diff --git a/RELEASE.md b/RELEASE.md index 2717c75740aeea7821fb6c57dfc85908e86e9d51..84d9d52868ecd55d38d6073315749d11c2340e8c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -6,7 +6,7 @@ * Added Gradient Boosted Trees as pre-made Estimators: BoostedTreesClassifier, BoostedTreesRegressor. * Add 3rd generation pipeline config for Cloud TPUs which improves performance and usability. * `tf.contrib.bayesflow` is moving out to it's own repo. -* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication. +* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication[1](#rpc-issue). ## Bug Fixes and Other Changes * `tf.data`: @@ -49,13 +49,14 @@ * Fix non-uniformity of orthogonal matrices. * Fix bug where multi-image Estimator eval summaries were not displayed correctly. +1 The cancellation logic of the RPC op contains a concurrency error. A fix has been submitted to master and will be part of the next release. + ## Thanks to our Contributors This release contains contributions from many people at Google, as well as: 4d55397500, Aghasy, Alan Du, Alan Lee, Alan Yee, Alex Wiltschko, Animesh Karnewar, Ankit Gupta, Anton Matosov, Aris L, Ben Barsdell, Brent Yi, Brett Koonce, Carl Thomé, cbockman, Chikanaga Tomoyuki, Chris Tava, CéDric Deltheil, Dahan Gong, Dalmo Cirne, Daniel Erenrich, David Norman, DavidNorman, Edd Wilder-James, Fanjin Zeng, Felix Abecassis, fo40225, George Sterpu, Giovanni Terlingen, Gor Baghdasaryan, Guillaume Klein, Hanchen Li, Ilya Polenov, Jakub Kolodziejczyk, Jason Sadler, Jayaram Bobba, Jerry Liu, jinghuangintel, Jiongyan Zhang (张炯衍), Joel Shor, Jong Wook Kim, Julian Eisenschlos, Karl Lessard, Krish Ravindranath, Loo Rong Jie, Lukas Geiger, Luke Iwanski, Mahmoud Abuzaina, ManHyuk, Marvin Richter, Maximilian Mitchell, Mohammad Ashraf Bhuiyan, msofka, Mustafa Kasap, Nathan Burnham, Nathan Luehr, Naveen Marri, ngc92, nio1814, Oleg Zabluda, Ou Changkun, Panos Ipeirotis, Paul Van Eck, Peter Lee, Piotr Czapla, qjivy, Rholais Lii, Rodrigo Formigone, Russell Klopfer, ryantimjohn, Sang Han, SebastiáN RamíRez, shengfuintel, Siby Jose Plathottam, Silver Chan, Stanislaw Antol, Taehoon Lee, Tarang Chugh, Ted Chang, Thomas Bastiani, Xian Xu, Xiaoming (Jason) Cui, Yan Facai (颜发才), yaox12, Yashal Shakti Kanungo, Yong Tang, Yuan (Terry) Tang, Yuxin Wu, Ziyue(Louis) Lu - # Release 1.7.0 ## Major Features And Improvements @@ -235,7 +236,7 @@ Yoni Tsafir, yordun, Yuan (Terry) Tang, Yuxin Wu, zhengdi, Zhengsheng Wei, 田 * Add `complex64` support to XLA compiler. * `bfloat` support is now added to XLA infrastructure. * Make `ClusterSpec` propagation work with XLA devices. - * Use a determinisitic executor to generate XLA graph. + * Use a deterministic executor to generate XLA graph. * `tf.contrib`: * `tf.contrib.distributions`: * Add `tf.contrib.distributions.Autoregressive`. diff --git a/SECURITY.md b/SECURITY.md index a5ce3a62ee202f6e7d83f0fedc2777d9c88ba9b5..01886b613e5d93793953124331b57f075fe7a373 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -173,7 +173,7 @@ the progress being made towards a fix and announcement. In addition, please include the following information along with your report: * Your name and affiliation (if any). -* A description the technical details of the vulnerabilities. It is very +* A description of the technical details of the vulnerabilities. It is very important to let us know how we can reproduce your findings. * An explanation who can exploit this vulnerability, and what they gain when doing so -- write an attack scenario. This will help us evaluate your report diff --git a/configure.py b/configure.py index b745e374a2baaffec73f9f9382e1bab322e7f0fd..6d9aba61bbc73ba1b80321d6859877c371dc5427 100644 --- a/configure.py +++ b/configure.py @@ -845,8 +845,8 @@ def reformat_version_sequence(version_str, sequence_count): def set_tf_cuda_version(environ_cp): """Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION.""" ask_cuda_version = ( - 'Please specify the CUDA SDK version you want to use, ' - 'e.g. 7.0. [Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION + 'Please specify the CUDA SDK version you want to use. ' + '[Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): # Configure the Cuda SDK version to use. @@ -1226,6 +1226,9 @@ def set_tf_cuda_compute_capabilities(environ_cp): ask_cuda_compute_capabilities, default_cuda_compute_capabilities) # Check whether all capabilities from the input is valid all_valid = True + # Remove all whitespace characters before splitting the string + # that users may insert by accident, as this will result in error + tf_cuda_compute_capabilities = ''.join(tf_cuda_compute_capabilities.split()) for compute_capability in tf_cuda_compute_capabilities.split(','): m = re.match('[0-9]+.[0-9]+', compute_capability) if not m: diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 18eeb2816807ec9986999cfc2c9a4c0f032683c0..b86b277ac3200b88ae03490a6c1b64d464e81950 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -2097,7 +2097,7 @@ static void GraphImportGraphDefLocked(TF_Graph* graph, const GraphDef& def, for (int i = 0; i < size; ++i) { TensorId id = results.missing_unused_input_map_keys[i]; - tf_results->missing_unused_key_names_data.push_back(id.first.ToString()); + tf_results->missing_unused_key_names_data.push_back(std::string(id.first)); tf_results->missing_unused_key_names[i] = tf_results->missing_unused_key_names_data.back().c_str(); tf_results->missing_unused_key_indexes[i] = id.second; diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index 82dbd3cdbc6e8fb0c6fbcddb33b6a95c87a83225..95b04f9058afdfaadbc24f0238860279fcd3e800 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -8407,3 +8407,51 @@ TF_Tensor* TF_DequeueNamedTensor(TF_Session* session, int tensor_id, } return ret; } + +void TF_EnqueueNamedTensor(TF_Session* session, int tensor_id, + TF_Tensor* tensor, TF_Status* status) { + assert(session); + { + tensorflow::mutex_lock c(session->graph->mu); + if (VLOG_IS_ON(1)) { + VLOG(1) << "Enqueuing named tensor with id " << tensor_id + << ", with input graph: " + << session->graph->graph.ToGraphDefDebug().DebugString(); + tensorflow::Tensor internal_tensor; + if (tensorflow::TF_TensorToTensor(tensor, &internal_tensor).ok()) { + VLOG(1) << "Enqueu'ing tensor content: " + << internal_tensor.DebugString(); + } + } + } + + TF_Operation* enqueue_op = TF_GraphOperationByName( + session->graph, + tensorflow::strings::StrCat("fifo_queue_enqueue_", tensor_id).c_str()); + if (enqueue_op == nullptr) { + status->status = tensorflow::errors::Internal( + "Unable to find the enqueue node in the TF graph."); + return; + } + + TF_Operation* placeholder_op = TF_GraphOperationByName( + session->graph, + tensorflow::strings::StrCat("arg_tensor_enqueue_", tensor_id).c_str()); + if (placeholder_op == nullptr) { + status->status = tensorflow::errors::Internal( + "Unable to find the placeholder node as input to enqueue in the TF " + "graph."); + return; + } + + VLOG(1) << "Running the enqueue op"; + TF_Output input{placeholder_op, 0}; + TF_SessionRun(session, /*run_options*/ nullptr, + // input related parameters + /*inputs*/ &input, /*input_values*/ &tensor, /*ninputs*/ 1, + // output related parameters + /*outputs*/ nullptr, /*output_values*/ nullptr, /*noutputs*/ 0, + /*targets*/ &enqueue_op, /*ntargets*/ 1, + /*run_metadata*/ nullptr, status); + VLOG(1) << "Enqueuing is done."; +} diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index e6757c065fc540fa789cdbb694e66ca0b00c4832..20bdace40f1272ded06e710034053a7610326e7f 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -87,8 +87,11 @@ TF_CAPI_EXPORT extern TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets( unsigned char is_mnist, TF_Status* status); // On success, dequeues a tensor from a TF-managed FifoQueue given by -// `tensor_id`, associated with `session`. Caller must call TF_DeleteTensor() -// over the returned tensor. If the queue is empty, this call is blocked. +// `tensor_id`, associated with `session`. There must be a graph node named +// "fifo_queue_dequeue_", to be executed by this API call. + +// Caller must call TF_DeleteTensor() over the returned tensor. If the queue is +// empty, this call is blocked. // // Tensors are enqueued via the corresponding TF enqueue op. // TODO(hongm): Add support for `timeout_ms`. @@ -96,6 +99,22 @@ TF_CAPI_EXPORT extern TF_Tensor* TF_DequeueNamedTensor(TF_Session* session, int tensor_id, TF_Status* status); +// On success, enqueues `tensor` into a TF-managed FifoQueue given by +// `tensor_id`, associated with `session`. There must be a graph node named +// "fifo_queue_enqueue_", to be executed by this API call. It reads +// from a placeholder node "arg_tensor_enqueue_". +// +// `tensor` is still owned by the caller. This call will be blocked if the queue +// has reached its capacity, and will be unblocked when the queued tensors again +// drop below the capacity due to dequeuing. +// +// Tensors are dequeued via the corresponding TF dequeue op. +// TODO(hongm): Add support for `timeout_ms`. +TF_CAPI_EXPORT extern void TF_EnqueueNamedTensor(TF_Session* session, + int tensor_id, + TF_Tensor* tensor, + TF_Status* status); + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index 9b86425aa5fbc2be2872b3f5d2809eaa844f9d68..577f10c5e69ea9ecbe8ce821c6bd5167e98bef25 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -1368,7 +1368,7 @@ TEST(CAPI, SavedModel) { } const tensorflow::string input_op_name = - tensorflow::ParseTensorName(input_name).first.ToString(); + std::string(tensorflow::ParseTensorName(input_name).first); TF_Operation* input_op = TF_GraphOperationByName(graph, input_op_name.c_str()); ASSERT_TRUE(input_op != nullptr); @@ -1376,7 +1376,7 @@ TEST(CAPI, SavedModel) { ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); const tensorflow::string output_op_name = - tensorflow::ParseTensorName(output_name).first.ToString(); + std::string(tensorflow::ParseTensorName(output_name).first); TF_Operation* output_op = TF_GraphOperationByName(graph, output_op_name.c_str()); ASSERT_TRUE(output_op != nullptr); diff --git a/tensorflow/c/checkpoint_reader.cc b/tensorflow/c/checkpoint_reader.cc index b1f7bdaa5420a56386e6983052df20aa976aa867..74bc25a491ac01cb725d1c004197e48727c30230 100644 --- a/tensorflow/c/checkpoint_reader.cc +++ b/tensorflow/c/checkpoint_reader.cc @@ -125,7 +125,7 @@ CheckpointReader::BuildV2VarMaps() { const auto& slice_proto = entry.slices(i); CHECK(filtered_keys .insert(EncodeTensorNameSlice( - v2_reader_->key().ToString() /* full var's name */, + std::string(v2_reader_->key()) /* full var's name */, TensorSlice(slice_proto))) .second); } @@ -138,11 +138,11 @@ CheckpointReader::BuildV2VarMaps() { new TensorSliceReader::VarToDataTypeMap); v2_reader_->Seek(kHeaderEntryKey); for (v2_reader_->Next(); v2_reader_->Valid(); v2_reader_->Next()) { - if (filtered_keys.count(v2_reader_->key().ToString()) > 0) continue; + if (filtered_keys.count(std::string(v2_reader_->key())) > 0) continue; CHECK(entry.ParseFromArray(v2_reader_->value().data(), v2_reader_->value().size())) << entry.InitializationErrorString(); - string key = v2_reader_->key().ToString(); + string key = std::string(v2_reader_->key()); (*var_to_shape_map)[key] = TensorShape(entry.shape()); (*var_to_data_type_map)[key] = DataType(entry.dtype()); } diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 97c323b87228039ba10f4ed5e434aa83621b1220..e9ed3395c448305bcd6317b0b292b4e4e0b659b1 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -130,13 +130,15 @@ class GradientTape { } } - bool ShouldRecord(gtl::ArraySlice tensor_ids); + bool ShouldRecord(gtl::ArraySlice tensor_ids, + gtl::ArraySlice dtypes); void Watch(int64 tensor_id); void RecordOperation(const string& op_type, gtl::ArraySlice output_tensors, gtl::ArraySlice input_tensor_id, + gtl::ArraySlice input_dtypes, BackwardFunction* backward_function, const std::function& backward_function_deleter); @@ -170,12 +172,30 @@ class GradientTape { // Template instantiations here +inline bool IsDtypeTrainable(DataType dtype) { + switch (dtype) { + case DT_HALF: + case DT_BFLOAT16: + case DT_FLOAT: + case DT_DOUBLE: + case DT_COMPLEX64: + case DT_COMPLEX128: + case DT_RESOURCE: + case DT_VARIANT: + return true; + default: + return false; + } +} + template bool GradientTape::ShouldRecord( - gtl::ArraySlice tensor_ids) { - for (int64 i : tensor_ids) { - if (tensor_tape_.find(i) != tensor_tape_.end()) { - return true; + gtl::ArraySlice tensor_ids, + gtl::ArraySlice dtypes) { + CHECK_EQ(tensor_ids.size(), dtypes.size()); + for (int i = 0; i < tensor_ids.size(); ++i) { + if (tensor_tape_.find(tensor_ids[i]) != tensor_tape_.end()) { + return IsDtypeTrainable(dtypes[i]); } } return false; @@ -189,9 +209,11 @@ void GradientTape::Watch(int64 tensor_id) { template void GradientTape::RecordOperation( const string& op_type, gtl::ArraySlice output_tensors, - gtl::ArraySlice input_tensor_id, BackwardFunction* backward_function, + gtl::ArraySlice input_tensor_id, + gtl::ArraySlice input_dtypes, + BackwardFunction* backward_function, const std::function& backward_function_deleter) { - if (!ShouldRecord(input_tensor_id)) { + if (!ShouldRecord(input_tensor_id, input_dtypes)) { backward_function_deleter(); return; } @@ -380,49 +402,39 @@ Status InitialGradients(const VSpace& vspace, gtl::ArraySlice output_gradients, const TensorTape& tensor_tape, const OpTape& op_tape, - const gtl::FlatMap& tensor_usage_counts, gtl::FlatMap>* result) { for (int i = 0; i < target_tensor_ids.size(); ++i) { const int64 id = target_tensor_ids[i]; - if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) { - if (!output_gradients.empty() && output_gradients[i] != nullptr) { - // TODO(apassos) figure out how to print debugging information here. - return errors::InvalidArgument( - "A gradient was provided for a tensor which is used as part of the " - "computation."); - } - } else { - if (output_gradients.empty() || output_gradients[i] == nullptr) { - auto tensor_it = tensor_tape.find(id); - if (tensor_it != tensor_tape.end() && tensor_it->second != -1) { - auto op_it = op_tape.find(tensor_it->second); - if (op_it == op_tape.end()) { - return errors::Internal( - "Internal state of the gradient tape is invalid: " - "failed to find operation producing a tensor"); - } - bool found = false; - for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) { - if (op_it->second.output_tensor_info[j].id == id) { - found = true; - (*result)[id].push_back( - vspace.Ones(op_it->second.output_tensor_info[j].shape, - op_it->second.output_tensor_info[j].dtype)); - break; - } - } - if (!found) { - return errors::Internal( - "Internal state of the gradient tape is invalid: " - "none of operations outputs match expected tensor"); + if (output_gradients.empty() || output_gradients[i] == nullptr) { + auto tensor_it = tensor_tape.find(id); + if (tensor_it != tensor_tape.end() && tensor_it->second != -1) { + auto op_it = op_tape.find(tensor_it->second); + if (op_it == op_tape.end()) { + return errors::Internal( + "Internal state of the gradient tape is invalid: " + "failed to find operation producing a tensor"); + } + bool found = false; + for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) { + if (op_it->second.output_tensor_info[j].id == id) { + found = true; + (*result)[id].push_back( + vspace.Ones(op_it->second.output_tensor_info[j].shape, + op_it->second.output_tensor_info[j].dtype)); + break; } - } else { - // No record of the target tensor found on the tape, so no gradient - // needs to be computed from it. Do nothing. + } + if (!found) { + return errors::Internal( + "Internal state of the gradient tape is invalid: " + "none of operations outputs match expected tensor"); } } else { - (*result)[id].push_back(output_gradients[i]); + // No record of the target tensor found on the tape, so no gradient + // needs to be computed from it. Do nothing. } + } else { + (*result)[id].push_back(output_gradients[i]); } } return Status::OK(); @@ -451,8 +463,7 @@ Status GradientTape::ComputeGradient( InitialStack(state.op_tape, state.op_missing_tensor); gtl::FlatMap> gradients; Status s = InitialGradients(vspace, target_tensor_ids, output_gradients, - tensor_tape_, state.op_tape, - state.tensor_usage_counts, &gradients); + tensor_tape_, state.op_tape, &gradients); auto cleanup = [this, &state]() { if (!persistent_) { // Release all backprop functions diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index d73121c7b701ec06c03836d1a765f4b35d88fe92..d6a4f141b6bb8ccadb77f1fa83b5fb742d78f70f 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -440,7 +440,7 @@ string AvoidCPPKeywords(StringPiece name) { if (IsCPPKeyword(name)) { return strings::StrCat(name, "_"); } - return name.ToString(); + return std::string(name); } void InferArgAttributes(const OpDef::ArgDef& arg, diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc index c143b978338815ebc7134eb0a07867c5d8b13dca..62a889181e787f2e181135ab0563c45e1bab8812 100644 --- a/tensorflow/cc/framework/scope.cc +++ b/tensorflow/cc/framework/scope.cc @@ -220,7 +220,7 @@ std::unordered_set Scope::Impl::GetColocationConstraints( for (const string& entry : node_constraints) { StringPiece s(entry); if (str_util::ConsumePrefix(&s, kColocationGroupPrefix)) { - current_constraints.insert(s.ToString()); + current_constraints.insert(std::string(s)); } } } else { diff --git a/tensorflow/compiler/aot/compile.cc b/tensorflow/compiler/aot/compile.cc index 31044ff85d6f0d72b34d03669fe508866d7d3358..bbc35da2ef6d14ff0d3570ef2d5cf6743456c674 100644 --- a/tensorflow/compiler/aot/compile.cc +++ b/tensorflow/compiler/aot/compile.cc @@ -44,7 +44,7 @@ namespace { // Compiles the XLA computation into executable code. Status CompileXla(xla::CompileOnlyClient* client, - const xla::Computation& computation, + const xla::XlaComputation& computation, const xla::cpu::CpuAotCompilationOptions& aot_opts, CompileResult* compile_result) { // Retrieves arg and result layouts from the computation. @@ -62,7 +62,7 @@ Status CompileXla(xla::CompileOnlyClient* client, for (int i = 0; i < pshape->parameters_size(); ++i) { arg_layouts.push_back(pshape->mutable_parameters(i)); } - xla::CompileOnlyClient::AotComputationInstance instance; + xla::CompileOnlyClient::AotXlaComputationInstance instance; instance.computation = &computation; instance.argument_layouts = std::move(arg_layouts); instance.result_layout = &pshape->result(); @@ -93,14 +93,14 @@ Status CompileGraph(const GraphDef& graph_def, const tf2xla::Config& config, xla::CompileOnlyClient* client = xla::ClientLibrary::GetOrCreateCompileOnlyClient(cpu_platform) .ValueOrDie(); - xla::Computation computation; + xla::XlaComputation computation; TF_RETURN_IF_ERROR( ConvertGraphDefToXla(graph_def, config, client, &computation)); if (!flags.out_session_module.empty()) { - TF_ASSIGN_OR_RETURN(std::unique_ptr module, + TF_ASSIGN_OR_RETURN(std::unique_ptr module, computation.Snapshot()); - // Serialize the SessionModule deterministically so that all the outputs of - // a tf_library genrule are deterministic. + // Serialize the HloSnapshot deterministically so that all the outputs of a + // tf_library genrule are deterministic. string proto; TF_RET_CHECK(SerializeToStringDeterministic(*module, &proto)); TF_RETURN_IF_ERROR( diff --git a/tensorflow/compiler/aot/tests/BUILD b/tensorflow/compiler/aot/tests/BUILD index 222e26810ac1157152ea81a56749b6652aa1f137..fd2cf2b67d4618dd626b8eef78eed044d7fde0a4 100644 --- a/tensorflow/compiler/aot/tests/BUILD +++ b/tensorflow/compiler/aot/tests/BUILD @@ -15,6 +15,7 @@ test_suite( ":test_graph_tfadd_with_ckpt_saver_test", ":test_graph_tfadd_with_ckpt_test", ":test_graph_tfassert_eq_test", + ":test_graph_tfcond_test", ":test_graph_tffunction_test", ":test_graph_tfgather_test", ":test_graph_tfmatmul_test", @@ -55,6 +56,7 @@ genrule( "test_graph_tfadd_with_ckpt_saver.pb", "test_graph_tfadd_with_ckpt_saver.saver", "test_graph_tfassert_eq.pb", + "test_graph_tfcond.pb", "test_graph_tffunction.pb", "test_graph_tfgather.pb", "test_graph_tfmatmul.pb", @@ -118,6 +120,17 @@ tf_library( ], ) +tf_library( + name = "test_graph_tfcond", + testonly = 1, + config = "test_graph_tfcond.config.pbtxt", + cpp_class = "CondComp", + graph = "test_graph_tfcond.pb", + tags = [ + "manual", + ], +) + tf_library( name = "test_graph_tffunction", testonly = 1, @@ -194,6 +207,7 @@ tf_cc_test( ":test_graph_tfadd_with_ckpt", ":test_graph_tfadd_with_ckpt_saver", ":test_graph_tfassert_eq", + ":test_graph_tfcond", ":test_graph_tffunction", ":test_graph_tfgather", ":test_graph_tfmatmul", diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py index 67767f55dae9b15aafbd8b129328bde2c59a9ef3..9ec7df163b1425f917e9ec51559efad3e6f05e75 100644 --- a/tensorflow/compiler/aot/tests/make_test_graphs.py +++ b/tensorflow/compiler/aot/tests/make_test_graphs.py @@ -78,6 +78,22 @@ def tfadd_with_ckpt_saver(out_dir): f.write(saver.as_saver_def().SerializeToString()) +def tfassert_eq(_): + x = array_ops.placeholder(dtypes.int32, name='x_hold') + y = array_ops.placeholder(dtypes.int32, name='y_hold') + control_flow_ops.Assert( + math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq') + math_ops.add(x, math_ops.negative(y), name='x_y_diff') + + +def tfcond(_): + p = array_ops.placeholder(dtypes.bool, name='p_hold') + x = array_ops.placeholder(dtypes.int32, name='x_hold') + y = array_ops.placeholder(dtypes.int32, name='y_hold') + z = control_flow_ops.cond(p, lambda: x, lambda: y) + array_ops.identity(z, name='result') + + def tfgather(_): params = array_ops.placeholder(dtypes.float32, name='params') indices = array_ops.placeholder(dtypes.int32, name='indices') @@ -126,14 +142,6 @@ def tfsplits(_): array_ops.identity(y, name='result') -def tfassert_eq(_): - x = array_ops.placeholder(dtypes.int32, name='x_hold') - y = array_ops.placeholder(dtypes.int32, name='y_hold') - control_flow_ops.Assert( - math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq') - math_ops.add(x, math_ops.negative(y), name='x_y_diff') - - def write_graph(build_graph, out_dir): """Build a graph using build_graph and write it out.""" g = ops.Graph() @@ -148,12 +156,13 @@ def main(_): write_graph(tfadd, FLAGS.out_dir) write_graph(tfadd_with_ckpt, FLAGS.out_dir) write_graph(tfadd_with_ckpt_saver, FLAGS.out_dir) + write_graph(tfassert_eq, FLAGS.out_dir) + write_graph(tfcond, FLAGS.out_dir) + write_graph(tffunction, FLAGS.out_dir) write_graph(tfgather, FLAGS.out_dir) write_graph(tfmatmul, FLAGS.out_dir) write_graph(tfmatmulandadd, FLAGS.out_dir) - write_graph(tffunction, FLAGS.out_dir) write_graph(tfsplits, FLAGS.out_dir) - write_graph(tfassert_eq, FLAGS.out_dir) if __name__ == '__main__': diff --git a/tensorflow/compiler/aot/tests/test_graph_tfcond.config.pbtxt b/tensorflow/compiler/aot/tests/test_graph_tfcond.config.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..94a01ad4abfaab5e4b087b7cc219e86c1d0179b8 --- /dev/null +++ b/tensorflow/compiler/aot/tests/test_graph_tfcond.config.pbtxt @@ -0,0 +1,20 @@ +# Text form of tensorflow.tf2xla.Config proto. +feed { + id { node_name: "p_hold" } + shape {} +} +feed { + id { node_name: "x_hold" } + shape { + dim { size: 1 } + } +} +feed { + id { node_name: "y_hold" } + shape { + dim { size: 1 } + } +} +fetch { + id { node_name: "result" } +} diff --git a/tensorflow/compiler/aot/tests/tfcompile_test.cc b/tensorflow/compiler/aot/tests/tfcompile_test.cc index aa9d968265b4619ff2e3c910e3d7455ae07bc49d..309a991fc11ab74ddd58a6345d9d40ad84fb2734 100644 --- a/tensorflow/compiler/aot/tests/tfcompile_test.cc +++ b/tensorflow/compiler/aot/tests/tfcompile_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/aot/tests/test_graph_tfadd_with_ckpt.h" #include "tensorflow/compiler/aot/tests/test_graph_tfadd_with_ckpt_saver.h" #include "tensorflow/compiler/aot/tests/test_graph_tfassert_eq.h" +#include "tensorflow/compiler/aot/tests/test_graph_tfcond.h" #include "tensorflow/compiler/aot/tests/test_graph_tffunction.h" #include "tensorflow/compiler/aot/tests/test_graph_tfgather.h" #include "tensorflow/compiler/aot/tests/test_graph_tfmatmul.h" @@ -150,6 +151,31 @@ TEST(TFCompileTest, AddWithCkptSaver) { EXPECT_EQ(add_const.result0_data(), add_const.results()[0]); } +TEST(TFCompileTest, Cond) { + CondComp cond; + EXPECT_EQ(cond.arg0_data(), cond.args()[0]); + EXPECT_EQ(cond.arg1_data(), cond.args()[1]); + EXPECT_EQ(cond.arg2_data(), cond.args()[2]); + cond.arg1() = 10; + cond.arg2() = 20; + { + cond.arg0() = true; + const int32 expected_result = cond.arg1(); + EXPECT_TRUE(cond.Run()); + EXPECT_EQ(cond.result0(), expected_result); + EXPECT_EQ(cond.result0_data()[0], expected_result); + EXPECT_EQ(cond.result0_data(), cond.results()[0]); + } + { + cond.arg0() = false; + const int32 expected_result = cond.arg2(); + EXPECT_TRUE(cond.Run()); + EXPECT_EQ(cond.result0(), expected_result); + EXPECT_EQ(cond.result0_data()[0], expected_result); + EXPECT_EQ(cond.result0_data(), cond.results()[0]); + } +} + TEST(TFCompileTest, Gather) { GatherComp gather; EXPECT_EQ(gather.arg0_data(), gather.args()[0]); @@ -525,14 +551,16 @@ TEST(TFCompileTest, HloProfiling) { auto header = HasSubstr("Execution profile for"); auto total_cycles_profile_line = HasSubstr("[total]"); auto dot_profile_line = HasSubstr( - "%dot = f32[2,2]{1,0} dot(f32[2,2]{1,0} %arg0, f32[2,2]{1,0} %arg1)"); + "%dot.0.2 = f32[2,2]{1,0} dot(f32[2,2]{1,0} %arg0.0.0, f32[2,2]{1,0} " + "%arg1.0.1)"); auto add_profile_line = HasSubstr( - "%add = f32[2,2]{1,0} add(f32[2,2]{1,0} %arg0, f32[2,2]{1,0} %arg1)"); + "%add.0.5 = f32[2,2]{1,0} add(f32[2,2]{1,0} %arg0.0.0, f32[2,2]{1,0} " + "%arg1.0.1)"); auto tuple_profile_line = HasSubstr( - "%tuple.2 = (f32[2,2]{1,0}, f32[2,2]{1,0}) tuple(f32[2,2]{1,0} %dot, " - "f32[2,2]{1,0} %add)"); - auto arg0_profile_line = HasSubstr("%arg0 = f32[2,2]{1,0} parameter(0)"); - auto arg1_profile_line = HasSubstr("%arg1 = f32[2,2]{1,0} parameter(1)"); + "%tuple.0.8 = (f32[2,2]{1,0}, f32[2,2]{1,0}) tuple(f32[2,2]{1,0} " + "%dot.0.2, f32[2,2]{1,0} %add.0.5)"); + auto arg0_profile_line = HasSubstr("%arg0.0.0 = f32[2,2]{1,0} parameter(0)"); + auto arg1_profile_line = HasSubstr("%arg1.0.1 = f32[2,2]{1,0} parameter(1)"); hlo_profile_lines.erase(hlo_profile_lines.begin() + 7, hlo_profile_lines.end()); diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index af2965bba5b91a66e206f05bb8945b0dcde1d2b4..a6b3ce394c6859c4f45bbde4e39dde9229da3388 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -261,6 +261,7 @@ cc_library( name = "create_xla_launch_op", srcs = [ "create_xla_launch_op.cc", + "create_xla_launch_op.h", ], deps = [ ":common", @@ -270,6 +271,29 @@ cc_library( "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/memory", + ], + alwayslink = 1, +) + +tf_cc_test( + name = "create_xla_launch_op_test", + srcs = [ + "create_xla_launch_op.h", + "create_xla_launch_op_test.cc", + ], + deps = [ + ":create_xla_launch_op", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:session_options", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "@com_google_absl//absl/memory", ], ) @@ -360,6 +384,31 @@ tf_cc_test( ], ) +tf_cc_test( + name = "xla_launch_util_test", + size = "small", + srcs = ["xla_launch_util_test.cc"], + deps = [ + ":common", + ":xla_compilation_cache", + ":xla_launch_util", + ":xla_tensor", + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:gpu_runtime", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core/kernels:variable_ops", + ], +) + # This target can be used by XLA device plugins to prevent circular dependencies, and provides access to all of the required headers for building a device library. cc_header_only_library( name = "xla_jit_headers_lib", diff --git a/tensorflow/compiler/jit/create_xla_launch_op.cc b/tensorflow/compiler/jit/create_xla_launch_op.cc index 18d901323f108505979be484c2bfad5998ab0748..f35e916eb937faf7e1afd53a4a5dfdb95a8bbe43 100644 --- a/tensorflow/compiler/jit/create_xla_launch_op.cc +++ b/tensorflow/compiler/jit/create_xla_launch_op.cc @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/compiler/jit/create_xla_launch_op.h" +#include "absl/memory/memory.h" #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/kernels/xla_launch_op.h" #include "tensorflow/compiler/jit/mark_for_compilation_pass.h" @@ -25,78 +27,189 @@ limitations under the License. namespace tensorflow { namespace { -// Givens a NodeDef 'ndef' and the function library runtime 'flr', if -// 'ndef' is a call to a compilable function defined in 'flr', returns OK -// and fills in 'kernel' with a XlaLaunchOp kernel which computes the -// node. Otherwise, returns a non-OK. +// Utility which searches for values in a sorted list by scanning over it once. +// No matter how many times ScanForValue is called, the list is scanned at most +// once. However, if a call to ScanForValue skips over a value, that value is +// not revisited in future calls to ScanForValue, so callers must take +// care to order their calls. // -// This routine is here so that FunctionLibraryRuntime can jit a -// specific function call as requested. -Status CreateXlaLaunchOp(FunctionLibraryRuntime* flr, const NodeDef& ndef, - std::unique_ptr* kernel) { - bool xla_compile = false; - if (!flr->GetFunctionLibraryDefinition() - ->GetAttr(ndef, kXlaCompileAttr, &xla_compile) - .ok() || - !xla_compile) { - // Not marked as _XlaCompile=true. - return errors::InvalidArgument("No ", kXlaCompileAttr, " for ", ndef.op()); +// Useful for merging multiple sorted lists in O(n) time. +class SinglePassSearch { + public: + // Creates a SinglePassSearch object that can be used to search in `values`. + // Does not take ownership of `values`. `values` must outlive this. + // `values` must be sorted. + explicit SinglePassSearch(const std::vector* values) + : current_index_(0), values_(values) {} + + // Scans forward in the vector looking for "value", updating the internal + // position in to the vector. + // Returns true iff the vector contains the given value at or after current + // position. + // Not thread-safe. + bool ScanForValue(int value) { + while (current_index_ < values_->size() && + (*values_)[current_index_] <= value) { + if ((*values_)[current_index_] == value) { + current_index_++; + return true; + } + current_index_++; + } + return false; } - // Make sure that kernels have been registered on the JIT device. - XlaOpRegistry::RegisterCompilationKernels(); - if (!IsCompilable(flr, ndef)) { - // ndef is calling a function that XLA can't compile. - return errors::InvalidArgument("Not compilable: ", ndef.ShortDebugString()); + + private: + int current_index_; + const std::vector* values_; +}; + +Status CompilationRequested(const FunctionLibraryRuntime& flr, + const NodeDef& node_def) { + bool xla_compile = false; + // Check if op is marked _XlaCompile=true. + Status status = flr.GetFunctionLibraryDefinition()->GetAttr( + node_def, kXlaCompileAttr, &xla_compile); + if (!status.ok() || !xla_compile) { + if (VLOG_IS_ON(3)) { + if (!status.ok()) { + VLOG(3) << "No " << kXlaCompileAttr << " attr defined for " + << node_def.op() << ". status=" << status.ToString(); + } else { + VLOG(3) << node_def.op() << " is explicitly marked not to be compiled"; + } + } + return Status(error::INVALID_ARGUMENT, ""); } + return Status::OK(); +} + +// Given a FunctionLibraryRuntime and a NodeDef calling a function in the +// runtime, returns this function's body in `fbody` as well as the indices +// of its constant and resource arguments. +// `fbody` is owned by `flr`. +// `constant_arg_indices` and `resource_arg_indices` should be empty vector. +// They are sorted in ascending order on this function's return. +Status GetBodyAndConstantsAndResources(FunctionLibraryRuntime* flr, + const NodeDef& node_def, + const FunctionBody** fbody, + std::vector* constant_arg_indices, + std::vector* resource_arg_indices) { FunctionLibraryRuntime::Handle handle; - // If ndef is not instantiable, e.g., the function does not exist, + // If node_def is not instantiable, e.g., the function does not exist, // simply bail out. TF_RETURN_IF_ERROR( - flr->Instantiate(ndef.op(), AttrSlice(&ndef.attr()), &handle)); - const FunctionBody* fbody = flr->GetFunctionBody(handle); - CHECK(fbody); // Can't be nullptr since we just instantiated it. - std::vector const_args(fbody->arg_types.size()); + flr->Instantiate(node_def.op(), AttrSlice(&node_def.attr()), &handle)); + *fbody = flr->GetFunctionBody(handle); + CHECK(*fbody); // Can't be nullptr since we just instantiated it. + const DataTypeVector& arg_types = (*fbody)->arg_types; + std::vector const_args(arg_types.size()); // If we can't analyze the const args. Bail out. - TF_RETURN_IF_ERROR(BackwardsConstAnalysis(*(fbody->graph), &const_args)); + TF_RETURN_IF_ERROR(BackwardsConstAnalysis(*((*fbody)->graph), &const_args)); for (int i = 0; i < const_args.size(); ++i) { if (const_args[i]) { - // There is a const arg. Bail out. - return errors::InvalidArgument("Const arg: ", i, " in ", - DebugString(fbody->fdef)); + constant_arg_indices->push_back(i); + } + } + + // There can be hundreds of resource variables. Reserve the space for them. + // We don't reserve for constants above as they are usually few. + resource_arg_indices->reserve(arg_types.size()); + for (int i = 0; i < arg_types.size(); ++i) { + if (arg_types[i] == DT_RESOURCE) { + resource_arg_indices->push_back(i); } } - NodeDef launch_def; - launch_def.set_name(ndef.name()); - launch_def.set_op("_XlaLaunch"); - launch_def.set_device(flr->device()->name()); - AddNodeAttr("Tconstants", DataTypeVector{}, &launch_def); - AddNodeAttr("Nresources", 0, &launch_def); - AddNodeAttr("Targs", fbody->arg_types, &launch_def); - AddNodeAttr("Tresults", fbody->ret_types, &launch_def); - NameAttrList func; - func.set_name(ndef.op()); - *(func.mutable_attr()) = ndef.attr(); - AddNodeAttr("function", func, &launch_def); - - // TODO(b/32387911): Handles the host memory types across function - // calls properly. For now, we assume all inputs and outputs are on - // the device memory. + return Status::OK(); +} + +} // namespace + +Status CreateXlaLaunchOp(FunctionLibraryRuntime* flr, const NodeDef& node_def, + std::unique_ptr* kernel) { + TF_RETURN_IF_ERROR(CompilationRequested(*flr, node_def)); + + VLOG(3) << "Creating XlaLaunchOp for " << node_def.DebugString(); + + // Make sure that kernels have been registered on the JIT device. + XlaOpRegistry::RegisterCompilationKernels(); + if (!IsCompilable(flr, node_def)) { + // node_def is calling a function that XLA can't compile. + return errors::InvalidArgument("Not compilable: ", + node_def.ShortDebugString()); + } + + // Get function body, constant args, and resource args. + const FunctionBody* fbody = nullptr; + std::vector constant_arg_indices; + std::vector resource_arg_indices; + TF_RETURN_IF_ERROR(GetBodyAndConstantsAndResources( + flr, node_def, &fbody, &constant_arg_indices, &resource_arg_indices)); + + // Set input and output memory types. MemoryTypeVector input_memory_types(fbody->arg_types.size(), DEVICE_MEMORY); + // These indices are used only for optimization purposes. They allow us + // to loop over constant_arg_indices and resource_arg_indices only once + // while iterating over all the function arguments checking if it is a + // resource or a constant. + // The reason we optimized this code is because functions can have a lot of + // captured arguments. For example, the backward pass of ResNet50 takes in all + // 214 variables and a similar number of activations. + SinglePassSearch constants_search(&constant_arg_indices); + SinglePassSearch resources_search(&resource_arg_indices); + for (int i = 0; i < fbody->arg_types.size(); ++i) { + if (resources_search.ScanForValue(i) || constants_search.ScanForValue(i)) { + // Compile-time constants and resource handles are expected to be in + // host memory. + input_memory_types[i] = HOST_MEMORY; + } + } + // One might wonder, about the case where a compile-time constant argument + // (which must be in host memory) is also used as an input into an op, + // e.g. Add, that expects its inputs in device memory. Here is how it + // works now. + // First, what do we mean by "op expects an input in XYZ memory"? + // There are two types of "ops" here: the tf2xla kernel and the HLO + // computation it builds. The tf2xla kernel needs to retrieve the actual + // numeric value of the compile-time constant tensors, so it really expects + // them to be on in host memory. However, for other inputs, it refers to them + // using xla::ComputationDataHandle, which is just a symbolic handle that + // xla::ComputationBuilder assigns. How does this handle gets assigned for + // constant arguments? Even constant arguments get an _Arg node in the graph + // instatiated for Function compilation. The tf2xla kernel for constant _Arg + // nodes takes the constant value, converts it to XlaLiteral, and feeds it + // to xla::ComputationBuilder.ConstantLiteral, which returns the handle. This + // constant XlaLiteral is included in the HLO graph, and subsequently, in + // the actual executable, which is copied to the device before being + // executed. Thus, when this executable runs, the constant is available in + // device memory. + + // XlaLaunch kernel keeps all outputs (including constants, which it copies), + // in device memory MemoryTypeVector output_memory_types(fbody->ret_types.size(), DEVICE_MEMORY); + // Create the kernel. + NameAttrList function; + function.set_name(node_def.op()); + *(function.mutable_attr()) = node_def.attr(); + Device* dev = flr->device(); Status s; OpKernelConstruction construction( DeviceType(dev->device_type()), dev, - dev->GetAllocator(AllocatorAttributes()), &launch_def, + dev->GetAllocator(AllocatorAttributes()), &node_def, &fbody->fdef.signature(), flr, fbody->arg_types, input_memory_types, fbody->ret_types, output_memory_types, flr->graph_def_version(), &s); - kernel->reset(new XlaLocalLaunchOp(&construction)); + + *kernel = absl::make_unique( + &construction, constant_arg_indices, resource_arg_indices, function); return s; } +namespace { + bool RegisterLaunchOpCreator() { RegisterDefaultCustomKernelCreator(CreateXlaLaunchOp); return true; diff --git a/tensorflow/compiler/jit/create_xla_launch_op.h b/tensorflow/compiler/jit/create_xla_launch_op.h new file mode 100644 index 0000000000000000000000000000000000000000..98a22e351532c197c69c5ea908305d885fd2c9d0 --- /dev/null +++ b/tensorflow/compiler/jit/create_xla_launch_op.h @@ -0,0 +1,35 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_COMPILER_JIT_CREATE_XLA_LAUNCH_OP_H_ +#define TENSORFLOW_COMPILER_JIT_CREATE_XLA_LAUNCH_OP_H_ + +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +class FunctionLibraryRuntime; +class OpKernel; + +// Given a NodeDef 'node_def' and the function library runtime 'flr', if +// 'node_def' is a call to a compilable function defined in 'flr', returns OK +// and fills in 'kernel' with a XlaLaunchOp kernel which computes the +// node. Otherwise, returns a non-OK. +Status CreateXlaLaunchOp(FunctionLibraryRuntime* flr, const NodeDef& node_def, + std::unique_ptr* kernel); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_CREATE_XLA_LAUNCH_OP_H_ diff --git a/tensorflow/compiler/jit/create_xla_launch_op_test.cc b/tensorflow/compiler/jit/create_xla_launch_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..bcd5e75c7e4c021a9be874ed96e994768bb80811 --- /dev/null +++ b/tensorflow/compiler/jit/create_xla_launch_op_test.cc @@ -0,0 +1,145 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/create_xla_launch_op.h" + +#include "absl/memory/memory.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { + +NodeDef ToNodeDef(const string& text) { + NodeDef node_def; + EXPECT_TRUE(protobuf::TextFormat::MergeFromString(text, &node_def)); + return node_def; +} + +// Create a FunctionDef that takes one resource and one regular param +FunctionDef XTimesY() { + return FunctionDefHelper::Define( + // Name + "XTimesY", + // Args + {"x: float", "y: resource"}, + // Return values + {"z: float"}, + // Attr def + {}, + // Nodes + { + {{"y0"}, "ReadVariableOp", {"y"}, {{"dtype", DT_FLOAT}}}, + {{"z"}, "Mul", {"x", "y0"}, {{"T", DT_FLOAT}}}, + }); +} + +class CreateXlaLaunchOpTest : public ::testing::Test { + protected: + void Init(const std::vector& flib) { + SessionOptions options; + auto* device_count = options.config.mutable_device_count(); + device_count->insert({"CPU", 1}); + TF_CHECK_OK(DeviceFactory::AddDevices( + options, "/job:localhost/replica:0/task:0", &devices_)); + + FunctionDefLibrary proto; + for (const auto& fdef : flib) { + *(proto.add_function()) = fdef; + } + lib_def_ = absl::make_unique( + OpRegistry::Global(), proto); + OptimizerOptions opts; + device_mgr_ = absl::make_unique(devices_); + pflr_ = absl::make_unique( + device_mgr_.get(), Env::Default(), TF_GRAPH_DEF_VERSION, lib_def_.get(), + opts, /*default_thread_pool=*/nullptr, /*cluster_flr=*/nullptr); + flr_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:0"); + } + + FunctionLibraryRuntime* flr_; + std::vector devices_; + std::unique_ptr device_mgr_; + std::unique_ptr lib_def_; + std::unique_ptr pflr_; + + std::unique_ptr kernel_; +}; + +AttrValue BoolAttr(bool b) { + AttrValue v; + v.set_b(b); + return v; +} + +TEST_F(CreateXlaLaunchOpTest, OneFloatOneResourceArgument) { + FunctionDef fdef = XTimesY(); + (*fdef.mutable_attr())["_XlaCompile"] = BoolAttr(true); + Init({fdef}); + + Status status = CreateXlaLaunchOp( + flr_, ToNodeDef(R"pb( + name: 'XTimesY' op: 'XTimesY' input: 'a' input: 'b' + )pb"), &kernel_); + ASSERT_TRUE(status.ok()) << status.ToString(); + + EXPECT_EQ("XTimesY", kernel_->name()); + EXPECT_EQ("XTimesY", kernel_->type_string()); + + EXPECT_EQ(2, kernel_->num_inputs()); + EXPECT_EQ(DT_FLOAT, kernel_->input_type(0)); + EXPECT_EQ(DT_RESOURCE, kernel_->input_type(1)); + EXPECT_EQ(DEVICE_MEMORY, kernel_->input_memory_types()[0]); + EXPECT_EQ(HOST_MEMORY, kernel_->input_memory_types()[1]); + + EXPECT_EQ(1, kernel_->num_outputs()); + EXPECT_EQ(DT_FLOAT, kernel_->output_type(0)); + EXPECT_EQ(DEVICE_MEMORY, kernel_->output_memory_types()[0]); +} + +TEST_F(CreateXlaLaunchOpTest, FailsIfXlaCompileAttrNotSet) { + FunctionDef fdef = XTimesY(); + Init({fdef}); + + Status status = CreateXlaLaunchOp(flr_, ToNodeDef(R"proto( + name: 'XTimesY' + op: 'XTimesY' + input: 'a' + input: 'b' + )proto"), &kernel_); + EXPECT_TRUE(errors::IsInvalidArgument(status)) << status.ToString(); +} + +TEST_F(CreateXlaLaunchOpTest, FailsIfXlaCompileAttrIsSetToFalse) { + FunctionDef fdef = XTimesY(); + (*fdef.mutable_attr())["_XlaCompile"] = BoolAttr(false); + Init({fdef}); + + Status status = CreateXlaLaunchOp(flr_, ToNodeDef(R"proto( + name: 'XTimesY' + op: 'XTimesY' + input: 'a' + input: 'b' + )proto"), &kernel_); + EXPECT_TRUE(errors::IsInvalidArgument(status)) << status.ToString(); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index f06debaf316c0172a5683e56aa5de6ebb83fbece..6d1e3325ebd35b9608ea273fb7de39bad381e60d 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -240,7 +240,7 @@ class Encapsulator { // Once edges between compiled and outside_compilation clusters have been // replaced by send/recv ops, some dependencies may no longer be apparent. // A clustering pass finds all the dependencies between HC nodes that are only - // present as a result of edges between nodes in outside_compilaton clusters. + // present as a result of edges between nodes in outside_compilation clusters. // Suppose there is a path from outside_compilation cluster C in subgraph S // to outside_compilation cluster D in subgraph T. If S != T then a control // edge is added from the call node for S to the call node for T, which diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index 049d170fa48928474b894f2d0e1f2243c5f87275..86a9fd3b8e124e581bc4b73f264dbd5be46c790a 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -39,15 +39,15 @@ limitations under the License. namespace tensorflow { -XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx) - : OpKernel(ctx), device_type_(ctx->device_type()) { - const NameAttrList* func; - OP_REQUIRES_OK(ctx, ctx->GetAttr("function", &func)); - function_ = *func; - DataTypeVector constant_types; - OP_REQUIRES_OK(ctx, ctx->GetAttr("Tconstants", &constant_types)); - num_constant_args_ = constant_types.size(); - OP_REQUIRES_OK(ctx, ctx->GetAttr("Nresources", &num_resource_args_)); +XlaLocalLaunchBase::XlaLocalLaunchBase(OpKernelConstruction* ctx, + const std::vector& constants, + const std::vector& resources, + const NameAttrList& function) + : OpKernel(ctx), + constants_(constants), + resources_(resources), + device_type_(ctx->device_type()), + function_(function) { if (device_type_ == DeviceType(DEVICE_CPU)) { platform_id_ = se::host::kHostPlatformId; } else if (device_type_ == DeviceType(DEVICE_GPU)) { @@ -57,8 +57,8 @@ XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx) } } -Status XlaLocalLaunchOp::BuildCompilationCache(OpKernelContext* ctx, - XlaCompilationCache** cache) { +Status XlaLocalLaunchBase::BuildCompilationCache(OpKernelContext* ctx, + XlaCompilationCache** cache) { const XlaDevice::Metadata* metadata; Status s = XlaDevice::GetMetadata(ctx, &metadata); if (s.ok()) { @@ -90,8 +90,8 @@ Status XlaLocalLaunchOp::BuildCompilationCache(OpKernelContext* ctx, return Status::OK(); } -void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { - VLOG(1) << "XlaLocalLaunchOp::Compute " +void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { + VLOG(1) << "XlaLocalLaunchOpBase::Compute " << Canonicalize(function_.name(), AttrSlice(&function_.attr())); // We store information about the JIT-compiled XLA computation // in the ResourceMgr. @@ -124,7 +124,7 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { } std::map variables = - SnapshotResourceVariables(ctx, num_resource_args_); + SnapshotResourceVariables(ctx, resources_); xla::LocalClient* client = static_cast(cache->client()); @@ -161,7 +161,7 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { xla::LocalExecutable* executable; std::map constant_args; - for (int i = 0; i < num_constant_args_; ++i) { + for (int i : constants_) { constant_args.insert({i, ctx->input(i)}); } OP_REQUIRES_OK(ctx, cache->Compile(options, function_, constant_args, @@ -170,8 +170,8 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { VLOG(1) << "Executing XLA Computation..."; - XlaComputationLaunchContext launch_context( - num_resource_args_, client, xla_allocator, allocate_xla_tensors); + XlaComputationLaunchContext launch_context(client, xla_allocator, + allocate_xla_tensors); launch_context.PopulateInputs(ctx, kernel, variables); // Execute the computation. @@ -194,6 +194,62 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { VLOG(1) << "Done"; } +namespace { + +// OP_REQUIRES_OK_RETURN is the same as OP_REQUIRES_OK except that +// in error case, it returns RET instead of void. +#define OP_REQUIRES_OK_RETURN(CTX, RET, ...) \ + do { \ + ::tensorflow::Status _s(__VA_ARGS__); \ + if (!TF_PREDICT_TRUE(_s.ok())) { \ + (CTX)->CtxFailureWithWarning(__FILE__, __LINE__, _s); \ + return RET; \ + } \ + } while (0) + +// Helper static functions to construct parameters for +// XlaLocalLaunchBase constructor from OpKernelConstruction. +std::vector ConstantsVector(OpKernelConstruction* ctx) { + DataTypeVector constant_types; + OP_REQUIRES_OK_RETURN(ctx, std::vector(), + ctx->GetAttr("Tconstants", &constant_types)); + std::vector constants(constant_types.size()); + std::iota(constants.begin(), constants.end(), 0); + return constants; +} + +std::vector ResourcesVector(OpKernelConstruction* ctx) { + DataTypeVector constant_types; + OP_REQUIRES_OK_RETURN(ctx, std::vector(), + ctx->GetAttr("Tconstants", &constant_types)); + + DataTypeVector arg_types; + OP_REQUIRES_OK_RETURN(ctx, std::vector(), + ctx->GetAttr("Targs", &arg_types)); + + int num_resources; + OP_REQUIRES_OK_RETURN(ctx, std::vector(), + ctx->GetAttr("Nresources", &num_resources)); + + std::vector resources(num_resources); + std::iota(resources.begin(), resources.end(), + constant_types.size() + arg_types.size()); + return resources; +} + +NameAttrList FunctionAttr(OpKernelConstruction* ctx) { + const NameAttrList* func; + OP_REQUIRES_OK_RETURN(ctx, NameAttrList(), ctx->GetAttr("function", &func)); + return *func; +} + +#undef OP_REQUIRES_OK_RETURN +} // namespace + +XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx) + : XlaLocalLaunchBase(ctx, ConstantsVector(ctx), ResourcesVector(ctx), + FunctionAttr(ctx)) {} + XlaLocalLaunchOp::~XlaLocalLaunchOp() { VLOG(1) << "XlaLocalLaunchOp destroyed"; } diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.h b/tensorflow/compiler/jit/kernels/xla_launch_op.h index 8f8e646f0ff6d94dfdf56721cacfce7fa658beb6..8dfc4b382d51151b6383fe7dd75429f3124d39be 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.h +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.h @@ -26,6 +26,41 @@ limitations under the License. namespace tensorflow { +// XlaLocalLaunchBase is almost the same as XlaLocalLaunchOp. +// The only difference is that it does not require arguments to follow +// the "constants, then regular args, then resources" order. +// It takes vectors of constant and resource arguments explicitly. +// It does not have corresponding OpDef because it is never present +// in the GraphDef. +// Currently, it is used by eager runtime. FunctionLibraryRuntime creates +// this kernel when asked to create a kernel for an XLA-compiled function. +class XlaLocalLaunchBase : public OpKernel { + public: + XlaLocalLaunchBase(OpKernelConstruction* ctx, + const std::vector& constants, + const std::vector& resources, + const NameAttrList& function); + XlaLocalLaunchBase(const XlaLocalLaunchBase&) = delete; + XlaLocalLaunchBase& operator=(const XlaLocalLaunchBase&) = delete; + ~XlaLocalLaunchBase() override = default; + + void Compute(OpKernelContext* ctx) override; + + protected: + // Builds a XlaCompilationCache class suitable for the current device. + Status BuildCompilationCache(OpKernelContext* ctx, + XlaCompilationCache** cache); + + // Indexes of compile-time constant inputs + std::vector constants_; + // Indexes of resource inputs + std::vector resources_; + + DeviceType device_type_; + NameAttrList function_; + se::Platform::Id platform_id_; +}; + // XlaLocalLaunchOp is used to replace a region of the TensorFlow graph // which will be compiled and executed using XLA. The XlaLocalLaunchOp is // responsible for handling interactions with the TensorFlow executor. @@ -35,26 +70,12 @@ namespace tensorflow { // XlaLocalLaunchOp uses xla::LocalClient::Compile() and // xla::LocalExecutable::Run(), and passes arguments into/out of XLA in device // memory. -class XlaLocalLaunchOp : public OpKernel { +class XlaLocalLaunchOp : public XlaLocalLaunchBase { public: explicit XlaLocalLaunchOp(OpKernelConstruction* ctx); ~XlaLocalLaunchOp() override; - void Compute(OpKernelContext* ctx) override; - private: - // Builds a XlaCompilationCache class suitable for the current device. - Status BuildCompilationCache(OpKernelContext* ctx, - XlaCompilationCache** compiler); - - DeviceType device_type_; - NameAttrList function_; - int num_constant_args_; - // Number of resource variable arguments. - int num_resource_args_; - - se::Platform::Id platform_id_; - TF_DISALLOW_COPY_AND_ASSIGN(XlaLocalLaunchOp); }; diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index 60458f6f3314b2c3b65be1c90e051b2a670383bc..6b83cf67ffc571f235ae84d0de58254c5d7e4962 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -48,13 +48,12 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, const XlaCompiler::CompilationResult* result, xla::LocalExecutable* executable) { std::map variables = GetVariables(ctx); - int64 num_resource_args = variables.size(); xla::LocalClient* client = metadata.client(); // Builds an XLA allocator for the device. XlaComputationLaunchContext launch_context( - num_resource_args, client, client->backend().memory_allocator(), true); + client, client->backend().memory_allocator(), true); launch_context.PopulateInputs(ctx, result, variables); diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 2a7f04271d4b7ea330f32b88ea1e3f4037988a91..0223f97a032cf9efe56005248ce65d412e340b78 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -38,14 +38,13 @@ using xla::ScopedShapedBuffer; using xla::ShapedBuffer; } // anonymous namespace -std::map SnapshotResourceVariables(OpKernelContext* ctx, - int num_variables) { +std::map SnapshotResourceVariables( + OpKernelContext* ctx, const std::vector& variables) { std::map snapshot; - int first_variable = ctx->num_inputs() - num_variables; - for (int i = 0; i < num_variables; ++i) { + for (int i : variables) { Var* variable = nullptr; - ResourceHandle handle = HandleFromInput(ctx, first_variable + i); - OptionalTensor& tensor = snapshot[first_variable + i]; + ResourceHandle handle = HandleFromInput(ctx, i); + OptionalTensor& tensor = snapshot[i]; if (LookupResource(ctx, handle, &variable).ok()) { tf_shared_lock lock(*variable->mu()); tensor.name = handle.name(); @@ -77,16 +76,16 @@ Status XlaAllocator::Deallocate(int device_ordinal, se::DeviceMemoryBase* mem) { return Status::OK(); } -namespace { +namespace internal { // Return the 'index''th subtree of the given ShapedBuffer as a // ScopedShapedBuffer. The returned ScopedShapedBuffer takes ownership of the // subtree, and sets the input's buffer pointers to nullptr for the subtree. ScopedShapedBuffer ExtractSubShapedBuffer( ShapedBuffer* shaped_buffer, int index, xla::DeviceMemoryAllocator* allocator) { - xla::Shape on_host_shape = xla::ShapeUtil::GetTupleElementShape( + const xla::Shape& on_host_shape = xla::ShapeUtil::GetTupleElementShape( shaped_buffer->on_host_shape(), index); - xla::Shape on_device_shape = xla::ShapeUtil::GetTupleElementShape( + const xla::Shape& on_device_shape = xla::ShapeUtil::GetTupleElementShape( shaped_buffer->on_device_shape(), index); ShapedBuffer sub_shaped_buffer(on_host_shape, on_device_shape, @@ -98,20 +97,23 @@ ScopedShapedBuffer ExtractSubShapedBuffer( sub_shape_tree.CopySubtreeFrom(shape_tree, /*source_base_index=*/{index}, /*target_base_index=*/{}); - for (auto& index_to_buffer : shape_tree) { - if (!index_to_buffer.first.empty() && index_to_buffer.first[0] == index) { - index_to_buffer.second = se::DeviceMemoryBase(nullptr, 0); - } - } + shape_tree.ForEachMutableElement( + [index](const xla::ShapeIndex& shape_index, + tensorflow::se::DeviceMemoryBase* data) { + // shape_index is empty for the root node. Ignore that. + if (!shape_index.empty() && shape_index[0] == index) { + *data = tensorflow::se::DeviceMemoryBase(nullptr, 0); + } + }); return ScopedShapedBuffer(std::move(sub_shaped_buffer), allocator); } -} // namespace +} // namespace internal +using internal::ExtractSubShapedBuffer; XlaComputationLaunchContext::XlaComputationLaunchContext( - int64 num_resource_args, xla::LocalClient* client, - xla::DeviceMemoryAllocator* xla_allocator, bool allocate_xla_tensors) - : num_resource_args_(num_resource_args), - client_(client), + xla::LocalClient* client, xla::DeviceMemoryAllocator* xla_allocator, + bool allocate_xla_tensors) + : client_(client), xla_allocator_(xla_allocator), allocate_xla_tensors_(allocate_xla_tensors) {} diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h index 8a6ff3b0c751206d184da63ef1a36e750a1252a5..a2431253f8c44bdd9b99a253f79bdb14722d7c72 100644 --- a/tensorflow/compiler/jit/xla_launch_util.h +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -31,15 +31,17 @@ limitations under the License. namespace tensorflow { class XlaAllocator; -// Takes a snapshot of the values of resource variable arguments, which are -// the last `num_variables` arguments. We snapshot tensors that back +// Takes a snapshot of the values of resource variable arguments, whose +// indices are specified in `variables` argument. We snapshot tensors that back // resource variables since concurrent updates may modify the shape, and it is // important that the shapes used for compilation match the true shapes of the // buffers. // -// Returns a map of TensorFlow argument index to resource variable. -std::map SnapshotResourceVariables(OpKernelContext* ctx, - int num_variables); +// Returns a map of TensorFlow argument index to resource variable. If a +// resource variable is not initialized, the corresponding OptionalTensor +// will have its `present` field set to false. +std::map SnapshotResourceVariables( + OpKernelContext* ctx, const std::vector& variables); // Adapter class that wraps a Tensorflow allocator as an XLA allocator. // Assumes that the Tensorflow allocator permits asynchronous deallocation: @@ -72,7 +74,7 @@ class XlaComputationLaunchContext { // Create a new launch context. 'allocate_xla_tensors' is true if allocated // output tensors and variables are always XlaTensors. If false they are // assumed to be "normal" device pointers. - XlaComputationLaunchContext(int64 num_resource_args, xla::LocalClient* client, + XlaComputationLaunchContext(xla::LocalClient* client, xla::DeviceMemoryAllocator* xla_allocator, bool allocate_xla_tensors); @@ -92,7 +94,6 @@ class XlaComputationLaunchContext { const std::vector& arguments() const { return arg_ptrs_; } private: - int64 num_resource_args_; xla::LocalClient* client_; xla::DeviceMemoryAllocator* xla_allocator_; bool allocate_xla_tensors_; @@ -140,6 +141,17 @@ class XlaTensorBuffer : public TensorBuffer { Allocator* allocator_; }; +// Exposed in this header file for microbenchmarking purposes, but this is an +// internal implementation detail. +namespace internal { +// Return the 'index''th subtree of the given ShapedBuffer as a +// ScopedShapedBuffer. The returned ScopedShapedBuffer takes ownership of the +// subtree, and sets the input's buffer pointers to nullptr for the subtree. +xla::ScopedShapedBuffer ExtractSubShapedBuffer( + xla::ShapedBuffer* shaped_buffer, int index, + xla::DeviceMemoryAllocator* allocator); +} // namespace internal + } // namespace tensorflow #endif diff --git a/tensorflow/compiler/jit/xla_launch_util_test.cc b/tensorflow/compiler/jit/xla_launch_util_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..27813efc0bc0aecdbea2dfce5ca27ba704ea45e2 --- /dev/null +++ b/tensorflow/compiler/jit/xla_launch_util_test.cc @@ -0,0 +1,64 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Contains microbenchmarks for performance critical functions in +// xla_launch_util.cc. + +#include "tensorflow/compiler/jit/xla_launch_util.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +// Test ExtractSubBuffer with different depths (depth of ShapeTree) and fan-outs +// (cardinality of each non-leaf node's children). +void BM_ExtractSubBuffer(int iters, int depth, int fan_out) { + tensorflow::testing::StopTiming(); + xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {32, 64, 128}); + for (int i = 0; i < depth; ++i) { + std::vector shapes(fan_out, shape); + shape = xla::ShapeUtil::MakeTupleShape(shapes); + } + xla::ShapedBuffer shaped_buffer(shape, shape, /*platform=*/nullptr, + /*device_ordinal=*/0); + tensorflow::testing::StartTiming(); + for (int i = 0; i < iters; ++i) { + // Extract a buffer from approximately the middle of the first level of the + // tree. + tensorflow::internal::ExtractSubShapedBuffer(&shaped_buffer, + /*index=*/fan_out / 2, + /*allocator=*/nullptr) + .release(); + } +} + +BENCHMARK(BM_ExtractSubBuffer) + ->ArgPair(1, 4) + ->ArgPair(1, 8) + ->ArgPair(1, 32) + ->ArgPair(1, 64) + ->ArgPair(1, 128) + ->ArgPair(1, 256) + ->ArgPair(1, 512) + ->ArgPair(2, 4) + ->ArgPair(2, 8) + ->ArgPair(2, 32) + ->ArgPair(2, 64) + ->ArgPair(2, 128); + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + tensorflow::testing::RunBenchmarks(); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/compiler/jit/xla_tensor.h b/tensorflow/compiler/jit/xla_tensor.h index 922a91897312096e4bb6ee2a1cc153e0039e2c7a..6b29c82ec11e39ad525663991e179443c2b6dca7 100644 --- a/tensorflow/compiler/jit/xla_tensor.h +++ b/tensorflow/compiler/jit/xla_tensor.h @@ -54,7 +54,7 @@ class XlaTensor { // Some Tensors can have complex on-device shapes, including tuple shapes. To // manage the memory for these tensors a ShapedBuffer may be required. - // Return true if this TensorInfo contains a ShapedBuffer. + // Return true if this XlaTensor contains a ShapedBuffer. bool has_shaped_buffer() const { return shaped_buffer_ != nullptr; } // Return the contained ShapedBuffer. // REQUIRES: has_shaped_buffer() @@ -62,7 +62,7 @@ class XlaTensor { CHECK(has_shaped_buffer()); return *shaped_buffer_; } - // Mutates the TensorInfo to set the ShapedBuffer. + // Mutates the XlaTensor to set the ShapedBuffer. void set_shaped_buffer(xla::ScopedShapedBuffer shaped_buffer) { shaped_buffer_ = xla::MakeUnique(std::move(shaped_buffer)); @@ -72,7 +72,7 @@ class XlaTensor { // in on-demand mode to avoid re-copying values from the device if we know the // host value already. - // Return true if this TensorInfo contains a host tensor. + // Return true if this XlaTensor contains a host tensor. bool has_host_tensor() const { return host_tensor_ != nullptr; } // Return the contained host tensor. // REQUIRES: has_host_tensor() diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index a94b298f87832057c6ec86a1ea250a54ed1b4ee0..9791792f29ca05f4ece77cca6305ed05343d1d38 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -300,6 +300,10 @@ tf_xla_py_test( name = "extract_image_patches_op_test", size = "small", srcs = ["extract_image_patches_op_test.py"], + tags = [ + "manual", + "notap", + ], deps = [ ":xla_test", "//tensorflow/python:array_ops", @@ -323,7 +327,11 @@ tf_xla_py_test( ":xla_test", "//tensorflow/python:array_ops", "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:layers", + "//tensorflow/python:math_ops", + "//tensorflow/python:nn", "//tensorflow/python:platform_test", + "//tensorflow/python/eager:function", ], ) diff --git a/tensorflow/compiler/tests/eager_test.py b/tensorflow/compiler/tests/eager_test.py index bdd0185dfe4abe9d9acecc5381ff82c54b8c0705..5ab1585f8c6e07d6e3f0f40c99840b176492e523 100644 --- a/tensorflow/compiler/tests/eager_test.py +++ b/tensorflow/compiler/tests/eager_test.py @@ -24,10 +24,16 @@ from tensorflow.compiler.tests.xla_test import XLATestCase from tensorflow.core.protobuf import config_pb2 from tensorflow.python.eager import backprop from tensorflow.python.eager import context +from tensorflow.python.eager import function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.layers import convolutional +from tensorflow.python.layers import pooling from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import googletest @@ -43,7 +49,7 @@ class EagerTest(XLATestCase): def testExecuteListOutputLen0(self): with self.test_scope(): - empty = constant_op.constant([], dtype=dtypes.int32) + empty = constant_op.constant([], dtype=dtypes.float32) result = array_ops.unstack(empty, 0) self.assertTrue(isinstance(result, list)) self.assertEqual(0, len(result)) @@ -51,7 +57,7 @@ class EagerTest(XLATestCase): def testExecuteListOutputLen1(self): with self.test_scope(): split_dim = constant_op.constant(1) - value = constant_op.constant([[0, 1, 2], [3, 4, 5]]) + value = constant_op.constant([[0., 1., 2.], [3., 4., 5.]]) result = array_ops.split(value, 1, axis=split_dim) self.assertTrue(isinstance(result, list)) self.assertEqual(1, len(result)) @@ -60,7 +66,7 @@ class EagerTest(XLATestCase): def testExecuteListOutputLen3(self): with self.test_scope(): split_dim = constant_op.constant(1) - value = constant_op.constant([[0, 1, 2], [3, 4, 5]]) + value = constant_op.constant([[0., 1., 2.], [3., 4., 5.]]) result = array_ops.split(value, 3, axis=split_dim) self.assertTrue(isinstance(result, list)) self.assertEqual(3, len(result)) @@ -131,7 +137,105 @@ class EagerTest(XLATestCase): self.assertEqual(2., grads[0][0].numpy()) -if __name__ == "__main__": +class EagerFunctionTest(XLATestCase): + + def testBasic(self): + with self.test_scope(): + matmul = function.defun(math_ops.matmul, compiled=True) + t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) + sq = matmul(t, t, transpose_a=True) + self.assertAllEqual(sq.numpy().reshape(-1), [10, 14, 14, 20]) + + def testConv(self): + if 'GPU' in self.device: + # TODO(b/32333178) + self.skipTest('Current implementation of RandomStandardNormal kernel ' + 'is very slow on GPU, and has been blacklisted.') + with self.test_scope(): + data_format = 'channels_last' + conv = convolutional.Conv2D( + filters=1, kernel_size=2, padding='VALID', + data_format=data_format, activation=nn_ops.relu, + kernel_initializer=init_ops.ones_initializer(), + bias_initializer=init_ops.zeros_initializer()) + pool = pooling.MaxPooling2D(2, 2, data_format=data_format) + + def model(x): + x = conv(x) + return pool(x) + model = function.defun(model, compiled=True) + + x = array_ops.ones([1, 4, 4, 1]) + y = model(x) + self.assertAllEqual(y.numpy(), [[[[4.]]]]) + + def testReadVariable(self): + with self.test_scope(): + v = resource_variable_ops.ResourceVariable(1.0) + + @function.defun(compiled=True) + def f(): + return v.read_value() + + var = f() + self.assertEqual(1.0, var.numpy()) + + def testUpdateVariable(self): + with self.test_scope(): + v = resource_variable_ops.ResourceVariable(1.0) + + def f(v): + v.assign_add(1.0) + return v + + f = function.defun(f, compiled=True) + + var = f(v) + self.assertEqual(2.0, var.numpy()) + + def testAllArgumentKinds(self): + """Test a complex function that takes different argument kinds. + + tf2xla machinery that translates, compiles, and runs defuns + classifies arguments into: compile-time constants, regular tensors, + and resources. This test creates a function with a mix of all these + kinds. Moreover, the order of function arguments is intentionally mixed up. + + This also tests the case when the same argument is a compile-time constant + as well as used in an operation that normally expects its inputs to be + in device memory - addition in this case. + """ + with self.test_scope(): + def foo(c1, r1, v1, c2, v2, r2): + # c1 and c2 are compile-time constants + # r1 and r2 are regular tensors + # v1 and v2 are resource variables + a = c1 + r1 + b = math_ops.cast(c2, dtypes.float32) + v2 + c = array_ops.slice(v1, c1, c2) + d = r2 * v2 + return a, b, c, d + + foo = function.defun(foo, compiled=True) + + c1 = [0, 0] + c2 = array_ops.ones([2], dtype=dtypes.int32) + + r1 = array_ops.ones([2]) + r2 = [[2., 2.], [3., 3.]] + + v1 = resource_variable_ops.ResourceVariable([[1., 2.], [3., 4.]]) + v2 = resource_variable_ops.ResourceVariable([[10., 20.], [30., 40.]]) + + a, b, c, d = foo(c1, r1, v1, c2, v2, r2) + + self.assertAllEqual([1, 1], a.numpy()) + self.assertAllEqual([[11., 21.], [31., 41.]], b.numpy()) + self.assertAllEqual([[1.]], c.numpy()) + self.assertAllEqual([[20., 40.], [90., 120.]], d.numpy()) + + +if __name__ == '__main__': ops.enable_eager_execution( config=config_pb2.ConfigProto(log_device_placement=True)) googletest.main() diff --git a/tensorflow/compiler/tests/oom_test.py b/tensorflow/compiler/tests/oom_test.py index 1434e965e3d7eaeca94ad0fa97498f884e30e115..d68d32057a367776d5b70d5ac21d5618297c605d 100644 --- a/tensorflow/compiler/tests/oom_test.py +++ b/tensorflow/compiler/tests/oom_test.py @@ -22,6 +22,8 @@ from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops from tensorflow.python.platform import googletest @@ -42,20 +44,33 @@ class OutOfMemoryTest(xla_test.XLATestCase): """ def test_loop(): - size = 2e8 + size = int(2e8) while True: with self.test_session(): - # Force the compiled code to not be constant by feeding in an addend. - p = array_ops.placeholder(dtypes.float32, shape=[]) + # Force the compiled code to not be constant by feeding in a + # parameter. + p = array_ops.placeholder(dtypes.float32, shape=[2, 1, 1]) with self.test_scope(): - # Create a large R1 tensor. - c = array_ops.zeros([size, 1]) + p + # Create a computation that produces a large R1 tensor as an + # intermediate result. Reduce it down so that if this file was + # compiled without --config=cuda, we don't force a D2H copy of a + # large tensor and potentially OOM the host. + # + # This is a bit tricky because XLA:GPU doesn't currently support RNG + # ops. Here we rely on the fact that XLA doesn't do algebraic + # simplifications on conv(, ). + c = math_ops.reduce_sum( + nn_ops.convolution( + array_ops.ones([1, size, 1]), + p, + padding='SAME', + data_format='NWC')) - c.eval(feed_dict={p: 1.0}) + c.eval(feed_dict={p: [[[1.0]], [[2.0]]]}) size *= 2 self.assertRaises(errors.ResourceExhaustedError, test_loop) -if __name__ == "__main__": +if __name__ == '__main__': googletest.main() diff --git a/tensorflow/compiler/tests/reduce_ops_test.py b/tensorflow/compiler/tests/reduce_ops_test.py index 2c084b04fa2f67ad0d86508109522d7bead206eb..7420724bdbeab63b39542ada59328621febad895 100644 --- a/tensorflow/compiler/tests/reduce_ops_test.py +++ b/tensorflow/compiler/tests/reduce_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import functools +import itertools import numpy as np from tensorflow.compiler.tests.xla_test import XLATestCase @@ -155,5 +156,68 @@ class ReduceOpsTest(XLATestCase): self._testReduction(math_ops.reduce_any, np.any, np.bool, self.BOOL_DATA) +class ReduceOpPrecisionTest(XLATestCase): + + def _testReduceSum(self, + expected_result, + dtype, + test_inputs, + rtol=1e-3, + atol=1e-4): + """Tests reduce sum on a list of input arrays. + + For each array in test_inputs, check that performing reduce sum on the array + produces a value that is close to the expected result. + + Args: + expected_result: the expected result. + dtype: the data type of the reduce sum operation. + test_inputs: a list of input arrays for the reduce sum operation. + rtol: the relative error. + atol: the absolute error. + """ + + for test_input in test_inputs: + with self.test_session() as sess: + with self.test_scope(): + a = array_ops.placeholder(dtype) + index = array_ops.placeholder(dtypes.int32) + out = math_ops.reduce_sum(a, index) + result = sess.run(out, { + a: np.array(test_input, dtype=dtype), + index: [0] + }) + # Compare the results using float32 type. + self.assertAllClose( + np.float32(result), + np.float32(expected_result), + rtol=rtol, + atol=atol) + + def testReduceSumF16(self): + """Tests the reduce sum of float16 doesn't lose too much precision.""" + + if np.float16 not in self.all_types: + return + + f16_max = np.finfo(np.float16).max + self._testReduceSum( + f16_max, np.float16, + itertools.permutations([f16_max, f16_max, f16_max * (-1.0)], 3)) + + def testReduceSumBF16(self): + """Tests the reduce sum of bfloat16 doesn't lose too much precision.""" + + if dtypes.bfloat16.as_numpy_dtype not in self.all_types: + return + + bf16_max = np.float32(dtypes.bfloat16.max) + f32_max = dtypes.float32.max + value = min(bf16_max, f32_max - bf16_max) + self._testReduceSum( + dtypes.bfloat16.as_numpy_dtype(value), dtypes.bfloat16.as_numpy_dtype, + itertools.permutations([bf16_max, value, bf16_max * (-1.0)], 3)) + + if __name__ == '__main__': googletest.main() diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py index 4336ebdbd184a081619f0a6951dd4514735c6eb6..b6f8390a45d43bf7666b90e14cc6ff2f3f61947e 100644 --- a/tensorflow/compiler/tests/stateless_random_ops_test.py +++ b/tensorflow/compiler/tests/stateless_random_ops_test.py @@ -86,6 +86,15 @@ class StatelessRandomOpsTest(XLATestCase): # seed were not fixed. self.assertTrue(self._chi_squared(y, 10) < 16.92) + def testRandomNormalIsFinite(self): + with self.test_session() as sess, self.test_scope(): + for dtype in self._random_types(): + seed_t = array_ops.placeholder(dtypes.int32, shape=[2]) + x = stateless.stateless_random_uniform( + shape=[10000], seed=seed_t, dtype=dtype) + y = sess.run(x, {seed_t: [0x12345678, 0xabcdef12]}) + self.assertTrue(np.all(np.isfinite(y))) + def _normal_cdf(self, x): """Cumulative distribution function for a standard normal distribution.""" return 0.5 + 0.5 * np.vectorize(math.erf)(x / math.sqrt(2)) diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 942504e6bd4c9ce93c9482251823efcbb46ab1c8..4fca51f54d320e843343f80d7df1177f80f1d99f 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -81,7 +81,7 @@ cc_library( "//tensorflow/compiler/tf2xla/kernels:xla_cpu_only_ops", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/compiler/xla/client", - "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", @@ -168,9 +168,9 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:client_library", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", @@ -215,7 +215,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/compiler/xla:status_macros", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:sharding_builder", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc index b20c1ffc7d8956f3f5530ee63e9b711a26439be5..8115a26210a8e9e95e851f350e34dcdfa2519a64 100644 --- a/tensorflow/compiler/tf2xla/graph_compiler.cc +++ b/tensorflow/compiler/tf2xla/graph_compiler.cc @@ -51,6 +51,7 @@ Status PrepareArguments(XlaOpKernelContext* ctx, Graph* graph, const std::vector& expressions, std::vector* args) { auto builder = ctx->builder(); + auto client = ctx->compiler()->client(); std::vector compile_time_constant_flags(expressions.size()); TF_RETURN_IF_ERROR( @@ -72,8 +73,10 @@ Status PrepareArguments(XlaOpKernelContext* ctx, Graph* graph, arg.kind = XlaCompiler::Argument::kConstant; TF_RET_CHECK(expressions[i]->resource() == nullptr) << "Input with resource is not yet implemented."; + TF_ASSIGN_OR_RETURN(auto constant_graph, builder->BuildConstantSubGraph( + expressions[i]->handle())); TF_ASSIGN_OR_RETURN(auto literal, - builder->ComputeConstant(expressions[i]->handle())); + client->ComputeConstant(constant_graph)); TF_RETURN_IF_ERROR( LiteralToHostTensor(*literal, arg.type, &arg.constant_value)); } else { @@ -212,7 +215,7 @@ Status GraphCompiler::CompileFunctionalNode(Node* n, TF_RET_CHECK(arguments.size() == expressions.size()); - std::vector handles; + std::vector handles; for (int64 i = 0; i < expressions.size(); ++i) { if (arguments[i].kind == XlaCompiler::Argument::kConstant) { continue; diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 00fd08b1a0750739445a124adc7ccf436a4a9b71..85ab4c41bf6a754236066260819f103970e603ae 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -114,8 +114,8 @@ tf_kernel_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:client_library", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/core:framework", "//tensorflow/core:image_ops_op_lib", "//tensorflow/core:lib", @@ -151,7 +151,7 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/compiler/xla:literal_util", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", @@ -167,7 +167,7 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/compiler/xla:literal_util", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", @@ -203,8 +203,8 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla/client:client_library", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/kernels:argmax_op", diff --git a/tensorflow/compiler/tf2xla/kernels/aggregate_ops.cc b/tensorflow/compiler/tf2xla/kernels/aggregate_ops.cc index 5c9f66df101bfb731d6114c23933e241af5dcbeb..1e59868621475cf72f4cc8b14dafec2dd8cd5c95 100644 --- a/tensorflow/compiler/tf2xla/kernels/aggregate_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/aggregate_ops.cc @@ -29,7 +29,7 @@ class AddNOp : public XlaOpKernel { OP_REQUIRES(ctx, ctx->num_inputs() >= 1, errors::InvalidArgument("AddN requires at least one argument")); - xla::ComputationDataHandle sum = ctx->Input(0); + xla::XlaOp sum = ctx->Input(0); for (int i = 1; i < ctx->num_inputs(); ++i) { sum = ctx->builder()->Add(sum, ctx->Input(i)); } diff --git a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc index 931175be1111ed5f70afbdf351ee53c59c1367de..15e1815a4cf07ff50dd1431b6790d14781da590f 100644 --- a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc @@ -48,9 +48,9 @@ class FusedBatchNormOp : public XlaOpKernel { OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(ctx->input_type(1), &scale_type)); - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaOp input = ctx->Input(0); TensorShape input_shape = ctx->InputShape(0); int feature_index = @@ -62,7 +62,7 @@ class FusedBatchNormOp : public XlaOpKernel { input = builder->ConvertElementType(input, scale_type); if (is_training_) { - xla::ComputationDataHandle output = builder->BatchNormTraining( + xla::XlaOp output = builder->BatchNormTraining( input, ctx->Input(1), ctx->Input(2), epsilon_, feature_index); // In training mode, outputs the normalized value as well as the @@ -79,7 +79,7 @@ class FusedBatchNormOp : public XlaOpKernel { ctx->SetOutput(3, builder->GetTupleElement(output, 1)); ctx->SetOutput(4, builder->GetTupleElement(output, 2)); } else { - xla::ComputationDataHandle output = builder->BatchNormInference( + xla::XlaOp output = builder->BatchNormInference( input, ctx->Input(1), ctx->Input(2), ctx->Input(3), ctx->Input(4), epsilon_, feature_index); ctx->SetOutput(0, builder->ConvertElementType(output, input_type)); @@ -118,7 +118,7 @@ class FusedBatchNormGradOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* const b = ctx->builder(); + xla::XlaBuilder* const b = ctx->builder(); DataType input_dtype = ctx->input_type(0); DataType scale_dtype = ctx->input_type(2); @@ -137,11 +137,11 @@ class FusedBatchNormGradOp : public XlaOpKernel { const int feature_index = GetTensorFeatureDimIndex(input_dims, data_format_); - xla::ComputationDataHandle x_backprop; - xla::ComputationDataHandle scale_backprop; - xla::ComputationDataHandle offset_backprop; + xla::XlaOp x_backprop; + xla::XlaOp scale_backprop; + xla::XlaOp offset_backprop; if (is_training_) { - xla::ComputationDataHandle output = + xla::XlaOp output = b->BatchNormGrad(activations, scale, mean, var, grad_backprop, epsilon_, feature_index); diff --git a/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc b/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc index 569950c2dfaeb61028049a263a962dfa54a62e09..642278ab994bf3cc84396f093ed56b009a1435c1 100644 --- a/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc @@ -20,9 +20,8 @@ limitations under the License. namespace tensorflow { namespace { -void BatchToSpace(XlaOpKernelContext* ctx, - const xla::ComputationDataHandle& input, DataType input_dtype, - const TensorShape& input_tensor_shape, +void BatchToSpace(XlaOpKernelContext* ctx, const xla::XlaOp& input, + DataType input_dtype, const TensorShape& input_tensor_shape, gtl::ArraySlice block_shape, const xla::Literal& crops) { const int input_rank = input_tensor_shape.dims(); @@ -46,7 +45,7 @@ void BatchToSpace(XlaOpKernelContext* ctx, ", 2] instead of ", xla::ShapeUtil::HumanString(crops.shape()))); - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); const int64 batch_size = input_shape[0]; // Compute the product of the block_shape values. @@ -73,7 +72,7 @@ void BatchToSpace(XlaOpKernelContext* ctx, reshaped_shape[block_rank] = batch_size / block_num_elems; std::copy(input_shape.begin() + 1, input_shape.end(), reshaped_shape.begin() + block_rank + 1); - xla::ComputationDataHandle reshaped = b->Reshape(input, reshaped_shape); + xla::XlaOp reshaped = b->Reshape(input, reshaped_shape); // 2. Permute dimensions of `reshaped` to produce `permuted` of shape // [batch / prod(block_shape), @@ -91,7 +90,7 @@ void BatchToSpace(XlaOpKernelContext* ctx, } std::iota(permutation.begin() + 1 + block_rank * 2, permutation.end(), 1 + block_rank * 2); - xla::ComputationDataHandle permuted = b->Transpose(reshaped, permutation); + xla::XlaOp permuted = b->Transpose(reshaped, permutation); // 3. Reshape `permuted` to produce `reshaped_permuted` of shape // [batch / prod(block_shape), @@ -111,8 +110,7 @@ void BatchToSpace(XlaOpKernelContext* ctx, std::copy(remainder_shape.begin(), remainder_shape.end(), reshaped_permuted_shape.begin() + 1 + block_rank); - xla::ComputationDataHandle reshaped_permuted = - b->Reshape(permuted, reshaped_permuted_shape); + xla::XlaOp reshaped_permuted = b->Reshape(permuted, reshaped_permuted_shape); // 4. Crop the start and end of dimensions `[1, ..., M]` of // `reshaped_permuted` according to `crops` to produce the output of shape: @@ -139,7 +137,7 @@ void BatchToSpace(XlaOpKernelContext* ctx, "Cropped size must be non-negative: start: ", crop_start, " end: ", crop_end, " size ", reshaped_permuted_shape[1 + i])); } - xla::ComputationDataHandle output = + xla::XlaOp output = b->Slice(reshaped_permuted, start_indices, end_indices, strides); ctx->SetOutput(0, output); } diff --git a/tensorflow/compiler/tf2xla/kernels/bias_ops.cc b/tensorflow/compiler/tf2xla/kernels/bias_ops.cc index ed33b8ed2e823f313a9a7fe220390bc617288405..9d677f426650ea17a49e5ab1401078f04623fe97 100644 --- a/tensorflow/compiler/tf2xla/kernels/bias_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/bias_ops.cc @@ -60,7 +60,7 @@ class BiasOp : public XlaOpKernel { "of the input tensor: ", bias_shape.DebugString(), " vs. ", input_shape.DebugString())); - xla::ComputationDataHandle result = + xla::XlaOp result = ctx->builder()->Add(ctx->Input(0), ctx->Input(1), {feature_dim}); ctx->SetOutput(0, result); } @@ -103,7 +103,7 @@ class BiasAddGradOp : public XlaOpKernel { std::iota(reduce_dims.begin(), reduce_dims.begin() + feature_dim, 0); std::iota(reduce_dims.begin() + feature_dim, reduce_dims.end(), feature_dim + 1); - xla::ComputationBuilder* const b = ctx->builder(); + xla::XlaBuilder* const b = ctx->builder(); const DataType accumulation_type = XlaHelpers::SumAccumulationType(input_type(0)); auto converted = diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc index 2436a6074a11ad66387b232dd1c5aa135875bfc3..f04cde878e98002d9442e0f3ec251c5197ef7969 100644 --- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" @@ -34,14 +34,13 @@ namespace { class NAME##Op : public XlaBinaryOp { \ public: \ explicit NAME##Op(OpKernelConstruction* ctx) : XlaBinaryOp(ctx) {} \ - xla::ComputationDataHandle Computation( \ - XlaOpKernelContext* ctx, const xla::ComputationDataHandle& lhs, \ - const gtl::ArraySlice& lhs_shape, \ - const xla::ComputationDataHandle& rhs, \ + xla::XlaOp Computation( \ + XlaOpKernelContext* ctx, const xla::XlaOp& lhs, \ + const gtl::ArraySlice& lhs_shape, const xla::XlaOp& rhs, \ const gtl::ArraySlice& rhs_shape, \ const BCast& broadcast_helper, \ const std::vector& extend_dimensions) override { \ - xla::ComputationBuilder* b = ctx->builder(); \ + xla::XlaBuilder* b = ctx->builder(); \ return HLO; \ } \ }; \ @@ -63,11 +62,8 @@ XLA_MAKE_BINARY(Complex, b->Complex(lhs, rhs, extend_dimensions)); // } else { // return x / y; // } -static xla::ComputationDataHandle FloorDivImpl(xla::ComputationBuilder* b, - DataType dtype, - xla::ComputationDataHandle x, - xla::ComputationDataHandle y, - const BCast& broadcast_helper) { +static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, + xla::XlaOp y, const BCast& broadcast_helper) { std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper); auto zero = XlaHelpers::Zero(b, dtype); auto one = XlaHelpers::One(b, dtype); @@ -87,11 +83,8 @@ XLA_MAKE_BINARY(FloorDiv, // Implementation of FloorMod. Pseudo-code: // T trunc_mod = std::fmod(x, y); // return (x < T(0)) == (y < T(0)) ? trunc_mod : std::fmod(trunc_mod + y, y); -static xla::ComputationDataHandle FloorModImpl(xla::ComputationBuilder* b, - DataType dtype, - xla::ComputationDataHandle x, - xla::ComputationDataHandle y, - const BCast& broadcast_helper) { +static xla::XlaOp FloorModImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, + xla::XlaOp y, const BCast& broadcast_helper) { std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper); auto zero = XlaHelpers::Zero(b, dtype); auto same_sign = b->Eq(b->Lt(x, zero), b->Lt(y, zero)); @@ -127,8 +120,7 @@ XLA_MAKE_BINARY(SqrtGrad, XlaHelpers::FloatLiteral(b, input_type(0), 0.5)), lhs, extend_dimensions)); -static xla::ComputationDataHandle Square(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& x) { +static xla::XlaOp Square(xla::XlaBuilder* builder, const xla::XlaOp& x) { return builder->Mul(x, x); } @@ -175,11 +167,11 @@ class ApproximateEqualOp : public XlaOpKernel { // Computes the max of the scalar input x and 0. void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); auto abs = b->Abs(b->Sub(ctx->Input(0), ctx->Input(1))); auto abs_shape = b->GetShape(abs); OP_REQUIRES_OK(ctx, abs_shape.status()); - auto abs_type = abs_shape.ValueOrDie()->element_type(); + auto abs_type = abs_shape.ValueOrDie().element_type(); auto result = b->Lt( abs, b->ConvertElementType(b->ConstantR0(tolerance_), abs_type)); ctx->SetOutput(0, result); diff --git a/tensorflow/compiler/tf2xla/kernels/cast_op.cc b/tensorflow/compiler/tf2xla/kernels/cast_op.cc index c52b2dcb7e9ef81fd52565dfbda05e33a52ed43a..e9d98c768572c52825fa5192ecec834889f040fe 100644 --- a/tensorflow/compiler/tf2xla/kernels/cast_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/cast_op.cc @@ -33,9 +33,9 @@ class CastOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); - xla::ComputationDataHandle input = ctx->Input(0); - xla::ComputationDataHandle output; + xla::XlaBuilder* builder = ctx->builder(); + xla::XlaOp input = ctx->Input(0); + xla::XlaOp output; if (src_dtype_ == dst_dtype_) { output = input; @@ -72,9 +72,9 @@ class BitcastOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); - xla::ComputationDataHandle input = ctx->Input(0); - xla::ComputationDataHandle output; + xla::XlaBuilder* builder = ctx->builder(); + xla::XlaOp input = ctx->Input(0); + xla::XlaOp output; if (src_dtype_ == dst_dtype_) { output = input; diff --git a/tensorflow/compiler/tf2xla/kernels/categorical_op.cc b/tensorflow/compiler/tf2xla/kernels/categorical_op.cc index 545aa364f937b2dc972dbe7b8c18b5897aa8e5c3..835a7f568945f0bee86fe2b39491c3326726e1aa 100644 --- a/tensorflow/compiler/tf2xla/kernels/categorical_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/categorical_op.cc @@ -34,7 +34,7 @@ class CategoricalOp : public XlaOpKernel { void Compile(XlaOpKernelContext* ctx) override { // Get the logits - const xla::ComputationDataHandle& logits = ctx->Input(0); + const xla::XlaOp& logits = ctx->Input(0); TensorShape logits_shape = ctx->InputShape(0); int64 num_samples; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &num_samples)); @@ -56,7 +56,7 @@ class CategoricalOp : public XlaOpKernel { const int64 batch_size = logits_shape.dim_size(0); const int64 num_classes = logits_shape.dim_size(1); - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); std::array uniform_shape_array = { {batch_size, num_samples, num_classes}}; @@ -78,7 +78,7 @@ class CategoricalOp : public XlaOpKernel { /*broadcast_dimensions=*/{0, 2}); TensorShape softmax_shape(uniform_shape_array); - xla::ComputationDataHandle argmax; + xla::XlaOp argmax; OP_REQUIRES_OK( ctx, XlaHelpers::ArgMax(builder, ctx, softmax_entries, softmax_shape, diff --git a/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc b/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc index fdf75be7b1156540d762e3bc04a51f2478f00f46..a00bc912f9f40052565446c6bf9390629af9a4cd 100644 --- a/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/clip_by_value_op.cc @@ -29,7 +29,7 @@ class ClipByValueOp : public XlaOpKernel { const TensorShape min_shape = ctx->InputShape(1); const TensorShape max_shape = ctx->InputShape(2); - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); auto input = ctx->Input(0); auto min = ctx->Input(1); auto max = ctx->Input(2); diff --git a/tensorflow/compiler/tf2xla/kernels/concat_op.cc b/tensorflow/compiler/tf2xla/kernels/concat_op.cc index 1a246e8df9b2cd83147b50d960744332f8582a51..78285affa1c399ae107a9172fb85cf257457c368 100644 --- a/tensorflow/compiler/tf2xla/kernels/concat_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/concat_op.cc @@ -54,7 +54,7 @@ class ConcatBaseOp : public XlaOpKernel { // TODO(annarev): add a helper to support int64 input. const int32 concat_dim = literal.Get({}); - std::vector values; + std::vector values; std::vector shapes; OP_REQUIRES_OK(ctx, ctx->InputList("values", &values, &shapes)); const int N = values.size(); @@ -70,13 +70,13 @@ class ConcatBaseOp : public XlaOpKernel { "[", -input_dims, ", ", input_dims, "), but got ", concat_dim)); - // Make a vector holding the ComputationDataHandles for each of - // the inputs that has non-zero elements. - std::vector input_data; + // Make a vector holding the XlaOp for each of the inputs that has non-zero + // elements. + std::vector input_data; int output_concat_dim = 0; const bool input_is_scalar = IsLegacyScalar(input_shape); for (int i = 0; i < N; ++i) { - xla::ComputationDataHandle handle = values[i]; + xla::XlaOp handle = values[i]; const TensorShape& in_shape = shapes[i]; const bool in_is_scalar = IsLegacyScalar(in_shape); OP_REQUIRES( diff --git a/tensorflow/compiler/tf2xla/kernels/const_op.cc b/tensorflow/compiler/tf2xla/kernels/const_op.cc index 8f78b4c8f90cf00d5fa9ba71a78bb1c0fe280dc6..59d06c654de18c9003fe0bdc706d0c2443de6d7b 100644 --- a/tensorflow/compiler/tf2xla/kernels/const_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/const_op.cc @@ -45,7 +45,7 @@ class ConstOp : public XlaOpKernel { ctx->SetInvalidOutput(0); return; } - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); // To avoid blowups for large constants filled with the same value, // recognize that case and emit a scalar broadcast instead. diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc index c0ee0c9c2ea849a692bee70bba36d32335eed9b5..627bad12f33c82e91bc3c6f3323f562bc8174056 100644 --- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc @@ -47,9 +47,8 @@ TensorShape ExpandedFilterShapeForDepthwiseConvolution( } // Broadcast zeros to ExpandedFilterShapeForDepthwiseConvolution. -xla::ComputationDataHandle CreateExpandedZero( - const TensorShape& filter_shape, DataType dtype, - xla::ComputationBuilder* builder) { +xla::XlaOp CreateExpandedZero(const TensorShape& filter_shape, DataType dtype, + xla::XlaBuilder* builder) { TensorShape expanded_filter_shape = ExpandedFilterShapeForDepthwiseConvolution(filter_shape); return builder->Broadcast(XlaHelpers::Zero(builder, dtype), @@ -87,8 +86,8 @@ xla::ComputationDataHandle CreateExpandedZero( // // Finally compare A and broadcasted B in dimension 2 amd return the result at // the beginning of the comment. -xla::ComputationDataHandle CreateExpandedFilterMask( - const TensorShape& filter_shape, xla::ComputationBuilder* builder) { +xla::XlaOp CreateExpandedFilterMask(const TensorShape& filter_shape, + xla::XlaBuilder* builder) { TensorShape expanded_filter_shape = ExpandedFilterShapeForDepthwiseConvolution(filter_shape); int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1); @@ -96,11 +95,11 @@ xla::ComputationDataHandle CreateExpandedFilterMask( // Create a M sized linspace and an M*N sized linspace that will be // broadcasted into perpendicular dimensions and compared. - xla::ComputationDataHandle input_feature_iota; + xla::XlaOp input_feature_iota; // DT_INT32 Iota will always return status::OK(). TF_CHECK_OK(XlaHelpers::Iota(builder, DataType::DT_INT32, input_feature, &input_feature_iota)); - xla::ComputationDataHandle expanded_feature_iota; + xla::XlaOp expanded_feature_iota; TF_CHECK_OK(XlaHelpers::Iota(builder, DataType::DT_INT32, input_feature * depthwise_multiplier, &expanded_feature_iota)); @@ -126,10 +125,10 @@ xla::ComputationDataHandle CreateExpandedFilterMask( // Expands a filter of shape [H, W, ..., M, N] to [H, W, ..., M, M*N] by adding // zeros for the cross-depth filters. Used to build a depthwise convolution. -xla::ComputationDataHandle ExpandFilterForDepthwiseConvolution( - const TensorShape& filter_shape, DataType dtype, - const xla::ComputationDataHandle& filter, - xla::ComputationBuilder* builder) { +xla::XlaOp ExpandFilterForDepthwiseConvolution(const TensorShape& filter_shape, + DataType dtype, + const xla::XlaOp& filter, + xla::XlaBuilder* builder) { int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1); int64 input_feature = filter_shape.dim_size(filter_shape.dims() - 2); TensorShape expanded_filter_shape = @@ -156,10 +155,11 @@ xla::ComputationDataHandle ExpandFilterForDepthwiseConvolution( } // Inverse of ExpandFilterForDepthwiseConvolution. -xla::ComputationDataHandle ContractFilterForDepthwiseBackprop( - XlaOpKernelContext* ctx, const TensorShape& filter_shape, DataType dtype, - const xla::ComputationDataHandle& filter_backprop, - xla::ComputationBuilder* builder) { +xla::XlaOp ContractFilterForDepthwiseBackprop(XlaOpKernelContext* ctx, + const TensorShape& filter_shape, + DataType dtype, + const xla::XlaOp& filter_backprop, + xla::XlaBuilder* builder) { TensorShape expanded_filter_shape = ExpandedFilterShapeForDepthwiseConvolution(filter_shape); auto masked_expanded_filter = builder->Select( @@ -248,9 +248,9 @@ class ConvOp : public XlaOpKernel { "input and filter must have the same depth: ", in_depth, " vs ", input_shape.dim_size(feature_dim))); - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); - xla::ComputationDataHandle filter = ctx->Input(1); + xla::XlaOp filter = ctx->Input(1); TensorShape expanded_filter_shape = filter_shape; if (depthwise_) { filter = ExpandFilterForDepthwiseConvolution( @@ -288,7 +288,7 @@ class ConvOp : public XlaOpKernel { &unused_output_size, &padding[i].first, &padding[i].second)); } - xla::ComputationDataHandle conv = + xla::XlaOp conv = b->ConvGeneralDilated(ctx->Input(0), filter, window_strides, padding, lhs_dilation, rhs_dilation, dims); ctx->SetOutput(0, conv); @@ -391,7 +391,7 @@ class ConvBackpropInputOp : public XlaOpKernel { expanded_filter_shape, out_backprop_shape, dilations_, strides_, padding_, data_format_, &dims)); - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); auto filter = ctx->Input(1); auto out_backprop = ctx->Input(2); @@ -435,12 +435,11 @@ class ConvBackpropInputOp : public XlaOpKernel { } // Mirror the filter in the spatial dimensions. - xla::ComputationDataHandle mirrored_weights = - b->Rev(filter, kernel_spatial_dims); + xla::XlaOp mirrored_weights = b->Rev(filter, kernel_spatial_dims); // activation gradients // = gradients (with padding and dilation) mirrored_weights - xla::ComputationDataHandle in_backprop = b->ConvGeneralDilated( + xla::XlaOp in_backprop = b->ConvGeneralDilated( out_backprop, mirrored_weights, /*window_strides=*/ones, padding, lhs_dilation, rhs_dilation, dnums); @@ -546,9 +545,9 @@ class ConvBackpropFilterOp : public XlaOpKernel { expanded_filter_shape, out_backprop_shape, dilations_, strides_, padding_, data_format_, &dims)); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle activations = ctx->Input(0); - xla::ComputationDataHandle gradients = ctx->Input(2); + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp activations = ctx->Input(0); + xla::XlaOp gradients = ctx->Input(2); // The filter gradients are computed by a convolution of the input // activations and the output gradients, with some appropriate padding. diff --git a/tensorflow/compiler/tf2xla/kernels/cross_op.cc b/tensorflow/compiler/tf2xla/kernels/cross_op.cc index 3df8c00f1b83556d7d954aedc8eeac0728251c3e..7fcd4170fb79a574663c1abffe873d4b53f471d3 100644 --- a/tensorflow/compiler/tf2xla/kernels/cross_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/cross_op.cc @@ -53,7 +53,7 @@ class CrossOp : public XlaOpKernel { } std::vector strides(in0_shape.dims(), 1); - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); auto in0 = ctx->Input(0); auto in1 = ctx->Input(1); starts.back() = 0; diff --git a/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc b/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc index 0cf03ceb948a5165a71e902eef5264eaddbd71e9..01aa1a83e7967921f1583b3ef18ec57e452dcfea 100644 --- a/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/util/bcast.h" @@ -75,7 +75,7 @@ void XlaBinaryOp::Compile(XlaOpKernelContext* ctx) { } // Call virtual method to emit the computation. - xla::ComputationDataHandle output = + xla::XlaOp output = Computation(ctx, lhs_handle, lhs_shape.dim_sizes(), rhs_handle, rhs_shape.dim_sizes(), bcast, extend_dimension); @@ -85,11 +85,9 @@ void XlaBinaryOp::Compile(XlaOpKernelContext* ctx) { ctx->SetOutput(0, output); } -/* static */ std::pair -XlaBinaryOp::Broadcast(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& lhs, - const xla::ComputationDataHandle& rhs, - const BCast& broadcast_helper) { +/* static */ std::pair XlaBinaryOp::Broadcast( + xla::XlaBuilder* builder, const xla::XlaOp& lhs, const xla::XlaOp& rhs, + const BCast& broadcast_helper) { // Manually construct the broadcasting since MapN does not do // automatic broadcasting. The bcast helper ensures that // lhs.reshape(bcast.x_reshape()).broadcast(bcast.x_bcast()) and diff --git a/tensorflow/compiler/tf2xla/kernels/cwise_ops.h b/tensorflow/compiler/tf2xla/kernels/cwise_ops.h index 5bc1d5fb1f08fb576df654e1f4068b6be9114096..4f92dbc8740b697322424058530b8477c35d809a 100644 --- a/tensorflow/compiler/tf2xla/kernels/cwise_ops.h +++ b/tensorflow/compiler/tf2xla/kernels/cwise_ops.h @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/util/bcast.h" @@ -30,7 +30,7 @@ namespace tensorflow { // inputs that can be broadcast to the same shape. The base class // contains pure virtual methods to override: description is a textual // description of the operation; and Computation adds the -// implementation of the operation to a xla::ComputationBuilder. For most +// implementation of the operation to a xla::XlaBuilder. For most // arithmetic Ops XLA handles the broadcasting automatically given the input // tensors. class XlaBinaryOp : public XlaOpKernel { @@ -55,10 +55,9 @@ class XlaBinaryOp : public XlaOpKernel { // higher-rank input should be matched when broadcasting the // lower-rank input. See comment below and the documentation on broadcasting // in the XLA documentation. - virtual xla::ComputationDataHandle Computation( - XlaOpKernelContext* ctx, const xla::ComputationDataHandle& lhs, - const gtl::ArraySlice& lhs_shape, - const xla::ComputationDataHandle& rhs, + virtual xla::XlaOp Computation( + XlaOpKernelContext* ctx, const xla::XlaOp& lhs, + const gtl::ArraySlice& lhs_shape, const xla::XlaOp& rhs, const gtl::ArraySlice& rhs_shape, const BCast& broadcast_helper, const std::vector& extend_dimensions) = 0; @@ -67,11 +66,9 @@ class XlaBinaryOp : public XlaOpKernel { // Helper function that performs the broadcasting described by // 'broadcast_helper', yielding arguments 'lhs' and 'rhs' that have the same // shape. - static std::pair - Broadcast(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& lhs, - const xla::ComputationDataHandle& rhs, - const BCast& broadcast_helper); + static std::pair Broadcast( + xla::XlaBuilder* builder, const xla::XlaOp& lhs, const xla::XlaOp& rhs, + const BCast& broadcast_helper); }; } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc b/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc index 96d7809f7995634b6bc31ab801b93526d9da7e6f..23243f62462c6315e359d9621823b19fc98c6218 100644 --- a/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/depthtospace_op.cc @@ -50,8 +50,8 @@ class DepthToSpaceOp : public XlaOpKernel { const gtl::InlinedVector input_shape = input_tensor_shape.dim_sizes(); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp input = ctx->Input(0); int feature_dim = GetTensorFeatureDimIndex(input_rank, data_format_); int num_spatial_dims = GetTensorSpatialDims(input_rank, data_format_); @@ -130,7 +130,7 @@ class DepthToSpaceOp : public XlaOpKernel { ") is not divisible by square of the block size (", block_size_, ")")); - xla::ComputationDataHandle reshaped = b->Reshape(input, reshaped_shape); + xla::XlaOp reshaped = b->Reshape(input, reshaped_shape); // 2. Permute dimensions of `reshaped` to produce // `permuted_reshaped` of shape: @@ -141,8 +141,7 @@ class DepthToSpaceOp : public XlaOpKernel { // input_shape[2], // block_size_, // depth / (block_size_ * block_size_)] - xla::ComputationDataHandle permuted_reshaped = - b->Transpose(reshaped, transpose_order); + xla::XlaOp permuted_reshaped = b->Transpose(reshaped, transpose_order); // 3. Reshape `permuted_reshaped` to flatten `block_shape` into the // batch dimension, producing an output tensor of shape: @@ -152,8 +151,7 @@ class DepthToSpaceOp : public XlaOpKernel { // input_shape[2] * block_size_, // depth / (block_size_ * block_size_)] // - xla::ComputationDataHandle output = - b->Reshape(permuted_reshaped, output_shape); + xla::XlaOp output = b->Reshape(permuted_reshaped, output_shape); ctx->SetOutput(0, output); } diff --git a/tensorflow/compiler/tf2xla/kernels/diag_op.cc b/tensorflow/compiler/tf2xla/kernels/diag_op.cc index 765ea922a532a085a552192348ab360c4c30ff0a..931705ba837153e1175cd9a209876ef5ec93f0fc 100644 --- a/tensorflow/compiler/tf2xla/kernels/diag_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/diag_op.cc @@ -25,10 +25,10 @@ namespace tensorflow { namespace { // Create a diagonal / batch diagonal matrix with 'input' on the diagonal. -xla::StatusOr CreateDiagonal( - const xla::ComputationDataHandle& input, int64 last_dim_size, +xla::StatusOr CreateDiagonal( + const xla::XlaOp& input, int64 last_dim_size, tensorflow::gtl::ArraySlice other_dims, XlaOpKernelContext* ctx, - xla::ComputationBuilder* builder) { + xla::XlaBuilder* builder) { // Create two matrices that have the following forms, and compare them: // // [[0, 0, 0, 0] [[0, 1, 2, 3] @@ -38,12 +38,11 @@ xla::StatusOr CreateDiagonal( // // This produces a predicate matrix of the right size, with "true" on the // diagonal. - xla::ComputationDataHandle iota; + xla::XlaOp iota; TF_RETURN_IF_ERROR( XlaHelpers::Iota(builder, DataType::DT_INT32, last_dim_size, &iota)); - xla::ComputationDataHandle iota_broadcast = - builder->Broadcast(iota, {last_dim_size}); - xla::ComputationDataHandle mask = builder->Eq(iota_broadcast, iota, {0}); + xla::XlaOp iota_broadcast = builder->Broadcast(iota, {last_dim_size}); + xla::XlaOp mask = builder->Eq(iota_broadcast, iota, {0}); // If this is a batched diagonal, broadcast the mask across the other // dimensions. @@ -65,8 +64,7 @@ xla::StatusOr CreateDiagonal( std::vector broadcast_dims(other_dims.begin(), other_dims.end()); broadcast_dims.push_back(1LL); broadcast_dims.push_back(last_dim_size); - xla::ComputationDataHandle input_broadcast = - builder->Reshape(input, broadcast_dims); + xla::XlaOp input_broadcast = builder->Reshape(input, broadcast_dims); broadcast_dims[broadcast_dims.size() - 2] = last_dim_size; xla::PrimitiveType element_type; @@ -74,7 +72,7 @@ xla::StatusOr CreateDiagonal( DataTypeToPrimitiveType(ctx->input_type(0), &element_type)); auto broadcast_shape = xla::ShapeUtil::MakeShape(element_type, broadcast_dims); - xla::ComputationDataHandle zeros = Zeros(builder, broadcast_shape); + xla::XlaOp zeros = Zeros(builder, broadcast_shape); input_broadcast = builder->Add(input_broadcast, zeros); return builder->Select(mask, input_broadcast, zeros); @@ -85,7 +83,7 @@ class DiagOp : public XlaOpKernel { explicit DiagOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); OP_REQUIRES(ctx, ctx->num_inputs() >= 1, errors::InvalidArgument("Diag op must have at an input")); @@ -96,7 +94,7 @@ class DiagOp : public XlaOpKernel { errors::InvalidArgument("Expected 1 <= dims, got shape ", input_shape.DebugString())); - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaOp input = ctx->Input(0); // Picture: // tf.diag([1, 2, 3, 4]) ==> [[1, 0, 0, 0] @@ -112,7 +110,7 @@ class DiagOp : public XlaOpKernel { auto diag_or_status = CreateDiagonal(input, size, /*other_dims=*/{}, ctx, builder); OP_REQUIRES_OK(ctx, diag_or_status.status()); - xla::ComputationDataHandle diag = diag_or_status.ValueOrDie(); + xla::XlaOp diag = diag_or_status.ValueOrDie(); // Reshapes to the final shape. std::vector new_dims(dims.size() * 2); @@ -131,7 +129,7 @@ class DiagPartOp : public XlaOpKernel { explicit DiagPartOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); const TensorShape input_shape = ctx->InputShape(0); auto dims = input_shape.dim_sizes(); @@ -158,7 +156,7 @@ class DiagPartOp : public XlaOpKernel { new_dims.push_back(dims[i]); } - xla::ComputationDataHandle diag = ctx->Input(0); + xla::XlaOp diag = ctx->Input(0); // TODO(b/30878775): use Slice with strides when supported, in place of // the Pad -> Reshape -> Slice. @@ -199,7 +197,7 @@ class MatrixDiagOp : public XlaOpKernel { explicit MatrixDiagOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); OP_REQUIRES(ctx, ctx->num_inputs() >= 1, errors::InvalidArgument("MatrixDiag op must have at an input")); @@ -210,7 +208,7 @@ class MatrixDiagOp : public XlaOpKernel { errors::InvalidArgument("Expected 1 <= dims, got shape ", input_shape.DebugString())); - xla::ComputationDataHandle diag = ctx->Input(0); + xla::XlaOp diag = ctx->Input(0); int last_dim = dims.size() - 1; int64 last_dim_size = input_shape.dim_size(last_dim); @@ -232,7 +230,7 @@ class MatrixDiagPartOp : public XlaOpKernel { explicit MatrixDiagPartOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); const TensorShape input_shape = ctx->InputShape(0); auto dims = input_shape.dim_sizes(); @@ -241,7 +239,7 @@ class MatrixDiagPartOp : public XlaOpKernel { errors::InvalidArgument("Expected 2 <= dims, got shape ", input_shape.DebugString())); - xla::ComputationDataHandle diag = ctx->Input(0); + xla::XlaOp diag = ctx->Input(0); int last_dim = dims.size() - 1; int64 last_dim_size = dims[last_dim]; diff --git a/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc b/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc index 800ef5ab98d70ad822c6efffb33db28b46ae50fe..0419de78b2ee83fd395e8bf23444fde84f30bba2 100644 --- a/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/compiler/tf2xla/type_util.h" @@ -57,7 +57,7 @@ class DynamicUpdateSliceOp : public XlaOpKernel { input_shape.DebugString(), "; update shape is ", update_shape.DebugString())); - xla::ComputationDataHandle result = ctx->builder()->DynamicUpdateSlice( + xla::XlaOp result = ctx->builder()->DynamicUpdateSlice( ctx->Input(0), ctx->Input(1), ctx->Input(2)); ctx->SetOutput(0, result); } diff --git a/tensorflow/compiler/tf2xla/kernels/dynamic_stitch_op.cc b/tensorflow/compiler/tf2xla/kernels/dynamic_stitch_op.cc index f2cd21ffb9ce88747c04f3c71e66dadeb1faf0f9..dd4a16908779508380b36f43ce2306ff2f5fb8c4 100644 --- a/tensorflow/compiler/tf2xla/kernels/dynamic_stitch_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/dynamic_stitch_op.cc @@ -56,7 +56,7 @@ class DynamicStitchOp : public XlaOpKernel { std::vector indices_input; OP_REQUIRES_OK(ctx, ctx->ConstantInputList("indices", &indices_input)); - std::vector data; + std::vector data; std::vector data_shapes; OP_REQUIRES_OK(ctx, ctx->InputList("data", &data, &data_shapes)); @@ -136,7 +136,7 @@ class DynamicStitchOp : public XlaOpKernel { // Look up all the children expressions that represent the data // inputs. - std::vector input(indices.size()); + std::vector input(indices.size()); for (int input_num = 0; input_num < indices.size(); input_num++) { TensorShape new_shape; // first reshaped dimension is the number of indices for this input. @@ -166,7 +166,7 @@ class DynamicStitchOp : public XlaOpKernel { for (int d = indices0_shape.dims(); d < data0_shape.dims(); d++) { slice_limit[1 + d - indices0_shape.dims()] = data0_shape.dim_size(d); } - std::vector to_concat(number_of_indices); + std::vector to_concat(number_of_indices); for (int index_num = 0; index_num < number_of_indices; index_num++) { const auto& expression = input[src_input_vector[index_num]]; // Take the appropriate slice of data. diff --git a/tensorflow/compiler/tf2xla/kernels/elu_op.cc b/tensorflow/compiler/tf2xla/kernels/elu_op.cc index 2fd27c5ca7e87c8b387d9d0854b787d30e7f7b6f..ed7462c16615f7f63a174e29843c2a1675c17058 100644 --- a/tensorflow/compiler/tf2xla/kernels/elu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/elu_op.cc @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/kernels/cwise_ops.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/types.h" @@ -32,7 +32,7 @@ class EluOp : public XlaOpKernel { explicit EluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} // Computes the max of the scalar input x and 0. void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); const auto zero = XlaHelpers::Zero(b, input_type(0)); const auto one = XlaHelpers::One(b, input_type(0)); const auto pred = b->Gt(ctx->Input(0), zero); @@ -47,7 +47,7 @@ class EluGradOp : public XlaOpKernel { // Return the lhs (incoming gradient) if the rhs (input feature) > 0, // otherwise return lhs * (1 + rhs). void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); const auto zero = XlaHelpers::Zero(b, input_type(0)); const auto one = XlaHelpers::One(b, input_type(0)); const auto grad = ctx->Input(0); @@ -66,7 +66,7 @@ class SeluOp : public XlaOpKernel { explicit SeluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} // Computes the max of the scalar input x and 0. void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); const auto zero = XlaHelpers::Zero(b, input_type(0)); const auto one = XlaHelpers::One(b, input_type(0)); const auto scale = XlaHelpers::FloatLiteral(b, input_type(0), @@ -86,9 +86,8 @@ class SeluGradOp : public XlaOpKernel { // Return the lhs (incoming gradient) if the rhs (input feature) > 0, // otherwise return lhs * (1 + rhs). void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); const auto zero = XlaHelpers::Zero(b, input_type(0)); - const auto one = XlaHelpers::One(b, input_type(0)); const auto scale = XlaHelpers::FloatLiteral(b, input_type(0), 1.0507009873554804934193349852946); const auto scale_alpha = XlaHelpers::FloatLiteral(b, input_type(0), diff --git a/tensorflow/compiler/tf2xla/kernels/extract_image_patches_op.cc b/tensorflow/compiler/tf2xla/kernels/extract_image_patches_op.cc index b2970eae20a3fb71f06619f476a49d41b22bca56..6df01cabbf1d98c0299bfd808bcc6db6223c4777 100644 --- a/tensorflow/compiler/tf2xla/kernels/extract_image_patches_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/extract_image_patches_op.cc @@ -93,7 +93,7 @@ class ExtractImagePatchesOp : public XlaOpKernel { input_shape.DebugString())); const int64 depth = input_shape.dim_size(feature_dim); - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); // The following code is equivalent to: // eye = np.eye(kH * kW * D).reshape([kH, kW, D, kH * kW * kD]) @@ -110,7 +110,7 @@ class ExtractImagePatchesOp : public XlaOpKernel { // Builds an identity matrix as a broadcast equality of iotas. // iota = np.arange(np.prod(ksize), depth) // filter = np.equal(np.reshape(iota, [-1, 1]), iota).astype(np.float32) - xla::ComputationDataHandle iota; + xla::XlaOp iota; TF_CHECK_OK(XlaHelpers::Iota(builder, DataType::DT_INT32, kernel_size * depth, &iota)); @@ -147,7 +147,7 @@ class ExtractImagePatchesOp : public XlaOpKernel { &padding[i].first, &padding[i].second)); } - xla::ComputationDataHandle conv = + xla::XlaOp conv = builder->ConvGeneralDilated(ctx->Input(0), filter, window_strides, padding, lhs_dilation, rhs_dilation, dims); ctx->SetOutput(0, conv); diff --git a/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc b/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc index 99470d70e709ddb5593c5eaae061bb897befc168..8f0de0a524c908b598c1a2165a462275346ad137 100644 --- a/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc @@ -44,23 +44,20 @@ void CpuNudge(const float min, const float max, const float quant_min, } // An XLA version of CpuNudge(). -void XlaNudge(xla::ComputationBuilder* b, const DataType data_type, - const xla::ComputationDataHandle& min, - const xla::ComputationDataHandle& max, +void XlaNudge(xla::XlaBuilder* b, const DataType data_type, + const xla::XlaOp& min, const xla::XlaOp& max, const float quant_min_value, const float quant_max_value, - xla::ComputationDataHandle* nudged_min, - xla::ComputationDataHandle* nudged_max, - xla::ComputationDataHandle* scale) { + xla::XlaOp* nudged_min, xla::XlaOp* nudged_max, + xla::XlaOp* scale) { *scale = b->Div(b->Sub(max, min), XlaHelpers::FloatLiteral(b, data_type, quant_max_value - quant_min_value)); - xla::ComputationDataHandle quant_min = + xla::XlaOp quant_min = XlaHelpers::FloatLiteral(b, data_type, quant_min_value); - xla::ComputationDataHandle zero_point_from_min = - b->Sub(quant_min, b->Div(min, *scale)); - xla::ComputationDataHandle quant_max = + xla::XlaOp zero_point_from_min = b->Sub(quant_min, b->Div(min, *scale)); + xla::XlaOp quant_max = XlaHelpers::FloatLiteral(b, data_type, quant_max_value); - xla::ComputationDataHandle nudged_zero_point = + xla::XlaOp nudged_zero_point = b->Select(b->Le(zero_point_from_min, quant_min), quant_min, b->Select(b->Ge(zero_point_from_min, quant_max), quant_max, b->Round(zero_point_from_min))); @@ -68,22 +65,18 @@ void XlaNudge(xla::ComputationBuilder* b, const DataType data_type, *nudged_max = b->Mul(b->Sub(quant_max, nudged_zero_point), *scale); } -xla::ComputationDataHandle Quantize( - xla::ComputationBuilder* b, const xla::ComputationDataHandle& input, - const DataType data_type, - const xla::ComputationDataHandle& nudged_input_min, - const xla::ComputationDataHandle& nudged_input_max, - const xla::ComputationDataHandle& input_scale) { - xla::ComputationDataHandle one = XlaHelpers::FloatLiteral(b, data_type, 1.0f); - xla::ComputationDataHandle inv_scale = b->Div(one, input_scale); - xla::ComputationDataHandle half = - XlaHelpers::FloatLiteral(b, data_type, 0.5f); - - xla::ComputationDataHandle clamped = - b->Clamp(nudged_input_min, input, nudged_input_max); - xla::ComputationDataHandle clamped_shifted = - b->Sub(clamped, nudged_input_min); - xla::ComputationDataHandle rounded = +xla::XlaOp Quantize(xla::XlaBuilder* b, const xla::XlaOp& input, + const DataType data_type, + const xla::XlaOp& nudged_input_min, + const xla::XlaOp& nudged_input_max, + const xla::XlaOp& input_scale) { + xla::XlaOp one = XlaHelpers::FloatLiteral(b, data_type, 1.0f); + xla::XlaOp inv_scale = b->Div(one, input_scale); + xla::XlaOp half = XlaHelpers::FloatLiteral(b, data_type, 0.5f); + + xla::XlaOp clamped = b->Clamp(nudged_input_min, input, nudged_input_max); + xla::XlaOp clamped_shifted = b->Sub(clamped, nudged_input_min); + xla::XlaOp rounded = b->Floor(b->Add(b->Mul(clamped_shifted, inv_scale), half)); return b->Add(b->Mul(rounded, input_scale), nudged_input_min); } @@ -111,18 +104,18 @@ class FakeQuantWithMinMaxArgsOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaOp input = ctx->Input(0); const DataType data_type = ctx->input_type(0); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle nudged_input_min = + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp nudged_input_min = XlaHelpers::FloatLiteral(b, data_type, nudged_input_min_); - xla::ComputationDataHandle nudged_input_max = + xla::XlaOp nudged_input_max = XlaHelpers::FloatLiteral(b, data_type, nudged_input_max_); - xla::ComputationDataHandle input_scale = + xla::XlaOp input_scale = XlaHelpers::FloatLiteral(b, data_type, input_scale_); - xla::ComputationDataHandle output = Quantize( - b, input, data_type, nudged_input_min, nudged_input_max, input_scale); + xla::XlaOp output = Quantize(b, input, data_type, nudged_input_min, + nudged_input_max, input_scale); ctx->SetOutput(0, output); } @@ -159,23 +152,22 @@ class FakeQuantWithMinMaxArgsGradOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle gradient = ctx->Input(0); + xla::XlaOp gradient = ctx->Input(0); const TensorShape gradient_shape = ctx->InputShape(0); - xla::ComputationDataHandle input = ctx->Input(1); + xla::XlaOp input = ctx->Input(1); const DataType data_type = ctx->input_type(1); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle nudged_input_min = + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp nudged_input_min = XlaHelpers::FloatLiteral(b, data_type, nudged_input_min_); - xla::ComputationDataHandle nudged_input_max = + xla::XlaOp nudged_input_max = XlaHelpers::FloatLiteral(b, data_type, nudged_input_max_); - xla::ComputationDataHandle between_nudged_min_max = + xla::XlaOp between_nudged_min_max = b->And(b->Le(nudged_input_min, input), b->Le(input, nudged_input_max)); - xla::ComputationDataHandle zeroes = b->Broadcast( - XlaHelpers::Zero(b, data_type), gradient_shape.dim_sizes()); - xla::ComputationDataHandle output = - b->Select(between_nudged_min_max, gradient, zeroes); + xla::XlaOp zeroes = b->Broadcast(XlaHelpers::Zero(b, data_type), + gradient_shape.dim_sizes()); + xla::XlaOp output = b->Select(between_nudged_min_max, gradient, zeroes); ctx->SetOutput(0, output); } @@ -204,18 +196,18 @@ class FakeQuantWithMinMaxVarsOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaOp input = ctx->Input(0); const DataType data_type = ctx->input_type(0); - xla::ComputationDataHandle input_min = ctx->Input(1); - xla::ComputationDataHandle input_max = ctx->Input(2); + xla::XlaOp input_min = ctx->Input(1); + xla::XlaOp input_max = ctx->Input(2); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle nudged_input_min, nudged_input_max, input_scale; + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp nudged_input_min, nudged_input_max, input_scale; XlaNudge(b, data_type, input_min, input_max, quant_min_, quant_max_, &nudged_input_min, &nudged_input_max, &input_scale); - xla::ComputationDataHandle output = Quantize( - b, input, data_type, nudged_input_min, nudged_input_max, input_scale); + xla::XlaOp output = Quantize(b, input, data_type, nudged_input_min, + nudged_input_max, input_scale); ctx->SetOutput(0, output); } @@ -243,47 +235,43 @@ class FakeQuantWithMinMaxVarsGradOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle gradient = ctx->Input(0); + xla::XlaOp gradient = ctx->Input(0); const TensorShape gradient_shape = ctx->InputShape(0); - xla::ComputationDataHandle input = ctx->Input(1); + xla::XlaOp input = ctx->Input(1); const DataType data_type = ctx->input_type(1); const DataType accumulation_type = XlaHelpers::SumAccumulationType(data_type); - xla::ComputationDataHandle input_min = ctx->Input(2); - xla::ComputationDataHandle input_max = ctx->Input(3); + xla::XlaOp input_min = ctx->Input(2); + xla::XlaOp input_max = ctx->Input(3); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle nudged_input_min, nudged_input_max, input_scale; + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp nudged_input_min, nudged_input_max, input_scale; XlaNudge(b, data_type, input_min, input_max, quant_min_, quant_max_, &nudged_input_min, &nudged_input_max, &input_scale); - xla::ComputationDataHandle between_nudged_min_max = + xla::XlaOp between_nudged_min_max = b->And(b->Le(nudged_input_min, input), b->Le(input, nudged_input_max)); - xla::ComputationDataHandle zero = XlaHelpers::Zero(b, data_type); - xla::ComputationDataHandle zeroes = - b->Broadcast(zero, gradient_shape.dim_sizes()); - xla::ComputationDataHandle output0 = - b->Select(between_nudged_min_max, gradient, zeroes); + xla::XlaOp zero = XlaHelpers::Zero(b, data_type); + xla::XlaOp zeroes = b->Broadcast(zero, gradient_shape.dim_sizes()); + xla::XlaOp output0 = b->Select(between_nudged_min_max, gradient, zeroes); ctx->SetOutput(0, output0); - xla::ComputationDataHandle below_min = b->Lt(input, nudged_input_min); - xla::ComputationDataHandle select1 = b->Select(below_min, gradient, zeroes); - xla::ComputationDataHandle reduce1 = b->ReduceAll( + xla::XlaOp below_min = b->Lt(input, nudged_input_min); + xla::XlaOp select1 = b->Select(below_min, gradient, zeroes); + xla::XlaOp reduce1 = b->ReduceAll( XlaHelpers::ConvertElementType(b, select1, accumulation_type), XlaHelpers::Zero(b, accumulation_type), *ctx->GetOrCreateAdd(accumulation_type)); - xla::ComputationDataHandle output1 = - XlaHelpers::ConvertElementType(b, reduce1, data_type); + xla::XlaOp output1 = XlaHelpers::ConvertElementType(b, reduce1, data_type); ctx->SetOutput(1, output1); - xla::ComputationDataHandle above_max = b->Gt(input, nudged_input_max); - xla::ComputationDataHandle select2 = b->Select(above_max, gradient, zeroes); - xla::ComputationDataHandle reduce2 = b->ReduceAll( + xla::XlaOp above_max = b->Gt(input, nudged_input_max); + xla::XlaOp select2 = b->Select(above_max, gradient, zeroes); + xla::XlaOp reduce2 = b->ReduceAll( XlaHelpers::ConvertElementType(b, select2, accumulation_type), XlaHelpers::Zero(b, accumulation_type), *ctx->GetOrCreateAdd(accumulation_type)); - xla::ComputationDataHandle output2 = - XlaHelpers::ConvertElementType(b, reduce2, data_type); + xla::XlaOp output2 = XlaHelpers::ConvertElementType(b, reduce2, data_type); ctx->SetOutput(2, output2); } diff --git a/tensorflow/compiler/tf2xla/kernels/fft_ops.cc b/tensorflow/compiler/tf2xla/kernels/fft_ops.cc index a4f3c1c3ad9a928e0552c388a25ed9fcb08edabb..933924cad1c7cac2879bd4720cb21ffc33c23f50 100644 --- a/tensorflow/compiler/tf2xla/kernels/fft_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/fft_ops.cc @@ -62,9 +62,8 @@ class GenericFftOp : public XlaOpKernel { } } - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle fft = - b->Fft(ctx->Input(0), fft_type_, fft_length); + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp fft = b->Fft(ctx->Input(0), fft_type_, fft_length); ctx->SetOutput(0, fft); } @@ -82,9 +81,11 @@ class FFTOp : public GenericFftOp { explicit FFTOp(OpKernelConstruction* ctx) : GenericFftOp(ctx, /*fft_type=*/FftType::FFT, /*fft_rank=*/FFTRank) {} }; -REGISTER_XLA_OP(Name("FFT"), FFTOp<1>); -REGISTER_XLA_OP(Name("FFT2D"), FFTOp<2>); -REGISTER_XLA_OP(Name("FFT3D"), FFTOp<3>); +REGISTER_XLA_OP(Name("FFT").TypeConstraint("Tcomplex", DT_COMPLEX64), FFTOp<1>); +REGISTER_XLA_OP(Name("FFT2D").TypeConstraint("Tcomplex", DT_COMPLEX64), + FFTOp<2>); +REGISTER_XLA_OP(Name("FFT3D").TypeConstraint("Tcomplex", DT_COMPLEX64), + FFTOp<3>); template class IFFTOp : public GenericFftOp { @@ -92,9 +93,12 @@ class IFFTOp : public GenericFftOp { explicit IFFTOp(OpKernelConstruction* ctx) : GenericFftOp(ctx, /*fft_type=*/FftType::IFFT, /*fft_rank=*/FFTRank) {} }; -REGISTER_XLA_OP(Name("IFFT"), IFFTOp<1>); -REGISTER_XLA_OP(Name("IFFT2D"), IFFTOp<2>); -REGISTER_XLA_OP(Name("IFFT3D"), IFFTOp<3>); +REGISTER_XLA_OP(Name("IFFT").TypeConstraint("Tcomplex", DT_COMPLEX64), + IFFTOp<1>); +REGISTER_XLA_OP(Name("IFFT2D").TypeConstraint("Tcomplex", DT_COMPLEX64), + IFFTOp<2>); +REGISTER_XLA_OP(Name("IFFT3D").TypeConstraint("Tcomplex", DT_COMPLEX64), + IFFTOp<3>); template class RFFTOp : public GenericFftOp { diff --git a/tensorflow/compiler/tf2xla/kernels/fill_op.cc b/tensorflow/compiler/tf2xla/kernels/fill_op.cc index eaa13b8dfacce9aaca42ce5fcdfa467ce7fa7b7f..e4467a0fb138ed7919af62ed032c0f5abee3e4f6 100644 --- a/tensorflow/compiler/tf2xla/kernels/fill_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/fill_op.cc @@ -48,7 +48,7 @@ class FillOp : public XlaOpKernel { 0, {dims_shape.num_elements()}, &dims_literal)); // Convert the dims literal into a vector that we can pass to - // ComputationBuilder. + // XlaBuilder. std::vector broadcast; broadcast.reserve(dims_literal.shape().dimensions(0)); for (int i = 0; i < dims_literal.shape().dimensions(0); ++i) { @@ -56,7 +56,7 @@ class FillOp : public XlaOpKernel { } // Look up the value input, reshaping to a scalar if it was a // 'legacy' scalar (secretly a vector). - xla::ComputationDataHandle data = ctx->Input(1); + xla::XlaOp data = ctx->Input(1); if (value_shape.dims() > 0) { CHECK_EQ(value_shape.dims(), 1); data = ctx->builder()->Reshape(data, {}); diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op.cc b/tensorflow/compiler/tf2xla/kernels/gather_op.cc index 0b79cb0916ee8a7d0e26c5dc12557639336f8ab1..d13e25bcddae16d0cd630403219657121b80868d 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_op.cc @@ -26,13 +26,11 @@ limitations under the License. namespace tensorflow { -Status XlaGather(const xla::ComputationDataHandle& input, - const TensorShape& input_shape, - const xla::ComputationDataHandle& indices, - const TensorShape& indices_shape, int64 axis, - bool indices_are_nd, DataType dtype, DataType index_type, - xla::ComputationBuilder* builder, - xla::ComputationDataHandle* gather_output) { +Status XlaGather(const xla::XlaOp& input, const TensorShape& input_shape, + const xla::XlaOp& indices, const TensorShape& indices_shape, + int64 axis, bool indices_are_nd, DataType dtype, + DataType index_type, xla::XlaBuilder* builder, + xla::XlaOp* gather_output) { // There is no deep reason why we need this precondition, but this is the only // combination that is used and tested today. CHECK(!indices_are_nd || axis == 0); @@ -153,7 +151,7 @@ class GatherOp : public XlaOpKernel { explicit GatherOp(OpKernelConstruction* context) : XlaOpKernel(context) {} void Compile(XlaOpKernelContext* context) override { - xla::ComputationBuilder* builder = context->builder(); + xla::XlaBuilder* builder = context->builder(); auto input = context->Input(0); auto input_shape = context->InputShape(0); auto indices = context->Input(1); @@ -182,7 +180,7 @@ class GatherOp : public XlaOpKernel { OP_REQUIRES(context, index_type == DT_INT32 || index_type == DT_INT64, errors::InvalidArgument("indices must be int32 or int64")); - xla::ComputationDataHandle gather; + xla::XlaOp gather; OP_REQUIRES_OK( context, XlaGather(input, input_shape, indices, indices_shape, axis, /*indices_are_nd=*/false, input_type(0), index_type, @@ -220,10 +218,10 @@ class GatherNdOp : public XlaOpKernel { indices_shape.dim_size(indices_shape.dims() - 1), " vs. ", params_shape.dims())); - xla::ComputationBuilder* builder = context->builder(); + xla::XlaBuilder* builder = context->builder(); auto params = context->Input(0); auto indices = context->Input(1); - xla::ComputationDataHandle gather; + xla::XlaOp gather; OP_REQUIRES_OK(context, XlaGather(params, params_shape, indices, indices_shape, /*axis=*/0, /*indices_are_nd=*/true, params_type, diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h b/tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h index f9376f0eabdc0f0c565eb4b9f86425de96b5aa22..d898e43b858bac706d524c7c271f48b1b5fa258f 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h +++ b/tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/util/bcast.h" @@ -33,13 +33,11 @@ namespace tensorflow { // If `indices_are_nd` is true, the last dimension of `indices` are treated as // a multidimensional index values. Otherwise, `indices` is treated as a tensor // of scalar indices. -Status XlaGather(const xla::ComputationDataHandle& input, - const TensorShape& input_shape, - const xla::ComputationDataHandle& indices, - const TensorShape& indices_shape, int64 axis, - bool indices_are_nd, DataType dtype, DataType index_type, - xla::ComputationBuilder* builder, - xla::ComputationDataHandle* gather_output); +Status XlaGather(const xla::XlaOp& input, const TensorShape& input_shape, + const xla::XlaOp& indices, const TensorShape& indices_shape, + int64 axis, bool indices_are_nd, DataType dtype, + DataType index_type, xla::XlaBuilder* builder, + xla::XlaOp* gather_output); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/if_op.cc b/tensorflow/compiler/tf2xla/kernels/if_op.cc index eefbe55c815d80a608bdf62d454a69d722adb158..8b9b026643cf35216a2082dfcce9270c017bd14f 100644 --- a/tensorflow/compiler/tf2xla/kernels/if_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/if_op.cc @@ -37,7 +37,7 @@ XlaIfOp::XlaIfOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { // TODO(b/35949885): There is duplication here with the handling of the // while_op. Refactor the common code out/rework. void XlaIfOp::Compile(XlaOpKernelContext* ctx) { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); OP_REQUIRES(ctx, cond_type_ == DT_BOOL, errors::InvalidArgument( @@ -48,7 +48,7 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { VLOG(1) << "Building If: " << input_types_.size() << " inputs"; - std::vector inputs(input_types_.size()); + std::vector inputs(input_types_.size()); std::vector arguments(input_types_.size()); for (int i = 0; i < input_types_.size(); ++i) { XlaCompiler::Argument& arg = arguments[i]; @@ -175,19 +175,19 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { "Mismatch in resource of then and else branch for resource ", i)); } - xla::ComputationDataHandle outputs = + xla::XlaOp outputs = b->Conditional(ctx->Input(0), b->Tuple(inputs), *then_result.computation, b->Tuple(inputs), *else_result.computation); // Sets non-variable outputs. for (int i = 0; i < output_types_.size(); ++i) { if (ctx->input_type(i) != DT_RESOURCE) { - xla::ComputationDataHandle output_handle = b->GetTupleElement(outputs, i); + xla::XlaOp output_handle = b->GetTupleElement(outputs, i); if (VLOG_IS_ON(2)) { LOG(INFO) << "Setting output " << i; auto shape_or = b->GetShape(output_handle); if (shape_or.ok()) { LOG(INFO) << "Shape for output " << i << ": " - << xla::ShapeUtil::HumanString(*shape_or.ValueOrDie()); + << xla::ShapeUtil::HumanString(shape_or.ValueOrDie()); } else { LOG(INFO) << "Shape unknown for output " << i; } diff --git a/tensorflow/compiler/tf2xla/kernels/image_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_ops.cc index 5eeda79a935e8194a596d322b52add27846d378c..1568b33679963c1a6630525f60560180d40b8d53 100644 --- a/tensorflow/compiler/tf2xla/kernels/image_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/image_ops.cc @@ -23,10 +23,9 @@ namespace { // Converts 'input' from RGB format to HSV format. // 'shape' is the shape of the red/green/blue tensors. -std::array RGBToHSV( - XlaOpKernelContext* ctx, xla::ComputationBuilder* b, - const std::array& rgb, DataType dtype, - const TensorShape& shape) { +std::array RGBToHSV(XlaOpKernelContext* ctx, xla::XlaBuilder* b, + const std::array& rgb, + DataType dtype, const TensorShape& shape) { auto zero = XlaHelpers::Zero(b, dtype); auto one = XlaHelpers::One(b, dtype); @@ -54,12 +53,12 @@ std::array RGBToHSV( } // Converts 'input' from HSV format to RGB format. -std::array HSVToRGB( - xla::ComputationBuilder* b, - const std::array& hsv, DataType dtype) { - xla::ComputationDataHandle hue = hsv[0]; - xla::ComputationDataHandle saturation = hsv[1]; - xla::ComputationDataHandle value = hsv[2]; +std::array HSVToRGB(xla::XlaBuilder* b, + const std::array& hsv, + DataType dtype) { + xla::XlaOp hue = hsv[0]; + xla::XlaOp saturation = hsv[1]; + xla::XlaOp value = hsv[2]; auto zero = XlaHelpers::Zero(b, dtype); auto one = XlaHelpers::FloatLiteral(b, dtype, 1.0); auto two = XlaHelpers::FloatLiteral(b, dtype, 2.0); @@ -95,16 +94,16 @@ class RGBToHSVOp : public XlaOpKernel { errors::FailedPrecondition("input must have 3 channels but input has ", channels, " channels.")); - xla::ComputationBuilder* b = context->builder(); - xla::ComputationDataHandle input = context->Input(0); + xla::XlaBuilder* b = context->builder(); + xla::XlaOp input = context->Input(0); - xla::ComputationDataHandle red = + xla::XlaOp red = b->SliceInDim(input, /*start_index=*/0, /*limit_index=*/1, /*stride=*/1, /*dimno=*/channel_dim); - xla::ComputationDataHandle green = + xla::XlaOp green = b->SliceInDim(input, /*start_index=*/1, /*limit_index=*/2, /*stride=*/1, /*dimno=*/channel_dim); - xla::ComputationDataHandle blue = + xla::XlaOp blue = b->SliceInDim(input, /*start_index=*/2, /*limit_index=*/3, /*stride=*/1, /*dimno=*/channel_dim); TensorShape channel_shape = input_shape; @@ -133,15 +132,15 @@ class HSVToRGBOp : public XlaOpKernel { errors::FailedPrecondition("input must have 3 channels but input has ", channels, " channels.")); - xla::ComputationBuilder* b = context->builder(); - xla::ComputationDataHandle input = context->Input(0); - xla::ComputationDataHandle hue = + xla::XlaBuilder* b = context->builder(); + xla::XlaOp input = context->Input(0); + xla::XlaOp hue = b->SliceInDim(input, /*start_index=*/0, /*limit_index=*/1, /*stride=*/1, /*dimno=*/channel_dim); - xla::ComputationDataHandle saturation = + xla::XlaOp saturation = b->SliceInDim(input, /*start_index=*/1, /*limit_index=*/2, /*stride=*/1, /*dimno=*/channel_dim); - xla::ComputationDataHandle value = + xla::XlaOp value = b->SliceInDim(input, /*start_index=*/2, /*limit_index=*/3, /*stride=*/1, /*dimno=*/channel_dim); @@ -174,9 +173,9 @@ class AdjustContrastOpV2 : public XlaOpKernel { errors::InvalidArgument("contrast_factor must be scalar: ", factor_shape.DebugString())); - xla::ComputationBuilder* b = context->builder(); - xla::ComputationDataHandle input = context->Input(0); - xla::ComputationDataHandle factor = context->Input(1); + xla::XlaBuilder* b = context->builder(); + xla::XlaOp input = context->Input(0); + xla::XlaOp factor = context->Input(1); DataType type = context->input_type(0); @@ -221,19 +220,19 @@ class AdjustSaturationOp : public XlaOpKernel { errors::InvalidArgument("input must have 3 channels but instead has ", channels, " channels.")); - xla::ComputationBuilder* b = context->builder(); - xla::ComputationDataHandle input = context->Input(0); - xla::ComputationDataHandle scale = context->Input(1); + xla::XlaBuilder* b = context->builder(); + xla::XlaOp input = context->Input(0); + xla::XlaOp scale = context->Input(1); DataType type = context->input_type(0); - xla::ComputationDataHandle red = + xla::XlaOp red = b->SliceInDim(input, /*start_index=*/0, /*limit_index=*/1, /*stride=*/1, /*dimno=*/channel_dim); - xla::ComputationDataHandle green = + xla::XlaOp green = b->SliceInDim(input, /*start_index=*/1, /*limit_index=*/2, /*stride=*/1, /*dimno=*/channel_dim); - xla::ComputationDataHandle blue = + xla::XlaOp blue = b->SliceInDim(input, /*start_index=*/2, /*limit_index=*/3, /*stride=*/1, /*dimno=*/channel_dim); TensorShape channel_shape = input_shape; @@ -271,19 +270,19 @@ class AdjustHueOp : public XlaOpKernel { errors::InvalidArgument("input must have 3 channels but instead has ", channels, " channels.")); - xla::ComputationBuilder* b = context->builder(); - xla::ComputationDataHandle input = context->Input(0); - xla::ComputationDataHandle delta = context->Input(1); + xla::XlaBuilder* b = context->builder(); + xla::XlaOp input = context->Input(0); + xla::XlaOp delta = context->Input(1); DataType type = context->input_type(0); - xla::ComputationDataHandle red = + xla::XlaOp red = b->SliceInDim(input, /*start_index=*/0, /*limit_index=*/1, /*stride=*/1, /*dimno=*/channel_dim); - xla::ComputationDataHandle green = + xla::XlaOp green = b->SliceInDim(input, /*start_index=*/1, /*limit_index=*/2, /*stride=*/1, /*dimno=*/channel_dim); - xla::ComputationDataHandle blue = + xla::XlaOp blue = b->SliceInDim(input, /*start_index=*/2, /*limit_index=*/3, /*stride=*/1, /*dimno=*/channel_dim); TensorShape channel_shape = input_shape; diff --git a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc index f36b3f594826c27b7866d956c855aa3638db9cb4..9058cbc74762576c7e6f8ec1b2b0f6b247ac0502 100644 --- a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc @@ -99,9 +99,9 @@ ResizeConvolutionDims ComputeResizeConvolutionParameters( return dims; } -xla::ComputationDataHandle MakeBilinearResizeKernel( - xla::ComputationBuilder* builder, gtl::ArraySlice kernel_size, - int64 channels) { +xla::XlaOp MakeBilinearResizeKernel(xla::XlaBuilder* builder, + gtl::ArraySlice kernel_size, + int64 channels) { // Form a 2D convolution kernel like: // 1 2 3 2 1 // 2 4 6 4 2 @@ -120,7 +120,7 @@ xla::ComputationDataHandle MakeBilinearResizeKernel( return kernel; }; - xla::ComputationDataHandle channels_iota; + xla::XlaOp channels_iota; // DT_INT32 Iota will always return status::OK(). TF_CHECK_OK( XlaHelpers::Iota(builder, DataType::DT_INT32, channels, &channels_iota)); @@ -139,10 +139,12 @@ xla::ComputationDataHandle MakeBilinearResizeKernel( /*broadcast_dimensions=*/{0}); } -xla::ComputationDataHandle ResizeUsingDilationAndConvolution( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& input, - const int num_spatial_dims, std::vector in_size, - std::vector out_size, const int64 channels) { +xla::XlaOp ResizeUsingDilationAndConvolution(xla::XlaBuilder* builder, + const xla::XlaOp& input, + const int num_spatial_dims, + std::vector in_size, + std::vector out_size, + const int64 channels) { // Picture for a 1x3 to 1x4 resize: // stride = 2, kernel size = 3 // Input: @@ -168,9 +170,9 @@ xla::ComputationDataHandle ResizeUsingDilationAndConvolution( ResizeConvolutionDims dims = ComputeResizeConvolutionParameters(in_size, out_size); - xla::ComputationDataHandle kernel = + xla::XlaOp kernel = MakeBilinearResizeKernel(builder, dims.kernel_size, channels); - xla::ComputationDataHandle output = builder->ConvGeneralDilated( + xla::XlaOp output = builder->ConvGeneralDilated( input, kernel, dims.stride, /*padding=*/ {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1}, @@ -189,10 +191,12 @@ xla::ComputationDataHandle ResizeUsingDilationAndConvolution( return output; } -xla::ComputationDataHandle ResizeUsingDilationAndConvolutionGradOp( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& grad, - const int num_spatial_dims, std::vector in_size, - std::vector grad_size, const int64 channels) { +xla::XlaOp ResizeUsingDilationAndConvolutionGradOp(xla::XlaBuilder* builder, + const xla::XlaOp& grad, + const int num_spatial_dims, + std::vector in_size, + std::vector grad_size, + const int64 channels) { ResizeConvolutionDims dims = ComputeResizeConvolutionParameters(in_size, grad_size); @@ -210,7 +214,7 @@ xla::ComputationDataHandle ResizeUsingDilationAndConvolutionGradOp( } dimension_numbers.set_kernel_input_feature_dimension(num_spatial_dims); dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims + 1); - xla::ComputationDataHandle kernel = + xla::XlaOp kernel = MakeBilinearResizeKernel(builder, dims.kernel_size, channels); // Broadcast the input kernel where the forward op expanded from a size == 1 @@ -223,7 +227,7 @@ xla::ComputationDataHandle ResizeUsingDilationAndConvolutionGradOp( } } - xla::ComputationDataHandle output = builder->ConvGeneralDilated( + xla::XlaOp output = builder->ConvGeneralDilated( grad, kernel, /*window_strides=*/dims.kernel_size, /*padding=*/ {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1}, @@ -258,7 +262,7 @@ class ResizeBilinearOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); TensorShape input_shape = ctx->InputShape(0); OP_REQUIRES(ctx, input_shape.dims() == 4, @@ -283,7 +287,7 @@ class ResizeBilinearOp : public XlaOpKernel { const int num_spatial_dims = 2; - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaOp input = ctx->Input(0); // If in_size[i] > 1 and out_size[i] == 1, slice out the first input in // dimension i. @@ -318,7 +322,7 @@ class ResizeBilinearOp : public XlaOpKernel { // from image of size axb -> cxd is same as resizing axb -> exf -> cxd. // // This makes the convolutions kernels smaller and the operation faster. - xla::ComputationDataHandle output = input; + xla::XlaOp output = input; while (in_size != out_size) { if (in_size[0] != 1 && in_size[1] != 1) { std::vector k = { @@ -369,7 +373,7 @@ class ResizeBilinearGradOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); TensorShape input_shape = ctx->InputShape(1); OP_REQUIRES(ctx, input_shape.dims() == 4, @@ -406,9 +410,9 @@ class ResizeBilinearGradOp : public XlaOpKernel { const int num_spatial_dims = 2; - xla::ComputationDataHandle grad = ctx->Input(0); + xla::XlaOp grad = ctx->Input(0); - xla::ComputationDataHandle output = grad; + xla::XlaOp output = grad; while (in_size != grad_size) { if (in_size[0] != 1 && in_size[1] != 1) { std::vector k = { diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops.cc b/tensorflow/compiler/tf2xla/kernels/index_ops.cc index 7bf4b435f526afa93d8a218b191928acb932cd6b..36eb4c75454ed82804c40b82e5dbaec2eef0a719 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops.cc @@ -61,10 +61,10 @@ void XlaArgMinMaxOp::Compile(XlaOpKernelContext* ctx) { DataType index_type = output_type(0); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp input = ctx->Input(0); - xla::ComputationDataHandle output; + xla::XlaOp output; if (is_min_) { OP_REQUIRES_OK(ctx, XlaHelpers::ArgMin(b, ctx, input, input_shape, input_type(0), diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc index b1f3c3c298ce0cadf38b9bda715761fe7e2896d7..2c2d88486fda99d2380382a3e2f633f5bdc7478c 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc @@ -71,10 +71,10 @@ class ArgMaxCustomCallOp : public XlaOpKernel { OP_REQUIRES(ctx, XlaContext::Get(ctx).allow_cpu_custom_calls(), errors::InvalidArgument( "ArgMax implementation requires a CustomCall on CPU")); - xla::ComputationBuilder& b = *ctx->builder(); + xla::XlaBuilder& b = *ctx->builder(); // XLA passes to the function, so it is not included here. - std::vector args; + std::vector args; args.push_back(ctx->Input(0)); args.push_back(b.ConstantLiteral( *xla::Literal::CreateR1(input_shape.dim_sizes()))); @@ -91,7 +91,7 @@ class ArgMaxCustomCallOp : public XlaOpKernel { // Tell XLA to call the custom code, defined in // index_ops_kernel_argmax_float_1d.cc. - xla::ComputationDataHandle output; + xla::XlaOp output; switch (input_shape.dims()) { case 1: output = b.CustomCall("argmax_float_1d_xla_impl", args, xla_shape); diff --git a/tensorflow/compiler/tf2xla/kernels/l2loss_op.cc b/tensorflow/compiler/tf2xla/kernels/l2loss_op.cc index c177f08d9c4687bb13b98a4328bb3960519799c4..1decf7d72d72bb697477e7f841ced2a1a0d5fbe9 100644 --- a/tensorflow/compiler/tf2xla/kernels/l2loss_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/l2loss_op.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/no_op.h" @@ -33,7 +33,7 @@ class L2LossOp : public XlaOpKernel { std::iota(dims.begin(), dims.end(), 0); DataType dtype = ctx->input_type(0); - xla::ComputationBuilder* const b = ctx->builder(); + xla::XlaBuilder* const b = ctx->builder(); // output = sum(t ** 2) / 2 const DataType accumulation_type = XlaHelpers::SumAccumulationType(dtype); diff --git a/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc b/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc index 1cfee3070f384af0a7441a9c860c530dd1b42187..39fbf98a6274918840e9e351470f04c2d80c5d01 100644 --- a/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc @@ -38,8 +38,8 @@ class LRNOp : public XlaOpKernel { OP_REQUIRES(ctx, in_shape.dims() == 4, errors::InvalidArgument("in must be 4-dimensional")); - xla::ComputationBuilder* builder = ctx->builder(); - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaBuilder* builder = ctx->builder(); + xla::XlaOp input = ctx->Input(0); // sqr_sum[a, b, c, d] = // sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) @@ -111,10 +111,10 @@ class LRNGradOp : public XlaOpKernel { "input_grads, input_image, and out_image should have the same " "shape")); - xla::ComputationBuilder* builder = ctx->builder(); - xla::ComputationDataHandle in_grads = ctx->Input(0); - xla::ComputationDataHandle in_image = ctx->Input(1); - xla::ComputationDataHandle out_image = ctx->Input(2); + xla::XlaBuilder* builder = ctx->builder(); + xla::XlaOp in_grads = ctx->Input(0); + xla::XlaOp in_image = ctx->Input(1); + xla::XlaOp out_image = ctx->Input(2); // This code is ported from tensorflow/core/kernels/lrn_op.cc. In Python // pseudo-code, the Eigen code does this for each spatial position: @@ -166,7 +166,7 @@ class LRNGradOp : public XlaOpKernel { auto dy_reduced = XlaHelpers::ConvertElementType(builder, dy_reduce, input_type(0)); - xla::ComputationDataHandle gradients = builder->Add( + xla::XlaOp gradients = builder->Add( builder->Mul(in_image, dy_reduced), builder->Mul(in_grads, builder->Pow(norm, builder->ConstantR0(-beta_)))); diff --git a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc index 886baf8115243a22b7255a3961c914d4cf6c2ed5..6949b296f4b9afe4a0c9152c763a9ad233b9f595 100644 --- a/tensorflow/compiler/tf2xla/kernels/matmul_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/matmul_op.cc @@ -66,8 +66,8 @@ class MatMulOp : public XlaOpKernel { a_shape.DebugString(), ", In[1]: ", b_shape.DebugString())); - xla::ComputationDataHandle a = ctx->Input(0); - xla::ComputationDataHandle b = ctx->Input(1); + xla::XlaOp a = ctx->Input(0); + xla::XlaOp b = ctx->Input(1); if (is_sparse_) { if (a_type_ == DT_BFLOAT16) { a = ctx->builder()->ConvertElementType(a, xla::F32); diff --git a/tensorflow/compiler/tf2xla/kernels/matrix_band_part_op.cc b/tensorflow/compiler/tf2xla/kernels/matrix_band_part_op.cc index faa415a97b053b4b11d015fefcd430210b98118a..fbd5dc0fdad4483aadbe9bc263cc1f7a034cee09 100644 --- a/tensorflow/compiler/tf2xla/kernels/matrix_band_part_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/matrix_band_part_op.cc @@ -44,10 +44,10 @@ class MatrixBandPartOp : public XlaOpKernel { errors::InvalidArgument("num_upper must be scalar, got shape ", num_upper_in_shape.DebugString())); - xla::ComputationBuilder* builder = context->builder(); - xla::ComputationDataHandle input = context->Input(0); - xla::ComputationDataHandle num_lower = context->Input(1); - xla::ComputationDataHandle num_upper = context->Input(2); + xla::XlaBuilder* builder = context->builder(); + xla::XlaOp input = context->Input(0); + xla::XlaOp num_lower = context->Input(1); + xla::XlaOp num_upper = context->Input(2); DataType input_type = context->input_type(0); DataType index_type = context->input_type(1); @@ -58,10 +58,10 @@ class MatrixBandPartOp : public XlaOpKernel { // Compute 'offset', which is how many diagonals we are above/below the // diagonal. - xla::ComputationDataHandle iota_m; + xla::XlaOp iota_m; OP_REQUIRES_OK(context, XlaHelpers::Iota(builder, index_type, m, &iota_m)); - xla::ComputationDataHandle iota_n; + xla::XlaOp iota_n; OP_REQUIRES_OK(context, XlaHelpers::Iota(builder, index_type, n, &iota_n)); auto offset = builder->Sub(builder->Broadcast(iota_n, {m}), iota_m, diff --git a/tensorflow/compiler/tf2xla/kernels/matrix_set_diag_op.cc b/tensorflow/compiler/tf2xla/kernels/matrix_set_diag_op.cc index b2940bdcff75a087c914fdad0cb2426276e41aff..db53f6fef8d6bf901c8281f50791ca6766c46efd 100644 --- a/tensorflow/compiler/tf2xla/kernels/matrix_set_diag_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/matrix_set_diag_op.cc @@ -54,16 +54,16 @@ class MatrixSetDiagOp : public XlaOpKernel { input_shape.DebugString(), " and diagonal shape: ", diag_shape.DebugString())); - xla::ComputationBuilder* builder = context->builder(); - xla::ComputationDataHandle input = context->Input(0); - xla::ComputationDataHandle diag = context->Input(1); + xla::XlaBuilder* builder = context->builder(); + xla::XlaOp input = context->Input(0); + xla::XlaOp diag = context->Input(1); auto zero = XlaHelpers::Zero(builder, context->input_type(0)); // Create an indicator tensor that is true only on the diagonal. - xla::ComputationDataHandle iota_m; + xla::XlaOp iota_m; OP_REQUIRES_OK(context, XlaHelpers::Iota(builder, DT_INT32, m, &iota_m)); - xla::ComputationDataHandle iota_n; + xla::XlaOp iota_n; OP_REQUIRES_OK(context, XlaHelpers::Iota(builder, DT_INT32, n, &iota_n)); auto indicator = builder->Eq(iota_m, builder->Broadcast(iota_n, {m}), diff --git a/tensorflow/compiler/tf2xla/kernels/mirror_pad_op.cc b/tensorflow/compiler/tf2xla/kernels/mirror_pad_op.cc index 05a36a031ad73be289604da1b7e56203ff12fbf5..7e9de3ef9b245c113cc143128fe58e7e017a361c 100644 --- a/tensorflow/compiler/tf2xla/kernels/mirror_pad_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/mirror_pad_op.cc @@ -25,10 +25,11 @@ class MirrorPadOp : public XlaOpKernel { public: explicit MirrorPadOp(OpKernelConstruction* context) : XlaOpKernel(context) {} - xla::StatusOr DoMirrorPad( - const xla::ComputationDataHandle& t, const xla::Shape& original_shape, - const xla::Literal& pad_literal, xla::ComputationBuilder* b) { - xla::ComputationDataHandle accum = t; + xla::StatusOr DoMirrorPad(const xla::XlaOp& t, + const xla::Shape& original_shape, + const xla::Literal& pad_literal, + xla::XlaBuilder* b) { + xla::XlaOp accum = t; for (int64 dimno = xla::ShapeUtil::Rank(original_shape) - 1; dimno >= 0; --dimno) { auto t_rev = b->Rev(accum, {dimno}); @@ -76,12 +77,12 @@ class MirrorPadOp : public XlaOpKernel { OP_REQUIRES_OK( ctx, ctx->ConstantInputReshaped(1, {fixed_dims, 2}, &pad_literal)); - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); auto in0 = ctx->Input(0); - xla::StatusOr> in0_shape = b->GetShape(in0); + xla::StatusOr in0_shape = b->GetShape(in0); OP_REQUIRES(ctx, in0_shape.ok(), in0_shape.status()); - xla::StatusOr accum_status = - DoMirrorPad(in0, *in0_shape.ValueOrDie(), pad_literal, b); + xla::StatusOr accum_status = + DoMirrorPad(in0, in0_shape.ValueOrDie(), pad_literal, b); OP_REQUIRES_OK(ctx, accum_status.status()); diff --git a/tensorflow/compiler/tf2xla/kernels/one_hot_op.cc b/tensorflow/compiler/tf2xla/kernels/one_hot_op.cc index 9f7c9913802d311895479b914b66553e135aa426..cac2eea96eeed723b2a63bc9193070cad04b005d 100644 --- a/tensorflow/compiler/tf2xla/kernels/one_hot_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/one_hot_op.cc @@ -62,7 +62,7 @@ class OneHotOp : public XlaOpKernel { ctx, depth >= 0, errors::InvalidArgument("depth must be non-negative, got: ", depth)); - xla::ComputationDataHandle one_hot; + xla::XlaOp one_hot; OP_REQUIRES_OK( ctx, XlaHelpers::OneHot(ctx->builder(), depth, axis, input_type(0), indices_shape, ctx->Input(0), ctx->Input(2), diff --git a/tensorflow/compiler/tf2xla/kernels/pack_op.cc b/tensorflow/compiler/tf2xla/kernels/pack_op.cc index a4318e29d2532faf1f0cc6bb9418d29c2df20cd4..aecaabb6dcf46bdd6ae3da929448d6370acb989b 100644 --- a/tensorflow/compiler/tf2xla/kernels/pack_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/pack_op.cc @@ -43,7 +43,7 @@ class PackOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - std::vector values; + std::vector values; std::vector shapes; OP_REQUIRES_OK(ctx, ctx->InputList("values", &values, &shapes)); const int num = values.size(); @@ -69,7 +69,7 @@ class PackOp : public XlaOpKernel { -expanded_num_dims, ", ", expanded_num_dims, ")")); - std::vector reshaped_inputs(num); + std::vector reshaped_inputs(num); TensorShape child_shape(shapes[0]); child_shape.InsertDim(axis, 1); diff --git a/tensorflow/compiler/tf2xla/kernels/pad_op.cc b/tensorflow/compiler/tf2xla/kernels/pad_op.cc index 791351637aee61c5fdd911dd8a48959990514395..7c95475e7b1f02183e44f73f116a4aeb25f05c09 100644 --- a/tensorflow/compiler/tf2xla/kernels/pad_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/pad_op.cc @@ -70,7 +70,7 @@ class PadOp : public XlaOpKernel { } // PadV2 added a "constant_values" input that indicates the pad value. - xla::ComputationDataHandle constant_values; + xla::XlaOp constant_values; if (ctx->num_inputs() == 3) { OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(ctx->InputShape(2)), errors::InvalidArgument("constant_values must be a scalar.")); diff --git a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc index 5f635dd1bc6122cfcac8163baafd95b13f157715..f8e7b48a0fd94835964aea033ad33523150067b4 100644 --- a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc @@ -66,15 +66,15 @@ class PoolingOp : public XlaOpKernel { int num_dims() const { return num_spatial_dims_ + 2; } // Method that builds an initial value to use in reductions. - virtual xla::ComputationDataHandle InitValue(xla::ComputationBuilder* b) = 0; + virtual xla::XlaOp InitValue(xla::XlaBuilder* b) = 0; // The reduction operation to apply to each window. - virtual const xla::Computation* Reduction(XlaOpKernelContext* ctx) = 0; + virtual const xla::XlaComputation* Reduction(XlaOpKernelContext* ctx) = 0; // A post-processing operation to apply on the outputs of the ReduceWindow. - virtual xla::ComputationDataHandle PostProcessOutput( - XlaOpKernelContext* ctx, const xla::ComputationDataHandle& output, - DataType dtype, const TensorShape& input_shape) = 0; + virtual xla::XlaOp PostProcessOutput(XlaOpKernelContext* ctx, + const xla::XlaOp& output, DataType dtype, + const TensorShape& input_shape) = 0; void Compile(XlaOpKernelContext* ctx) override { std::vector ksize = ksize_; @@ -110,7 +110,7 @@ class PoolingOp : public XlaOpKernel { " operator must have ", num_dims(), " dimensions")); - xla::ComputationBuilder* const b = ctx->builder(); + xla::XlaBuilder* const b = ctx->builder(); auto input = XlaHelpers::ConvertElementType(b, ctx->Input(0), reduction_type_); auto reduce = ctx->builder()->ReduceWindow( @@ -135,17 +135,17 @@ class MaxPoolOp : public PoolingOp { : PoolingOp(ctx, /*num_spatial_dims=*/num_spatial_dims, /*reduction_type=*/ctx->input_type(0)) {} - xla::ComputationDataHandle InitValue(xla::ComputationBuilder* b) override { + xla::XlaOp InitValue(xla::XlaBuilder* b) override { return XlaHelpers::MinValue(b, reduction_type_); } - const xla::Computation* Reduction(XlaOpKernelContext* ctx) override { + const xla::XlaComputation* Reduction(XlaOpKernelContext* ctx) override { return ctx->GetOrCreateMax(reduction_type_); } - xla::ComputationDataHandle PostProcessOutput( - XlaOpKernelContext* ctx, const xla::ComputationDataHandle& output, - DataType dtype, const TensorShape& input_shape) override { + xla::XlaOp PostProcessOutput(XlaOpKernelContext* ctx, + const xla::XlaOp& output, DataType dtype, + const TensorShape& input_shape) override { return output; } }; @@ -176,9 +176,9 @@ REGISTER_XLA_OP(Name("MaxPool3D"), MaxPool3DOp); // Common computation shared between AvgPool and AvgPoolGrad. Divide each // element of an image by the count of elements that contributed to that // element during pooling. -static xla::ComputationDataHandle AvgPoolDivideByCount( - XlaOpKernelContext* ctx, const xla::ComputationDataHandle& output, - DataType dtype, const TensorShape& input_shape, xla::Padding padding, +static xla::XlaOp AvgPoolDivideByCount( + XlaOpKernelContext* ctx, const xla::XlaOp& output, DataType dtype, + const TensorShape& input_shape, xla::Padding padding, const std::vector& ksize, const std::vector& stride, int num_spatial_dims, TensorFormat data_format) { if (padding == xla::Padding::kValid) { @@ -234,17 +234,17 @@ class AvgPoolOp : public PoolingOp { /*reduction_type=*/ XlaHelpers::SumAccumulationType(ctx->input_type(0))) {} - xla::ComputationDataHandle InitValue(xla::ComputationBuilder* b) override { + xla::XlaOp InitValue(xla::XlaBuilder* b) override { return XlaHelpers::Zero(b, reduction_type_); } - const xla::Computation* Reduction(XlaOpKernelContext* ctx) override { + const xla::XlaComputation* Reduction(XlaOpKernelContext* ctx) override { return ctx->GetOrCreateAdd(reduction_type_); } - xla::ComputationDataHandle PostProcessOutput( - XlaOpKernelContext* ctx, const xla::ComputationDataHandle& output, - DataType dtype, const TensorShape& input_shape) override { + xla::XlaOp PostProcessOutput(XlaOpKernelContext* ctx, + const xla::XlaOp& output, DataType dtype, + const TensorShape& input_shape) override { return AvgPoolDivideByCount(ctx, output, dtype, input_shape, padding_, ksize_, stride_, num_spatial_dims_, data_format_); @@ -344,11 +344,10 @@ class MaxPoolGradOp : public XlaOpKernel { xla::PrimitiveType element_type; OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(input_type(2), &element_type)); - xla::ComputationDataHandle init_value = - XlaHelpers::Zero(ctx->builder(), input_type(2)); + xla::XlaOp init_value = XlaHelpers::Zero(ctx->builder(), input_type(2)); auto select = CreateScalarGeComputation(element_type, ctx->builder()); auto scatter = CreateScalarAddComputation(element_type, ctx->builder()); - xla::ComputationDataHandle gradients = ctx->builder()->SelectAndScatter( + xla::XlaOp gradients = ctx->builder()->SelectAndScatter( input, select, ksize_, stride_, xla_padding, out_backprop, init_value, scatter); @@ -462,7 +461,7 @@ class AvgPoolGradOp : public XlaOpKernel { // The input gradients are computed by a convolution of the output gradients // and the filter, with some appropriate padding. See the comment at the top // of conv_grad_ops.h for details. - xla::ComputationBuilder* const b = ctx->builder(); + xla::XlaBuilder* const b = ctx->builder(); auto out_backprop = ctx->Input(1); auto dtype = input_type(1); xla::Padding xla_padding = diff --git a/tensorflow/compiler/tf2xla/kernels/quantize_and_dequantize_op.cc b/tensorflow/compiler/tf2xla/kernels/quantize_and_dequantize_op.cc index 4171e076ff6d9dd4f809454377620324d1fe5ae4..661cd5923e1023eaf89a6bc4f56fcc362c8bcfb6 100644 --- a/tensorflow/compiler/tf2xla/kernels/quantize_and_dequantize_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/quantize_and_dequantize_op.cc @@ -35,7 +35,7 @@ class QuantizeAndDequantizeOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaOp input = ctx->Input(0); const DataType data_type = ctx->input_type(0); // Comments taken from semantics description at @@ -46,8 +46,8 @@ class QuantizeAndDequantizeOp : public XlaOpKernel { // m = max(abs(input_min), abs(input_max)) if range_given is true, // m = max(abs(min_elem(input)), // abs(max_elem(input))) otherwise. - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle input_min, input_max; + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp input_min, input_max; if (range_given_) { double input_min_value, input_max_value; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsFloatScalar(1, &input_min_value)); @@ -55,14 +55,14 @@ class QuantizeAndDequantizeOp : public XlaOpKernel { input_min = XlaHelpers::FloatLiteral(b, data_type, input_min_value); input_max = XlaHelpers::FloatLiteral(b, data_type, input_max_value); } else { - const xla::Computation* fmax = ctx->GetOrCreateMax(data_type); - const xla::Computation* fmin = ctx->GetOrCreateMin(data_type); + const xla::XlaComputation* fmax = ctx->GetOrCreateMax(data_type); + const xla::XlaComputation* fmin = ctx->GetOrCreateMin(data_type); input_min = b->ReduceAll(input, XlaHelpers::MaxValue(b, data_type), *fmin); input_max = b->ReduceAll(input, XlaHelpers::MinValue(b, data_type), *fmax); } - xla::ComputationDataHandle m = b->Max(b->Abs(input_min), b->Abs(input_max)); + xla::XlaOp m = b->Max(b->Abs(input_min), b->Abs(input_max)); // Next, we choose our fixed-point quantization buckets, [min_fixed, // max_fixed]. If signed_input is true, this is @@ -85,7 +85,7 @@ class QuantizeAndDequantizeOp : public XlaOpKernel { // From this we compute our scaling factor, s: // // s = (max_fixed - min_fixed) / (2 * m). - xla::ComputationDataHandle s = + xla::XlaOp s = b->Div(XlaHelpers::FloatLiteral(b, data_type, max_fixed - min_fixed), b->Mul(XlaHelpers::FloatLiteral(b, data_type, 2.0), m)); @@ -93,7 +93,7 @@ class QuantizeAndDequantizeOp : public XlaOpKernel { // e is transformed into e': // // e' = (e * s).round_to_nearest() / s. - xla::ComputationDataHandle result = b->Div(b->Round(b->Mul(input, s)), s); + xla::XlaOp result = b->Div(b->Round(b->Mul(input, s)), s); ctx->SetOutput(0, result); } diff --git a/tensorflow/compiler/tf2xla/kernels/random_ops.cc b/tensorflow/compiler/tf2xla/kernels/random_ops.cc index c0994c434bca5174eaee7b9e63e10432d9c2ed8d..5f5bd586376ab368e443671ac8a5de23a5fd604b 100644 --- a/tensorflow/compiler/tf2xla/kernels/random_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/random_ops.cc @@ -41,9 +41,9 @@ class RandomUniformOp : public XlaOpKernel { xla::Shape xla_shape; OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype, shape, &xla_shape)); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle result = b->RngUniform( - XlaHelpers::Zero(b, dtype), XlaHelpers::One(b, dtype), xla_shape); + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp result = b->RngUniform(XlaHelpers::Zero(b, dtype), + XlaHelpers::One(b, dtype), xla_shape); ctx->SetOutput(0, result); } @@ -100,11 +100,11 @@ class RandomStandardNormalOp : public XlaOpKernel { xla::Shape xla_shape; OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype, shape, &xla_shape)); - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); // Normal distribution with a mean of 0 and a standard deviation of 1: - xla::ComputationDataHandle result = b->RngNormal( - XlaHelpers::Zero(b, dtype), XlaHelpers::One(b, dtype), xla_shape); + xla::XlaOp result = b->RngNormal(XlaHelpers::Zero(b, dtype), + XlaHelpers::One(b, dtype), xla_shape); ctx->SetOutput(0, result); } @@ -130,19 +130,18 @@ class TruncatedNormalOp : public XlaOpKernel { xla::Shape xla_element_shape = xla::ShapeUtil::MakeShape(xla_shape.element_type(), {}); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle mean = XlaHelpers::Zero(b, dtype); - xla::ComputationDataHandle stddev = XlaHelpers::One(b, dtype); - xla::ComputationDataHandle candidate = - b->RngNormal(mean, stddev, xla_shape); + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp mean = XlaHelpers::Zero(b, dtype); + xla::XlaOp stddev = XlaHelpers::One(b, dtype); + xla::XlaOp candidate = b->RngNormal(mean, stddev, xla_shape); - auto two_sd = [dtype](bool negate, xla::ComputationBuilder* b) { + auto two_sd = [dtype](bool negate, xla::XlaBuilder* b) { return XlaHelpers::FloatLiteral(b, dtype, negate ? -2.0 : 2.0); }; - auto out_of_range_mask = [two_sd](xla::ComputationDataHandle candidate, - xla::ComputationBuilder* b) { - xla::ComputationDataHandle too_large = b->Gt(candidate, two_sd(false, b)); - xla::ComputationDataHandle too_small = b->Lt(candidate, two_sd(true, b)); + auto out_of_range_mask = [two_sd](xla::XlaOp candidate, + xla::XlaBuilder* b) { + xla::XlaOp too_large = b->Gt(candidate, two_sd(false, b)); + xla::XlaOp too_small = b->Lt(candidate, two_sd(true, b)); return b->Or(too_large, too_small); }; @@ -152,35 +151,32 @@ class TruncatedNormalOp : public XlaOpKernel { // out_of_range_mask := candidate < mean-2*sd || candidate > mean+2*sd // candidate = select(out_of_range_mask, rng_normal(), candidate) // } - std::unique_ptr test_builder = + std::unique_ptr test_builder = b->CreateSubBuilder("truncated_normal_test"); { auto* b = test_builder.get(); - xla::ComputationDataHandle candidate = - b->Parameter(0, xla_shape, "candidate"); - xla::ComputationDataHandle oor_mask = out_of_range_mask(candidate, b); + xla::XlaOp candidate = b->Parameter(0, xla_shape, "candidate"); + out_of_range_mask(candidate, b); OP_REQUIRES_OK(ctx, Any(out_of_range_mask(candidate, b), b).status()); } - std::unique_ptr body_builder = + std::unique_ptr body_builder = b->CreateSubBuilder("truncated_normal_body"); { auto* b = body_builder.get(); - xla::ComputationDataHandle candidate = - b->Parameter(0, xla_shape, "candidate"); - xla::ComputationDataHandle to_resample = out_of_range_mask(candidate, b); - xla::ComputationDataHandle mean = XlaHelpers::Zero(b, dtype); - xla::ComputationDataHandle stddev = XlaHelpers::One(b, dtype); + xla::XlaOp candidate = b->Parameter(0, xla_shape, "candidate"); + xla::XlaOp to_resample = out_of_range_mask(candidate, b); + xla::XlaOp mean = XlaHelpers::Zero(b, dtype); + xla::XlaOp stddev = XlaHelpers::One(b, dtype); b->Select(to_resample, b->RngNormal(mean, stddev, xla_shape), candidate); } - xla::StatusOr test_computation = test_builder->Build(); + xla::StatusOr test_computation = test_builder->Build(); OP_REQUIRES_OK(ctx, test_computation.status()); - xla::StatusOr body_computation = body_builder->Build(); + xla::StatusOr body_computation = body_builder->Build(); OP_REQUIRES_OK(ctx, body_computation.status()); - xla::ComputationDataHandle result = - b->While(test_computation.ValueOrDie(), body_computation.ValueOrDie(), - candidate); + xla::XlaOp result = b->While(test_computation.ValueOrDie(), + body_computation.ValueOrDie(), candidate); ctx->SetOutput(0, result); } diff --git a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc index cb144bea9e429b7c8bcc3d07f688ed6a254c3be0..08894489ac77bbbe4ddb067c06a6d031a537697d 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc @@ -19,7 +19,6 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op_kernel.h" @@ -65,7 +64,7 @@ class ReduceWindowOp : public XlaOpKernel { "rank (", padding_high_.size(), " vs. ", rank, ")")); - xla::ComputationBuilder* builder = context->builder(); + xla::XlaBuilder* builder = context->builder(); // Build the reducer function. XlaCompiler::Argument reducer_arg; @@ -95,15 +94,15 @@ class ReduceWindowOp : public XlaOpKernel { xla::ShapeUtil::HumanString(reducer.xla_output_shape))); // Wraps the reducer in a computation that unpacks the output tuple. - xla::Computation wrapper; + xla::XlaComputation wrapper; { - std::unique_ptr cb = + std::unique_ptr cb = builder->CreateSubBuilder("wrapper"); auto x = cb->Parameter(0, scalar_shape, "x"); auto y = cb->Parameter(1, scalar_shape, "y"); auto outputs = cb->Call(*reducer.computation, {x, y}); cb->GetTupleElement(outputs, 0); - xla::StatusOr result = cb->Build(); + xla::StatusOr result = cb->Build(); OP_REQUIRES_OK(context, result.status()); wrapper = std::move(result.ValueOrDie()); } @@ -113,7 +112,7 @@ class ReduceWindowOp : public XlaOpKernel { padding[i] = {padding_low_[i], padding_high_[i]}; } - xla::ComputationDataHandle output = builder->ReduceWindowWithGeneralPadding( + xla::XlaOp output = builder->ReduceWindowWithGeneralPadding( context->Input(0), context->Input(1), wrapper, window_dimensions_, window_strides_, padding); context->SetOutput(0, output); diff --git a/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc b/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc index 812d258cd1677e18ef49952044126c76a2f55b19..0f425637795e9633a8e36f921000ee2f5e25813a 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc @@ -30,13 +30,11 @@ class SumOp : public XlaReductionOp { explicit SumOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx, XlaHelpers::SumAccumulationType(ctx->input_type(0))) {} - xla::ComputationDataHandle InitialValue( - xla::ComputationBuilder* builder) override { + xla::XlaOp InitialValue(xla::XlaBuilder* builder) override { return XlaHelpers::Zero(builder, reduction_type_); } - void BuildReducer(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& scalar_lhs, - const xla::ComputationDataHandle& scalar_rhs) override { + void BuildReducer(xla::XlaBuilder* builder, const xla::XlaOp& scalar_lhs, + const xla::XlaOp& scalar_rhs) override { builder->Add(scalar_lhs, scalar_rhs); } }; @@ -49,14 +47,12 @@ class ProdOp : public XlaReductionOp { : XlaReductionOp(ctx, XlaHelpers::SumAccumulationType(ctx->input_type(0))) {} - xla::ComputationDataHandle InitialValue( - xla::ComputationBuilder* builder) override { + xla::XlaOp InitialValue(xla::XlaBuilder* builder) override { return XlaHelpers::One(builder, reduction_type_); } - void BuildReducer(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& scalar_lhs, - const xla::ComputationDataHandle& scalar_rhs) override { + void BuildReducer(xla::XlaBuilder* builder, const xla::XlaOp& scalar_lhs, + const xla::XlaOp& scalar_rhs) override { builder->Mul(scalar_lhs, scalar_rhs); } }; @@ -69,14 +65,12 @@ class MinOp : public XlaReductionOp { explicit MinOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx, ctx->input_type(0)) {} - xla::ComputationDataHandle InitialValue( - xla::ComputationBuilder* builder) override { + xla::XlaOp InitialValue(xla::XlaBuilder* builder) override { return XlaHelpers::MaxValue(builder, reduction_type_); } - void BuildReducer(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& scalar_lhs, - const xla::ComputationDataHandle& scalar_rhs) override { + void BuildReducer(xla::XlaBuilder* builder, const xla::XlaOp& scalar_lhs, + const xla::XlaOp& scalar_rhs) override { builder->Min(scalar_lhs, scalar_rhs); } }; @@ -88,14 +82,12 @@ class MaxOp : public XlaReductionOp { explicit MaxOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx, ctx->input_type(0)) {} - xla::ComputationDataHandle InitialValue( - xla::ComputationBuilder* builder) override { + xla::XlaOp InitialValue(xla::XlaBuilder* builder) override { return XlaHelpers::MinValue(builder, reduction_type_); } - void BuildReducer(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& scalar_lhs, - const xla::ComputationDataHandle& scalar_rhs) override { + void BuildReducer(xla::XlaBuilder* builder, const xla::XlaOp& scalar_lhs, + const xla::XlaOp& scalar_rhs) override { builder->Max(scalar_lhs, scalar_rhs); } }; @@ -108,20 +100,17 @@ class MeanOp : public XlaReductionOp { : XlaReductionOp(ctx, XlaHelpers::SumAccumulationType(ctx->input_type(0))) {} - xla::ComputationDataHandle InitialValue( - xla::ComputationBuilder* builder) override { + xla::XlaOp InitialValue(xla::XlaBuilder* builder) override { return XlaHelpers::Zero(builder, reduction_type_); } - void BuildReducer(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& scalar_lhs, - const xla::ComputationDataHandle& scalar_rhs) override { + void BuildReducer(xla::XlaBuilder* builder, const xla::XlaOp& scalar_lhs, + const xla::XlaOp& scalar_rhs) override { builder->Add(scalar_lhs, scalar_rhs); } - xla::ComputationDataHandle BuildFinalizer( - xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& reduce_output, - int64 num_elements_reduced) override { + xla::XlaOp BuildFinalizer(xla::XlaBuilder* builder, + const xla::XlaOp& reduce_output, + int64 num_elements_reduced) override { auto divisor = XlaHelpers::IntegerLiteral(builder, input_type(0), num_elements_reduced); return builder->Div(reduce_output, divisor); @@ -136,14 +125,12 @@ class AllOp : public XlaReductionOp { explicit AllOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx, ctx->input_type(0)) {} - xla::ComputationDataHandle InitialValue( - xla::ComputationBuilder* builder) override { + xla::XlaOp InitialValue(xla::XlaBuilder* builder) override { return builder->ConstantR0(true); } - void BuildReducer(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& scalar_lhs, - const xla::ComputationDataHandle& scalar_rhs) override { + void BuildReducer(xla::XlaBuilder* builder, const xla::XlaOp& scalar_lhs, + const xla::XlaOp& scalar_rhs) override { builder->And(scalar_lhs, scalar_rhs); } }; @@ -155,14 +142,12 @@ class AnyOp : public XlaReductionOp { explicit AnyOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx, ctx->input_type(0)) {} - xla::ComputationDataHandle InitialValue( - xla::ComputationBuilder* builder) override { + xla::XlaOp InitialValue(xla::XlaBuilder* builder) override { return builder->ConstantR0(false); } - void BuildReducer(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& scalar_lhs, - const xla::ComputationDataHandle& scalar_rhs) override { + void BuildReducer(xla::XlaBuilder* builder, const xla::XlaOp& scalar_lhs, + const xla::XlaOp& scalar_rhs) override { builder->Or(scalar_lhs, scalar_rhs); } }; diff --git a/tensorflow/compiler/tf2xla/kernels/reduction_ops.h b/tensorflow/compiler/tf2xla/kernels/reduction_ops.h index f3181f0dadc2d3f45abb145e009e2663c10490f0..2ecfb854a1c8625524d4f1199af3927edd204926 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduction_ops.h +++ b/tensorflow/compiler/tf2xla/kernels/reduction_ops.h @@ -19,7 +19,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_TF2XLA_KERNELS_REDUCTION_OPS_H_ #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { @@ -28,35 +28,33 @@ namespace tensorflow { // to override: description is a textual description of the mapped // function; InitialValue constructs the base case for the reduction; // BuildReducer adds the implementation of the reduction lambda to a -// xla::ComputationBuilder and BuildFinalizer adds the +// xla::XlaBuilder and BuildFinalizer adds the // implementation of the finalizer lambda (if there is one) to a -// xla::ComputationBuilder. +// xla::XlaBuilder. class XlaReductionOp : public XlaOpKernel { public: XlaReductionOp(OpKernelConstruction* ctx, DataType reduction_type); ~XlaReductionOp() override {} // Return the base case for the reduction. - virtual xla::ComputationDataHandle InitialValue( - xla::ComputationBuilder* builder) = 0; + virtual xla::XlaOp InitialValue(xla::XlaBuilder* builder) = 0; // Implement the (scalar,scalar)->scalar lambda that should be // applied to each pair of elements to be reduced. The desired // computation should be added to 'builder' and // '(scalar_lhs,scalar_rhs)' are the function's inputs. - virtual void BuildReducer(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& scalar_lhs, - const xla::ComputationDataHandle& scalar_rhs) = 0; + virtual void BuildReducer(xla::XlaBuilder* builder, + const xla::XlaOp& scalar_lhs, + const xla::XlaOp& scalar_rhs) = 0; // Applies a transformation to the output of the reduction. The desired // computation should be added to 'builder'. Argument 'reduce_output' is the // output of the reduction. 'num_elements_reduced' is the number of elements // that contributed to the reduction. Returns the transformed reduction // output, Defaults to returning 'reduce_output' unchanged. - virtual xla::ComputationDataHandle BuildFinalizer( - xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& reduce_output, - int64 num_elements_reduced); + virtual xla::XlaOp BuildFinalizer(xla::XlaBuilder* builder, + const xla::XlaOp& reduce_output, + int64 num_elements_reduced); void Compile(XlaOpKernelContext* ctx) override; diff --git a/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc b/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc index 64fe765ae9a945c58ea60bc157b1520c83b0d8e7..4fd5bfd03999a7f8b7bb081cc4b03aa1434d4c3d 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc @@ -35,10 +35,9 @@ XlaReductionOp::XlaReductionOp(OpKernelConstruction* ctx, // Unless BuildFinalizer is overridden the reduction has no // finalizer. -xla::ComputationDataHandle XlaReductionOp::BuildFinalizer( - xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& reduce_output, - int64 num_elements_reduced) { +xla::XlaOp XlaReductionOp::BuildFinalizer(xla::XlaBuilder* builder, + const xla::XlaOp& reduce_output, + int64 num_elements_reduced) { return reduce_output; } @@ -96,9 +95,9 @@ void XlaReductionOp::Compile(XlaOpKernelContext* ctx) { string desc = ctx->op_kernel().name(); - xla::ComputationBuilder* const b = ctx->builder(); + xla::XlaBuilder* const b = ctx->builder(); // Construct the builder for the reduction lambda. - xla::ComputationBuilder r(b->client(), strings::StrCat(desc, "-reduction")); + xla::XlaBuilder r(strings::StrCat(desc, "-reduction")); xla::PrimitiveType type; TF_CHECK_OK(DataTypeToPrimitiveType(reduction_type_, &type)); @@ -110,7 +109,7 @@ void XlaReductionOp::Compile(XlaOpKernelContext* ctx) { auto ry = r.Parameter(1, xla::ShapeUtil::MakeShape(type, {}), "y"); // Call virtual method to build the reduction lambda. BuildReducer(&r, rx, ry); - xla::Computation reduction_computation = r.Build().ConsumeValueOrDie(); + xla::XlaComputation reduction_computation = r.Build().ConsumeValueOrDie(); auto reduce = b->Reduce(data, initial, reduction_computation, xla_axes); auto deconverted = XlaHelpers::ConvertElementType(b, reduce, input_type(0)); diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index 12a35529992e6160566046dd28f9321c88afec91..ba7d484d53d7258edaa5bc42fa116cf16e94835b 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/kernels/cwise_ops.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/types.h" @@ -32,7 +32,7 @@ class ReluOp : public XlaOpKernel { explicit ReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} // Computes the max of the scalar input x and 0. void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); auto zero = XlaHelpers::Zero(builder, input_type(0)); ctx->SetOutput(0, builder->Max(zero, ctx->Input(0))); } @@ -43,7 +43,7 @@ class Relu6Op : public XlaOpKernel { explicit Relu6Op(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} // Clamp the scalar input between 0 and 6. void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); auto zero = XlaHelpers::Zero(builder, input_type(0)); auto six = XlaHelpers::IntegerLiteral(builder, input_type(0), 6); ctx->SetOutput(0, builder->Clamp(zero, ctx->Input(0), six)); @@ -56,7 +56,7 @@ class ReluGradOp : public XlaOpKernel { // Return the lhs (incoming gradient) if the rhs (input feature) > 0, // otherwise return 0. void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); const TensorShape shape = ctx->InputShape(0); const auto zero = b->Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); @@ -71,7 +71,7 @@ class Relu6GradOp : public XlaOpKernel { // Return the lhs (incoming gradient) if the rhs (input feature) > 0, // otherwise return 0. void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); const TensorShape shape = ctx->InputShape(0); const auto zero = b->Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); diff --git a/tensorflow/compiler/tf2xla/kernels/retval_op.cc b/tensorflow/compiler/tf2xla/kernels/retval_op.cc index c283e3b02c2676785952e3e17bffa671b0dabc1e..70547290eaed169599764a5d66185dde85345863 100644 --- a/tensorflow/compiler/tf2xla/kernels/retval_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/retval_op.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_context.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" @@ -45,7 +45,7 @@ class RetvalOp : public XlaOpKernel { // compilation. OP_REQUIRES_OK(ctx, frame->SetRetval(index_, input)); } else { - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaOp input = ctx->Input(0); const TensorShape input_shape = ctx->InputShape(0); auto is_constant = ctx->builder()->IsConstant(input); diff --git a/tensorflow/compiler/tf2xla/kernels/reverse_op.cc b/tensorflow/compiler/tf2xla/kernels/reverse_op.cc index e51d386926763ecbb5a943dfb6f872e78901dc69..2872a3c4d49d0d269aa3d216887a5c32cd51f1c3 100644 --- a/tensorflow/compiler/tf2xla/kernels/reverse_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/reverse_op.cc @@ -48,7 +48,7 @@ class ReverseOp : public XlaOpKernel { ctx->SetOutput(0, ctx->Input(0)); return; } - // ComputationBuilder::Rev() requires concrete values for dimensions arg. + // XlaBuilder::Rev() requires concrete values for dimensions arg. xla::Literal lax; OP_REQUIRES_OK(ctx, ctx->ConstantInputReshaped(1, {x_shape.dims()}, &lax)); std::vector revdims(x_shape.dims()); @@ -90,7 +90,7 @@ class ReverseV2Op : public XlaOpKernel { ctx->SetOutput(0, ctx->Input(0)); return; } - // ComputationBuilder::Rev() requires concrete values for dimensions arg. + // XlaBuilder::Rev() requires concrete values for dimensions arg. std::vector axes; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(1, &axes)); diff --git a/tensorflow/compiler/tf2xla/kernels/reverse_sequence_op.cc b/tensorflow/compiler/tf2xla/kernels/reverse_sequence_op.cc index 6bc5d3adb091cd238974c5b69b7a2f8fe639cc68..0ed4c4707df71cf5f56ccfe0af506916f04bcdb5 100644 --- a/tensorflow/compiler/tf2xla/kernels/reverse_sequence_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/reverse_sequence_op.cc @@ -54,7 +54,7 @@ class ReverseSequenceOp : public XlaOpKernel { "), ", "(", seq_lens_shape.num_elements(), " vs. ", input_shape.dim_size(batch_dim_))); - xla::ComputationBuilder* builder = context->builder(); + xla::XlaBuilder* builder = context->builder(); const auto input = context->Input(0); const auto seq_lens = context->Input(1); @@ -155,7 +155,7 @@ class ReverseSequenceOp : public XlaOpKernel { auto output = builder->GetTupleElement(loop_output, 2); // Mask out elements after the sequence length. - xla::ComputationDataHandle iota; + xla::XlaOp iota; OP_REQUIRES_OK( context, XlaHelpers::Iota(builder, seq_lens_type, max_seq_len, &iota)); std::vector dims(input_shape.dims(), 1); diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc index 4cfa28a0ce3d7d1f24196ef6ef2775f840b2bcf1..1819fb543317eed15b2fe0518d74aba5c564697d 100644 --- a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc @@ -74,7 +74,7 @@ class ScanOp : public XlaOpKernel { return; } - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); std::vector window_strides(input_shape.dims(), 1); std::vector window_dims(input_shape.dims(), 1); @@ -91,8 +91,8 @@ class ScanOp : public XlaOpKernel { std::swap(padding[axis].first, padding[axis].second); } - xla::ComputationDataHandle init; - const xla::Computation* reducer; + xla::XlaOp init; + const xla::XlaComputation* reducer; if (sum_) { init = XlaHelpers::Zero(builder, dtype); reducer = ctx->GetOrCreateAdd(dtype); diff --git a/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc b/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc index 8433a29c4e203cac726ee6bf7f67a863447326ed..f2c63b4f9083ad3c7dd7cf318dc22def1e99fa9f 100644 --- a/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc @@ -102,7 +102,7 @@ class ScatterNdOp : public XlaOpKernel { OP_REQUIRES_OK(context, ValidateUpdateShape(buffer_shape, indices_shape, updates_shape)); - xla::ComputationBuilder* builder = context->builder(); + xla::XlaBuilder* builder = context->builder(); auto buffer = builder->Broadcast(XlaHelpers::Zero(builder, dtype), buffer_shape.dim_sizes()); auto indices = context->Input(0); diff --git a/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc b/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc index 498342a98881df0c6ff50007eacc1d5ef6196b57..664078ca16c6d5d4b57c4a8c661ad0848f30dd7d 100644 --- a/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc @@ -17,7 +17,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" namespace tensorflow { namespace { @@ -62,16 +62,16 @@ class UnsortedSegmentSum : public XlaOpKernel { d, " differs ", data_shape.dim_size(d), " vs. ", indices_shape.dim_size(d))); } - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); TensorShape buffer_shape = data_shape; buffer_shape.RemoveDimRange(0, indices_shape.dims()); buffer_shape.InsertDim(0, num_segments); auto buffer = builder->Broadcast(XlaHelpers::Zero(builder, dtype_), buffer_shape.dim_sizes()); - auto combiner = - [](xla::ComputationDataHandle a, xla::ComputationDataHandle b, - xla::ComputationBuilder* builder) { return builder->Add(a, b); }; + auto combiner = [](xla::XlaOp a, xla::XlaOp b, xla::XlaBuilder* builder) { + return builder->Add(a, b); + }; auto result = XlaScatter(buffer, /*updates=*/data, indices, /*indices_are_vectors=*/false, combiner, builder); diff --git a/tensorflow/compiler/tf2xla/kernels/select_op.cc b/tensorflow/compiler/tf2xla/kernels/select_op.cc index 8081d3c41c436324c21858124121fecfac71cefa..f9f48164d63492b057d4950abfc2ca6153e44870 100644 --- a/tensorflow/compiler/tf2xla/kernels/select_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/select_op.cc @@ -40,7 +40,7 @@ class SelectOp : public XlaOpKernel { "'then' and 'else' must have the same size. but received: ", then_shape.DebugString(), " vs. ", else_shape.DebugString())); - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); auto cond_handle = ctx->Input(0); auto then_handle = ctx->Input(1); diff --git a/tensorflow/compiler/tf2xla/kernels/sendrecv_ops.cc b/tensorflow/compiler/tf2xla/kernels/sendrecv_ops.cc index d079b89861817a5639ac72b5ee49d76cb4506ae8..9ce01d0d44509bbcbea18afdb4210a675834bb6d 100644 --- a/tensorflow/compiler/tf2xla/kernels/sendrecv_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/sendrecv_ops.cc @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/types.h" diff --git a/tensorflow/compiler/tf2xla/kernels/softmax_op.cc b/tensorflow/compiler/tf2xla/kernels/softmax_op.cc index 463788b8b461c370a8e7ab4d79a94fc0143b8b45..bbf5ee8b12186a582666121b1df5d8b7d881863e 100644 --- a/tensorflow/compiler/tf2xla/kernels/softmax_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/softmax_op.cc @@ -43,8 +43,8 @@ class SoftmaxOp : public XlaOpKernel { const DataType type = input_type(0); auto logits = ctx->Input(0); - xla::ComputationBuilder* const b = ctx->builder(); - const xla::Computation& max_func = *ctx->GetOrCreateMax(type); + xla::XlaBuilder* const b = ctx->builder(); + const xla::XlaComputation& max_func = *ctx->GetOrCreateMax(type); // Find the max in each batch, resulting in a tensor of shape [batch] auto logits_max = @@ -76,16 +76,15 @@ class SoftmaxOp : public XlaOpKernel { REGISTER_XLA_OP(Name("Softmax"), SoftmaxOp); REGISTER_XLA_OP(Name("LogSoftmax"), SoftmaxOp); -std::pair -CrossEntropyWithLogits(XlaOpKernelContext* ctx, DataType type, - const xla::ComputationDataHandle& logits, - const xla::ComputationDataHandle& labels) { - const xla::Computation& max_func = *ctx->GetOrCreateMax(type); +std::pair CrossEntropyWithLogits( + XlaOpKernelContext* ctx, DataType type, const xla::XlaOp& logits, + const xla::XlaOp& labels) { + const xla::XlaComputation& max_func = *ctx->GetOrCreateMax(type); const int kBatchDim = 0; const int kClassDim = 1; - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); // Find the max in each batch, resulting in a tensor of shape [batch] auto logits_max = b->Reduce(logits, XlaHelpers::MinValue(b, type), max_func, {kClassDim}); @@ -123,7 +122,7 @@ CrossEntropyWithLogits(XlaOpKernelContext* ctx, DataType type, // backprop: prob - labels, where // prob = exp(logits - max_logits) / sum(exp(logits - max_logits)) // (where the division broadcasts along the batch dimension) - xla::ComputationDataHandle backprop = + xla::XlaOp backprop = b->Sub(b->Div(exp_shifted_logits, sum_exp, {kBatchDim}), labels); return {loss, backprop}; } @@ -150,7 +149,7 @@ class SoftmaxXentWithLogitsOp : public XlaOpKernel { auto logits = ctx->Input(0); auto labels = ctx->Input(1); - xla::ComputationDataHandle loss, backprop; + xla::XlaOp loss, backprop; std::tie(loss, backprop) = CrossEntropyWithLogits(ctx, type, logits, labels); ctx->SetOutput(0, loss); @@ -191,10 +190,10 @@ class SparseSoftmaxXentWithLogitsOp : public XlaOpKernel { DataType logits_type = input_type(0); DataType indices_type = input_type(1); - xla::ComputationDataHandle indices = ctx->Input(1); + xla::XlaOp indices = ctx->Input(1); - xla::ComputationBuilder* builder = ctx->builder(); - xla::ComputationDataHandle labels; + xla::XlaBuilder* builder = ctx->builder(); + xla::XlaOp labels; OP_REQUIRES_OK(ctx, XlaHelpers::OneHot( builder, depth, /*axis=*/1, input_type(1), labels_shape, @@ -207,7 +206,7 @@ class SparseSoftmaxXentWithLogitsOp : public XlaOpKernel { // Builds a vector of {batch_size} that is 0 if the index is in range, or // NaN otherwise; then add that vector to the labels to force out-of-range // values to NaNs. - xla::ComputationDataHandle nan_or_zero = builder->Select( + xla::XlaOp nan_or_zero = builder->Select( builder->And( builder->Le(XlaHelpers::Zero(builder, indices_type), indices), builder->Lt(indices, XlaHelpers::IntegerLiteral( @@ -218,7 +217,7 @@ class SparseSoftmaxXentWithLogitsOp : public XlaOpKernel { {batch_size})); labels = builder->Add(labels, nan_or_zero, {0}); - xla::ComputationDataHandle loss, backprop; + xla::XlaOp loss, backprop; std::tie(loss, backprop) = CrossEntropyWithLogits(ctx, logits_type, ctx->Input(0), labels); ctx->SetOutput(0, loss); diff --git a/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc b/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc index 01b46e160d1f1f10a43faf7ca35afb42dfde6e33..ec077924b5b5af4a573c86c8d9aeb8623bd7f801 100644 --- a/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc @@ -20,9 +20,8 @@ limitations under the License. namespace tensorflow { namespace { -void SpaceToBatch(XlaOpKernelContext* ctx, - const xla::ComputationDataHandle& input, DataType input_dtype, - const TensorShape& input_tensor_shape, +void SpaceToBatch(XlaOpKernelContext* ctx, const xla::XlaOp& input, + DataType input_dtype, const TensorShape& input_tensor_shape, gtl::ArraySlice block_shape, const xla::Literal& paddings) { const int input_rank = input_tensor_shape.dims(); @@ -46,7 +45,7 @@ void SpaceToBatch(XlaOpKernelContext* ctx, ", 2] instead of ", xla::ShapeUtil::HumanString(paddings.shape()))); - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); // 1. Zero-pad the start and end of dimensions `[1, ..., M]` of the // input according to `paddings` to produce `padded` of shape `padded_shape`. @@ -73,7 +72,7 @@ void SpaceToBatch(XlaOpKernelContext* ctx, errors::InvalidArgument( "The product of the block dimensions must be positive")); - xla::ComputationDataHandle padded = + xla::XlaOp padded = b->Pad(input, XlaHelpers::Zero(b, input_dtype), padding_config); // 2. Reshape `padded` to `reshaped_padded` of shape: @@ -101,8 +100,7 @@ void SpaceToBatch(XlaOpKernelContext* ctx, std::copy(remainder_shape.begin(), remainder_shape.end(), reshaped_padded_shape.begin() + 1 + 2 * block_rank); - xla::ComputationDataHandle reshaped_padded = - b->Reshape(padded, reshaped_padded_shape); + xla::XlaOp reshaped_padded = b->Reshape(padded, reshaped_padded_shape); // 3. Permute dimensions of `reshaped_padded` to produce // `permuted_reshaped_padded` of shape: @@ -121,7 +119,7 @@ void SpaceToBatch(XlaOpKernelContext* ctx, permutation[block_rank] = 0; std::iota(permutation.begin() + 1 + block_rank * 2, permutation.end(), 1 + block_rank * 2); - xla::ComputationDataHandle permuted_reshaped_padded = + xla::XlaOp permuted_reshaped_padded = b->Transpose(reshaped_padded, permutation); // 4. Reshape `permuted_reshaped_padded` to flatten `block_shape` into the @@ -142,8 +140,7 @@ void SpaceToBatch(XlaOpKernelContext* ctx, std::copy(remainder_shape.begin(), remainder_shape.end(), output_shape.begin() + 1 + block_rank); - xla::ComputationDataHandle output = - b->Reshape(permuted_reshaped_padded, output_shape); + xla::XlaOp output = b->Reshape(permuted_reshaped_padded, output_shape); ctx->SetOutput(0, output); } diff --git a/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc b/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc index 806fda632cde64c1b37ae3b9199028d6b6b0a215..4c5886ee2a0f63d609f79fc690f457d93e284e3e 100644 --- a/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/spacetodepth_op.cc @@ -50,8 +50,8 @@ class SpaceToDepthOp : public XlaOpKernel { const gtl::InlinedVector input_shape = input_tensor_shape.dim_sizes(); - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp input = ctx->Input(0); int feature_dim = GetTensorFeatureDimIndex(input_rank, data_format_); int num_spatial_dims = GetTensorSpatialDims(input_rank, data_format_); @@ -135,7 +135,7 @@ class SpaceToDepthOp : public XlaOpKernel { // input_shape[1] / block_size_, block_size_, // input_shape[2] / block_size_, block_size_, // depth] - xla::ComputationDataHandle reshaped = b->Reshape(input, reshaped_shape); + xla::XlaOp reshaped = b->Reshape(input, reshaped_shape); // 2. Permute dimensions of `reshaped` to produce // `permuted_reshaped` of shape: @@ -145,8 +145,7 @@ class SpaceToDepthOp : public XlaOpKernel { // input_shape[2] / block_size_, // block_size_, block_size_, // depth] - xla::ComputationDataHandle permuted_reshaped = - b->Transpose(reshaped, transpose_order); + xla::XlaOp permuted_reshaped = b->Transpose(reshaped, transpose_order); // 3. Reshape `permuted_reshaped` to flatten `block_shape` into the // batch dimension, producing an output tensor of shape: @@ -156,8 +155,7 @@ class SpaceToDepthOp : public XlaOpKernel { // input_shape[2] / block_size_, // block_size_ * block_size_ * depth] // - xla::ComputationDataHandle output = - b->Reshape(permuted_reshaped, output_shape); + xla::XlaOp output = b->Reshape(permuted_reshaped, output_shape); ctx->SetOutput(0, output); } diff --git a/tensorflow/compiler/tf2xla/kernels/split_op.cc b/tensorflow/compiler/tf2xla/kernels/split_op.cc index 43c15e753805352875034dfd2c70a2a1ed9a4114..8958b2e7701e62d802e37a895c14b662ecf9786a 100644 --- a/tensorflow/compiler/tf2xla/kernels/split_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/split_op.cc @@ -124,7 +124,7 @@ class SplitVOp : public XlaOpKernel { input_shape.dims(), "), but got ", split_dim_orig)); - xla::ComputationDataHandle input = ctx->Input(0); + xla::XlaOp input = ctx->Input(0); OP_REQUIRES(ctx, input_shape.dims() > 0, errors::InvalidArgument("Can't split a 0 dimensional input")); diff --git a/tensorflow/compiler/tf2xla/kernels/stack_ops.cc b/tensorflow/compiler/tf2xla/kernels/stack_ops.cc index 1a78c7ab9be701d3d02285ed21604f0f856b3f1f..0fb05a2be7b1034d6c2e864643b69647d622ede7 100644 --- a/tensorflow/compiler/tf2xla/kernels/stack_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/stack_ops.cc @@ -38,13 +38,13 @@ limitations under the License. namespace tensorflow { namespace { -Status GetStackShape(xla::ComputationBuilder* builder, XlaResource* resource, +Status GetStackShape(xla::XlaBuilder* builder, XlaResource* resource, TensorShape* stack_shape) { auto shape_or_status = builder->GetShape(resource->value()); if (!shape_or_status.ok()) { return shape_or_status.status(); } - xla::Shape shape = *shape_or_status.ValueOrDie(); + xla::Shape shape = shape_or_status.ValueOrDie(); TF_RET_CHECK(xla::ShapeUtil::IsTuple(shape)); return XLAShapeToTensorShape(xla::ShapeUtil::GetTupleElementShape(shape, 0), stack_shape); @@ -60,9 +60,8 @@ Status GetStackShape(xla::ComputationBuilder* builder, XlaResource* resource, // // TODO(phawkins): consider changing the API of the stack operators to // allow an optional element shape at stack construction time. -Status MaybeInitializeStack(xla::ComputationBuilder* builder, - XlaResource* resource, DataType dtype, - const TensorShape& elem_shape) { +Status MaybeInitializeStack(xla::XlaBuilder* builder, XlaResource* resource, + DataType dtype, const TensorShape& elem_shape) { if (resource->type() != dtype) { return errors::InvalidArgument( "Stack dtype is ", DataTypeString(resource->type()), @@ -75,8 +74,6 @@ Status MaybeInitializeStack(xla::ComputationBuilder* builder, if (!resource->initialized()) { // Stack has not been initialized. - xla::ComputationDataHandle zero = - XlaHelpers::Zero(builder, resource->type()); TF_RETURN_IF_ERROR(resource->SetTypeAndShape(dtype, elem_shape)); TF_RETURN_IF_ERROR(resource->SetZeroValue(builder)); } else { @@ -111,7 +108,7 @@ class StackOp : public XlaOpKernel { // We defer initializing the Stack resource until we see the first push. // Otherwise we do not know the shape of the stack elements. - xla::ComputationDataHandle value; + xla::XlaOp value; XlaContext& xc = XlaContext::Get(ctx); XlaResource* resource; string name = strings::StrCat("Stack: ", stack_name_); @@ -138,7 +135,7 @@ class StackPushOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); TensorShape elem_shape = ctx->InputShape(1); XlaResource* resource; @@ -147,9 +144,9 @@ class StackPushOp : public XlaOpKernel { // Initializes the Stack, if the element shape was not already known. OP_REQUIRES_OK(ctx, MaybeInitializeStack(b, resource, dtype_, elem_shape)); - xla::ComputationDataHandle ta = b->GetTupleElement(resource->value(), 0); - xla::ComputationDataHandle index = b->GetTupleElement(resource->value(), 1); - xla::ComputationDataHandle value = ctx->Input(1); + xla::XlaOp ta = b->GetTupleElement(resource->value(), 0); + xla::XlaOp index = b->GetTupleElement(resource->value(), 1); + xla::XlaOp value = ctx->Input(1); // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0]. auto start_indices = @@ -184,7 +181,7 @@ class StackPopOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); XlaResource* resource; OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &resource)); @@ -199,9 +196,9 @@ class StackPopOp : public XlaOpKernel { TensorShape stack_shape; OP_REQUIRES_OK(ctx, GetStackShape(b, resource, &stack_shape)); - xla::ComputationDataHandle state = resource->value(); - xla::ComputationDataHandle ta = b->GetTupleElement(state, 0); - xla::ComputationDataHandle index = b->GetTupleElement(state, 1); + xla::XlaOp state = resource->value(); + xla::XlaOp ta = b->GetTupleElement(state, 0); + xla::XlaOp index = b->GetTupleElement(state, 1); index = b->Sub(index, b->ConstantR0(1)); OP_REQUIRES_OK(ctx, resource->SetValue(b->Tuple({ta, index}))); @@ -216,8 +213,7 @@ class StackPopOp : public XlaOpKernel { // TODO(phawkins): We don't check the index is in bounds --- there is no // error mechanism in XLA. - xla::ComputationDataHandle read = - b->DynamicSlice(ta, start_indices, slice_shape); + xla::XlaOp read = b->DynamicSlice(ta, start_indices, slice_shape); // Remove the leading '1' dimension. std::vector value_shape(slice_shape.begin() + 1, slice_shape.end()); diff --git a/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc index 5bb773d97fc5ce90dabceeefd5c29d916597f5ff..a99d4ddc7c4956f7144512a9bdf6f4c2eb0f944f 100644 --- a/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc @@ -30,9 +30,8 @@ namespace tensorflow { namespace { // Rotates a 32-bit integer 'v' left by 'distance' bits. -xla::ComputationDataHandle RotateLeftS32(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& v, - int distance) { +xla::XlaOp RotateLeftS32(xla::XlaBuilder* builder, const xla::XlaOp& v, + int distance) { return builder->Or( builder->ShiftLeft(v, builder->ConstantR0(distance)), builder->ShiftRightLogical(v, builder->ConstantR0(32 - distance))); @@ -40,25 +39,24 @@ xla::ComputationDataHandle RotateLeftS32(xla::ComputationBuilder* builder, // TODO(b/65209188): add a primitive XOR to XLA and call it here, rather than // building XOR out of other bitwise operators. -xla::ComputationDataHandle BitwiseXor(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& x, - const xla::ComputationDataHandle& y) { +xla::XlaOp BitwiseXor(xla::XlaBuilder* builder, const xla::XlaOp& x, + const xla::XlaOp& y) { return builder->Or(builder->And(x, builder->Not(y)), builder->And(builder->Not(x), y)); } -using ThreeFry2x32State = std::array; +using ThreeFry2x32State = std::array; // Implements the ThreeFry counter-based PRNG algorithm. // Salmon et al. SC 2011. Parallel random numbers: as easy as 1, 2, 3. // http://www.thesalmons.org/john/random123/papers/random123sc11.pdf -ThreeFry2x32State ThreeFry2x32(xla::ComputationBuilder* builder, +ThreeFry2x32State ThreeFry2x32(xla::XlaBuilder* builder, ThreeFry2x32State input, ThreeFry2x32State key) { // Rotation distances specified by the Threefry2x32 algorithm. constexpr std::array rotations = {13, 15, 26, 6, 17, 29, 16, 24}; ThreeFry2x32State x; - std::array ks; + std::array ks; // 0x1BD11BDA is a parity constant specified by the ThreeFry2x32 algorithm. ks[2] = builder->ConstantR0(0x1BD11BDA); for (int i = 0; i < 2; ++i) { @@ -121,10 +119,9 @@ ThreeFry2x32State ThreeFry2x32(xla::ComputationBuilder* builder, // Returns a tensor of 'shape' random values uniformly distributed in the range // [minval, maxval) -xla::ComputationDataHandle RandomUniform(xla::ComputationBuilder* builder, - const xla::ComputationDataHandle& seed, - const TensorShape& shape, - double minval, double maxval) { +xla::XlaOp RandomUniform(xla::XlaBuilder* builder, const xla::XlaOp& seed, + const TensorShape& shape, double minval, + double maxval) { // Split the seed into two 32-bit scalars to form a key. auto seed0 = builder->Reshape(builder->Slice(seed, {0}, {1}, {1}), {}); auto seed1 = builder->Reshape(builder->Slice(seed, {1}, {2}, {1}), {}); @@ -178,9 +175,8 @@ xla::ComputationDataHandle RandomUniform(xla::ComputationBuilder* builder, // p = sum_{i=1}^n gq[i]*w^i // } // return p*x -xla::ComputationDataHandle ErfInvF32(xla::ComputationBuilder* b, - const xla::ComputationDataHandle& x, - const TensorShape& shape) { +xla::XlaOp ErfInvF32(xla::XlaBuilder* b, const xla::XlaOp& x, + const TensorShape& shape) { constexpr int kDegree = 9; constexpr std::array w_less_than_5_constants = { 2.81022636e-08f, 3.43273939e-07f, -3.5233877e-06f, @@ -220,7 +216,7 @@ class StatelessRandomUniformOp : public XlaOpKernel { : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); TensorShape shape; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &shape)); @@ -229,7 +225,7 @@ class StatelessRandomUniformOp : public XlaOpKernel { OP_REQUIRES(ctx, seed_shape.dims() == 1 && seed_shape.dim_size(0) == 2, errors::InvalidArgument("seed must have shape [2], not ", seed_shape.DebugString())); - xla::ComputationDataHandle seed = ctx->Input(1); + xla::XlaOp seed = ctx->Input(1); ctx->SetOutput(0, RandomUniform(builder, seed, shape, 0.0, 1.0)); } @@ -257,9 +253,10 @@ class StatelessRandomNormalOp : public XlaOpKernel { OP_REQUIRES(ctx, seed_shape == TensorShape({2}), errors::InvalidArgument("seed must have shape [2], not ", seed_shape.DebugString())); - xla::ComputationDataHandle seed = ctx->Input(1); - xla::ComputationBuilder* builder = ctx->builder(); - auto uniform = RandomUniform(builder, seed, shape, -1.0, 1.0); + xla::XlaOp seed = ctx->Input(1); + xla::XlaBuilder* builder = ctx->builder(); + auto uniform = + RandomUniform(builder, seed, shape, std::nextafter(-1.0f, 0.0f), 1.0); // Convert uniform distribution to normal distribution by computing // sqrt(2) * erfinv(x) auto normal = builder->Mul(builder->ConstantR0(std::sqrt(2.0)), diff --git a/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc b/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc index 6204aa4e27000fddec7f5b82b2198d37956f6aba..55254c746e5ebaf6b468c24ab59b968bf0d6260b 100644 --- a/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/strided_slice_op.cc @@ -90,7 +90,7 @@ class StridedSliceOp : public XlaOpKernel { } } - xla::ComputationDataHandle slice = ctx->Input(0); + xla::XlaOp slice = ctx->Input(0); if (!dimensions_to_reverse.empty()) { slice = ctx->builder()->Rev(slice, dimensions_to_reverse); } @@ -168,7 +168,7 @@ class StridedSliceGradOp : public XlaOpKernel { auto zero = XlaHelpers::Zero(ctx->builder(), ctx->expected_output_dtype(0)); - xla::ComputationDataHandle grad = ctx->Input(4); + xla::XlaOp grad = ctx->Input(4); // Undo any new/shrink axes. grad = ctx->builder()->Reshape(grad, processing_shape.dim_sizes()); @@ -255,7 +255,7 @@ class StridedSliceAssignOp : public XlaOpKernel { &strides_tensor)); TensorShape lhs_shape; - xla::ComputationDataHandle lhs; + xla::XlaOp lhs; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, dtype_, &lhs_shape, &lhs)); const TensorShape rhs_shape = ctx->InputShape(4); @@ -284,7 +284,7 @@ class StridedSliceAssignOp : public XlaOpKernel { " does not match r-value shape ", rhs_shape.DebugString(), ". Automatic broadcasting not yet implemented.")); - xla::ComputationDataHandle rhs = ctx->Input(4); + xla::XlaOp rhs = ctx->Input(4); gtl::InlinedVector dimensions_to_reverse; gtl::InlinedVector slice_begin, slice_dims; diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc index 000b50af6bd86b7268c016865fb0856c16053ece..9adee78a1fd1fb9a12afae83197425c328b5fe7e 100644 --- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc @@ -47,7 +47,7 @@ namespace { // the TensorArray with elements of `elem_shape`. For both initialized and // uninitialized TensorArrays, checks that the tensor has a type compatible with // 'dtype' and shape compatible with 'elem_shape'. -Status MaybeInitializeTensorArray(xla::ComputationBuilder* builder, +Status MaybeInitializeTensorArray(xla::XlaBuilder* builder, XlaResource* resource, DataType dtype, const TensorShape& elem_shape) { if (resource->kind() != XlaResource::kTensorArray) { @@ -64,9 +64,6 @@ Status MaybeInitializeTensorArray(xla::ComputationBuilder* builder, << resource->name() << " size " << resource->tensor_array_size(); if (!resource->initialized()) { - xla::ComputationDataHandle zero = - XlaHelpers::Zero(builder, resource->type()); - TF_RETURN_IF_ERROR(resource->SetTypeAndShape(dtype, elem_shape)); TF_RETURN_IF_ERROR(resource->SetZeroValue(builder)); } else { @@ -77,7 +74,7 @@ Status MaybeInitializeTensorArray(xla::ComputationBuilder* builder, } TensorShape shape; TF_RETURN_IF_ERROR( - XLAShapeToTensorShape(*shape_or_status.ValueOrDie(), &shape)); + XLAShapeToTensorShape(shape_or_status.ValueOrDie(), &shape)); TensorShape ta_shape; ta_shape.AddDim(resource->tensor_array_size()); @@ -114,23 +111,21 @@ Status CheckTensorArrayIsInitialized(const string& op_name, } Status GetTensorArrayShape(const XlaResource* resource, - xla::ComputationBuilder* builder, - TensorShape* shape) { + xla::XlaBuilder* builder, TensorShape* shape) { *shape = resource->shape(); shape->InsertDim(0, resource->tensor_array_size()); return Status::OK(); } -// Like ComputationBuilder::DynamicUpdateSlice, but adds 'update' to the +// Like XlaBuilder::DynamicUpdateSlice, but adds 'update' to the // relevant slice of 'operand'. -xla::ComputationDataHandle DynamicAddSlice( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& operand, - const xla::ComputationDataHandle& update, - const gtl::ArraySlice& update_dims, - const xla::ComputationDataHandle& start_indices) { - xla::ComputationDataHandle current = +xla::XlaOp DynamicAddSlice(xla::XlaBuilder* builder, const xla::XlaOp& operand, + const xla::XlaOp& update, + const gtl::ArraySlice& update_dims, + const xla::XlaOp& start_indices) { + xla::XlaOp current = builder->DynamicSlice(operand, start_indices, update_dims); - xla::ComputationDataHandle sum = builder->Add(current, update); + xla::XlaOp sum = builder->Add(current, update); return builder->DynamicUpdateSlice(operand, sum, start_indices); } @@ -155,18 +150,18 @@ class TensorArrayOp : public XlaOpKernel { OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument("TensorArray size must be >= 0")); - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); // Initializes the TensorArray value if we know the element shape. // Otherwise, defer initialization to the first write. - xla::ComputationDataHandle value; + xla::XlaOp value; TensorShape shape; if (element_shape_.IsFullyDefined()) { CHECK(element_shape_.AsTensorShape(&shape)); TensorShape ta_shape; ta_shape.AddDim(size); ta_shape.AppendShape(shape); - xla::ComputationDataHandle zero = XlaHelpers::Zero(b, dtype_); + xla::XlaOp zero = XlaHelpers::Zero(b, dtype_); value = b->Broadcast(zero, ta_shape.dim_sizes()); } @@ -202,7 +197,7 @@ class TensorArrayWriteOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); TensorShape elem_shape = ctx->InputShape(2); @@ -213,10 +208,10 @@ class TensorArrayWriteOp : public XlaOpKernel { OP_REQUIRES_OK(ctx, MaybeInitializeTensorArray(b, resource, dtype_, elem_shape)); - xla::ComputationDataHandle ta = resource->value(); - xla::ComputationDataHandle index = ctx->Input(1); - xla::ComputationDataHandle value = ctx->Input(2); - xla::ComputationDataHandle flow = ctx->Input(3); + xla::XlaOp ta = resource->value(); + xla::XlaOp index = ctx->Input(1); + xla::XlaOp value = ctx->Input(2); + xla::XlaOp flow = ctx->Input(3); // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0]. auto start_indices = @@ -227,7 +222,7 @@ class TensorArrayWriteOp : public XlaOpKernel { slice_shape.InsertDim(0, 1LL); auto update = b->Reshape(value, slice_shape.dim_sizes()); - xla::ComputationDataHandle written = + xla::XlaOp written = DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(), start_indices); OP_REQUIRES_OK(ctx, resource->SetValue(written)); @@ -249,7 +244,7 @@ class TensorArrayReadOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); XlaResource* resource; OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &resource)); @@ -259,8 +254,8 @@ class TensorArrayReadOp : public XlaOpKernel { TensorShape ta_shape; OP_REQUIRES_OK(ctx, GetTensorArrayShape(resource, b, &ta_shape)); - xla::ComputationDataHandle ta = resource->value(); - xla::ComputationDataHandle index = ctx->Input(1); + xla::XlaOp ta = resource->value(); + xla::XlaOp index = ctx->Input(1); // start_indices of the DynamicSlice are [index, 0, 0, ..., 0]. auto start_indices = @@ -270,8 +265,7 @@ class TensorArrayReadOp : public XlaOpKernel { auto slice_shape = ta_shape.dim_sizes(); slice_shape[0] = 1LL; - xla::ComputationDataHandle read = - b->DynamicSlice(ta, start_indices, slice_shape); + xla::XlaOp read = b->DynamicSlice(ta, start_indices, slice_shape); // Remove the leading '1' dimension. std::vector value_shape(slice_shape.begin() + 1, slice_shape.end()); @@ -293,7 +287,7 @@ class TensorArrayGatherOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); XlaResource* resource; OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &resource)); @@ -309,7 +303,7 @@ class TensorArrayGatherOp : public XlaOpKernel { auto indices = ctx->Input(1); DataType index_type = ctx->input_type(1); - xla::ComputationDataHandle ta = resource->value(); + xla::XlaOp ta = resource->value(); // Look for the case where the gather takes a simple slice from the // tensor array (0, 1, 2, 3, 4, ..., N) @@ -337,7 +331,7 @@ class TensorArrayGatherOp : public XlaOpKernel { } } - xla::ComputationDataHandle gather; + xla::XlaOp gather; OP_REQUIRES_OK( ctx, XlaGather(ta, ta_shape, indices, indices_shape, /*axis=*/0, @@ -360,7 +354,7 @@ class TensorArrayScatterOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); const TensorShape value_shape = ctx->InputShape(2); @@ -375,11 +369,11 @@ class TensorArrayScatterOp : public XlaOpKernel { OP_REQUIRES(ctx, indices_shape.dims() >= 1, errors::InvalidArgument("indices must be rank 1")); const int num_indices = indices_shape.dim_size(0); - const xla::ComputationDataHandle indices = ctx->Input(1); + const xla::XlaOp indices = ctx->Input(1); - xla::ComputationDataHandle ta = resource->value(); - const xla::ComputationDataHandle value = ctx->Input(2); - const xla::ComputationDataHandle flow = ctx->Input(3); + xla::XlaOp ta = resource->value(); + const xla::XlaOp value = ctx->Input(2); + const xla::XlaOp flow = ctx->Input(3); // Look for the case where the scatter is for each sub-tensor in order. The // tensor array implementation allows for this to be a straight addition. @@ -443,7 +437,7 @@ class TensorArrayConcatOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); XlaResource* resource; OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &resource)); @@ -453,7 +447,7 @@ class TensorArrayConcatOp : public XlaOpKernel { TensorShape ta_shape; OP_REQUIRES_OK(ctx, GetTensorArrayShape(resource, b, &ta_shape)); - xla::ComputationDataHandle ta = resource->value(); + xla::XlaOp ta = resource->value(); auto ta_dims = ta_shape.dim_sizes(); std::vector shape(ta_dims.begin() + 1, ta_dims.end()); @@ -503,12 +497,12 @@ class TensorArraySplitOp : public XlaOpKernel { TensorShape elem_shape = value_shape; elem_shape.set_dim(0, length); - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); XlaResource* resource; OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &resource)); OP_REQUIRES_OK(ctx, MaybeInitializeTensorArray(b, resource, dtype_, elem_shape)); - xla::ComputationDataHandle ta = resource->value(); + xla::XlaOp ta = resource->value(); TensorShape ta_shape; ta_shape.AddDim(resource->tensor_array_size()); @@ -520,8 +514,8 @@ class TensorArraySplitOp : public XlaOpKernel { "TensorArray's size is not equal to the size of lengths (", lengths.size(), " vs. ", resource->tensor_array_size(), ")")); - const xla::ComputationDataHandle value = ctx->Input(1); - const xla::ComputationDataHandle flow = ctx->Input(3); + const xla::XlaOp value = ctx->Input(1); + const xla::XlaOp flow = ctx->Input(3); OP_REQUIRES(ctx, value_shape.num_elements() == ta_shape.num_elements(), errors::InvalidArgument("mismatched element count ", @@ -569,7 +563,7 @@ class TensorArrayGradOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); XlaResource* resource; OP_REQUIRES_OK(ctx, ctx->GetResourceInput(0, &resource)); diff --git a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc index 9aefcd4fc7f94a1dba1c56273c55d0b98fbbfaf2..e91075196bd8414939888e22b5483ad637487af6 100644 --- a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc @@ -112,7 +112,7 @@ class TileOp : public XlaOpKernel { flattened.push_back(i); flattened.push_back(i + output_shape.size()); } - xla::ComputationDataHandle output = + xla::XlaOp output = ctx->builder()->Reshape(broadcasted, flattened, output_shape); ctx->SetOutput(0, output); diff --git a/tensorflow/compiler/tf2xla/kernels/training_ops.cc b/tensorflow/compiler/tf2xla/kernels/training_ops.cc index f750f7003be288461f5f10455e58932d1b4e4524..34caefa050c0d58f5f7bad557286b6ed64b996ad 100644 --- a/tensorflow/compiler/tf2xla/kernels/training_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/training_ops.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/kernels/cwise_ops.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/types.h" @@ -30,8 +30,8 @@ class ResourceApplyGradientDescent : public XlaOpKernel { explicit ResourceApplyGradientDescent(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle handle; - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaOp handle; + xla::XlaBuilder* b = ctx->builder(); DataType type = ctx->input_type(1); TensorShape var_shape; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, type, &var_shape, &handle)); @@ -63,12 +63,12 @@ class ResourceApplyMomentum : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); DataType type = ctx->input_type(2); TensorShape var_shape, accum_shape; - xla::ComputationDataHandle var, accum; + xla::XlaOp var, accum; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, type, &var_shape, &var)); OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(1, type, &accum_shape, &accum)); @@ -93,9 +93,9 @@ class ResourceApplyMomentum : public XlaOpKernel { errors::InvalidArgument("momentum is not a scalar: ", momentum_shape.DebugString())); - xla::ComputationDataHandle lr = ctx->Input(2); - xla::ComputationDataHandle grad = ctx->Input(3); - xla::ComputationDataHandle momentum = ctx->Input(4); + xla::XlaOp lr = ctx->Input(2); + xla::XlaOp grad = ctx->Input(3); + xla::XlaOp momentum = ctx->Input(4); accum = b->Add(b->Mul(accum, momentum), grad); if (use_nesterov_) { @@ -121,12 +121,12 @@ class ResourceApplyAdagrad : public XlaOpKernel { explicit ResourceApplyAdagrad(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); DataType type = ctx->input_type(2); TensorShape var_shape, accum_shape; - xla::ComputationDataHandle var, accum; + xla::XlaOp var, accum; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, type, &var_shape, &var)); OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(1, type, &accum_shape, &accum)); @@ -146,8 +146,8 @@ class ResourceApplyAdagrad : public XlaOpKernel { "var and grad do not have the same shape", var_shape.DebugString(), " ", grad_shape.DebugString())); - xla::ComputationDataHandle lr = ctx->Input(2); - xla::ComputationDataHandle grad = ctx->Input(3); + xla::XlaOp lr = ctx->Input(2); + xla::XlaOp grad = ctx->Input(3); accum = b->Add(accum, b->Pow(grad, XlaHelpers::FloatLiteral(b, type, 2.0))); var = b->Sub( @@ -168,7 +168,7 @@ class ResourceApplyAdam : public XlaOpKernel { void Compile(XlaOpKernelContext* ctx) override { TensorShape var_shape, m_shape, v_shape; - xla::ComputationDataHandle var, m, v; + xla::XlaOp var, m, v; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, dtype_, &var_shape, &var)); OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(1, dtype_, &m_shape, &m)); OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(2, dtype_, &v_shape, &v)); @@ -213,25 +213,25 @@ class ResourceApplyAdam : public XlaOpKernel { "var and grad do not have the same shape", var_shape.DebugString(), " ", grad_shape.DebugString())); - xla::ComputationDataHandle beta1_power = ctx->Input(3); - xla::ComputationDataHandle beta2_power = ctx->Input(4); - xla::ComputationDataHandle lr = ctx->Input(5); - xla::ComputationDataHandle beta1 = ctx->Input(6); - xla::ComputationDataHandle beta2 = ctx->Input(7); - xla::ComputationDataHandle epsilon = ctx->Input(8); - xla::ComputationDataHandle grad = ctx->Input(9); + xla::XlaOp beta1_power = ctx->Input(3); + xla::XlaOp beta2_power = ctx->Input(4); + xla::XlaOp lr = ctx->Input(5); + xla::XlaOp beta1 = ctx->Input(6); + xla::XlaOp beta2 = ctx->Input(7); + xla::XlaOp epsilon = ctx->Input(8); + xla::XlaOp grad = ctx->Input(9); // alpha <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) // m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t // v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t // variable <- variable - alpha * m_t / (sqrt(v_t) + epsilon) - xla::ComputationBuilder* b = ctx->builder(); - xla::ComputationDataHandle half = XlaHelpers::FloatLiteral(b, dtype_, 0.5); - xla::ComputationDataHandle one = XlaHelpers::FloatLiteral(b, dtype_, 1.0); - xla::ComputationDataHandle two = XlaHelpers::FloatLiteral(b, dtype_, 2.0); + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp half = XlaHelpers::FloatLiteral(b, dtype_, 0.5); + xla::XlaOp one = XlaHelpers::FloatLiteral(b, dtype_, 1.0); + xla::XlaOp two = XlaHelpers::FloatLiteral(b, dtype_, 2.0); - xla::ComputationDataHandle alpha = + xla::XlaOp alpha = b->Div(b->Mul(lr, b->Pow(b->Sub(one, beta2_power), half)), b->Sub(one, beta1_power)); m = b->Add(m, b->Mul(b->Sub(grad, m), b->Sub(one, beta1))); @@ -255,12 +255,12 @@ class ResourceApplyRMSProp : public XlaOpKernel { explicit ResourceApplyRMSProp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); DataType type = ctx->input_type(3); TensorShape var_shape, ms_shape, mom_shape; - xla::ComputationDataHandle var, ms, mom; + xla::XlaOp var, ms, mom; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, type, &var_shape, &var)); OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(1, type, &ms_shape, &ms)); OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(2, type, &mom_shape, &mom)); @@ -297,11 +297,11 @@ class ResourceApplyRMSProp : public XlaOpKernel { "var and grad do not have the same shape", var_shape.DebugString(), " ", grad_shape.DebugString())); - xla::ComputationDataHandle lr = ctx->Input(3); - xla::ComputationDataHandle rho = ctx->Input(4); - xla::ComputationDataHandle momentum = ctx->Input(5); - xla::ComputationDataHandle epsilon = ctx->Input(6); - xla::ComputationDataHandle grad = ctx->Input(7); + xla::XlaOp lr = ctx->Input(3); + xla::XlaOp rho = ctx->Input(4); + xla::XlaOp momentum = ctx->Input(5); + xla::XlaOp epsilon = ctx->Input(6); + xla::XlaOp grad = ctx->Input(7); // ms <- rho * ms_{t-1} + (1-rho) * grad * grad // mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) @@ -320,16 +320,16 @@ class ResourceApplyRMSProp : public XlaOpKernel { // ms <- grad**2 (1 - rho) + ms * rho // // Which is the equation listed above. - xla::ComputationDataHandle new_ms = b->Add( + xla::XlaOp new_ms = b->Add( ms, b->Mul(b->Sub(b->Pow(grad, XlaHelpers::FloatLiteral(b, type, 2.0)), ms), b->Sub(XlaHelpers::FloatLiteral(b, type, 1.0), rho))); - xla::ComputationDataHandle new_mom = + xla::XlaOp new_mom = b->Add(b->Mul(mom, momentum), b->Mul(b->Mul(grad, lr), b->Pow(b->Add(new_ms, epsilon), XlaHelpers::FloatLiteral(b, type, -0.5)))); - xla::ComputationDataHandle new_var = b->Sub(var, new_mom); + xla::XlaOp new_var = b->Sub(var, new_mom); OP_REQUIRES_OK(ctx, ctx->AssignVariable(0, type, new_var)); OP_REQUIRES_OK(ctx, ctx->AssignVariable(1, type, new_ms)); @@ -341,10 +341,10 @@ REGISTER_XLA_OP(Name("ResourceApplyRMSProp").TypeConstraint("T", kFloatTypes), void CompileFtrl(XlaOpKernelContext* ctx, DataType dtype, bool has_l2_shrinkage) { - xla::ComputationBuilder* b = ctx->builder(); + xla::XlaBuilder* b = ctx->builder(); TensorShape var_shape, accum_shape, linear_shape; - xla::ComputationDataHandle var, accum, linear; + xla::XlaOp var, accum, linear; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, dtype, &var_shape, &var)); OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(1, dtype, &accum_shape, &accum)); OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(2, dtype, &linear_shape, &linear)); @@ -399,12 +399,12 @@ void CompileFtrl(XlaOpKernelContext* ctx, DataType dtype, errors::InvalidArgument("lr_power is not a scalar: ", lr_power_shape.DebugString())); - xla::ComputationDataHandle grad = ctx->Input(3); - xla::ComputationDataHandle lr = ctx->Input(4); - xla::ComputationDataHandle l1 = ctx->Input(5); - xla::ComputationDataHandle l2 = ctx->Input(6); - xla::ComputationDataHandle l2_shrinkage; - xla::ComputationDataHandle lr_power; + xla::XlaOp grad = ctx->Input(3); + xla::XlaOp lr = ctx->Input(4); + xla::XlaOp l1 = ctx->Input(5); + xla::XlaOp l2 = ctx->Input(6); + xla::XlaOp l2_shrinkage; + xla::XlaOp lr_power; if (has_l2_shrinkage) { l2_shrinkage = ctx->Input(7); lr_power = ctx->Input(8); @@ -421,26 +421,23 @@ void CompileFtrl(XlaOpKernelContext* ctx, DataType dtype, // var = (linear_clipped - linear) / quadratic // accum = new_accum - xla::ComputationDataHandle two = XlaHelpers::FloatLiteral(b, dtype, 2.0); - xla::ComputationDataHandle grad_to_use; + xla::XlaOp two = XlaHelpers::FloatLiteral(b, dtype, 2.0); + xla::XlaOp grad_to_use; if (has_l2_shrinkage) { grad_to_use = b->Add(grad, b->Mul(two, b->Mul(l2_shrinkage, var))); } else { grad_to_use = grad; } - xla::ComputationDataHandle new_accum = - b->Add(accum, b->Pow(grad_to_use, two)); - xla::ComputationDataHandle new_accum_lr_pow = - b->Pow(new_accum, b->Neg(lr_power)); - xla::ComputationDataHandle accum_lr_pow = b->Pow(accum, b->Neg(lr_power)); + xla::XlaOp new_accum = b->Add(accum, b->Pow(grad_to_use, two)); + xla::XlaOp new_accum_lr_pow = b->Pow(new_accum, b->Neg(lr_power)); + xla::XlaOp accum_lr_pow = b->Pow(accum, b->Neg(lr_power)); linear = b->Add( linear, b->Sub(grad_to_use, b->Mul(b->Div(b->Sub(new_accum_lr_pow, accum_lr_pow), lr), var))); - xla::ComputationDataHandle linear_clipped = b->Clamp(b->Neg(l1), linear, l1); - xla::ComputationDataHandle quadratic = - b->Add(b->Div(new_accum_lr_pow, lr), b->Mul(two, l2)); + xla::XlaOp linear_clipped = b->Clamp(b->Neg(l1), linear, l1); + xla::XlaOp quadratic = b->Add(b->Div(new_accum_lr_pow, lr), b->Mul(two, l2)); var = b->Div(b->Sub(linear_clipped, linear), quadratic); accum = new_accum; diff --git a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc index 7cb47f908d4ff43f455f1e77c53cd3cc956579ee..a4f50f52ebe8b1ed7df862996d64e135ea1d0ac5 100644 --- a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/kernel_def_builder.h" namespace tensorflow { @@ -33,9 +33,9 @@ namespace { public: \ explicit NAME##Op(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} \ void Compile(XlaOpKernelContext* ctx) { \ - xla::ComputationBuilder* b = ctx->builder(); \ - xla::ComputationDataHandle x = ctx->Input(0); \ - xla::ComputationDataHandle y = COMPUTATION; \ + xla::XlaBuilder* b = ctx->builder(); \ + xla::XlaOp x = ctx->Input(0); \ + xla::XlaOp y = COMPUTATION; \ ctx->SetOutput(0, y); \ } \ }; \ @@ -124,9 +124,8 @@ XLAJIT_MAKE_UNARY(Neg, b->Neg(x)); // Implements Banker's rounding: numbers that are equidistant between two // integers are rounded towards even. -static xla::ComputationDataHandle Round(xla::ComputationBuilder* b, - DataType dtype, - const xla::ComputationDataHandle& x) { +static xla::XlaOp Round(xla::XlaBuilder* b, DataType dtype, + const xla::XlaOp& x) { auto half = XlaHelpers::FloatLiteral(b, dtype, 0.5); auto one = XlaHelpers::FloatLiteral(b, dtype, 1.0); auto two = XlaHelpers::FloatLiteral(b, dtype, 2.0); @@ -148,9 +147,8 @@ XLAJIT_MAKE_UNARY(Rsqrt, b->Pow(x, XlaHelpers::FloatLiteral(b, input_type(0), -0.5))); // Expresses sigmoid as a rescaled tanh: sigmoid(x) == (tanh(x/2) + 1) / 2. -static xla::ComputationDataHandle Sigmoid(xla::ComputationBuilder* b, - DataType dtype, - const xla::ComputationDataHandle& x) { +static xla::XlaOp Sigmoid(xla::XlaBuilder* b, DataType dtype, + const xla::XlaOp& x) { auto half = XlaHelpers::FloatLiteral(b, dtype, 0.5); return b->Add(half, b->Mul(half, b->Tanh(b->Mul(half, x)))); } @@ -162,20 +160,18 @@ XLAJIT_MAKE_UNARY(Sinh, b->Mul(b->Sub(b->Exp(x), b->Exp(b->Neg(x))), XlaHelpers::FloatLiteral(b, input_type(0), 0.5))); -static xla::ComputationDataHandle Softplus( - xla::ComputationBuilder* b, DataType dtype, - const xla::ComputationDataHandle& features) { - xla::ComputationDataHandle threshold = - b->Add(b->Log(XlaHelpers::Epsilon(b, dtype)), - XlaHelpers::FloatLiteral(b, dtype, 2.0)); +static xla::XlaOp Softplus(xla::XlaBuilder* b, DataType dtype, + const xla::XlaOp& features) { + xla::XlaOp threshold = b->Add(b->Log(XlaHelpers::Epsilon(b, dtype)), + XlaHelpers::FloatLiteral(b, dtype, 2.0)); // Value above which exp(x) may overflow, but softplus(x) == x // is within machine epsilon. - xla::ComputationDataHandle too_large = b->Gt(features, b->Neg(threshold)); + xla::XlaOp too_large = b->Gt(features, b->Neg(threshold)); // Value below which exp(x) may underflow, but softplus(x) == exp(x) // is within machine epsilon. - xla::ComputationDataHandle too_small = b->Lt(features, threshold); - xla::ComputationDataHandle features_exp = b->Exp(features); - xla::ComputationDataHandle output = b->Select( + xla::XlaOp too_small = b->Lt(features, threshold); + xla::XlaOp features_exp = b->Exp(features); + xla::XlaOp output = b->Select( too_large, features, b->Select(too_small, features_exp, b->Log(b->Add(features_exp, XlaHelpers::One(b, dtype))))); diff --git a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc index 71173f5aead47702f0ed9e95b827a6fefd9b7efd..6109db8e89e5ee67e0635d26e258bfe7cb70a15d 100644 --- a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/types.h" @@ -48,7 +48,7 @@ class ReadVariableOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle handle; + xla::XlaOp handle; OP_REQUIRES_OK( ctx, ctx->ReadVariableInput(0, dtype_, /*shape=*/nullptr, &handle)); ctx->SetOutput(0, handle); @@ -74,7 +74,7 @@ class AssignAddVariableOp : public XlaOpKernel { explicit AssignAddVariableOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { DataType type = ctx->input_type(1); - xla::ComputationDataHandle handle; + xla::XlaOp handle; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, type, /*shape=*/nullptr, &handle)); handle = ctx->builder()->Add(handle, ctx->Input(1)); @@ -90,7 +90,7 @@ class AssignSubVariableOp : public XlaOpKernel { explicit AssignSubVariableOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { DataType type = ctx->input_type(1); - xla::ComputationDataHandle handle; + xla::XlaOp handle; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, type, /*shape=*/nullptr, &handle)); handle = ctx->builder()->Sub(handle, ctx->Input(1)); @@ -105,19 +105,19 @@ class ResourceGatherOp : public XlaOpKernel { public: explicit ResourceGatherOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); DataType type = ctx->expected_output_dtype(0); TensorShape resource_shape; - xla::ComputationDataHandle resource_handle; + xla::XlaOp resource_handle; OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, type, &resource_shape, &resource_handle)); auto indices = ctx->Input(1); auto indices_shape = ctx->InputShape(1); DataType index_type = ctx->input_type(1); - xla::ComputationDataHandle gather; + xla::XlaOp gather; OP_REQUIRES_OK( ctx, XlaGather(resource_handle, resource_shape, indices, indices_shape, /*axis=*/0, /*indices_are_nd=*/false, type, index_type, diff --git a/tensorflow/compiler/tf2xla/kernels/while_op.cc b/tensorflow/compiler/tf2xla/kernels/while_op.cc index 0ff1b65ae9179d506e453f98097cd88083eb2be7..5467c5d9946846ff9f14ce9c5aac9e2be4b9d6ab 100644 --- a/tensorflow/compiler/tf2xla/kernels/while_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/while_op.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op_kernel.h" @@ -101,7 +101,7 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) { ctx, MakeXlaCompilerArgumentsFromInputs( ctx, &arguments, &has_uninitialized_vars, &has_tensor_arrays)); - xla::ComputationBuilder* builder = ctx->builder(); + xla::XlaBuilder* builder = ctx->builder(); XlaCompiler* compiler = ctx->compiler(); VLOG(1) << "Compiling body"; @@ -234,7 +234,7 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) { xla::ShapeUtil::HumanString(cond.xla_output_shape))); int num_inputs = body.input_mapping.size(); - std::vector inputs(num_inputs); + std::vector inputs(num_inputs); for (int i = 0; i < num_inputs; ++i) { int input_num = body.input_mapping[i]; if (ctx->input_type(input_num) == DT_RESOURCE) { @@ -246,24 +246,24 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) { } } - xla::ComputationDataHandle init = builder->Tuple(inputs); + xla::XlaOp init = builder->Tuple(inputs); VLOG(1) << "Building while loop"; // Wraps the condition in a computation that unpacks the output tuple. - xla::Computation cond_wrapper; + xla::XlaComputation cond_wrapper; { - std::unique_ptr cb = + std::unique_ptr cb = builder->CreateSubBuilder("cond_wrapper"); auto inputs = cb->Parameter(0, cond_input_shape, "inputs"); auto outputs = cb->Call(*cond.computation, {inputs}); cb->GetTupleElement(outputs, 0); - xla::StatusOr result = cb->Build(); + xla::StatusOr result = cb->Build(); OP_REQUIRES_OK(ctx, result.status()); cond_wrapper = std::move(result.ValueOrDie()); } - xla::ComputationDataHandle while_result = + xla::XlaOp while_result = builder->While(cond_wrapper, *body.computation, init); // Sets non-variable outputs. diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD index 12fdfb605d667bf2cc96e79e84954b89229a7340..04ad3694a0c0df9d43c706d428c3b8715e5ff8ca 100644 --- a/tensorflow/compiler/tf2xla/lib/BUILD +++ b/tensorflow/compiler/tf2xla/lib/BUILD @@ -25,8 +25,8 @@ cc_library( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", ], ) @@ -44,8 +44,8 @@ cc_library( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", ], ) @@ -62,9 +62,9 @@ cc_library( "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", ], ) @@ -82,8 +82,8 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", ], ) @@ -101,9 +101,9 @@ xla_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", @@ -122,8 +122,8 @@ cc_library( "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", ], ) @@ -161,8 +161,8 @@ cc_library( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/tf2xla/lib/batch_dot.cc b/tensorflow/compiler/tf2xla/lib/batch_dot.cc index 798f0fa78055e800038e8bf41b4f410b670be7dd..526694d5a0c7124e1696f34b516f3b202462bc19 100644 --- a/tensorflow/compiler/tf2xla/lib/batch_dot.cc +++ b/tensorflow/compiler/tf2xla/lib/batch_dot.cc @@ -25,24 +25,22 @@ limitations under the License. namespace tensorflow { -xla::StatusOr BatchDot( - xla::ComputationBuilder* builder, xla::ComputationDataHandle x, - xla::ComputationDataHandle y, bool transpose_x, bool transpose_y, - bool conjugate_x, bool conjugate_y) { - TF_ASSIGN_OR_RETURN(std::unique_ptr x_shape, - builder->GetShape(x)); - TF_ASSIGN_OR_RETURN(std::unique_ptr y_shape, - builder->GetShape(y)); +xla::StatusOr BatchDot(xla::XlaBuilder* builder, xla::XlaOp x, + xla::XlaOp y, bool transpose_x, + bool transpose_y, bool conjugate_x, + bool conjugate_y) { + TF_ASSIGN_OR_RETURN(xla::Shape x_shape, builder->GetShape(x)); + TF_ASSIGN_OR_RETURN(xla::Shape y_shape, builder->GetShape(y)); // Check that both tensors have the same number of dimensions. There must be // at least two (the batch dimensions can be empty). - if (xla::ShapeUtil::Rank(*x_shape) != xla::ShapeUtil::Rank(*y_shape)) { + if (xla::ShapeUtil::Rank(x_shape) != xla::ShapeUtil::Rank(y_shape)) { return errors::InvalidArgument( "Arguments to BatchedDot have different ranks: ", - xla::ShapeUtil::HumanString(*x_shape), " vs. ", - xla::ShapeUtil::HumanString(*y_shape)); + xla::ShapeUtil::HumanString(x_shape), " vs. ", + xla::ShapeUtil::HumanString(y_shape)); } - const int ndims = xla::ShapeUtil::Rank(*x_shape); + const int ndims = xla::ShapeUtil::Rank(x_shape); if (ndims < 2) { return errors::InvalidArgument( "Arguments to BatchedDot must have rank >= 2: ", ndims); @@ -52,46 +50,46 @@ xla::StatusOr BatchDot( // valid. std::vector batch_dimension_numbers; for (int i = 0; i < ndims - 2; ++i) { - if (x_shape->dimensions(i) != y_shape->dimensions(i)) { + if (x_shape.dimensions(i) != y_shape.dimensions(i)) { return errors::InvalidArgument( "Dimension ", i, " of inputs to BatchedDot must be equal: ", - xla::ShapeUtil::HumanString(*x_shape), " vs ", - xla::ShapeUtil::HumanString(*y_shape)); + xla::ShapeUtil::HumanString(x_shape), " vs ", + xla::ShapeUtil::HumanString(y_shape)); } batch_dimension_numbers.push_back(i); } int x_inner_dim = transpose_x ? (ndims - 2) : (ndims - 1); int y_inner_dim = transpose_y ? (ndims - 1) : (ndims - 2); - if (x_shape->dimensions(x_inner_dim) != y_shape->dimensions(y_inner_dim)) { + if (x_shape.dimensions(x_inner_dim) != y_shape.dimensions(y_inner_dim)) { return errors::InvalidArgument( "Dimensions ", x_inner_dim, " and ", y_inner_dim, " of arguments to BatchedDot must be equal: ", - xla::ShapeUtil::HumanString(*x_shape), " transpose: ", transpose_x, - " vs. ", xla::ShapeUtil::HumanString(*y_shape), + xla::ShapeUtil::HumanString(x_shape), " transpose: ", transpose_x, + " vs. ", xla::ShapeUtil::HumanString(y_shape), " transpose: ", transpose_y); } // Check for zero lhs/rhs dim size. - if (xla::ShapeUtil::HasZeroElements(*x_shape) || - xla::ShapeUtil::HasZeroElements(*y_shape)) { + if (xla::ShapeUtil::HasZeroElements(x_shape) || + xla::ShapeUtil::HasZeroElements(y_shape)) { std::vector dimensions(batch_dimension_numbers.size()); for (int i = 0; i < batch_dimension_numbers.size(); ++i) { - dimensions[i] = x_shape->dimensions(batch_dimension_numbers[i]); + dimensions[i] = x_shape.dimensions(batch_dimension_numbers[i]); } int x_outer_dim = transpose_x ? (ndims - 1) : (ndims - 2); int y_outer_dim = transpose_y ? (ndims - 2) : (ndims - 1); - dimensions.push_back(x_shape->dimensions(x_outer_dim)); - dimensions.push_back(y_shape->dimensions(y_outer_dim)); + dimensions.push_back(x_shape.dimensions(x_outer_dim)); + dimensions.push_back(y_shape.dimensions(y_outer_dim)); return builder->Broadcast( - builder->ConstantLiteral(xla::Literal::Zero(x_shape->element_type())), + builder->ConstantLiteral(xla::Literal::Zero(x_shape.element_type())), dimensions); } - if (x_shape->element_type() == xla::C64 && conjugate_x) { + if (x_shape.element_type() == xla::C64 && conjugate_x) { x = builder->Conj(x); } - if (y_shape->element_type() == xla::C64 && conjugate_y) { + if (y_shape.element_type() == xla::C64 && conjugate_y) { y = builder->Conj(y); } diff --git a/tensorflow/compiler/tf2xla/lib/batch_dot.h b/tensorflow/compiler/tf2xla/lib/batch_dot.h index b230e885f10f45a78cdd6e455da3ba55ce589b96..1acc72033b05e73b0f5f88907df20cde5cfffbf0 100644 --- a/tensorflow/compiler/tf2xla/lib/batch_dot.h +++ b/tensorflow/compiler/tf2xla/lib/batch_dot.h @@ -16,8 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_BATCH_DOT_H_ #define TENSORFLOW_COMPILER_TF2XLA_LIB_BATCH_DOT_H_ -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" namespace tensorflow { @@ -43,10 +43,10 @@ namespace tensorflow { // It is computed as: // // output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :]) -xla::StatusOr BatchDot( - xla::ComputationBuilder* builder, xla::ComputationDataHandle x, - xla::ComputationDataHandle y, bool transpose_x, bool transpose_y, - bool conjugate_x = false, bool conjugate_y = false); +xla::StatusOr BatchDot(xla::XlaBuilder* builder, xla::XlaOp x, + xla::XlaOp y, bool transpose_x, + bool transpose_y, bool conjugate_x = false, + bool conjugate_y = false); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/cholesky.cc b/tensorflow/compiler/tf2xla/lib/cholesky.cc index 203365e2ab07e0da1abfac5452a8ec41a4ddf406..83e73827862ca26a1a51bed72ab87768854c1e71 100644 --- a/tensorflow/compiler/tf2xla/lib/cholesky.cc +++ b/tensorflow/compiler/tf2xla/lib/cholesky.cc @@ -47,23 +47,21 @@ namespace { // l[..., j+1:, j] = (a[..., j+1:, j] - np.dot(l[..., j+1:, :j], row_t)) / // l[..., j, j] // return l -xla::StatusOr CholeskyUnblocked( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a) { - TF_ASSIGN_OR_RETURN(std::unique_ptr a_shape, - builder->GetShape(a)); - const int n_dims = xla::ShapeUtil::Rank(*a_shape); - const int64 n = xla::ShapeUtil::GetDimension(*a_shape, -1); - gtl::ArraySlice major_dims(xla::AsInt64Slice(a_shape->dimensions()), +xla::StatusOr CholeskyUnblocked(xla::XlaBuilder* builder, + const xla::XlaOp& a) { + TF_ASSIGN_OR_RETURN(xla::Shape a_shape, builder->GetShape(a)); + const int n_dims = xla::ShapeUtil::Rank(a_shape); + const int64 n = xla::ShapeUtil::GetDimension(a_shape, -1); + gtl::ArraySlice major_dims(xla::AsInt64Slice(a_shape.dimensions()), /*pos=*/0, /*len=*/n_dims - 2); - xla::ComputationDataHandle l = Zeros(builder, *a_shape); + xla::XlaOp l = Zeros(builder, a_shape); // Construct the for loop body to iterate over rows. - auto body_fn = [&](xla::ComputationDataHandle i, - gtl::ArraySlice loop_vars, - xla::ComputationBuilder* body_builder) - -> xla::StatusOr> { + auto body_fn = [&](xla::XlaOp i, gtl::ArraySlice loop_vars, + xla::XlaBuilder* body_builder) + -> xla::StatusOr> { xla::Shape col_shape; xla::Shape row_shape; for (int64 d : major_dims) { @@ -72,12 +70,12 @@ xla::StatusOr CholeskyUnblocked( } row_shape.add_dimensions(1); row_shape.add_dimensions(n); - row_shape.set_element_type(a_shape->element_type()); + row_shape.set_element_type(a_shape.element_type()); auto mask_zeros_row = Zeros(body_builder, row_shape); col_shape.add_dimensions(n); col_shape.add_dimensions(1); - col_shape.set_element_type(a_shape->element_type()); + col_shape.set_element_type(a_shape.element_type()); auto mask_zeros_col = Zeros(body_builder, col_shape); std::vector mask_vector(n); @@ -101,7 +99,7 @@ xla::StatusOr CholeskyUnblocked( TF_ASSIGN_OR_RETURN(auto a_ii, DynamicSliceInMinorDims(body_builder, body_a, {i, i}, {1, 1})); // np.dot(row, np.swapaxes(row, -1, -2)) - xla::ComputationDataHandle diag_dot; + xla::XlaOp diag_dot; TF_ASSIGN_OR_RETURN(diag_dot, BatchDot(body_builder, row, row, /*transpose_x=*/false, /*transpose_y=*/true)); @@ -109,7 +107,7 @@ xla::StatusOr CholeskyUnblocked( // np.swapaxes(row, -1, -2))) auto l_ii = body_builder->Pow( body_builder->Sub(a_ii, diag_dot), - FloatLiteral(body_builder, a_shape->element_type(), 0.5)); + FloatLiteral(body_builder, a_shape.element_type(), 0.5)); // a[..., i+1:, i] auto ip1 = body_builder->Add(i, body_builder->ConstantR0(1)); @@ -140,7 +138,7 @@ xla::StatusOr CholeskyUnblocked( TF_ASSIGN_OR_RETURN(body_l, DynamicUpdateSliceInMinorDims( body_builder, body_l, l_ii, {i, i})); - return std::vector{body_a, body_l}; + return std::vector{body_a, body_l}; }; TF_ASSIGN_OR_RETURN( @@ -152,22 +150,20 @@ xla::StatusOr CholeskyUnblocked( } // namespace -xla::StatusOr Cholesky( - xla::ComputationBuilder* builder, xla::ComputationDataHandle a, - int64 block_size) { - TF_ASSIGN_OR_RETURN(std::unique_ptr a_shape, - builder->GetShape(a)); - const int ndims = xla::ShapeUtil::Rank(*a_shape); +xla::StatusOr Cholesky(xla::XlaBuilder* builder, xla::XlaOp a, + int64 block_size) { + TF_ASSIGN_OR_RETURN(xla::Shape a_shape, builder->GetShape(a)); + const int ndims = xla::ShapeUtil::Rank(a_shape); if (ndims < 2) { return errors::InvalidArgument( "Arguments to Cholesky must have rank >= 2: ", ndims); } - const int64 n = xla::ShapeUtil::GetDimension(*a_shape, -1); - if (n != xla::ShapeUtil::GetDimension(*a_shape, -2)) { + const int64 n = xla::ShapeUtil::GetDimension(a_shape, -1); + if (n != xla::ShapeUtil::GetDimension(a_shape, -2)) { return errors::InvalidArgument( "Arguments to Cholesky must be square matrices: ", - xla::ShapeUtil::HumanString(*a_shape)); + xla::ShapeUtil::HumanString(a_shape)); } if (block_size < 1) { @@ -179,7 +175,7 @@ xla::StatusOr Cholesky( // Algorithm 1 from // Haidar, Azzam, et al. "High-performance Cholesky factorization for GPU-only // execution." Proceedings of General Purpose GPUs. ACM, 2017. - xla::ComputationDataHandle l = Zeros(builder, *a_shape); + xla::XlaOp l = Zeros(builder, a_shape); for (int64 i = 0; i < n; i += block_size) { int64 k = std::min(block_size, n - i); if (i > 0) { diff --git a/tensorflow/compiler/tf2xla/lib/cholesky.h b/tensorflow/compiler/tf2xla/lib/cholesky.h index 17da8d8b22d107701ce768ac945c1404df6d47e8..20fca7969ece2729a44933fd3ef3f87230ab6cad 100644 --- a/tensorflow/compiler/tf2xla/lib/cholesky.h +++ b/tensorflow/compiler/tf2xla/lib/cholesky.h @@ -16,8 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_CHOLESKY_H_ #define TENSORFLOW_COMPILER_TF2XLA_LIB_CHOLESKY_H_ -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" namespace tensorflow { @@ -30,9 +30,8 @@ namespace tensorflow { // TODO(phawkins): check for negative values on the diagonal and return an // error, instead of silently yielding NaNs. // TODO(znado): handle the complex Hermitian case -xla::StatusOr Cholesky( - xla::ComputationBuilder* builder, xla::ComputationDataHandle a, - int64 block_size = 256); +xla::StatusOr Cholesky(xla::XlaBuilder* builder, xla::XlaOp a, + int64 block_size = 256); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/scatter.cc b/tensorflow/compiler/tf2xla/lib/scatter.cc index 45699233ea8b2a75e3850098250307b95546cc28..d5a27abb2585f699ae2719cb8a6b9a829263389e 100644 --- a/tensorflow/compiler/tf2xla/lib/scatter.cc +++ b/tensorflow/compiler/tf2xla/lib/scatter.cc @@ -30,24 +30,19 @@ limitations under the License. namespace tensorflow { -xla::StatusOr XlaScatter( - const xla::ComputationDataHandle& buffer, - const xla::ComputationDataHandle& updates, - const xla::ComputationDataHandle& indices, bool indices_are_vectors, - const std::function& combiner, - xla::ComputationBuilder* builder) { - TF_ASSIGN_OR_RETURN(std::unique_ptr buffer_shape, - builder->GetShape(buffer)); - TF_ASSIGN_OR_RETURN(std::unique_ptr updates_shape, - builder->GetShape(updates)); - TF_ASSIGN_OR_RETURN(std::unique_ptr indices_shape, - builder->GetShape(indices)); +xla::StatusOr XlaScatter( + const xla::XlaOp& buffer, const xla::XlaOp& updates, + const xla::XlaOp& indices, bool indices_are_vectors, + const std::function& + combiner, + xla::XlaBuilder* builder) { + TF_ASSIGN_OR_RETURN(xla::Shape buffer_shape, builder->GetShape(buffer)); + TF_RETURN_IF_ERROR(builder->GetShape(updates).status()); + TF_ASSIGN_OR_RETURN(xla::Shape indices_shape, builder->GetShape(indices)); gtl::ArraySlice indices_dims = - xla::AsInt64Slice(indices_shape->dimensions()); + xla::AsInt64Slice(indices_shape.dimensions()); gtl::ArraySlice buffer_dims = - xla::AsInt64Slice(buffer_shape->dimensions()); + xla::AsInt64Slice(buffer_shape.dimensions()); // If the indices are N-dimensional, the minor dimension of indices contains // the indices to update. Otherwise the indices are all scalars. @@ -55,12 +50,12 @@ xla::StatusOr XlaScatter( if (indices_are_vectors) { TF_RET_CHECK(!indices_dims.empty()); num_index_dims = indices_dims.back(); - if (num_index_dims > xla::ShapeUtil::Rank(*buffer_shape)) { + if (num_index_dims > xla::ShapeUtil::Rank(buffer_shape)) { return errors::InvalidArgument( "The size of the minor dimension of the indices (shape: ", - xla::ShapeUtil::HumanString(*indices_shape), + xla::ShapeUtil::HumanString(indices_shape), ") must be <= the rank of the buffer (shape: ", - xla::ShapeUtil::HumanString(*buffer_shape), ")"); + xla::ShapeUtil::HumanString(buffer_shape), ")"); } indices_dims.pop_back(); } @@ -78,10 +73,10 @@ xla::StatusOr XlaScatter( // If any of the indexed dimensions are zero in the buffer, the update cannot // succeed since it updates a slice of size 1. for (int64 i = 0; i < num_index_dims; ++i) { - if (xla::ShapeUtil::GetDimension(*buffer_shape, i) == 0) { - return errors::InvalidArgument( - "Scatter dimension ", i, " is of size zero in tensor with shape ", - xla::ShapeUtil::HumanString(*buffer_shape)); + if (xla::ShapeUtil::GetDimension(buffer_shape, i) == 0) { + return errors::InvalidArgument("Scatter dimension ", i, + " is of size zero in tensor with shape ", + xla::ShapeUtil::HumanString(buffer_shape)); } } @@ -111,18 +106,17 @@ xla::StatusOr XlaScatter( // index = dynamic-slice(indices, i) // update = dynamic-slice(updates, i) // buffer = dynamic-update-slice(buffer, update, index) - auto body_fn = [&](xla::ComputationDataHandle i, - gtl::ArraySlice loop_vars, - xla::ComputationBuilder* body_builder) { + auto body_fn = [&](xla::XlaOp i, gtl::ArraySlice loop_vars, + xla::XlaBuilder* body_builder) { auto indices = loop_vars[0]; auto updates = loop_vars[1]; auto buffer = loop_vars[2]; auto zero_index = body_builder->ConstantLiteral( - xla::Literal::Zero(indices_shape->element_type())); + xla::Literal::Zero(indices_shape.element_type())); // Slice the i-th index from the indices array. - xla::ComputationDataHandle index; + xla::XlaOp index; auto indices_offset = body_builder->Reshape(i, {1}); if (indices_are_vectors) { indices_offset = body_builder->Pad(indices_offset, zero_index, @@ -180,12 +174,12 @@ xla::StatusOr XlaScatter( // Apply the update. buffer = body_builder->DynamicUpdateSlice(buffer, update, index); - return std::vector{indices, updates, buffer}; + return std::vector{indices, updates, buffer}; }; - TF_ASSIGN_OR_RETURN( - auto outputs, XlaForEachIndex(num_indices, indices_shape->element_type(), - body_fn, init, "scatter", builder)); + TF_ASSIGN_OR_RETURN(auto outputs, + XlaForEachIndex(num_indices, indices_shape.element_type(), + body_fn, init, "scatter", builder)); return outputs[2]; } diff --git a/tensorflow/compiler/tf2xla/lib/scatter.h b/tensorflow/compiler/tf2xla/lib/scatter.h index 41e6d3b195ebf90662c7b9b42c53fcb0133ab29e..87309e10ede320a81d173cd0a64492f88a2c7376 100644 --- a/tensorflow/compiler/tf2xla/lib/scatter.h +++ b/tensorflow/compiler/tf2xla/lib/scatter.h @@ -18,8 +18,8 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/statusor.h" namespace tensorflow { @@ -39,14 +39,12 @@ namespace tensorflow { // If a `combiner` is provided, updates are combined with the existing values in // the buffer using the combiner function. Otherwise, the updates replace the // existing values. The order of updates is implementation-defined. -xla::StatusOr XlaScatter( - const xla::ComputationDataHandle& buffer, - const xla::ComputationDataHandle& updates, - const xla::ComputationDataHandle& indices, bool indices_are_vectors, - const std::function& combiner, - xla::ComputationBuilder* builder); +xla::StatusOr XlaScatter( + const xla::XlaOp& buffer, const xla::XlaOp& updates, + const xla::XlaOp& indices, bool indices_are_vectors, + const std::function& + combiner, + xla::XlaBuilder* builder); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/triangular_solve.cc b/tensorflow/compiler/tf2xla/lib/triangular_solve.cc index 9bf5821b54abe3994085ad72043ff143077824c5..b4503601f94baa5a595a64c9fc81bc92d9980ac6 100644 --- a/tensorflow/compiler/tf2xla/lib/triangular_solve.cc +++ b/tensorflow/compiler/tf2xla/lib/triangular_solve.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -29,21 +29,20 @@ limitations under the License. namespace tensorflow { -xla::StatusOr TriangularSolve( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a, - xla::ComputationDataHandle b, bool left_side, bool lower, bool transpose_a, - bool conjugate_a, int64 block_size) { - TF_ASSIGN_OR_RETURN(std::unique_ptr a_shape, - builder->GetShape(a)); - TF_ASSIGN_OR_RETURN(std::unique_ptr b_shape, - builder->GetShape(b)); - if (xla::ShapeUtil::Rank(*a_shape) != xla::ShapeUtil::Rank(*b_shape)) { +xla::StatusOr TriangularSolve(xla::XlaBuilder* builder, + const xla::XlaOp& a, xla::XlaOp b, + bool left_side, bool lower, + bool transpose_a, bool conjugate_a, + int64 block_size) { + TF_ASSIGN_OR_RETURN(xla::Shape a_shape, builder->GetShape(a)); + TF_ASSIGN_OR_RETURN(xla::Shape b_shape, builder->GetShape(b)); + if (xla::ShapeUtil::Rank(a_shape) != xla::ShapeUtil::Rank(b_shape)) { return errors::InvalidArgument( "Arguments to TriangularSolve have different ranks: ", - xla::ShapeUtil::HumanString(*a_shape), " vs. ", - xla::ShapeUtil::HumanString(*b_shape)); + xla::ShapeUtil::HumanString(a_shape), " vs. ", + xla::ShapeUtil::HumanString(b_shape)); } - const int ndims = xla::ShapeUtil::Rank(*a_shape); + const int ndims = xla::ShapeUtil::Rank(a_shape); if (ndims < 2) { return errors::InvalidArgument( "Arguments to TriangularSolve must have rank >= 2: ", ndims); @@ -51,30 +50,30 @@ xla::StatusOr TriangularSolve( // The batch dimensions must be equal. std::vector batch_dimensions; for (int i = 0; i < ndims - 2; ++i) { - int64 a_size = a_shape->dimensions(i); - int64 b_size = b_shape->dimensions(i); + int64 a_size = a_shape.dimensions(i); + int64 b_size = b_shape.dimensions(i); if (a_size != b_size) { return errors::InvalidArgument( "Batch dimensions of arguments to TriangularSolve must be equal: ", - xla::ShapeUtil::HumanString(*a_shape), " vs ", - xla::ShapeUtil::HumanString(*b_shape)); + xla::ShapeUtil::HumanString(a_shape), " vs ", + xla::ShapeUtil::HumanString(b_shape)); } batch_dimensions.push_back(a_size); } - if (xla::ShapeUtil::GetDimension(*a_shape, -1) != - xla::ShapeUtil::GetDimension(*a_shape, -2)) { + if (xla::ShapeUtil::GetDimension(a_shape, -1) != + xla::ShapeUtil::GetDimension(a_shape, -2)) { return errors::InvalidArgument( "The 'a' arguments to TriangularSolve must be square matrices: ", - xla::ShapeUtil::HumanString(*a_shape)); + xla::ShapeUtil::HumanString(a_shape)); } - const int64 m = xla::ShapeUtil::GetDimension(*b_shape, -2); - const int64 n = xla::ShapeUtil::GetDimension(*b_shape, -1); - if ((left_side ? m : n) != xla::ShapeUtil::GetDimension(*a_shape, -1)) { + const int64 m = xla::ShapeUtil::GetDimension(b_shape, -2); + const int64 n = xla::ShapeUtil::GetDimension(b_shape, -1); + if ((left_side ? m : n) != xla::ShapeUtil::GetDimension(a_shape, -1)) { return errors::InvalidArgument( "Arguments to TriangularSolve have incompatible matrix shapes: ", - xla::ShapeUtil::HumanString(*a_shape), " vs ", - xla::ShapeUtil::HumanString(*b_shape)); + xla::ShapeUtil::HumanString(a_shape), " vs ", + xla::ShapeUtil::HumanString(b_shape)); } if (block_size < 1) { @@ -83,26 +82,18 @@ xla::StatusOr TriangularSolve( block_size); } - // Applies a complex conjugation operation if `a` is complex and `conjugate_a` - // is true, otherwise returns its argument. - auto maybe_conj = [&](xla::ComputationBuilder* builder, - xla::ComputationDataHandle x) { - auto perform_conj = a_shape->element_type() == xla::C64 && conjugate_a; - return perform_conj ? builder->Conj(x) : x; - }; - - std::map base_computations; + std::map base_computations; auto get_base_triangular_solve = - [&](int k) -> xla::StatusOr { - xla::Computation& computation = base_computations[k]; + [&](int k) -> xla::StatusOr { + xla::XlaComputation& computation = base_computations[k]; if (computation.IsNull()) { - std::unique_ptr sub = builder->CreateSubBuilder( + std::unique_ptr sub = builder->CreateSubBuilder( tensorflow::strings::StrCat("trsm_base_", k)); auto a_param = sub->Parameter( 0, xla::ShapeUtil::MakeShape( - b_shape->element_type(), + b_shape.element_type(), PrependMajorDims(sub.get(), batch_dimensions, {k, k})), "a"); @@ -115,20 +106,25 @@ xla::StatusOr TriangularSolve( auto b_param = sub->Parameter( 1, xla::ShapeUtil::MakeShape( - b_shape->element_type(), + b_shape.element_type(), PrependMajorDims(sub.get(), batch_dimensions, b_lastd)), "b"); - // We use a left-looking subroutine on the block diagonal in some common - // cases, while falling back to a recursive call in unsupported cases. The - // left-looking subroutine is written with a While loop and so yields much - // faster compile times. Moreover, the left-looking variant can give - // higher performance on smaller (sub)problems. + // We use a left-looking or right-looking subroutine on the block diagonal + // in the lower=true cases, while falling back to a recursive call in + // others. The left-looking and right-looking subroutines are written with + // a While loop and so yields much faster compile times. Moreover, they + // can give higher performance on smaller (sub)problems. if (left_side && lower) { TF_RETURN_IF_ERROR(TriangularSolveLeftLooking(sub.get(), a_param, b_param, transpose_a, conjugate_a) .status()); + } else if (!left_side && lower) { + TF_RETURN_IF_ERROR(TriangularSolveRightLooking(sub.get(), a_param, + b_param, transpose_a, + conjugate_a) + .status()); } else { TF_RETURN_IF_ERROR(TriangularSolve(sub.get(), a_param, b_param, left_side, lower, transpose_a, @@ -142,7 +138,7 @@ xla::StatusOr TriangularSolve( return &computation; }; - xla::ComputationDataHandle output = Zeros(builder, *b_shape); + xla::XlaOp output = Zeros(builder, b_shape); // Right-looking blocked triangular solve. // For an explanation of the algorithm, see the TRSM discussion in: @@ -165,13 +161,15 @@ xla::StatusOr TriangularSolve( SliceInMinorDims(builder, a, {i, i}, {i + k, i + k})); TF_ASSIGN_OR_RETURN(auto b_slice, SliceInMinorDims(builder, b, {0, i}, {m, i + k})); - xla::ComputationDataHandle update; + xla::XlaOp update; if (k > 1) { - TF_ASSIGN_OR_RETURN(xla::Computation * solve, + TF_ASSIGN_OR_RETURN(xla::XlaComputation * solve, get_base_triangular_solve(k)); update = builder->Call(*solve, {a_slice, b_slice}); } else { - update = builder->Div(b_slice, maybe_conj(builder, a_slice)); + TF_ASSIGN_OR_RETURN(auto a_slice_conj, + MaybeConjugate(builder, a_slice, conjugate_a)); + update = builder->Div(b_slice, a_slice_conj); } TF_ASSIGN_OR_RETURN( output, UpdateSliceInMinorDims(builder, output, update, {0, i})); @@ -181,7 +179,7 @@ xla::StatusOr TriangularSolve( // a_slice_2 = T(a_slice_2) if transpose_a else a_slice_2 // b[..., :, i+k:] -= np.matmul(output[..., :, i:i+k], a_slice_2) if (i + k < n) { - xla::ComputationDataHandle a_slice_2; + xla::XlaOp a_slice_2; if (lower) { TF_ASSIGN_OR_RETURN( a_slice_2, SliceInMinorDims(builder, a, {i + k, i}, {n, i + k})); @@ -215,13 +213,15 @@ xla::StatusOr TriangularSolve( SliceInMinorDims(builder, a, {i, i}, {i + k, i + k})); TF_ASSIGN_OR_RETURN(auto b_slice, SliceInMinorDims(builder, b, {i, 0}, {i + k, n})); - xla::ComputationDataHandle update; + xla::XlaOp update; if (k > 1) { - TF_ASSIGN_OR_RETURN(xla::Computation * solve, + TF_ASSIGN_OR_RETURN(xla::XlaComputation * solve, get_base_triangular_solve(k)); update = builder->Call(*solve, {a_slice, b_slice}); } else { - update = builder->Div(b_slice, maybe_conj(builder, a_slice)); + TF_ASSIGN_OR_RETURN(auto a_slice_conj, + MaybeConjugate(builder, a_slice, conjugate_a)); + update = builder->Div(b_slice, a_slice_conj); } TF_ASSIGN_OR_RETURN( output, UpdateSliceInMinorDims(builder, output, update, {i, 0})); @@ -231,7 +231,7 @@ xla::StatusOr TriangularSolve( // a_slice_2 = T(a_slice_2) if transpose_a else a_slice_2 // b[..., i+k:, :] -= np.matmul(a_slice_2, output[..., i:i+k, :]) if (i + k < m) { - xla::ComputationDataHandle a_slice_2; + xla::XlaOp a_slice_2; if (lower) { TF_ASSIGN_OR_RETURN( a_slice_2, SliceInMinorDims(builder, a, {i + k, i}, {m, i + k})); @@ -264,13 +264,15 @@ xla::StatusOr TriangularSolve( SliceInMinorDims(builder, a, {i, i}, {i + k, i + k})); TF_ASSIGN_OR_RETURN(auto b_slice, SliceInMinorDims(builder, b, {0, i}, {m, i + k})); - xla::ComputationDataHandle update; + xla::XlaOp update; if (k > 1) { - TF_ASSIGN_OR_RETURN(xla::Computation * solve, + TF_ASSIGN_OR_RETURN(xla::XlaComputation * solve, get_base_triangular_solve(k)); update = builder->Call(*solve, {a_slice, b_slice}); } else { - update = builder->Div(b_slice, maybe_conj(builder, a_slice)); + TF_ASSIGN_OR_RETURN(auto a_slice_conj, + MaybeConjugate(builder, a_slice, conjugate_a)); + update = builder->Div(b_slice, a_slice_conj); } TF_ASSIGN_OR_RETURN( output, UpdateSliceInMinorDims(builder, output, update, {0, i})); @@ -280,7 +282,7 @@ xla::StatusOr TriangularSolve( // a_slice_2 = T(a_slice_2) if transpose_a else a_slice_2 // b[..., :, :i] -= np.matmul(out[..., :, i:i+k], a_slice_2) if (i - k >= 0) { - xla::ComputationDataHandle a_slice_2; + xla::XlaOp a_slice_2; if (lower) { TF_ASSIGN_OR_RETURN(a_slice_2, SliceInMinorDims(builder, a, {i, 0}, {i + k, i})); @@ -314,13 +316,15 @@ xla::StatusOr TriangularSolve( SliceInMinorDims(builder, a, {i, i}, {i + k, i + k})); TF_ASSIGN_OR_RETURN(auto b_slice, SliceInMinorDims(builder, b, {i, 0}, {i + k, n})); - xla::ComputationDataHandle update; + xla::XlaOp update; if (k > 1) { - TF_ASSIGN_OR_RETURN(xla::Computation * solve, + TF_ASSIGN_OR_RETURN(xla::XlaComputation * solve, get_base_triangular_solve(k)); update = builder->Call(*solve, {a_slice, b_slice}); } else { - update = builder->Div(b_slice, maybe_conj(builder, a_slice)); + TF_ASSIGN_OR_RETURN(auto a_slice_conj, + MaybeConjugate(builder, a_slice, conjugate_a)); + update = builder->Div(b_slice, a_slice_conj); } TF_ASSIGN_OR_RETURN( output, UpdateSliceInMinorDims(builder, output, update, {i, 0})); @@ -330,7 +334,7 @@ xla::StatusOr TriangularSolve( // a_slice_2 = T(a_slice_2) if transpose_a else a_slice_2 // b[..., :i, :] -= np.matmul(a_slice_2, out[..., i:i+k, :]) if (i - k >= 0) { - xla::ComputationDataHandle a_slice_2; + xla::XlaOp a_slice_2; if (lower) { TF_ASSIGN_OR_RETURN(a_slice_2, SliceInMinorDims(builder, a, {i, 0}, {i + k, i})); @@ -356,29 +360,23 @@ xla::StatusOr TriangularSolve( return output; } -xla::StatusOr TriangularSolveLeftLooking( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a, - const xla::ComputationDataHandle& b, bool transpose_a, bool conjugate_a) { - TF_ASSIGN_OR_RETURN(std::unique_ptr a_shape, - builder->GetShape(a)); - TF_ASSIGN_OR_RETURN(std::unique_ptr b_shape, - builder->GetShape(b)); - const int64 m = xla::ShapeUtil::GetDimension(*b_shape, -2); - const int64 n = xla::ShapeUtil::GetDimension(*b_shape, -1); - const int64 ndims = xla::ShapeUtil::Rank(*a_shape); +xla::StatusOr TriangularSolveLeftLooking(xla::XlaBuilder* builder, + const xla::XlaOp& a, + const xla::XlaOp& b, + bool transpose_a, + bool conjugate_a) { + TF_ASSIGN_OR_RETURN(xla::Shape a_shape, builder->GetShape(a)); + TF_ASSIGN_OR_RETURN(xla::Shape b_shape, builder->GetShape(b)); + const int64 m = xla::ShapeUtil::GetDimension(b_shape, -2); + const int64 n = xla::ShapeUtil::GetDimension(b_shape, -1); + const int64 ndims = xla::ShapeUtil::Rank(a_shape); std::vector batch_dimensions; for (int i = 0; i < ndims - 2; ++i) { - int64 a_size = a_shape->dimensions(i); + int64 a_size = a_shape.dimensions(i); batch_dimensions.push_back(a_size); } - auto maybe_conj = [&](xla::ComputationBuilder* builder, - xla::ComputationDataHandle x) { - auto perform_conj = a_shape->element_type() == xla::C64 && conjugate_a; - return perform_conj ? builder->Conj(x) : x; - }; - // The main computation is performed in a While loop. // Allocate the output and set its first or last row, @@ -387,14 +385,16 @@ xla::StatusOr TriangularSolveLeftLooking( // output[..., m-1:, :] = b[..., m-1:, :] / a[..., m-1:, m-1:] // else: // output[..., :1, :] = b[..., :1, :] / a[..., :1, :1] - xla::ComputationDataHandle output = Zeros(builder, *b_shape); + xla::XlaOp output = Zeros(builder, b_shape); { auto i = transpose_a ? m - 1 : 0; TF_ASSIGN_OR_RETURN(auto a_slice, SliceInMinorDims(builder, a, {i, i}, {i + 1, i + 1})); TF_ASSIGN_OR_RETURN(auto b_slice, SliceInMinorDims(builder, b, {i, 0}, {i + 1, n})); - auto update = builder->Div(b_slice, maybe_conj(builder, a_slice)); + TF_ASSIGN_OR_RETURN(auto a_slice_conj, + MaybeConjugate(builder, a_slice, conjugate_a)); + auto update = builder->Div(b_slice, a_slice_conj); TF_ASSIGN_OR_RETURN( output, UpdateSliceInMinorDims(builder, output, update, {i, 0})); } @@ -408,11 +408,11 @@ xla::StatusOr TriangularSolveLeftLooking( // The loop iteration counter is a scalar, incremented each iteration. xla::ShapeUtil::MakeShape(xla::S32, {}), // The output has the shape of b, with one row updated each iteration. - *b_shape, + b_shape, // The coefficient matrix a is a loop invariant. - *a_shape, + a_shape, // The right-hand-side matrix b is a loop invariant. - *b_shape}; + b_shape}; xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(tuple_shapes); auto init_i = builder->ConstantR0(transpose_a ? m - 2 : 1); auto init = builder->Tuple({init_i, output, a, b}); @@ -421,7 +421,7 @@ xla::StatusOr TriangularSolveLeftLooking( // def cond_fun(loop_carry): // i, output, a, b = loop_carry // return i >= 0 if transpose_a else i < m - std::unique_ptr condb = + std::unique_ptr condb = builder->CreateSubBuilder("TriangularSolveLeftLookingWhileCond"); { auto i = condb->GetTupleElement( @@ -451,7 +451,7 @@ xla::StatusOr TriangularSolveLeftLooking( // return (i + 1, output, a, b) // We have to do some extra FLOPs propagating zeros in the matrix multiply // because we can't have the size of its arguments depend on the loop counter. - std::unique_ptr bodyb = + std::unique_ptr bodyb = builder->CreateSubBuilder("TriangularSolveLeftLookingWhileBody"); { auto input_tuple = bodyb->Parameter(0, tuple_shape, @@ -475,7 +475,7 @@ xla::StatusOr TriangularSolveLeftLooking( // But since we can't have intermediate array sizes depend on the loop // counter, we instead exploit the fact that we initialized the output to // all zeros and use that as zero-padding (doing unnecessary FLOPs). - xla::ComputationDataHandle a_row; + xla::XlaOp a_row; if (transpose_a) { TF_ASSIGN_OR_RETURN(a_row, DynamicSliceInMinorDims(bodyb.get(), body_a, {zero, i}, {m, 1})); @@ -496,7 +496,9 @@ xla::StatusOr TriangularSolveLeftLooking( // body_out[..., i:i+1, :] = result_row / a[..., i:i+1, i:i+1] TF_ASSIGN_OR_RETURN(auto a_elt, DynamicSliceInMinorDims(bodyb.get(), body_a, {i, i}, {1, 1})); - auto div_result = bodyb->Div(result_row, maybe_conj(bodyb.get(), a_elt)); + TF_ASSIGN_OR_RETURN(auto a_elt_conj, + MaybeConjugate(bodyb.get(), a_elt, conjugate_a)); + auto div_result = bodyb->Div(result_row, a_elt_conj); TF_ASSIGN_OR_RETURN(body_out, DynamicUpdateSliceInMinorDims(bodyb.get(), body_out, div_result, {i, zero})); @@ -516,4 +518,130 @@ xla::StatusOr TriangularSolveLeftLooking( return builder->GetTupleElement(triangular_solve_left_looking_while, 1); } +xla::StatusOr TriangularSolveRightLooking(xla::XlaBuilder* builder, + const xla::XlaOp& a, + const xla::XlaOp& b, + bool transpose_a, + bool conjugate_a) { + TF_ASSIGN_OR_RETURN(xla::Shape a_shape, builder->GetShape(a)); + TF_ASSIGN_OR_RETURN(xla::Shape b_shape, builder->GetShape(b)); + const int64 m = xla::ShapeUtil::GetDimension(b_shape, -2); + const int64 n = xla::ShapeUtil::GetDimension(b_shape, -1); + const int64 ndims = xla::ShapeUtil::Rank(a_shape); + + std::vector batch_dimensions; + for (int i = 0; i < ndims - 2; ++i) { + int64 a_size = a_shape.dimensions(i); + batch_dimensions.push_back(a_size); + } + + // The main computation is performed in a While loop. + xla::XlaOp output = Zeros(builder, b_shape); + + // Construct the initial loop carry tuple, + // if transpose_a: + // init = (0, output, a, b) + // else: + // init = (n-1, output, a, b) + std::vector tuple_shapes = { + // The loop iteration counter is a scalar, incremented each iteration. + xla::ShapeUtil::MakeShape(xla::S32, {}), + // The output has the shape of b, with one row updated each iteration. + b_shape, + // The coefficient matrix a is a loop invariant. + a_shape, + // The right-hand-side matrix b is a loop invariant. + b_shape}; + xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(tuple_shapes); + auto init_i = builder->ConstantR0(transpose_a ? 0 : n - 1); + auto init = builder->Tuple({init_i, output, a, b}); + + // Construct the loop condition function, + // def cond_fun(loop_carry): + // i, output, a, b = loop_carry + // return i < n if transpose_a else i >= 0 + std::unique_ptr condb = + builder->CreateSubBuilder("TriangularSolveRightLookingWhileCond"); + { + auto i = condb->GetTupleElement( + condb->Parameter(0, tuple_shape, + "TriangularSolveRightLookingWhileTuple"), + 0); + if (transpose_a) { + condb->Lt(i, condb->ConstantR0(n)); + } else { + condb->Ge(i, condb->ConstantR0(0)); + } + } + TF_ASSIGN_OR_RETURN(auto cond, condb->Build()); + + // Construct the loop body function, + // def body_fun(loop_carry): + // i, output, a, b = loop_carry + // if transpose_a: + // a_row = np.swapaxes(a[..., :, i:i+1], -1 -2) + // else: + // a_row = a[..., :, i:i+1] + // result_row = b[..., :, i:i+1] - np.matmul(output, a_row) + // output[..., :, i:i+1] = result_row / a[..., i:i+1, i:i+1] + // if transpose_a: + // return (i - 1, output, a, b) + // else: + // return (i + 1, output, a, b) + // We have to do some extra FLOPs propagating zeros in the matrix multiply + // because we can't have the size of its arguments depend on the loop counter. + std::unique_ptr bodyb = + builder->CreateSubBuilder("TriangularSolveRightLookingWhileBody"); + { + auto input_tuple = bodyb->Parameter( + 0, tuple_shape, "TriangularSolveRightLookingWhileTuple"); + + // i, output, a, b = loop_carry + auto i = bodyb->GetTupleElement(input_tuple, 0); + auto body_out = bodyb->GetTupleElement(input_tuple, 1); + auto body_a = bodyb->GetTupleElement(input_tuple, 2); + auto body_b = bodyb->GetTupleElement(input_tuple, 3); + auto zero = bodyb->ConstantR0(0); + + // We'd like to implement b[..., :, i:i+1] - np.matmul(output, a[..., :, + // i:i+1]) But since we can't have intermediate array sizes depend on the + // loop counter, we instead exploit the fact that we initialized the output + // to all zeros and use that as zero-padding (doing unnecessary FLOPs). + TF_ASSIGN_OR_RETURN(auto b_update, BatchDot(bodyb.get(), body_out, body_a, + /*transpose_x=*/false, + /*transpose_y=*/transpose_a, + /*conjugate_x=*/false, + /*conjugate_y=*/conjugate_a)); + // result = b - np.matmul(output, a) + auto result = bodyb->Sub(body_b, b_update); + // result_row = result[..., :, i:i+1] + TF_ASSIGN_OR_RETURN( + auto result_row, + DynamicSliceInMinorDims(bodyb.get(), result, {zero, i}, {m, 1})); + + // body_out[..., :, i:i+1] = result_row / a[..., i:i+1, i:i+1] + TF_ASSIGN_OR_RETURN(auto a_ii, DynamicSliceInMinorDims(bodyb.get(), body_a, + {i, i}, {1, 1})); + TF_ASSIGN_OR_RETURN(auto a_ii_conj, + MaybeConjugate(bodyb.get(), a_ii, conjugate_a)); + auto div_result = bodyb->Div(result_row, a_ii_conj); + TF_ASSIGN_OR_RETURN(body_out, + DynamicUpdateSliceInMinorDims(bodyb.get(), body_out, + div_result, {zero, i})); + + // if transpose_a: + // return (i + 1, body_out, a, b) + // else: + // return (i - 1, body_out, a, b) + auto next_i = bodyb->Add(i, bodyb->ConstantR0(transpose_a ? 1 : -1)); + bodyb->Tuple({next_i, body_out, body_a, body_b}); + } + TF_ASSIGN_OR_RETURN(auto body, bodyb->Build()); + + // Construct the While loop and return the result, + // return while_loop(cond_fun, body_fun, init)[1] + auto triangular_solve_left_looking_while = builder->While(cond, body, init); + return builder->GetTupleElement(triangular_solve_left_looking_while, 1); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/triangular_solve.h b/tensorflow/compiler/tf2xla/lib/triangular_solve.h index e32223bfdddda800b1fd4de3e4f0c8061e0f81d8..540c26b2473df9e7885f4e549b3e516a3d8a0d43 100644 --- a/tensorflow/compiler/tf2xla/lib/triangular_solve.h +++ b/tensorflow/compiler/tf2xla/lib/triangular_solve.h @@ -16,8 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_TRIANGULAR_SOLVE_H_ #define TENSORFLOW_COMPILER_TF2XLA_LIB_TRIANGULAR_SOLVE_H_ -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" namespace tensorflow { @@ -57,14 +57,23 @@ namespace tensorflow { // // Uses a blocked algorithm if `block_size` is > 1; if block_size == 1 then no // blocking is used. -xla::StatusOr TriangularSolve( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a, - xla::ComputationDataHandle b, bool left_side, bool lower, bool transpose_a, - bool conjugate_a, int64 block_size = 256); +xla::StatusOr TriangularSolve(xla::XlaBuilder* builder, + const xla::XlaOp& a, xla::XlaOp b, + bool left_side, bool lower, + bool transpose_a, bool conjugate_a, + int64 block_size = 256); -xla::StatusOr TriangularSolveLeftLooking( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a, - const xla::ComputationDataHandle& b, bool transpose_a, bool conjugate_a); +xla::StatusOr TriangularSolveLeftLooking(xla::XlaBuilder* builder, + const xla::XlaOp& a, + const xla::XlaOp& b, + bool transpose_a, + bool conjugate_a); + +xla::StatusOr TriangularSolveRightLooking(xla::XlaBuilder* builder, + const xla::XlaOp& a, + const xla::XlaOp& b, + bool transpose_a, + bool conjugate_a); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/triangular_solve_test.cc b/tensorflow/compiler/tf2xla/lib/triangular_solve_test.cc index 661707062916263fd0d5d935ce41698a7655df02..87ea4763f7c2357ae179b68ade3715b24c46432f 100644 --- a/tensorflow/compiler/tf2xla/lib/triangular_solve_test.cc +++ b/tensorflow/compiler/tf2xla/lib/triangular_solve_test.cc @@ -20,7 +20,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/array2d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/test.h" @@ -80,9 +80,9 @@ xla::Array2D AValsFull() { } XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTranspose) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsLower(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsRight(), 1, "b", &builder, &b); auto result = TriangularSolve(&builder, a, b, @@ -102,9 +102,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTranspose) { } XLA_TEST_F(TriangularSolveTest, SimpleRightLowerNotranspose) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsLower(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsRight(), 1, "b", &builder, &b); auto result = TriangularSolve(&builder, a, b, @@ -124,9 +124,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightLowerNotranspose) { } XLA_TEST_F(TriangularSolveTest, SimpleRightUpperTranspose) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsUpper(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsRight(), 1, "b", &builder, &b); auto result = TriangularSolve(&builder, a, b, @@ -146,9 +146,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightUpperTranspose) { } XLA_TEST_F(TriangularSolveTest, SimpleRightUpperNotranspose) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsUpper(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsRight(), 1, "b", &builder, &b); auto result = TriangularSolve(&builder, a, b, @@ -168,9 +168,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightUpperNotranspose) { } XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerTranspose) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsLower(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsLeft(), 1, "b", &builder, &b); auto result = TriangularSolve(&builder, a, b, @@ -191,9 +191,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerTranspose) { } XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotranspose) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsLower(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsLeft(), 1, "b", &builder, &b); auto result = TriangularSolve(&builder, a, b, @@ -214,9 +214,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotranspose) { } XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTranspose) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsUpper(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsLeft(), 1, "b", &builder, &b); auto result = TriangularSolve(&builder, a, b, @@ -237,9 +237,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTranspose) { } XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperNotranspose) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsUpper(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsLeft(), 1, "b", &builder, &b); auto result = TriangularSolve(&builder, a, b, @@ -260,9 +260,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperNotranspose) { } XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTransposeConjugate) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsLowerComplex(), 0, "a", &builder, &a); auto b_data = @@ -288,9 +288,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTransposeConjugate) { } XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTransposeNoconjugate) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsUpperComplex(), 0, "a", &builder, &a); auto b_data = @@ -318,9 +318,9 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTransposeNoconjugate) { } XLA_TEST_F(TriangularSolveLeftLookingTest, Simple) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsLower(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsLeft(), 1, "b", &builder, &b); auto result = TriangularSolveLeftLooking(&builder, a, b, @@ -340,9 +340,9 @@ XLA_TEST_F(TriangularSolveLeftLookingTest, Simple) { } XLA_TEST_F(TriangularSolveLeftLookingTest, NonzeroUpperTriangle) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b; + xla::XlaOp a, b; auto a_data = CreateR2Parameter(AValsFull(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter(BValsLeft(), 1, "b", &builder, &b); auto result = TriangularSolveLeftLooking(&builder, a, b, diff --git a/tensorflow/compiler/tf2xla/lib/util.cc b/tensorflow/compiler/tf2xla/lib/util.cc index 31d823ca336039f691f2c16e37028c0de98b1ee5..d9ff7e6259f3fbab8957394bff5c5670a67dd0eb 100644 --- a/tensorflow/compiler/tf2xla/lib/util.cc +++ b/tensorflow/compiler/tf2xla/lib/util.cc @@ -27,15 +27,14 @@ limitations under the License. namespace tensorflow { -xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder, - const xla::Shape& shape) { +xla::XlaOp Zeros(xla::XlaBuilder* builder, const xla::Shape& shape) { return builder->Broadcast( builder->ConstantLiteral(xla::Literal::Zero(shape.element_type())), xla::AsInt64Slice(shape.dimensions())); } -xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder, - xla::PrimitiveType type, double value) { +xla::XlaOp FloatLiteral(xla::XlaBuilder* builder, xla::PrimitiveType type, + double value) { switch (type) { case xla::F16: return builder->ConstantR0(static_cast(value)); @@ -57,9 +56,8 @@ xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder, } } -xla::ComputationDataHandle IntegerLiteral(xla::ComputationBuilder* builder, - xla::PrimitiveType type, - int64 value) { +xla::XlaOp IntegerLiteral(xla::XlaBuilder* builder, xla::PrimitiveType type, + int64 value) { xla::Literal literal; switch (type) { case xla::U8: @@ -112,17 +110,18 @@ xla::ComputationDataHandle IntegerLiteral(xla::ComputationBuilder* builder, return builder->ConstantLiteral(literal); } -xla::StatusOr SliceInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - gtl::ArraySlice start, gtl::ArraySlice end) { +xla::StatusOr SliceInMinorDims(xla::XlaBuilder* builder, + const xla::XlaOp& x, + gtl::ArraySlice start, + gtl::ArraySlice end) { TF_RET_CHECK(start.size() == end.size()); int64 n_minor_dims = start.size(); - TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); + TF_ASSIGN_OR_RETURN(xla::Shape shape, builder->GetShape(x)); - const int64 n_dims = xla::ShapeUtil::Rank(*shape); + const int64 n_dims = xla::ShapeUtil::Rank(shape); TF_RET_CHECK(n_minor_dims <= n_dims); - gtl::ArraySlice major_dims(xla::AsInt64Slice(shape->dimensions()), + gtl::ArraySlice major_dims(xla::AsInt64Slice(shape.dimensions()), /*pos=*/0, /*len=*/n_dims - n_minor_dims); @@ -140,7 +139,7 @@ xla::StatusOr SliceInMinorDims( return builder->Slice(x, padded_start, padded_end, strides); } -std::vector PrependMajorDims(xla::ComputationBuilder* builder, +std::vector PrependMajorDims(xla::XlaBuilder* builder, const gtl::ArraySlice& major_dims, const gtl::ArraySlice& indices) { std::vector output(indices.size() + major_dims.size()); @@ -149,16 +148,16 @@ std::vector PrependMajorDims(xla::ComputationBuilder* builder, return output; } -xla::StatusOr DynamicSliceInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const std::vector& starts, +xla::StatusOr DynamicSliceInMinorDims( + xla::XlaBuilder* builder, const xla::XlaOp& x, + const std::vector& starts, const gtl::ArraySlice& sizes) { - TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); - const int64 n_dims = xla::ShapeUtil::Rank(*shape); + TF_ASSIGN_OR_RETURN(xla::Shape shape, builder->GetShape(x)); + const int64 n_dims = xla::ShapeUtil::Rank(shape); int64 n_minor_dims = starts.size(); TF_RET_CHECK(n_minor_dims == sizes.size()); TF_RET_CHECK(n_minor_dims <= n_dims); - gtl::ArraySlice major_dims(xla::AsInt64Slice(shape->dimensions()), + gtl::ArraySlice major_dims(xla::AsInt64Slice(shape.dimensions()), /*pos=*/0, /*len=*/n_dims - sizes.size()); TF_ASSIGN_OR_RETURN(auto padded_starts, @@ -167,27 +166,29 @@ xla::StatusOr DynamicSliceInMinorDims( return builder->DynamicSlice(x, padded_starts, padded_sizes); } -xla::StatusOr UpdateSlice( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const xla::ComputationDataHandle& update, gtl::ArraySlice start) { +xla::StatusOr UpdateSlice(xla::XlaBuilder* builder, + const xla::XlaOp& x, + const xla::XlaOp& update, + gtl::ArraySlice start) { // TODO(phawkins): make int64 work on all backends, remove the int32 cast. std::vector start_as_int32(start.begin(), start.end()); auto start_constant = builder->ConstantR1(start_as_int32); - TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); - const int64 n_dims = xla::ShapeUtil::Rank(*shape); - TF_ASSIGN_OR_RETURN(std::unique_ptr start_constant_shape, + TF_ASSIGN_OR_RETURN(xla::Shape shape, builder->GetShape(x)); + const int64 n_dims = xla::ShapeUtil::Rank(shape); + TF_ASSIGN_OR_RETURN(xla::Shape start_constant_shape, builder->GetShape(start_constant)); const int64 start_length = - xla::ShapeUtil::GetDimension(*start_constant_shape, -1); + xla::ShapeUtil::GetDimension(start_constant_shape, -1); TF_RET_CHECK(start_length == n_dims); return builder->DynamicUpdateSlice(x, update, start_constant); } -xla::StatusOr UpdateSliceInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const xla::ComputationDataHandle& update, gtl::ArraySlice start) { - TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); - const int64 n_dims = xla::ShapeUtil::Rank(*shape); +xla::StatusOr UpdateSliceInMinorDims(xla::XlaBuilder* builder, + const xla::XlaOp& x, + const xla::XlaOp& update, + gtl::ArraySlice start) { + TF_ASSIGN_OR_RETURN(xla::Shape shape, builder->GetShape(x)); + const int64 n_dims = xla::ShapeUtil::Rank(shape); const int64 n_minor_dims = start.size(); TF_RET_CHECK(n_minor_dims <= n_dims); std::vector padded_start(n_dims, 0); @@ -196,22 +197,21 @@ xla::StatusOr UpdateSliceInMinorDims( return UpdateSlice(builder, x, update, padded_start); } -xla::StatusOr DynamicUpdateSliceInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const xla::ComputationDataHandle& update, - const std::vector& starts) { +xla::StatusOr DynamicUpdateSliceInMinorDims( + xla::XlaBuilder* builder, const xla::XlaOp& x, const xla::XlaOp& update, + const std::vector& starts) { TF_ASSIGN_OR_RETURN(auto padded_starts, PrependZerosInMajorDims(builder, x, starts)); return builder->DynamicUpdateSlice(x, update, padded_starts); } -xla::StatusOr PrependZerosInMajorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const std::vector& starts) { - TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); - const int64 n_dims = xla::ShapeUtil::Rank(*shape); +xla::StatusOr PrependZerosInMajorDims( + xla::XlaBuilder* builder, const xla::XlaOp& x, + const std::vector& starts) { + TF_ASSIGN_OR_RETURN(xla::Shape shape, builder->GetShape(x)); + const int64 n_dims = xla::ShapeUtil::Rank(shape); auto zero = builder->Reshape(builder->ConstantR0(0), {1}); - std::vector padded_starts(n_dims, zero); + std::vector padded_starts(n_dims, zero); for (int i = 0; i < starts.size(); ++i) { padded_starts[n_dims - starts.size() + i] = builder->Reshape(starts[i], {1}); @@ -219,10 +219,10 @@ xla::StatusOr PrependZerosInMajorDims( return builder->ConcatInDim(padded_starts, 0); } -xla::StatusOr TransposeInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x) { - TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(x)); - const int64 n_dims = xla::ShapeUtil::Rank(*shape); +xla::StatusOr TransposeInMinorDims(xla::XlaBuilder* builder, + const xla::XlaOp& x) { + TF_ASSIGN_OR_RETURN(xla::Shape shape, builder->GetShape(x)); + const int64 n_dims = xla::ShapeUtil::Rank(shape); TF_RET_CHECK(n_dims >= 2); std::vector permutation(n_dims); std::iota(permutation.begin(), permutation.end(), 0); @@ -230,4 +230,11 @@ xla::StatusOr TransposeInMinorDims( return builder->Transpose(x, permutation); } +xla::StatusOr MaybeConjugate(xla::XlaBuilder* builder, + const xla::XlaOp& x, bool conjugate) { + TF_ASSIGN_OR_RETURN(xla::Shape shape, builder->GetShape(x)); + auto perform_conj = shape.element_type() == xla::C64 && conjugate; + return perform_conj ? builder->Conj(x) : x; +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/util.h b/tensorflow/compiler/tf2xla/lib/util.h index b684123f1363cff9e6ac4314cc3a8ae7630cbdf3..3c120a2548576d6ad46870583ca65beea63507a3 100644 --- a/tensorflow/compiler/tf2xla/lib/util.h +++ b/tensorflow/compiler/tf2xla/lib/util.h @@ -16,75 +16,79 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_UTIL_H_ #define TENSORFLOW_COMPILER_TF2XLA_LIB_UTIL_H_ -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/core/lib/gtl/array_slice.h" namespace tensorflow { // Returns a zero-filled tensor with shape `shape`. -xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder, - const xla::Shape& shape); +xla::XlaOp Zeros(xla::XlaBuilder* builder, const xla::Shape& shape); // Returns a floating point scalar constant of 'type' with 'value'. // If 'type' is complex, returns a real value with zero imaginary component. -xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder, - xla::PrimitiveType type, double value); +xla::XlaOp FloatLiteral(xla::XlaBuilder* builder, xla::PrimitiveType type, + double value); // Makes a 1D tensor [0, ..., x, y] from two tensors x and y with zeros // prepended until the array is length n_dims. -xla::ComputationDataHandle PrependZerosInMajorDims( - xla::ComputationBuilder* builder, - gtl::ArraySlice starts); +xla::XlaOp PrependZerosInMajorDims(xla::XlaBuilder* builder, + gtl::ArraySlice starts); // Returns a integer scalar constant of 'type' with 'value'. // If 'type' is complex, returns a real value with zero imaginary component. -xla::ComputationDataHandle IntegerLiteral(xla::ComputationBuilder* builder, - xla::PrimitiveType type, int64 value); +xla::XlaOp IntegerLiteral(xla::XlaBuilder* builder, xla::PrimitiveType type, + int64 value); // Builds a vector of zeros of length rank(x) with the last two values being // those in `starts`. -xla::StatusOr PrependZerosInMajorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const std::vector& starts); +xla::StatusOr PrependZerosInMajorDims( + xla::XlaBuilder* builder, const xla::XlaOp& x, + const std::vector& starts); // Performs a slice in the minor dimensions of a Tensor. -xla::StatusOr SliceInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - gtl::ArraySlice start, gtl::ArraySlice end); +xla::StatusOr SliceInMinorDims(xla::XlaBuilder* builder, + const xla::XlaOp& x, + gtl::ArraySlice start, + gtl::ArraySlice end); // Builds a 1-d vector out of a concatenation of `major_dims` and `starts`. -std::vector PrependMajorDims(xla::ComputationBuilder* builder, +std::vector PrependMajorDims(xla::XlaBuilder* builder, const gtl::ArraySlice& major_dims, const gtl::ArraySlice& indices); // Performs a dynamic slice in the minor dimensions of a Tensor. -xla::StatusOr DynamicSliceInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const std::vector& starts, - const gtl::ArraySlice& sizes); +xla::StatusOr DynamicSliceInMinorDims( + xla::XlaBuilder* builder, const xla::XlaOp& x, + const std::vector& starts, const gtl::ArraySlice& sizes); // Updates a slice of 'x', i.e., // x[start[0], ..., start[n]] = update -xla::StatusOr UpdateSlice( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const xla::ComputationDataHandle& update, gtl::ArraySlice start); +xla::StatusOr UpdateSlice(xla::XlaBuilder* builder, + const xla::XlaOp& x, + const xla::XlaOp& update, + gtl::ArraySlice start); // Updates a slice of 'x', where 'start' contains a list of minor dimensions: // x[..., start[0], ..., start[n]] = update -xla::StatusOr UpdateSliceInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const xla::ComputationDataHandle& update, gtl::ArraySlice start); +xla::StatusOr UpdateSliceInMinorDims(xla::XlaBuilder* builder, + const xla::XlaOp& x, + const xla::XlaOp& update, + gtl::ArraySlice start); -xla::StatusOr DynamicUpdateSliceInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, - const xla::ComputationDataHandle& update, - const std::vector& starts); +xla::StatusOr DynamicUpdateSliceInMinorDims( + xla::XlaBuilder* builder, const xla::XlaOp& x, const xla::XlaOp& update, + const std::vector& starts); // Transposes a stack of matrices `x` by swapping the last two dimensions. -xla::StatusOr TransposeInMinorDims( - xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x); +xla::StatusOr TransposeInMinorDims(xla::XlaBuilder* builder, + const xla::XlaOp& x); + +// Applies a complex conjugation operation if `a` is complex and `conjugate_a` +// is true, otherwise returns its argument. +xla::StatusOr MaybeConjugate(xla::XlaBuilder* builder, + const xla::XlaOp& x, bool conjugate); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/util_test.cc b/tensorflow/compiler/tf2xla/lib/util_test.cc index b6bd33af2e42a4ab93a22528fd49ef53c46bb479..265b39402c832f8c810a74f281563b05afdf2b1b 100644 --- a/tensorflow/compiler/tf2xla/lib/util_test.cc +++ b/tensorflow/compiler/tf2xla/lib/util_test.cc @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/lib/batch_dot.h" #include "tensorflow/compiler/xla/array2d.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/test.h" @@ -65,9 +64,9 @@ xla::Array3D BatchedAValsFull() { } XLA_TEST_F(UtilTest, Simple2dLookup) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, x, y; + xla::XlaOp a, x, y; auto a_data = CreateR2Parameter(BValsRight(), 0, "a", &builder, &a); auto x_data = CreateR0Parameter(2, 1, "x", &builder, &x); auto y_data = CreateR0Parameter(1, 2, "y", &builder, &y); @@ -80,9 +79,9 @@ XLA_TEST_F(UtilTest, Simple2dLookup) { } XLA_TEST_F(UtilTest, Simple3dLookup) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, index; + xla::XlaOp a, index; auto a_data = CreateR3Parameter(BatchedAValsFull(), 0, "a", &builder, &a); auto index_data = CreateR0Parameter(1, 1, "index", &builder, &index); @@ -97,9 +96,9 @@ XLA_TEST_F(UtilTest, Simple3dLookup) { } XLA_TEST_F(UtilTest, SimpleSliceUpdate) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); - xla::ComputationDataHandle a, b, x, y; + xla::XlaOp a, b, x, y; auto a_data = CreateR2Parameter(AValsFull(), 0, "a", &builder, &a); auto b_data = CreateR2Parameter({{9, 1, -10}}, 1, "b", &builder, &b); auto x_data = CreateR0Parameter(2, 2, "x", &builder, &x); @@ -117,11 +116,11 @@ XLA_TEST_F(UtilTest, SimpleSliceUpdate) { } XLA_TEST_F(UtilTest, RowBatchDot) { - xla::ComputationBuilder builder(client_, TestName()); + xla::XlaBuilder builder(TestName()); int n = 4; - xla::ComputationDataHandle a, row, index; + xla::XlaOp a, row, index; auto a_data = CreateR3Parameter(BatchedAValsFull(), 0, "a", &builder, &a); auto row_data = CreateR3Parameter({{{9, 1, 0, 0}}, {{2, 4, 0, 0}}}, 1, diff --git a/tensorflow/compiler/tf2xla/lib/while_loop.cc b/tensorflow/compiler/tf2xla/lib/while_loop.cc index 495d9c60780b0a728e8dbfb4537d33d92b4bb5b7..09ce594930efc0af47306590d76b322ac730f80f 100644 --- a/tensorflow/compiler/tf2xla/lib/while_loop.cc +++ b/tensorflow/compiler/tf2xla/lib/while_loop.cc @@ -20,24 +20,24 @@ limitations under the License. namespace tensorflow { -xla::StatusOr> XlaWhileLoop( +xla::StatusOr> XlaWhileLoop( const LoopConditionFunction& condition_function, const LoopBodyFunction& body_function, - gtl::ArraySlice initial_values, - StringPiece name, xla::ComputationBuilder* builder) { + gtl::ArraySlice initial_values, StringPiece name, + xla::XlaBuilder* builder) { int arity = initial_values.size(); std::vector var_shapes; var_shapes.reserve(arity); - for (const xla::ComputationDataHandle& input : initial_values) { + for (const xla::XlaOp& input : initial_values) { TF_ASSIGN_OR_RETURN(auto shape, builder->GetShape(input)); - var_shapes.push_back(std::move(*shape)); + var_shapes.push_back(std::move(shape)); } xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(var_shapes); // Unpacks a tuple into its component parts. - auto unpack_tuple = [](xla::ComputationDataHandle tuple, int arity, - xla::ComputationBuilder* builder) { - std::vector elements(arity); + auto unpack_tuple = [](xla::XlaOp tuple, int arity, + xla::XlaBuilder* builder) { + std::vector elements(arity); for (int i = 0; i < arity; ++i) { elements[i] = builder->GetTupleElement(tuple, i); } @@ -45,20 +45,20 @@ xla::StatusOr> XlaWhileLoop( }; // Build the condition. - std::unique_ptr cond_builder = + std::unique_ptr cond_builder = builder->CreateSubBuilder(strings::StrCat(name, "_condition")); { auto parameter = cond_builder->Parameter(0, tuple_shape, "parameter"); - TF_ASSIGN_OR_RETURN( - auto result, + TF_RETURN_IF_ERROR( condition_function(unpack_tuple(parameter, arity, cond_builder.get()), - cond_builder.get())); + cond_builder.get()) + .status()); } TF_ASSIGN_OR_RETURN(auto cond, cond_builder->Build()); // Build the body. - std::unique_ptr body_builder = + std::unique_ptr body_builder = builder->CreateSubBuilder(strings::StrCat(name, "_body")); { auto parameter = body_builder->Parameter(0, tuple_shape, "parameter"); @@ -78,38 +78,38 @@ xla::StatusOr> XlaWhileLoop( return unpack_tuple(outputs, arity, builder); } -xla::StatusOr> XlaForEachIndex( +xla::StatusOr> XlaForEachIndex( int64 num_iterations, xla::PrimitiveType num_iterations_type, const ForEachIndexBodyFunction& body_function, - gtl::ArraySlice initial_values, - StringPiece name, xla::ComputationBuilder* builder) { - auto while_cond_fn = [&](gtl::ArraySlice values, - xla::ComputationBuilder* cond_builder) - -> xla::StatusOr { + gtl::ArraySlice initial_values, StringPiece name, + xla::XlaBuilder* builder) { + auto while_cond_fn = + [&](gtl::ArraySlice values, + xla::XlaBuilder* cond_builder) -> xla::StatusOr { return cond_builder->Lt( values[0], IntegerLiteral(cond_builder, num_iterations_type, num_iterations)); }; - auto while_body_fn = [&](gtl::ArraySlice values, - xla::ComputationBuilder* body_builder) - -> xla::StatusOr> { - xla::ComputationDataHandle iteration = values[0]; + auto while_body_fn = [&](gtl::ArraySlice values, + xla::XlaBuilder* body_builder) + -> xla::StatusOr> { + xla::XlaOp iteration = values[0]; - std::vector updated_values; + std::vector updated_values; updated_values.reserve(values.size()); updated_values.push_back(body_builder->Add( iteration, body_builder->ConstantLiteral(xla::Literal::One(num_iterations_type)))); values.remove_prefix(1); - TF_ASSIGN_OR_RETURN(std::vector body_outputs, + TF_ASSIGN_OR_RETURN(std::vector body_outputs, body_function(iteration, values, body_builder)); updated_values.insert(updated_values.end(), body_outputs.begin(), body_outputs.end()); return updated_values; }; - std::vector values; + std::vector values; values.reserve(initial_values.size() + 1); values.push_back( builder->ConstantLiteral(xla::Literal::Zero(num_iterations_type))); diff --git a/tensorflow/compiler/tf2xla/lib/while_loop.h b/tensorflow/compiler/tf2xla/lib/while_loop.h index 2e67a0c99b6deb65fa16ab2dec1727f5cb5fcb92..5b6684c995889efbb1378c7ac4903548891d090a 100644 --- a/tensorflow/compiler/tf2xla/lib/while_loop.h +++ b/tensorflow/compiler/tf2xla/lib/while_loop.h @@ -19,8 +19,8 @@ limitations under the License. #include #include -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -29,14 +29,14 @@ namespace tensorflow { // Function that builds a loop condition. Takes as input a sequence of input // values, and returns a boolean value representing if the condition succeeds. -typedef std::function( - gtl::ArraySlice, xla::ComputationBuilder*)> +typedef std::function(gtl::ArraySlice, + xla::XlaBuilder*)> LoopConditionFunction; // Function that builds a loop body. Takes as input a sequence of input values // and returns a sequence of output values. -typedef std::function>( - gtl::ArraySlice, xla::ComputationBuilder*)> +typedef std::function>( + gtl::ArraySlice, xla::XlaBuilder*)> LoopBodyFunction; // Helper function for building an XLA while loop, where the values carried by @@ -47,27 +47,26 @@ typedef std::function>( // init: (a, b, c) // ) // 'name' is a descriptive name for the loop. -xla::StatusOr> XlaWhileLoop( +xla::StatusOr> XlaWhileLoop( const LoopConditionFunction& condition_function, const LoopBodyFunction& body_function, - gtl::ArraySlice initial_values, - StringPiece name, xla::ComputationBuilder* builder); + gtl::ArraySlice initial_values, StringPiece name, + xla::XlaBuilder* builder); // Builds an XLA loop that repeats a computation `num_iterations` times. // // The body function (ForEachIndexBodyFunction) takes as input a pair of // (current iteration number, loop-carried values), and returns an updated // vector of the loop-carried values. -typedef std::function>( - xla::ComputationDataHandle, gtl::ArraySlice, - xla::ComputationBuilder*)> +typedef std::function>( + xla::XlaOp, gtl::ArraySlice, xla::XlaBuilder*)> ForEachIndexBodyFunction; -xla::StatusOr> XlaForEachIndex( +xla::StatusOr> XlaForEachIndex( int64 num_iterations, xla::PrimitiveType num_iterations_type, const ForEachIndexBodyFunction& body_function, - gtl::ArraySlice initial_values, - StringPiece name, xla::ComputationBuilder* builder); + gtl::ArraySlice initial_values, StringPiece name, + xla::XlaBuilder* builder); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/tf2xla.cc b/tensorflow/compiler/tf2xla/tf2xla.cc index 6051d7dffd7493d8cffb07c1b5d10500e7e75522..3a08aa8cf4f5cea6210cc9470d57c3387445ea6e 100644 --- a/tensorflow/compiler/tf2xla/tf2xla.cc +++ b/tensorflow/compiler/tf2xla/tf2xla.cc @@ -251,7 +251,7 @@ Status CreateXlaArgs(const Graph& graph, // Converts the TensorFlow graph into an XLA computation, by executing the // graph symbolically, with each op building up the XLA HLO. Status ConvertGraphToXla(std::unique_ptr graph, xla::Client* client, - xla::Computation* computation) { + xla::XlaComputation* computation) { XlaOpRegistry::RegisterCompilationKernels(); for (Node* node : graph->nodes()) { node->set_assigned_device_name( @@ -303,7 +303,7 @@ Status ConvertGraphToXla(std::unique_ptr graph, xla::Client* client, } // InitGraph creates a graph based on the graph_def, that may then be converted -// to an xla::Computation via ConvertGraphToXla. +// to an xla::XlaComputation via ConvertGraphToXla. // // The graph is rewritten with _Arg and _Retval nodes, representing the inputs // and outputs of the function that will be compiled. Each feed id causes a new @@ -348,7 +348,7 @@ Status InitGraph(const GraphDef& graph_def, const tf2xla::Config& config, Status ConvertGraphDefToXla(const GraphDef& graph_def, const tf2xla::Config& config, xla::Client* client, - xla::Computation* computation) { + xla::XlaComputation* computation) { std::unique_ptr graph; TF_RETURN_IF_ERROR(InitGraph(graph_def, config, &graph)); TF_RETURN_IF_ERROR(ConvertGraphToXla(std::move(graph), client, computation)); diff --git a/tensorflow/compiler/tf2xla/tf2xla.h b/tensorflow/compiler/tf2xla/tf2xla.h index 473c431b12d441c652f1d0d6c11c5e87836ab36d..d02fc56c5b8f58f0e4cfe1779ad34fe3b79324c7 100644 --- a/tensorflow/compiler/tf2xla/tf2xla.h +++ b/tensorflow/compiler/tf2xla/tf2xla.h @@ -18,21 +18,21 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/tf2xla.pb.h" #include "tensorflow/compiler/xla/client/client.h" -#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/core/framework/graph.pb.h" namespace tensorflow { -// Converts a tensorflow::GraphDef into an xla::Computation. The given `config` -// specifies the portion of the graph to convert, via feeds and fetches. Each -// feed is a positional input argument for the generated computation, while each -// fetch is a positional output argument. +// Converts a tensorflow::GraphDef into an xla::XlaComputation. The given +// `config` specifies the portion of the graph to convert, via feeds and +// fetches. Each feed is a positional input argument for the generated +// computation, while each fetch is a positional output argument. // // The computation is built in the context of the given `client`, which may // subsequently be used to compile or execute the computation. Status ConvertGraphDefToXla(const GraphDef& graph_def, const tf2xla::Config& config, xla::Client* client, - xla::Computation* computation); + xla::XlaComputation* computation); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/tf2xla_test.cc b/tensorflow/compiler/tf2xla/tf2xla_test.cc index b813668a9edd3a704a9dca1eaa588c1eced6ac31..84c133ffabe20dbdaa4d5a64e035efb5e4c4c44b 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_test.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_test.cc @@ -69,7 +69,7 @@ TEST(ConvertGraphDefToXla, Sum) { tf2xla::Config config = SumConfig(); xla::LocalClient* client = xla::ClientLibrary::LocalClientOrDie(); - xla::Computation computation; + xla::XlaComputation computation; TF_EXPECT_OK(ConvertGraphDefToXla(graph_def, config, client, &computation)); // Set up arguments. diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc index 7ec85aa3cdec622cae509f45c5ba7740222025f9..9203e8d9e607e99ad738350a1c3f2b9e900df179 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc @@ -232,7 +232,7 @@ Status PruneGraphDefInto(const tf2xla::Config& config, const GraphDef& in, // Push input nodes of the currently visited node to name_queue. for (const string& in_edge : map_entry.second->input()) { auto id = ParseTensorName(in_edge); - const string node_name = id.first.ToString(); + const string node_name = std::string(id.first); if (feed_tensors.find(std::make_pair(node_name, id.second)) == feed_tensors.end()) { name_queue.push(node_name); diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.cc b/tensorflow/compiler/tf2xla/xla_compilation_device.cc index fcb0a4e63814b4afc114bdaea312a92dd8396a2e..fe7ec633eca2504faf6cbb2f5fd7f59780ab7976 100644 --- a/tensorflow/compiler/tf2xla/xla_compilation_device.cc +++ b/tensorflow/compiler/tf2xla/xla_compilation_device.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/sharding_util.h" #include "tensorflow/compiler/tf2xla/xla_context.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/common_runtime/local_device.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/platform/mem.h" @@ -108,7 +109,7 @@ void XlaCompilationDevice::Compute(OpKernel* op_kernel, // If no sharding metadata is found, XLA is free to use whatever device it // wants. In practice this usually has the effect of placing things on device // 0. - xla::ScopedShardingAssignment assign_sharding(b, op_sharding); + xla::XlaScopedShardingAssignment assign_sharding(b, op_sharding); op_kernel->Compute(context); b->ClearOpMetadata(); @@ -126,9 +127,7 @@ Status XlaCompilationDevice::MakeTensorFromProto( XlaExpression::XlaExpression() = default; -void XlaExpression::set_handle(const xla::ComputationDataHandle& h) { - handle_ = h; -} +void XlaExpression::set_handle(const xla::XlaOp& h) { handle_ = h; } void XlaExpression::set_constant_value(Tensor value) { has_constant_value_ = true; diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.h b/tensorflow/compiler/tf2xla/xla_compilation_device.h index 0243ee332fbdca0fe5e28b1a7d9530df4417f807..d0b9e34e162f3412cd6662a2e2bbfe3df213c4c2 100644 --- a/tensorflow/compiler/tf2xla/xla_compilation_device.h +++ b/tensorflow/compiler/tf2xla/xla_compilation_device.h @@ -19,7 +19,7 @@ limitations under the License. #include #include "tensorflow/compiler/tf2xla/xla_resource.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/common_runtime/local_device.h" #include "tensorflow/core/framework/device_base.h" @@ -69,7 +69,7 @@ class XlaCompilationDevice : public LocalDevice { // A XlaExpression wraps an XLA computation. Each Tensor on an // XlaCompilationDevice contains an XlaExpression, and the shape of the Tensor -// matches the shape of the subcomputation in the ComputationDataHandle. Each +// matches the shape of the subcomputation in the XlaOp. Each // expression is either a constant, or a function of previously-compiled // expressions. class XlaExpression { @@ -78,8 +78,8 @@ class XlaExpression { // handle() stores the XLA handle of the computation that the // expression represents. - void set_handle(const xla::ComputationDataHandle& h); - const xla::ComputationDataHandle& handle() const { return handle_; } + void set_handle(const xla::XlaOp& h); + const xla::XlaOp& handle() const { return handle_; } void set_constant_value(Tensor value); bool has_constant_value() const { return has_constant_value_; } @@ -90,7 +90,7 @@ class XlaExpression { private: // The XLA handle of the expression's computation. - xla::ComputationDataHandle handle_; + xla::XlaOp handle_; // If this expression is a constant with a known value, 'constant_value' is a // host-memory Tensor containing the value. Used to avoid invoking XLA for diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index c0e996768491a6315c21021ce874b8a11557de6e..3d1946c332b0f903b710a19fbb79fc9923e89c43 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -339,11 +339,11 @@ Status BuildComputation( const std::vector& arg_cores, const std::vector& retvals, const std::vector>& resources, - bool return_updated_values_for_all_resources, - xla::ComputationBuilder* builder, xla::Computation* computation, - int* num_computation_outputs, int* num_nonconst_outputs, + bool return_updated_values_for_all_resources, xla::XlaBuilder* builder, + xla::XlaComputation* computation, int* num_computation_outputs, + int* num_nonconst_outputs, std::vector* resource_updates) { - std::vector elems; + std::vector elems; elems.reserve(retvals.size()); for (const XlaExpression& retval : retvals) { if (!retval.has_constant_value()) { @@ -376,14 +376,12 @@ Status BuildComputation( const XlaCompiler::Argument& arg = args[resource->arg_num()]; const int core = arg_cores[resource->arg_num()]; DCHECK_LT(resource->arg_num(), arg_cores.size()); - bool modified = - resource->value().handle() != resource->initial_value().handle(); + bool modified = resource->value() != resource->initial_value(); // TensorArray gradients were modified if their values changed or there are // any newly created gradients. for (const auto& grad : resource->tensor_array_gradients()) { modified = modified || - grad.second->value().handle() != - grad.second->initial_value().handle() || + grad.second->value() != grad.second->initial_value() || arg.tensor_array_gradients.count(grad.first) == 0; } if (return_updated_values_for_all_resources || modified) { @@ -398,11 +396,11 @@ Status BuildComputation( } // Request that the value be returned on a specific core. - xla::ScopedShardingAssignment assign_sharding( + xla::XlaScopedShardingAssignment assign_sharding( builder, core == -1 ? tensorflow::gtl::optional() : xla::sharding_builder::AssignDevice(core)); - xla::ComputationDataHandle handle; + xla::XlaOp handle; TF_RETURN_IF_ERROR(resource->Pack(&handle, builder)); // Since we can't change the sharding metadata of as this point, @@ -421,7 +419,7 @@ Status BuildComputation( builder->Tuple(elems); builder->ClearOpMetadata(); - xla::StatusOr computation_status = builder->Build(); + xla::StatusOr computation_status = builder->Build(); if (!computation_status.ok()) { return computation_status.status(); } @@ -435,7 +433,7 @@ Status BuildComputation( // `args` are the arguments to the computation. Status XlaCompiler::BuildArguments( const Graph& graph, const std::vector& args, - bool use_tuple_arg, xla::ComputationBuilder* builder, XlaContext* context, + bool use_tuple_arg, xla::XlaBuilder* builder, XlaContext* context, std::vector* arg_cores, std::vector* arg_expressions, std::vector* input_mapping, std::vector* input_shapes, bool is_entry_computation) { @@ -461,8 +459,7 @@ Status XlaCompiler::BuildArguments( // alias. XlaResource* resource; TF_RETURN_IF_ERROR(context->CreateResource( - arg.resource_kind, i, arg.name, arg.type, arg.shape, - xla::ComputationDataHandle(), + arg.resource_kind, i, arg.name, arg.type, arg.shape, xla::XlaOp(), /*tensor_array_size=*/arg.tensor_array_size, /*tensor_array_gradients=*/arg.tensor_array_gradients, &resource)); arg_expression.set_resource(resource); @@ -531,9 +528,9 @@ Status XlaCompiler::BuildArguments( builder->SetOpMetadata(arg_metadata); // Build parameter handles for non-constant arguments. - std::vector arg_handles(input_mapping->size()); + std::vector arg_handles(input_mapping->size()); if (use_tuple_arg) { - xla::ComputationDataHandle tuple; + xla::XlaOp tuple; if (is_entry_computation) { xla::OpSharding tuple_sharding; tuple_sharding.set_type(xla::OpSharding::Type::OpSharding_Type_TUPLE); @@ -544,15 +541,15 @@ Status XlaCompiler::BuildArguments( core == -1 ? xla::sharding_builder::AssignDevice(root_device) : xla::sharding_builder::AssignDevice(core); } - xla::ScopedShardingAssignment assign_tuple_sharding(builder, - tuple_sharding); + xla::XlaScopedShardingAssignment assign_tuple_sharding(builder, + tuple_sharding); tuple = builder->Parameter(0, (*input_shapes)[0], "arg_tuple"); } else { tuple = builder->Parameter(0, (*input_shapes)[0], "arg_tuple"); } for (std::vector::size_type i = 0; i < input_mapping->size(); ++i) { const int core = (*arg_cores)[input_mapping->at(i)]; - xla::ScopedShardingAssignment assign_sharding( + xla::XlaScopedShardingAssignment assign_sharding( builder, core == -1 ? tensorflow::gtl::optional() : xla::sharding_builder::AssignDevice(core)); arg_handles[i] = builder->GetTupleElement(tuple, i); @@ -560,7 +557,7 @@ Status XlaCompiler::BuildArguments( } else { for (std::vector::size_type i = 0; i < input_mapping->size(); ++i) { const int core = (*arg_cores)[input_mapping->at(i)]; - xla::ScopedShardingAssignment assign_sharding( + xla::XlaScopedShardingAssignment assign_sharding( builder, core == -1 ? tensorflow::gtl::optional() : xla::sharding_builder::AssignDevice(core)); arg_handles[i] = @@ -647,7 +644,7 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, std::unique_ptr graph, const std::vector& args, CompilationResult* result) { - VLOG(1) << "Executing graph symbolically to populate ComputationBuilder."; + VLOG(1) << "Executing graph symbolically to populate XlaBuilder."; if (VLOG_IS_ON(2)) { VLOG(2) << "XlaCompiler::CompileGraph: " @@ -663,7 +660,7 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, TF_RETURN_IF_ERROR( FunctionalizeControlFlow(graph.get(), local_flib_def_.get())); - xla::ComputationBuilder builder(client(), name); + xla::XlaBuilder builder(name); XlaContext* context = new XlaContext(this, &builder, options_.allow_cpu_custom_calls, options.resolve_compile_time_constants, @@ -683,7 +680,7 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, int num_nonconst_outputs; int num_computation_outputs; - result->computation = std::make_shared(); + result->computation = std::make_shared(); TF_RETURN_IF_ERROR(BuildComputation( args, arg_cores, context->retvals(), context->resources(), options.return_updated_values_for_all_resources, &builder, @@ -814,7 +811,7 @@ Status XlaCompiler::SetHostToDeviceMetadata( } Status XlaCompiler::GetHostComputeControlDependency( - const string& host_compute_name, xla::ComputationDataHandle* handle) { + const string& host_compute_name, xla::XlaOp* handle) { const auto iter = host_compute_control_output_.find(host_compute_name); if (iter == host_compute_control_output_.end()) { return errors::InvalidArgument( @@ -827,7 +824,7 @@ Status XlaCompiler::GetHostComputeControlDependency( } Status XlaCompiler::SetHostComputeControlDependency( - const string& host_compute_name, const xla::ComputationDataHandle& handle) { + const string& host_compute_name, const xla::XlaOp& handle) { if (host_compute_control_output_.find(host_compute_name) != host_compute_control_output_.end()) { return errors::InvalidArgument( diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index 8f564f35ec81765e8998513dfd4805d221200c6c..ca6cd822ef4effd48dbc3cc18d35d6642f303df1 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -227,7 +227,7 @@ class XlaCompiler { std::vector resource_updates; // The XLA computation built from the tensorflow subgraph. - std::shared_ptr computation; + std::shared_ptr computation; }; struct Options { @@ -281,7 +281,7 @@ class XlaCompiler { const NameAttrList& fn_name_attrs, std::vector args, CompilationResult* result); - // Compiles a tensorflow::Graph into an xla::Computation. + // Compiles a tensorflow::Graph into an xla::XlaComputation. // Similar to CompileFunction, but takes a Graph as input rather than a // function. Status CompileGraph(const CompileOptions& options, string const& name, @@ -290,7 +290,7 @@ class XlaCompiler { CompilationResult* result); // Compiles a single Op, given by an OpKernelContext, into an - // xla::Computation. Similar to CompileFunction but takes a single Op as + // xla::XlaComputation. Similar to CompileFunction but takes a single Op as // input. Status CompileSingleOp(const CompileOptions& options, string const& name, OpKernelContext* ctx, @@ -337,10 +337,9 @@ class XlaCompiler { // a given HostCompute Op as long as the names are unique within the // compilation. Status GetHostComputeControlDependency(const string& host_compute_name, - xla::ComputationDataHandle* handle); - Status SetHostComputeControlDependency( - const string& host_compute_name, - const xla::ComputationDataHandle& handle); + xla::XlaOp* handle); + Status SetHostComputeControlDependency(const string& host_compute_name, + const xla::XlaOp& handle); const Options& options() const { return options_; } xla::Client* client() const { return options_.client; } @@ -358,7 +357,7 @@ class XlaCompiler { // `args` are the arguments to the computation. Status BuildArguments(const Graph& graph, const std::vector& args, - bool use_tuple_arg, xla::ComputationBuilder* builder, + bool use_tuple_arg, xla::XlaBuilder* builder, XlaContext* context, std::vector* arg_cores, std::vector* arg_expressions, std::vector* input_mapping, @@ -408,8 +407,7 @@ class XlaCompiler { std::unordered_map host_compute_sends_; std::unordered_map host_compute_recvs_; - std::unordered_map - host_compute_control_output_; + std::unordered_map host_compute_control_output_; TF_DISALLOW_COPY_AND_ASSIGN(XlaCompiler); }; diff --git a/tensorflow/compiler/tf2xla/xla_compiler_test.cc b/tensorflow/compiler/tf2xla/xla_compiler_test.cc index 096dc7160bfc0a3a751f33e7d646471ebea56070..6b8918b26179735a4518a422fed024fa534122f5 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler_test.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler_test.cc @@ -164,7 +164,6 @@ REGISTER_XLA_OP(Name("DummyDuplicateOp").Device(DEVICE_CPU_XLA_JIT), REGISTER_XLA_OP(Name("DummyDuplicateOp").Device(DEVICE_GPU_XLA_JIT), DummyDuplicateOp); - // Tests compilation and execution of an empty graph. TEST_F(XlaCompilerTest, EmptyReturnValues) { XlaCompiler compiler(DefaultOptions()); @@ -433,21 +432,26 @@ TEST_F(XlaCompilerTest, DeterministicCompilation) { } for (int64 i = 1; i < test_count; ++i) { - auto m1 = - results[i - 1].computation->Snapshot().ValueOrDie()->entry().requests(); - auto m2 = - results[i].computation->Snapshot().ValueOrDie()->entry().requests(); - // Check if every entry is the same. - for (auto& entry1 : m1) { - int64 key = entry1.first; - auto value1 = entry1.second; - auto entry2 = m2.find(key); - auto value2 = entry2->second; - EXPECT_TRUE(entry2 != m2.end()); - string str1, str2; - value1.AppendToString(&str1); - value2.AppendToString(&str2); - EXPECT_EQ(str1, str2); + const auto& m1 = results[i - 1].computation->proto(); + const auto& m2 = results[i].computation->proto(); + ASSERT_EQ(m1.computations_size(), m2.computations_size()); + // Check if every hlo computation is the same. + for (int k = 0; k < m1.computations_size(); k++) { + const auto& c1 = m1.computations(k); + const auto& c2 = m2.computations(k); + ASSERT_EQ(c1.instructions_size(), c2.instructions_size()); + for (int j = 0; j < c1.instructions_size(); j++) { + auto instr1 = c1.instructions(j); + auto instr2 = c2.instructions(j); + instr1.clear_name(); + instr2.clear_name(); + // The names of instructions were uniquified by the XlaBuilder, the rest + // of the fields should be identical. + string str1, str2; + instr1.AppendPartialToString(&str1); + instr2.AppendPartialToString(&str2); + EXPECT_EQ(str1, str2); + } } } } diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc index 8423921086fec1cf534cf613102fc3839035cb85..3dd2d183f3a538786856dd8d92c5886b1cc237d8 100644 --- a/tensorflow/compiler/tf2xla/xla_context.cc +++ b/tensorflow/compiler/tf2xla/xla_context.cc @@ -25,7 +25,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/statusor.h" @@ -63,7 +63,7 @@ void XlaContext::set_args(std::vector args) { } XlaContext::XlaContext( - XlaCompiler* compiler, xla::ComputationBuilder* builder, + XlaCompiler* compiler, xla::XlaBuilder* builder, bool allow_cpu_custom_calls, bool resolve_compile_time_constants, const std::function* variable_representation_shape_fn) @@ -78,7 +78,7 @@ string XlaContext::DebugString() { return "TLA JIT context"; } // This is called by the Retval Op to associate a computed value // with a specific return value of the subgraph. void XlaContext::AddRetval(int retval_index, DataType type, - const xla::ComputationDataHandle& handle) { + const xla::XlaOp& handle) { VLOG(1) << "Added retval index " << retval_index << " to XLA computation"; // Add the return value to the list being built up. if (retvals_.size() <= retval_index) { @@ -104,13 +104,12 @@ Status XlaContext::AddConstRetval(int retval_index, DataType dtype, return Status::OK(); } -xla::ComputationBuilder* XlaContext::builder() { return builder_; } +xla::XlaBuilder* XlaContext::builder() { return builder_; } Status XlaContext::CreateResource( XlaResource::Kind kind, int arg_num, string name, DataType type, - TensorShape shape, const xla::ComputationDataHandle& handle, - int64 tensor_array_size, const std::set& tensor_array_gradients, - XlaResource** resource) { + TensorShape shape, const xla::XlaOp& handle, int64 tensor_array_size, + const std::set& tensor_array_gradients, XlaResource** resource) { resources_.emplace_back( new XlaResource(kind, arg_num, std::move(name), type, std::move(shape), handle, tensor_array_size, tensor_array_gradients)); @@ -123,11 +122,11 @@ TensorShape XlaContext::VariableRepresentationShape(const TensorShape& shape, return (*variable_representation_shape_fn_)(shape, type); } -const xla::Computation* XlaContext::GetOrCreateMax(const DataType type) { +const xla::XlaComputation* XlaContext::GetOrCreateMax(const DataType type) { return LookupOrCreate(type, &max_func_, [this, type] { const string type_string = DataTypeString(type); VLOG(1) << "Building Max() for " << type_string; - xla::ComputationBuilder b(builder()->client(), "max<" + type_string + ">"); + xla::XlaBuilder b("max<" + type_string + ">"); xla::PrimitiveType xla_type; TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); auto x = b.Parameter(0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); @@ -137,11 +136,11 @@ const xla::Computation* XlaContext::GetOrCreateMax(const DataType type) { }); } -const xla::Computation* XlaContext::GetOrCreateMin(const DataType type) { +const xla::XlaComputation* XlaContext::GetOrCreateMin(const DataType type) { return LookupOrCreate(type, &min_func_, [this, type] { const string type_string = DataTypeString(type); VLOG(1) << "Building Min() for " << type_string; - xla::ComputationBuilder b(builder()->client(), "min<" + type_string + ">"); + xla::XlaBuilder b("min<" + type_string + ">"); xla::PrimitiveType xla_type; TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); auto x = b.Parameter(0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); @@ -151,11 +150,11 @@ const xla::Computation* XlaContext::GetOrCreateMin(const DataType type) { }); } -const xla::Computation* XlaContext::GetOrCreateAdd(const DataType type) { +const xla::XlaComputation* XlaContext::GetOrCreateAdd(const DataType type) { return LookupOrCreate(type, &add_func_, [this, type] { const string type_string = DataTypeString(type); VLOG(1) << "Building Add() for " << type_string; - xla::ComputationBuilder b(builder()->client(), "add<" + type_string + ">"); + xla::XlaBuilder b("add<" + type_string + ">"); xla::PrimitiveType xla_type; TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); auto x = b.Parameter(0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); @@ -165,11 +164,11 @@ const xla::Computation* XlaContext::GetOrCreateAdd(const DataType type) { }); } -const xla::Computation* XlaContext::GetOrCreateMul(const DataType type) { +const xla::XlaComputation* XlaContext::GetOrCreateMul(const DataType type) { return LookupOrCreate(type, &mul_func_, [this, type] { const string type_string = DataTypeString(type); VLOG(1) << "Building Mul() for " << type_string; - xla::ComputationBuilder b(builder()->client(), "mul<" + type_string + ">"); + xla::XlaBuilder b("mul<" + type_string + ">"); xla::PrimitiveType xla_type; TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); auto x = b.Parameter(0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); @@ -179,9 +178,9 @@ const xla::Computation* XlaContext::GetOrCreateMul(const DataType type) { }); } -const xla::Computation* XlaContext::LookupOrCreate( +const xla::XlaComputation* XlaContext::LookupOrCreate( DataType type, ComputationMap* out, - const std::function& create) { + const std::function& create) { { const auto& entry = (*out)[type]; if (!entry.IsNull()) { diff --git a/tensorflow/compiler/tf2xla/xla_context.h b/tensorflow/compiler/tf2xla/xla_context.h index 00fbaba37c542954f690b310a184cff985a05156..1136ffe5073a8e7fd3c27d6ec7050cb1f8307584 100644 --- a/tensorflow/compiler/tf2xla/xla_context.h +++ b/tensorflow/compiler/tf2xla/xla_context.h @@ -22,8 +22,8 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_compilation_device.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" @@ -43,7 +43,7 @@ class XlaContext : public ResourceBase { static XlaContext& Get(const XlaOpKernelContext* ctx); // Creates a new XlaContext. - XlaContext(XlaCompiler* compiler, xla::ComputationBuilder* builder, + XlaContext(XlaCompiler* compiler, xla::XlaBuilder* builder, bool allow_cpu_custom_calls, bool resolve_compile_time_constants, const std::function* variable_representation_shape_fn); @@ -53,9 +53,8 @@ class XlaContext : public ResourceBase { XlaCompiler* compiler() const { return compiler_; } - // Returns the ComputationBuilder that Ops use for compiling new - // expressions. - xla::ComputationBuilder* builder(); + // Returns the XlaBuilder that Ops use for compiling new expressions. + xla::XlaBuilder* builder(); bool allow_cpu_custom_calls() const { return allow_cpu_custom_calls_; } @@ -66,8 +65,7 @@ class XlaContext : public ResourceBase { // This is called by the Retval Op to associate a computed value // with a specific return value of the subgraph. - void AddRetval(int retval_index, DataType type, - const xla::ComputationDataHandle& handle); + void AddRetval(int retval_index, DataType type, const xla::XlaOp& handle); // As for Retval, but for return values that are compile-time constants. Status AddConstRetval(int retval_index, DataType dtype, @@ -79,8 +77,7 @@ class XlaContext : public ResourceBase { // Fails if the resource already exists. Status CreateResource(XlaResource::Kind kind, int arg_num, string name, DataType type, TensorShape shape, - const xla::ComputationDataHandle& handle, - int64 tensor_array_size, + const xla::XlaOp& handle, int64 tensor_array_size, const std::set& tensor_array_gradients, XlaResource** resource); @@ -96,22 +93,22 @@ class XlaContext : public ResourceBase { // Get an XLA lambda to compute Max. This is cached in the // XlaContext since it may be used by multiple Ops. There is a // separate specialization of the computation for each DataType. - const xla::Computation* GetOrCreateMax(const DataType type); + const xla::XlaComputation* GetOrCreateMax(const DataType type); // Get an XLA lambda to compute Min. This is cached in the // XlaContext since it may be used by multiple Ops. There is a // separate specialization of the computation for each DataType. - const xla::Computation* GetOrCreateMin(const DataType type); + const xla::XlaComputation* GetOrCreateMin(const DataType type); // Get an XLA lambda to compute Add. This is cached in the // XlaContext since it may be used by multiple Ops. There is a // separate specialization of the computation for each DataType. - const xla::Computation* GetOrCreateAdd(const DataType type); + const xla::XlaComputation* GetOrCreateAdd(const DataType type); // Get an XLA lambda to compute Mul. This is cached in the // XlaContext since it may be used by multiple Ops. There is a // separate specialization of the computation for each DataType. - const xla::Computation* GetOrCreateMul(const DataType type); + const xla::XlaComputation* GetOrCreateMul(const DataType type); // The name of the XlaContext resource during symbolic graph execution. static const char kXlaContextResourceName[]; @@ -119,9 +116,8 @@ class XlaContext : public ResourceBase { private: XlaCompiler* const compiler_; - // The ComputationBuilder used to construct the subgraph's compiled - // representation. - xla::ComputationBuilder* builder_; + // The XlaBuilder used to construct the subgraph's compiled representation. + xla::XlaBuilder* builder_; // Allow ops to emit CustomCall operations for CPU. const bool allow_cpu_custom_calls_; @@ -146,14 +142,14 @@ class XlaContext : public ResourceBase { variable_representation_shape_fn_; // Cache of prebuilt computations indexed by their type. - using ComputationMap = std::map; + using ComputationMap = std::map; // Finds the value for the given type in out map if it already // exists or makes a new value with create function and keeps it the // map. The returned value != nullptr and is owned by the map. - const xla::Computation* LookupOrCreate( + const xla::XlaComputation* LookupOrCreate( DataType type, ComputationMap* out, - const std::function& create); + const std::function& create); // Cached computation to compute Max of two elements, specialized by type. ComputationMap max_func_; diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index 62a5114837e07f35134ad99e28880d6a9233a213..f1594193af09c7193f03b4685d3a7d4510d654dd 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_context.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/status.h" @@ -32,13 +32,12 @@ namespace tensorflow { namespace { -Status ArgMinMax(xla::ComputationBuilder* builder, XlaOpKernelContext* ctx, - const xla::ComputationDataHandle& input, - const TensorShape& input_shape, DataType input_type, - DataType output_type, int axis, bool is_min, - xla::ComputationDataHandle* argminmax) { - xla::ComputationDataHandle init_value; - const xla::Computation* reducer; +Status ArgMinMax(xla::XlaBuilder* builder, XlaOpKernelContext* ctx, + const xla::XlaOp& input, const TensorShape& input_shape, + DataType input_type, DataType output_type, int axis, + bool is_min, xla::XlaOp* argminmax) { + xla::XlaOp init_value; + const xla::XlaComputation* reducer; if (is_min) { init_value = XlaHelpers::MaxValue(builder, input_type); reducer = ctx->GetOrCreateMin(input_type); @@ -50,13 +49,13 @@ Status ArgMinMax(xla::ComputationBuilder* builder, XlaOpKernelContext* ctx, xla::PrimitiveType xla_output_type; TF_RETURN_IF_ERROR(DataTypeToPrimitiveType(output_type, &xla_output_type)); - xla::ComputationDataHandle input_max = builder->Reduce( - input, init_value, *reducer, /*dimensions_to_reduce=*/{axis}); + xla::XlaOp input_max = builder->Reduce(input, init_value, *reducer, + /*dimensions_to_reduce=*/{axis}); std::vector broadcast_dims(input_shape.dims() - 1); std::iota(broadcast_dims.begin(), broadcast_dims.begin() + axis, 0); std::iota(broadcast_dims.begin() + axis, broadcast_dims.end(), axis + 1); // Compute a mask that has 1s for elements equal to the maximum. - xla::ComputationDataHandle partial_mask = builder->ConvertElementType( + xla::XlaOp partial_mask = builder->ConvertElementType( builder->Eq(input, input_max, broadcast_dims), xla_output_type); // In order to make identity elements for a bitwise And, we: @@ -65,23 +64,23 @@ Status ArgMinMax(xla::ComputationBuilder* builder, XlaOpKernelContext* ctx, // 0xFF...F int32 bits_in_type = xla::ShapeUtil::ByteSizeOfPrimitiveType(xla_output_type) * 8 - 1; - xla::ComputationDataHandle shift_amount = + xla::XlaOp shift_amount = XlaHelpers::IntegerLiteral(builder, output_type, bits_in_type); - xla::ComputationDataHandle full_mask = builder->ShiftRightArithmetic( + xla::XlaOp full_mask = builder->ShiftRightArithmetic( builder->ShiftLeft(partial_mask, shift_amount), shift_amount); // And with the vector [0, 1, 2, ...] to convert each 0xFF...F into its // index. - xla::ComputationDataHandle iota; + xla::XlaOp iota; const int64 axis_size = input_shape.dim_size(axis); TF_RETURN_IF_ERROR(XlaHelpers::Iota(builder, output_type, axis_size, &iota)); - xla::ComputationDataHandle product = + xla::XlaOp product = builder->And(full_mask, iota, /*broadcast_dimensions=*/{axis}); // If there are multiple maximum elements, choose the one with the highest // index. - xla::ComputationDataHandle output = + xla::XlaOp output = builder->Reduce(product, XlaHelpers::MinValue(builder, output_type), *ctx->GetOrCreateMax(output_type), /*dimensions_to_reduce=*/{axis}); @@ -91,36 +90,31 @@ Status ArgMinMax(xla::ComputationBuilder* builder, XlaOpKernelContext* ctx, } // namespace -xla::ComputationDataHandle XlaHelpers::MinValue(xla::ComputationBuilder* b, - DataType data_type) { +xla::XlaOp XlaHelpers::MinValue(xla::XlaBuilder* b, DataType data_type) { xla::PrimitiveType type; TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return b->ConstantLiteral(xla::Literal::MinValue(type)); } -xla::ComputationDataHandle XlaHelpers::MaxValue(xla::ComputationBuilder* b, - DataType data_type) { +xla::XlaOp XlaHelpers::MaxValue(xla::XlaBuilder* b, DataType data_type) { xla::PrimitiveType type; TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return b->ConstantLiteral(xla::Literal::MaxValue(type)); } -xla::ComputationDataHandle XlaHelpers::Zero(xla::ComputationBuilder* b, - DataType data_type) { +xla::XlaOp XlaHelpers::Zero(xla::XlaBuilder* b, DataType data_type) { xla::PrimitiveType type; TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return b->ConstantLiteral(xla::Literal::Zero(type)); } -xla::ComputationDataHandle XlaHelpers::One(xla::ComputationBuilder* b, - DataType data_type) { +xla::XlaOp XlaHelpers::One(xla::XlaBuilder* b, DataType data_type) { xla::PrimitiveType type; TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return b->ConstantLiteral(xla::Literal::One(type)); } -xla::ComputationDataHandle XlaHelpers::Epsilon(xla::ComputationBuilder* b, - DataType data_type) { +xla::XlaOp XlaHelpers::Epsilon(xla::XlaBuilder* b, DataType data_type) { switch (data_type) { case DT_HALF: return b->ConstantR0( @@ -137,16 +131,15 @@ xla::ComputationDataHandle XlaHelpers::Epsilon(xla::ComputationBuilder* b, } } -xla::ComputationDataHandle XlaHelpers::IntegerLiteral( - xla::ComputationBuilder* b, DataType data_type, int64 value) { +xla::XlaOp XlaHelpers::IntegerLiteral(xla::XlaBuilder* b, DataType data_type, + int64 value) { xla::PrimitiveType type; TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return ::tensorflow::IntegerLiteral(b, type, value); } -xla::ComputationDataHandle XlaHelpers::FloatLiteral(xla::ComputationBuilder* b, - DataType data_type, - double value) { +xla::XlaOp XlaHelpers::FloatLiteral(xla::XlaBuilder* b, DataType data_type, + double value) { xla::PrimitiveType type; TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return ::tensorflow::FloatLiteral(b, type, value); @@ -183,28 +176,24 @@ static Tensor MakeLinspaceTensor(const TensorShape& shape, int64 depth) { return linspace; } -Status XlaHelpers::ArgMax(xla::ComputationBuilder* builder, - XlaOpKernelContext* ctx, - const xla::ComputationDataHandle& input, +Status XlaHelpers::ArgMax(xla::XlaBuilder* builder, XlaOpKernelContext* ctx, + const xla::XlaOp& input, const TensorShape& input_shape, DataType input_type, - DataType output_type, int axis, - xla::ComputationDataHandle* argmax) { + DataType output_type, int axis, xla::XlaOp* argmax) { return ArgMinMax(builder, ctx, input, input_shape, input_type, output_type, axis, /*is_min=*/false, argmax); } -Status XlaHelpers::ArgMin(xla::ComputationBuilder* builder, - XlaOpKernelContext* ctx, - const xla::ComputationDataHandle& input, +Status XlaHelpers::ArgMin(xla::XlaBuilder* builder, XlaOpKernelContext* ctx, + const xla::XlaOp& input, const TensorShape& input_shape, DataType input_type, - DataType output_type, int axis, - xla::ComputationDataHandle* argmin) { + DataType output_type, int axis, xla::XlaOp* argmin) { return ArgMinMax(builder, ctx, input, input_shape, input_type, output_type, axis, /*is_min=*/true, argmin); } -Status XlaHelpers::Iota(xla::ComputationBuilder* builder, DataType dtype, - int64 size, xla::ComputationDataHandle* iota) { +Status XlaHelpers::Iota(xla::XlaBuilder* builder, DataType dtype, int64 size, + xla::XlaOp* iota) { TensorShape linspace_shape({size}); Tensor linspace; switch (dtype) { @@ -227,13 +216,10 @@ Status XlaHelpers::Iota(xla::ComputationBuilder* builder, DataType dtype, return Status::OK(); } -Status XlaHelpers::OneHot(xla::ComputationBuilder* builder, int64 depth, - int axis, DataType index_type, - const TensorShape& indices_shape, - const xla::ComputationDataHandle& indices, - const xla::ComputationDataHandle& on_value, - const xla::ComputationDataHandle& off_value, - xla::ComputationDataHandle* one_hot) { +Status XlaHelpers::OneHot(xla::XlaBuilder* builder, int64 depth, int axis, + DataType index_type, const TensorShape& indices_shape, + const xla::XlaOp& indices, const xla::XlaOp& on_value, + const xla::XlaOp& off_value, xla::XlaOp* one_hot) { const int indices_dims = indices_shape.dims(); const int output_dims = indices_dims + 1; @@ -267,7 +253,7 @@ Status XlaHelpers::OneHot(xla::ComputationBuilder* builder, int64 depth, std::vector broadcast_dims(indices_shape.dims()); std::iota(broadcast_dims.begin(), broadcast_dims.begin() + axis, 0); std::iota(broadcast_dims.begin() + axis, broadcast_dims.end(), axis + 1); - xla::ComputationDataHandle one_hot_bool = builder->Eq( + xla::XlaOp one_hot_bool = builder->Eq( indices, builder->ConstantLiteral(linspace_literal), broadcast_dims); // Selects the user-provided off_value and on_value values. @@ -278,16 +264,15 @@ Status XlaHelpers::OneHot(xla::ComputationBuilder* builder, int64 depth, } DataType XlaHelpers::SumAccumulationType(const DataType& dtype) { - if (dtype == DT_BFLOAT16) { + if (dtype == DT_BFLOAT16 || dtype == DT_HALF) { return DT_FLOAT; } return dtype; } -xla::ComputationDataHandle XlaHelpers::ConvertElementType( - xla::ComputationBuilder* const builder, - const xla::ComputationDataHandle& operand, - const DataType new_element_type) { +xla::XlaOp XlaHelpers::ConvertElementType(xla::XlaBuilder* const builder, + const xla::XlaOp& operand, + const DataType new_element_type) { xla::PrimitiveType convert_to; TF_CHECK_OK(DataTypeToPrimitiveType(new_element_type, &convert_to)); return builder->ConvertElementType(operand, convert_to); diff --git a/tensorflow/compiler/tf2xla/xla_helpers.h b/tensorflow/compiler/tf2xla/xla_helpers.h index 68ab93b64a5fa87ad99e0f44d84f6473fc8bbebd..c3fdc5252e74363fe289eeabb2cb0d68298ee291 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.h +++ b/tensorflow/compiler/tf2xla/xla_helpers.h @@ -19,7 +19,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_TF2XLA_XLA_HELPERS_H_ #include "tensorflow/compiler/tf2xla/xla_context.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -30,41 +30,34 @@ class XlaHelpers { public: // Returns a handle representing the minimum value of a scalar // element of data_type. - static xla::ComputationDataHandle MinValue(xla::ComputationBuilder* b, - DataType data_type); + static xla::XlaOp MinValue(xla::XlaBuilder* b, DataType data_type); // Returns a handle representing the maximum value of a scalar // element of data_type. - static xla::ComputationDataHandle MaxValue(xla::ComputationBuilder* b, - DataType data_type); + static xla::XlaOp MaxValue(xla::XlaBuilder* b, DataType data_type); // Returns a handle representing the zero value of a scalar // element of data_type. - static xla::ComputationDataHandle Zero(xla::ComputationBuilder* b, - DataType data_type); + static xla::XlaOp Zero(xla::XlaBuilder* b, DataType data_type); // Returns a handle representing the one value of a scalar // element of data_type. - static xla::ComputationDataHandle One(xla::ComputationBuilder* b, - DataType data_type); + static xla::XlaOp One(xla::XlaBuilder* b, DataType data_type); // Returns the machine epsilon for floating-point type `data_type`, i.e., // the difference between 1.0 and the next representable value. - static xla::ComputationDataHandle Epsilon(xla::ComputationBuilder* b, - DataType data_type); + static xla::XlaOp Epsilon(xla::XlaBuilder* b, DataType data_type); // Returns a handle representing the given value of an integer scalar // element of data_type. // Note that unlike One and Zero, does not work on boolean types. - static xla::ComputationDataHandle IntegerLiteral(xla::ComputationBuilder* b, - DataType data_type, - int64 value); + static xla::XlaOp IntegerLiteral(xla::XlaBuilder* b, DataType data_type, + int64 value); // Returns a handle representing the given value of a floating-point scalar // element of data_type. - static xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* b, - DataType data_type, - double value); + static xla::XlaOp FloatLiteral(xla::XlaBuilder* b, DataType data_type, + double value); // Reshapes literal 'input' to have 'shape'. Both the original shape and // 'shape' must contain the same number of elements. @@ -75,38 +68,32 @@ class XlaHelpers { // Sets `argmax` to the argmax of `input` along `axis`. `input_shape` and // `input_dtype` are the shape and dtype of `input` respectively, and // `output_type` is the dtype to use for `argmax`. - static Status ArgMax(xla::ComputationBuilder* builder, - XlaOpKernelContext* ctx, - const xla::ComputationDataHandle& input, - const TensorShape& input_shape, DataType input_type, - DataType output_type, int axis, - xla::ComputationDataHandle* argmax); + static Status ArgMax(xla::XlaBuilder* builder, XlaOpKernelContext* ctx, + const xla::XlaOp& input, const TensorShape& input_shape, + DataType input_type, DataType output_type, int axis, + xla::XlaOp* argmax); // Sets `argmin` to the argmin of `input` along `axis`. `input_shape` and // `input_dtype` are the shape and dtype of `input` respectively, and // `output_type` is the dtype to use for `argmin`. - static Status ArgMin(xla::ComputationBuilder* builder, - XlaOpKernelContext* ctx, - const xla::ComputationDataHandle& input, - const TensorShape& input_shape, DataType input_type, - DataType output_type, int axis, - xla::ComputationDataHandle* argmin); + static Status ArgMin(xla::XlaBuilder* builder, XlaOpKernelContext* ctx, + const xla::XlaOp& input, const TensorShape& input_shape, + DataType input_type, DataType output_type, int axis, + xla::XlaOp* argmin); // Sets *iota to a rank 1 tensor with values [0, 1, 2, ...] of `dtype`. - static Status Iota(xla::ComputationBuilder* builder, DataType dtype, - int64 size, xla::ComputationDataHandle* iota); + static Status Iota(xla::XlaBuilder* builder, DataType dtype, int64 size, + xla::XlaOp* iota); // Converts `indices` into a one-hot representation. `depth` is the size // of the new axis to add. `axis` is the position at which to add the new // axis. `indices_shape` is the shape of `indices`. `on_value` and // `off_value` represent the values to use for the on and off positions, // respectively. - static Status OneHot(xla::ComputationBuilder* builder, int64 depth, int axis, + static Status OneHot(xla::XlaBuilder* builder, int64 depth, int axis, DataType index_type, const TensorShape& indices_shape, - const xla::ComputationDataHandle& indices, - const xla::ComputationDataHandle& on_value, - const xla::ComputationDataHandle& off_value, - xla::ComputationDataHandle* one_hot); + const xla::XlaOp& indices, const xla::XlaOp& on_value, + const xla::XlaOp& off_value, xla::XlaOp* one_hot); // Certain DataTypes should use increased precision DataTypes when performing // reductions. This function remaps a given DataType to a higher precision @@ -115,10 +102,9 @@ class XlaHelpers { // A helper for creating a ConvertElementType xla op given a DataType rather // than the xla::PrimitiveType. - static xla::ComputationDataHandle ConvertElementType( - xla::ComputationBuilder* const builder, - const xla::ComputationDataHandle& operand, - const DataType new_element_type); + static xla::XlaOp ConvertElementType(xla::XlaBuilder* const builder, + const xla::XlaOp& operand, + const DataType new_element_type); }; } // end namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc index 1fe6e69ff2dc838152032ac3d7b21de41684c6f6..9e17756b27733e2453ea1688d13e1d718c25cfc8 100644 --- a/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc +++ b/tensorflow/compiler/tf2xla/xla_jit_compiled_cpu_function.cc @@ -112,10 +112,10 @@ void CollectNames(const T& entries, std::vector* nonempty_names, XlaJitCompiledCpuFunction::Compile( const GraphDef& graph_def, const tf2xla::Config& config, const xla::ExecutableBuildOptions& build_options) { - // Convert the graph_def into an xla::Computation. + // Convert the graph_def into an xla::XlaComputation. TF_ASSIGN_OR_RETURN(xla::LocalClient * client, xla::ClientLibrary::GetOrCreateLocalClient()); - xla::Computation computation; + xla::XlaComputation computation; TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToXla(graph_def, config, client, &computation)); diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index c4bb90d58755f16672ca7c6a6738065be6330485..2b65f4d5d5936e062e5351a0723544191ffe2dfa 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -30,7 +30,7 @@ bool XlaOpKernelContext::ValidateInputsAreSameShape(OpKernel* op) { return context_->ValidateInputsAreSameShape(op); } -xla::ComputationBuilder* XlaOpKernelContext::builder() const { +xla::XlaBuilder* XlaOpKernelContext::builder() const { return XlaContext::Get(this).builder(); } @@ -38,9 +38,9 @@ xla::ComputationBuilder* XlaOpKernelContext::builder() const { static const XlaExpression* CastExpressionFromTensor(const Tensor& tensor) { const XlaExpression* expression = reinterpret_cast(tensor.tensor_data().data()); - CHECK(expression->handle().handle() != 0 || + CHECK(expression->handle().builder() != nullptr || expression->resource() != nullptr); - VLOG(1) << "Fetched T" << expression->handle().handle(); + VLOG(1) << "Fetched T" << expression->handle(); return expression; } @@ -48,20 +48,18 @@ static const XlaExpression* CastExpressionFromTensor(const Tensor& tensor) { static XlaExpression* CastExpressionFromUninitializedTensor(Tensor* tensor) { const XlaExpression* expression = reinterpret_cast(tensor->tensor_data().data()); - CHECK_EQ(expression->handle().handle(), 0); + CHECK_EQ(expression->handle().builder(), nullptr); return const_cast(expression); } -// Retrieves the ComputationDataHandle from an input Tensor to an Op. This -// computation was constructed by an Op that executed previously and -// created the output Tensor using CreateOutputTensorFromComputation -// or CreateConstantOutputTensor. -static const xla::ComputationDataHandle& GetComputationFromTensor( - const Tensor& tensor) { +// Retrieves the XlaOp from an input Tensor to an Op. This computation was +// constructed by an Op that executed previously and created the output Tensor +// using CreateOutputTensorFromComputation or CreateConstantOutputTensor. +static const xla::XlaOp& GetComputationFromTensor(const Tensor& tensor) { return CastExpressionFromTensor(tensor)->handle(); } -const xla::ComputationDataHandle& XlaOpKernelContext::Input(int index) { +const xla::XlaOp& XlaOpKernelContext::Input(int index) { return GetComputationFromTensor(context_->input(index)); } @@ -106,7 +104,7 @@ Status XlaOpKernelContext::ConstantInputReshaped( return HostTensorToLiteral(temp, constant_literal); } - xla::ComputationDataHandle handle = expression->handle(); + xla::XlaOp handle = expression->handle(); if (new_shape != tensor.shape()) { // Reshape the handle to the desired shape. handle = builder()->Reshape(handle, new_shape.dim_sizes()); @@ -141,8 +139,17 @@ Status XlaOpKernelContext::ConstantInputReshaped( } // Ask the XLA compiler to evaluate the data handle to a literal. + xla::StatusOr constant_graph = + builder()->BuildConstantSubGraph(handle); + if (!constant_graph.ok()) { + return errors::Internal( + "Error getting a compile-time constant graph for ", + context_->op_kernel().name(), " input ", index, + ".\nError: ", constant_graph.status().error_message()); + } xla::StatusOr> computed = - builder()->ComputeConstant(handle, &layout); + compiler()->client()->ComputeConstant(constant_graph.ValueOrDie(), + &layout); if (!computed.ok()) { return errors::Internal("Error evaluating ", context_->op_kernel().name(), " input ", index, @@ -260,9 +267,9 @@ Status XlaOpKernelContext::ConstantInputAsShape(int index, TensorShape* shape) { return Status::OK(); } -Status XlaOpKernelContext::InputList( - StringPiece name, std::vector* handles, - std::vector* shapes) { +Status XlaOpKernelContext::InputList(StringPiece name, + std::vector* handles, + std::vector* shapes) { OpInputList inputs; TF_RETURN_IF_ERROR(context_->input_list(name, &inputs)); handles->clear(); @@ -285,9 +292,9 @@ Status XlaOpKernelContext::ConstantInputList( return Status::OK(); } -Status XlaOpKernelContext::ReadVariableInput( - int index, DataType type, TensorShape* shape, - xla::ComputationDataHandle* value) { +Status XlaOpKernelContext::ReadVariableInput(int index, DataType type, + TensorShape* shape, + xla::XlaOp* value) { const Tensor& tensor = context_->input(index); const XlaExpression* expression = CastExpressionFromTensor(tensor); XlaResource* variable = expression->resource(); @@ -334,8 +341,7 @@ Status XlaOpKernelContext::GetVariableTypeAndShape(int index, DataType* type, return Status::OK(); } -void XlaOpKernelContext::SetOutput(int index, - const xla::ComputationDataHandle& handle) { +void XlaOpKernelContext::SetOutput(int index, const xla::XlaOp& handle) { // Makes the host Tensor that will refer to the expression. Tensor* output = nullptr; auto shape = builder()->GetShape(handle); @@ -349,7 +355,7 @@ void XlaOpKernelContext::SetOutput(int index, // corresponds. TensorShape tensor_shape; OP_REQUIRES_OK(context_, - XLAShapeToTensorShape(*shape.ValueOrDie(), &tensor_shape)); + XLAShapeToTensorShape(shape.ValueOrDie(), &tensor_shape)); OP_REQUIRES_OK(context_, context_->allocate_output(index, tensor_shape, &output)); @@ -364,8 +370,8 @@ void XlaOpKernelContext::SetConstantOutput(int index, const Tensor& constant) { xla::Literal literal; OP_REQUIRES_OK(context_, HostTensorToLiteral(constant, &literal)); - xla::ComputationDataHandle handle = builder()->ConstantLiteral(literal); - CHECK_NE(handle.handle(), 0); + xla::XlaOp handle = builder()->ConstantLiteral(literal); + CHECK_NE(handle.builder(), nullptr); // Make the Tensor that will refer to the expression. Tensor* output = nullptr; @@ -386,8 +392,7 @@ void XlaOpKernelContext::SetInvalidOutput(int index) { OP_REQUIRES_OK(context_, context_->allocate_output(index, TensorShape({}), &output)); XlaExpression* expression = CastExpressionFromUninitializedTensor(output); - xla::ComputationDataHandle handle; - handle.set_handle(0); + xla::XlaOp handle; expression->set_handle(handle); } @@ -410,8 +415,8 @@ Status XlaOpKernelContext::GetResourceInput(int index, XlaResource** resource) { } Status XlaOpKernelContext::AssignVariable(int input_index, DataType type, - xla::ComputationDataHandle handle) { - TF_RET_CHECK(handle.handle() != 0); + xla::XlaOp handle) { + TF_RET_CHECK(handle.builder() != nullptr); const XlaExpression* expression = CastExpressionFromTensor(context_->input(input_index)); @@ -425,7 +430,7 @@ Status XlaOpKernelContext::AssignVariable(int input_index, DataType type, } TensorShape shape; TF_RETURN_IF_ERROR( - XLAShapeToTensorShape(*shape_or_status.ValueOrDie(), &shape)); + XLAShapeToTensorShape(shape_or_status.ValueOrDie(), &shape)); TF_RETURN_IF_ERROR(variable->SetTypeAndShape(type, shape)); @@ -457,22 +462,22 @@ void XlaOpKernelContext::CtxFailureWithWarning(const char* file, int line, context_->CtxFailureWithWarning(file, line, s); } -const xla::Computation* XlaOpKernelContext::GetOrCreateMax( +const xla::XlaComputation* XlaOpKernelContext::GetOrCreateMax( const DataType type) { return XlaContext::Get(context_).GetOrCreateMax(type); } -const xla::Computation* XlaOpKernelContext::GetOrCreateMin( +const xla::XlaComputation* XlaOpKernelContext::GetOrCreateMin( const DataType type) { return XlaContext::Get(context_).GetOrCreateMin(type); } -const xla::Computation* XlaOpKernelContext::GetOrCreateAdd( +const xla::XlaComputation* XlaOpKernelContext::GetOrCreateAdd( const DataType type) { return XlaContext::Get(context_).GetOrCreateAdd(type); } -const xla::Computation* XlaOpKernelContext::GetOrCreateMul( +const xla::XlaComputation* XlaOpKernelContext::GetOrCreateMul( const DataType type) { return XlaContext::Get(context_).GetOrCreateMul(type); } diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h index 4e4b97e0cec8d16b9b5686a779b1285906765dbd..667dc262ca03ca716ffbf015a78fc14c7a8b7c1a 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.h +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h @@ -17,7 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_TF2XLA_XLA_OP_KERNEL_H_ #include "tensorflow/compiler/tf2xla/xla_compiler.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/platform/macros.h" @@ -58,8 +58,8 @@ class XlaOpKernelContext { public: explicit XlaOpKernelContext(OpKernelContext* context); - // Returns the XLA ComputationBuilder containing the output of compilation. - xla::ComputationBuilder* builder() const; + // Returns the XLA XlaBuilder containing the output of compilation. + xla::XlaBuilder* builder() const; // Inputs @@ -72,10 +72,10 @@ class XlaOpKernelContext { // Returns the shape of input 'index'. TensorShape InputShape(int index); - // Returns input 'index' as a ComputationDataHandle. Unlike + // Returns input 'index' as a XlaOp. Unlike // OpKernelContext::Input returns a symbolic value rather than a concrete // Tensor. - const xla::ComputationDataHandle& Input(int index); + const xla::XlaOp& Input(int index); // Returns true if all inputs are the same shape, otherwise sets the // status to a non-OK value and returns false. @@ -85,8 +85,7 @@ class XlaOpKernelContext { // Returns the named list-valued immutable input in "list", as // defined in the OpDef. If the named output is not list-valued, // returns a one-element list. - Status InputList(StringPiece name, - std::vector* handles, + Status InputList(StringPiece name, std::vector* handles, std::vector* shapes); // Helper methods for constant inputs. @@ -132,10 +131,10 @@ class XlaOpKernelContext { return context_->expected_output_dtype(index); } - // Sets output 'index' to the ComputationDataHandle 'handle'. + // Sets output 'index' to the XlaOp 'handle'. // All outputs should be set using SetOutput and SetConstantOutput, not // via the underlying OpKernelContext. - void SetOutput(int index, const xla::ComputationDataHandle& handle); + void SetOutput(int index, const xla::XlaOp& handle); // Sets output 'index' to compile-time constant 'host_tensor', where // 'host_tensor' is a tensor in host memory. It is preferable to use @@ -168,14 +167,13 @@ class XlaOpKernelContext { // variable. Returns an error if the variable has not been initialized, or if // its type does not match `type`. Status ReadVariableInput(int index, DataType type, TensorShape* shape, - xla::ComputationDataHandle* value); + xla::XlaOp* value); // Assigns the value `handle` to the variable referenced by input // `input_index`. The variable must be of `type`. Returns an error if the // variable has been initialized with a different type or with a // different shape. - Status AssignVariable(int input_index, DataType type, - xla::ComputationDataHandle handle); + Status AssignVariable(int input_index, DataType type, xla::XlaOp handle); // Helper routines for the OP_REQUIRES macros void CtxFailure(const Status& s); @@ -205,22 +203,22 @@ class XlaOpKernelContext { // Gets an XLA lambda to compute Max. This is cached in the // XlaContext since it may be used by multiple Ops. There is a // separate specialization of the computation for each DataType. - const xla::Computation* GetOrCreateMax(const DataType type); + const xla::XlaComputation* GetOrCreateMax(const DataType type); // Gets an XLA lambda to compute Min. This is cached in the // XlaContext since it may be used by multiple Ops. There is a // separate specialization of the computation for each DataType. - const xla::Computation* GetOrCreateMin(const DataType type); + const xla::XlaComputation* GetOrCreateMin(const DataType type); // Gets an XLA lambda to compute Add. This is cached in the // XlaContext since it may be used by multiple Ops. There is a // separate specialization of the computation for each DataType. - const xla::Computation* GetOrCreateAdd(const DataType type); + const xla::XlaComputation* GetOrCreateAdd(const DataType type); // Gets an XLA lambda to compute Mul. This is cached in the // XlaContext since it may be used by multiple Ops. There is a // separate specialization of the computation for each DataType. - const xla::Computation* GetOrCreateMul(const DataType type); + const xla::XlaComputation* GetOrCreateMul(const DataType type); private: OpKernelContext* const context_; diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.cc b/tensorflow/compiler/tf2xla/xla_op_registry.cc index bbe808595d958346bd55bf8419306bf3de4cd1d0..e309cb1e34db7f8430c2494c03aed41652b7a167 100644 --- a/tensorflow/compiler/tf2xla/xla_op_registry.cc +++ b/tensorflow/compiler/tf2xla/xla_op_registry.cc @@ -311,7 +311,7 @@ XlaOpRegistry& XlaOpRegistry::Instance() { XlaOpRegistrationBuilder::XlaOpRegistrationBuilder(StringPiece name) { registration_.reset(new XlaOpRegistry::OpRegistration); - registration_->name = name.ToString(); + registration_->name = std::string(name); } XlaOpRegistrationBuilder XlaOpRegistrationBuilder::Name(StringPiece name) { @@ -323,14 +323,14 @@ XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::Device( gtl::ArraySlice devices) { registration_->has_device_whitelist = true; for (StringPiece device : devices) { - registration_->device_whitelist.insert(device.ToString()); + registration_->device_whitelist.insert(std::string(device)); } return *this; } XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::Device(StringPiece device) { registration_->has_device_whitelist = true; - registration_->device_whitelist.insert(device.ToString()); + registration_->device_whitelist.insert(std::string(device)); return *this; } @@ -347,7 +347,7 @@ XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::AllowResourceTypes() { XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::TypeConstraint( StringPiece attr_name, DataType allowed) { std::set& types = - registration_->type_constraints[attr_name.ToString()]; + registration_->type_constraints[std::string(attr_name)]; types.insert(allowed); return *this; } @@ -355,7 +355,7 @@ XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::TypeConstraint( XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::TypeConstraint( StringPiece attr_name, gtl::ArraySlice allowed) { std::set& types = - registration_->type_constraints[attr_name.ToString()]; + registration_->type_constraints[std::string(attr_name)]; for (DataType t : allowed) { types.insert(t); } @@ -364,7 +364,7 @@ XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::TypeConstraint( XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::CompileTimeConstInput( StringPiece input_name) { - registration_->compile_time_constant_inputs.insert(input_name.ToString()); + registration_->compile_time_constant_inputs.insert(std::string(input_name)); return *this; } @@ -394,7 +394,7 @@ XlaBackendRegistrar::XlaBackendRegistrar( StringPiece name, gtl::ArraySlice types, XlaOpRegistry::BackendOpFilter op_filter) { XlaOpRegistry& registry = XlaOpRegistry::Instance(); - registry.RegisterBackend(name.ToString(), types, op_filter); + registry.RegisterBackend(std::string(name), types, op_filter); } } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_resource.cc b/tensorflow/compiler/tf2xla/xla_resource.cc index c2075b44b82ba279d1246ec6bfcf305d12c418a6..540c65c597f20d5bb26494e56c09ff2187cfb0db 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.cc +++ b/tensorflow/compiler/tf2xla/xla_resource.cc @@ -26,8 +26,7 @@ limitations under the License. namespace tensorflow { XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type, - TensorShape shape, - const xla::ComputationDataHandle& initial_value, + TensorShape shape, const xla::XlaOp& initial_value, int64 tensor_array_size, const std::set& tensor_array_gradients) : kind_(kind), @@ -41,11 +40,10 @@ XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type, CHECK(kind_ != kInvalid); for (const string& gradient : tensor_array_gradients) { - tensor_array_gradients_[gradient].reset( - new XlaResource(/*kind=*/kTensorArray, /*arg_num=*/-1, - /*name=*/strings::StrCat("TensorArrayGrad: ", name_), - type_, shape_, xla::ComputationDataHandle(), - tensor_array_size_, /*tensor_array_gradients=*/{})); + tensor_array_gradients_[gradient].reset(new XlaResource( + /*kind=*/kTensorArray, /*arg_num=*/-1, + /*name=*/strings::StrCat("TensorArrayGrad: ", name_), type_, shape_, + xla::XlaOp(), tensor_array_size_, /*tensor_array_gradients=*/{})); } } @@ -73,7 +71,7 @@ Status XlaResource::SetTypeAndShape(DataType type, const TensorShape& shape) { return Status::OK(); } -Status XlaResource::SetValue(const xla::ComputationDataHandle& value) { +Status XlaResource::SetValue(const xla::XlaOp& value) { if (type_ == DT_INVALID) { return errors::InvalidArgument( "Resource '", name_, @@ -83,7 +81,7 @@ Status XlaResource::SetValue(const xla::ComputationDataHandle& value) { return Status::OK(); } -Status XlaResource::SetZeroValue(xla::ComputationBuilder* builder) { +Status XlaResource::SetZeroValue(xla::XlaBuilder* builder) { if (type_ == DT_INVALID) { return errors::InvalidArgument( "Resource '", name_, @@ -121,9 +119,9 @@ Status XlaResource::SetZeroValue(xla::ComputationBuilder* builder) { return Status::OK(); } -Status XlaResource::GetOrCreateTensorArrayGradient( - const string& source, xla::ComputationBuilder* builder, - XlaResource** gradient_out) { +Status XlaResource::GetOrCreateTensorArrayGradient(const string& source, + xla::XlaBuilder* builder, + XlaResource** gradient_out) { VLOG(2) << "Gradient lookup for resource: " << name_ << " gradient: " << source; TF_RET_CHECK(kind_ == kTensorArray); @@ -132,7 +130,7 @@ Status XlaResource::GetOrCreateTensorArrayGradient( TensorShape ta_shape; ta_shape.AddDim(tensor_array_size_); ta_shape.AppendShape(shape_); - xla::ComputationDataHandle gradient_value = builder->Broadcast( + xla::XlaOp gradient_value = builder->Broadcast( XlaHelpers::Zero(builder, type_), ta_shape.dim_sizes()); gradient.reset( new XlaResource(/*kind=*/kTensorArray, /*arg_num=*/-1, @@ -144,13 +142,12 @@ Status XlaResource::GetOrCreateTensorArrayGradient( return Status::OK(); } -Status XlaResource::Pack(xla::ComputationDataHandle* pack, - xla::ComputationBuilder* builder) const { +Status XlaResource::Pack(xla::XlaOp* pack, xla::XlaBuilder* builder) const { if (tensor_array_gradients_.empty()) { *pack = value_; } else { TF_RET_CHECK(kind_ == kTensorArray); - std::vector elems; + std::vector elems; elems.push_back(value_); for (const auto& gradient : tensor_array_gradients_) { elems.push_back(gradient.second->value_); @@ -161,8 +158,8 @@ Status XlaResource::Pack(xla::ComputationDataHandle* pack, } Status XlaResource::SetFromPack(const std::set& gradient_sources, - const xla::ComputationDataHandle& pack, - xla::ComputationBuilder* builder) { + const xla::XlaOp& pack, + xla::XlaBuilder* builder) { if (gradient_sources.empty()) { if (!initialized()) { initial_value_ = pack; diff --git a/tensorflow/compiler/tf2xla/xla_resource.h b/tensorflow/compiler/tf2xla/xla_resource.h index 1bb2c7274ecdf0954768fd96def51194e52deee8..9ce36d1aa7622334b2acfbe9aa85d7419c4772ed 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.h +++ b/tensorflow/compiler/tf2xla/xla_resource.h @@ -18,7 +18,7 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.pb.h" @@ -37,8 +37,7 @@ class XlaResource { }; XlaResource(Kind kind, int arg_num, string name, DataType type, - TensorShape shape, - const xla::ComputationDataHandle& initial_value, + TensorShape shape, const xla::XlaOp& initial_value, int64 tensor_array_size, const std::set& tensor_array_gradients); @@ -69,16 +68,14 @@ class XlaResource { // this is the shape of each entry in the TensorArray/Stack. const TensorShape& shape() const { return shape_; } - const xla::ComputationDataHandle& value() const { return value_; } + const xla::XlaOp& value() const { return value_; } // Value of the resource at computation entry. Used to detect which // variables have new values that need to be written back. - const xla::ComputationDataHandle& initial_value() const { - return initial_value_; - } + const xla::XlaOp& initial_value() const { return initial_value_; } // A variable is initialized if it has a value. - bool initialized() const { return value_.handle() > 0; } + bool initialized() const { return value_.builder() != nullptr; } // Sets the type and shape of the resource. The type and shape of a resource // must not change once the variable has been initialized. @@ -86,17 +83,17 @@ class XlaResource { // Sets the current value of the resource. Returns an error if the type is not // set to a valid value. - Status SetValue(const xla::ComputationDataHandle& value); + Status SetValue(const xla::XlaOp& value); // Sets the current value of the resource to an all-zero value. - Status SetZeroValue(xla::ComputationBuilder* builder); + Status SetZeroValue(xla::XlaBuilder* builder); // Looks up the gradient for `source`, or creates it if it does not already // exist. The call target must be an initialized TensorArray resource. A // TensorArray can have multiple named gradients; see the operator // documentation for TensorArrayGradV3 for details. Status GetOrCreateTensorArrayGradient(const string& source, - xla::ComputationBuilder* builder, + xla::XlaBuilder* builder, XlaResource** gradient_out); // Packs a resource into a single XLA value `pack`, suitable for use as @@ -104,8 +101,7 @@ class XlaResource { // gradients, sets `*pack` to `value`. // For TensorArrays with gradients, packs the value and its gradient values in // a tuple; the gradients values are packed in order by source name. - Status Pack(xla::ComputationDataHandle* pack, - xla::ComputationBuilder* builder) const; + Status Pack(xla::XlaOp* pack, xla::XlaBuilder* builder) const; // Updates the resource with values from `pack`. If `gradient_sources` is // non-empty, treats `pack` as a tuple that represents a TensorArray and @@ -114,8 +110,7 @@ class XlaResource { // values. // Opposite of Pack(). Status SetFromPack(const std::set& gradient_sources, - const xla::ComputationDataHandle& pack, - xla::ComputationBuilder* builder); + const xla::XlaOp& pack, xla::XlaBuilder* builder); // TensorArray and Stack specific fields @@ -144,8 +139,8 @@ class XlaResource { DataType type_; TensorShape shape_; - xla::ComputationDataHandle value_; - xla::ComputationDataHandle initial_value_; + xla::XlaOp value_; + xla::XlaOp initial_value_; int64 tensor_array_size_ = -1; diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 1af9cb6d2ab15a33b56f1df0410f47d7e139a1ba..dbf14f32bc3e54a9b4f0e1fbc5d827e8708b73f7 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -99,6 +99,7 @@ cc_library( hdrs = ["service_interface.h"], visibility = [":friends"], deps = [ + ":xla_data_proto", ":xla_proto", "//tensorflow/core:lib", ], diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD index 286d06d12ffca7410067f2d33398497576986807..aac3273d5fd144f3b737529b0833c9328b3d0e4d 100644 --- a/tensorflow/compiler/xla/client/BUILD +++ b/tensorflow/compiler/xla/client/BUILD @@ -106,6 +106,7 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:backend", "//tensorflow/compiler/xla/service:compiler", "//tensorflow/compiler/xla/service:device_memory_allocator", diff --git a/tensorflow/compiler/xla/client/computation.h b/tensorflow/compiler/xla/client/computation.h index a53fc9e9cf34704bd08ddb5bf062c1ec1107f5fb..9a1bcde76387297cb7f374b25baad1d5ec284859 100644 --- a/tensorflow/compiler/xla/client/computation.h +++ b/tensorflow/compiler/xla/client/computation.h @@ -30,6 +30,8 @@ namespace xla { // Wraps a ComputationHandle protobuf with a lifetime. Computation is // movable and not copyable to capture the same kind of unique // ownership that std::unique_ptr represents. +// +// TODO(b/74197823): Deprecated. Use XlaComputation instead. class Computation { public: // Creates a null Computation. diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 9431c2c459a564e3cf509d9dae16e71fc27ee2c0..ac1eb915cc52df94df71631a7e80de9095f7fafb 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -48,6 +48,8 @@ namespace xla { // deferred from being handled until Build() is called. // // Thread-compatible. +// +// TODO(b/74197823): Deprecated. Use XlaBuilder instead. class ComputationBuilder { public: // client: client in which to build the computation. diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index 59c4a53c05a45490a7c8e732840a4e70767c46c2..d49d959a6c8112d3701857a70cecb24701c7b6d9 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -22,8 +22,6 @@ cc_library( "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", @@ -43,9 +41,8 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:lib", diff --git a/tensorflow/compiler/xla/client/lib/arithmetic.cc b/tensorflow/compiler/xla/client/lib/arithmetic.cc index 63df449e0b3bdd642d548319dd7d621ca2f59b1d..a1d34796ccfd86f2025eff0ecb51338eb6a9b1da 100644 --- a/tensorflow/compiler/xla/client/lib/arithmetic.cc +++ b/tensorflow/compiler/xla/client/lib/arithmetic.cc @@ -17,7 +17,8 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" @@ -27,28 +28,6 @@ limitations under the License. namespace xla { namespace { -using InstructionGenerator = - ComputationDataHandle (*)(ComputationBuilder*, const ComputationDataHandle&, - const ComputationDataHandle&); - -Computation CreateScalarComputation(const string& name, PrimitiveType type, - ComputationBuilder* builder, - InstructionGenerator generator) { - std::unique_ptr b; - if (type == PRED) { - b = builder->CreateSubBuilder(name); - } else { - b = builder->CreateSubBuilder( - tensorflow::strings::StrCat(name, "_", PrimitiveType_Name(type))); - } - - const Shape scalar = ShapeUtil::MakeShape(type, {}); - auto lhs = b->Parameter(0, scalar, "lhs"); - auto rhs = b->Parameter(1, scalar, "rhs"); - generator(b.get(), lhs, rhs); - return b->BuildAndNoteError(); -} - using XlaOpGenerator = XlaOp (*)(XlaBuilder*, const XlaOp&, const XlaOp&); XlaComputation CreateScalarComputation(const string& name, PrimitiveType type, @@ -71,71 +50,6 @@ XlaComputation CreateScalarComputation(const string& name, PrimitiveType type, } // namespace -Computation CreateScalarAddComputation(PrimitiveType type, - ComputationBuilder* builder) { - return CreateScalarComputation( - "add", type, builder, - [](ComputationBuilder* b, const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { return b->Add(lhs, rhs); }); -} - -Computation CreateScalarMultiplyComputation(PrimitiveType type, - ComputationBuilder* builder) { - return CreateScalarComputation( - "mul", type, builder, - [](ComputationBuilder* b, const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { return b->Mul(lhs, rhs); }); -} - -Computation CreateScalarGeComputation(PrimitiveType type, - ComputationBuilder* builder) { - return CreateScalarComputation( - "ge", type, builder, - [](ComputationBuilder* b, const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { return b->Ge(lhs, rhs); }); -} - -Computation CreateScalarMaxComputation(PrimitiveType type, - ComputationBuilder* builder) { - return CreateScalarComputation( - "max", type, builder, - [](ComputationBuilder* b, const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { return b->Max(lhs, rhs); }); -} - -Computation CreateScalarMinComputation(PrimitiveType type, - ComputationBuilder* builder) { - return CreateScalarComputation( - "min", type, builder, - [](ComputationBuilder* b, const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { return b->Min(lhs, rhs); }); -} - -Computation CreateScalarAndComputation(ComputationBuilder* builder) { - return CreateScalarComputation( - "and", PRED, builder, - [](ComputationBuilder* b, const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { return b->And(lhs, rhs); }); -} - -Computation CreateScalarOrComputation(ComputationBuilder* builder) { - return CreateScalarComputation( - "or", PRED, builder, - [](ComputationBuilder* b, const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { return b->Or(lhs, rhs); }); -} - -StatusOr Any(const ComputationDataHandle& predicates, - ComputationBuilder* builder) { - auto f = builder->ConstantR0(false); - Computation logical_or = CreateScalarOrComputation(builder); - TF_ASSIGN_OR_RETURN(std::unique_ptr predicates_shape, - builder->GetShape(predicates)); - std::vector all_dimensions(ShapeUtil::Rank(*predicates_shape)); - std::iota(all_dimensions.begin(), all_dimensions.end(), 0); - return builder->Reduce(predicates, f, logical_or, all_dimensions); -} - XlaComputation CreateScalarAddComputation(PrimitiveType type, XlaBuilder* builder) { return CreateScalarComputation( diff --git a/tensorflow/compiler/xla/client/lib/arithmetic.h b/tensorflow/compiler/xla/client/lib/arithmetic.h index f4d3fc801590fedbb84ed3d6283e62f47c56d5c7..64b6b7d63353165e45bf12d35126a7eeef9e56e4 100644 --- a/tensorflow/compiler/xla/client/lib/arithmetic.h +++ b/tensorflow/compiler/xla/client/lib/arithmetic.h @@ -18,83 +18,38 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/xla_data.pb.h" namespace xla { -// Creates a scalar add computation and returns it. -Computation CreateScalarAddComputation(PrimitiveType type, - ComputationBuilder* builder); - -// Creates a scalar multiply computation and returns it. -Computation CreateScalarMultiplyComputation(PrimitiveType type, - ComputationBuilder* builder); - -// Creates a scalar ge computation and returns it. -Computation CreateScalarGeComputation(PrimitiveType type, - ComputationBuilder* builder); - -// Creates a scalar max computation and returns it. -Computation CreateScalarMaxComputation(PrimitiveType type, - ComputationBuilder* builder); - -// Creates a scalar min computation and returns it. -Computation CreateScalarMinComputation(PrimitiveType type, - ComputationBuilder* builder); - -// Creates a scalar logical AND computation and returns it. -Computation CreateScalarAndComputation(ComputationBuilder* builder); - -// Creates a scalar logical OR computation and returns it. -Computation CreateScalarOrComputation(ComputationBuilder* builder); - -// Returns whether any predicate in "predicates" is set. -// -// Note: if predicates is zero-sized, Any() vacuously returns false. -StatusOr Any(const ComputationDataHandle& predicates, - ComputationBuilder* builder); - -// TODO(b/74197823): This is a part of a NOT YET ready refactor. -// // Creates a scalar add computation and returns it. XlaComputation CreateScalarAddComputation(PrimitiveType type, XlaBuilder* builder); -// TODO(b/74197823): This is a part of a NOT YET ready refactor. -// + // Creates a scalar multiply computation and returns it. XlaComputation CreateScalarMultiplyComputation(PrimitiveType type, XlaBuilder* builder); -// TODO(b/74197823): This is a part of a NOT YET ready refactor. -// + // Creates a scalar ge computation and returns it. XlaComputation CreateScalarGeComputation(PrimitiveType type, XlaBuilder* builder); -// TODO(b/74197823): This is a part of a NOT YET ready refactor. -// + // Creates a scalar max computation and returns it. XlaComputation CreateScalarMaxComputation(PrimitiveType type, XlaBuilder* builder); -// TODO(b/74197823): This is a part of a NOT YET ready refactor. -// + // Creates a scalar min computation and returns it. XlaComputation CreateScalarMinComputation(PrimitiveType type, XlaBuilder* builder); -// TODO(b/74197823): This is a part of a NOT YET ready refactor. -// + // Creates a scalar logical AND computation and returns it. XlaComputation CreateScalarAndComputation(XlaBuilder* builder); -// TODO(b/74197823): This is a part of a NOT YET ready refactor. -// // Creates a scalar logical OR computation and returns it. XlaComputation CreateScalarOrComputation(XlaBuilder* builder); -// TODO(b/74197823): This is a part of a NOT YET ready refactor. -// // Returns whether any predicate in "predicates" is set. // // Note: if predicates is zero-sized, Any() vacuously returns false. diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc index 311dc4bdd72cfd7999e83a26e11614d6ca005bce..9cd87f74735ff50df8a3382723c7d045ff6c9e52 100644 --- a/tensorflow/compiler/xla/client/lib/testing.cc +++ b/tensorflow/compiler/xla/client/lib/testing.cc @@ -15,8 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/lib/testing.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/execution_options_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -46,16 +45,14 @@ int64 DataSizeOfShape(const Shape& shape) { return total_size; } -// Create a ComputationDataHandle for an op what generates fake data with the -// given shape. -ComputationDataHandle BuildFakeDataOpOnDevice(const Shape& shape, - ComputationBuilder* builder) { +// Creates a XlaOp for an op what generates fake data with the given shape. +XlaOp BuildFakeDataOpOnDevice(const Shape& shape, XlaBuilder* builder) { if (ShapeUtil::IsArray(shape)) { return builder->Broadcast( builder->ConstantLiteral(Literal::One(shape.element_type())), AsInt64Slice(shape.dimensions())); } - std::vector parts; + std::vector parts; for (const Shape& s : shape.tuple_shapes()) { parts.push_back(BuildFakeDataOpOnDevice(s, builder)); } @@ -64,11 +61,10 @@ ComputationDataHandle BuildFakeDataOpOnDevice(const Shape& shape, std::unique_ptr MakeFakeDataViaDeviceOrDie(const Shape& shape, Client* client) { - ComputationBuilder b( - client, + XlaBuilder b( tensorflow::strings::StrCat("make_fake_", ShapeUtil::HumanString(shape))); BuildFakeDataOpOnDevice(shape, &b); - Computation computation = b.Build().ConsumeValueOrDie(); + XlaComputation computation = b.Build().ConsumeValueOrDie(); auto execution_options = CreateDefaultExecutionOptions(); *execution_options.mutable_shape_with_output_layout() = shape; diff --git a/tensorflow/compiler/xla/client/lib/testing.h b/tensorflow/compiler/xla/client/lib/testing.h index 1dc2622972d5fd3da6991d70b800cc3fd5a638f4..9e06141b1f13d24cd033b72e31ee3a0442fe6a37 100644 --- a/tensorflow/compiler/xla/client/lib/testing.h +++ b/tensorflow/compiler/xla/client/lib/testing.h @@ -20,7 +20,6 @@ limitations under the License. #include #include "tensorflow/compiler/xla/client/client.h" -#include "tensorflow/compiler/xla/client/computation.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/xla_data.pb.h" diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index 1c1270590375ab54e5d7b56344db1b2d40e5b89c..1acc6f86860e526b5ff737c45041a863f21da145 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -51,27 +51,49 @@ LocalExecutable::LocalExecutable(std::unique_ptr executable, tensorflow::Status LocalExecutable::ValidateExecutionOptions( const tensorflow::gtl::ArraySlice arguments, const ExecutableRunOptions& run_options, const Backend& backend) { - const ComputationLayout& computation_layout = - executable_->module_config().entry_computation_layout(); + const ComputationLayout& host_computation_layout = + executable_->module_config().host_entry_computation_layout(); + const ComputationLayout& device_computation_layout = + executable_->module_config().device_entry_computation_layout(); // Check argument number, shapes, and layouts. - if (arguments.size() != computation_layout.parameter_count()) { + if (arguments.size() != host_computation_layout.parameter_count()) { return InvalidArgument( "invalid number of arguments for computation: expected %d, got %zu", - computation_layout.parameter_count(), arguments.size()); + host_computation_layout.parameter_count(), arguments.size()); + } + if (arguments.size() != device_computation_layout.parameter_count()) { + return InvalidArgument( + "invalid number of arguments for computation: expected %d, got %zu", + device_computation_layout.parameter_count(), arguments.size()); } for (int i = 0; i < arguments.size(); ++i) { - if (!computation_layout.parameter_layout(i).MatchesLayoutInShape( + if (!host_computation_layout.parameter_layout(i).MatchesLayoutInShape( arguments[i]->on_host_shape())) { return InvalidParameterArgument( executable_.get(), i, - "Argument does not match shape or layout of computation parameter " + "Argument does not match host shape or layout of computation " + "parameter " "%d: want %s, got %s", i, - ShapeUtil::HumanString(computation_layout.parameter_layout(i).shape()) + ShapeUtil::HumanString( + host_computation_layout.parameter_layout(i).shape()) .c_str(), ShapeUtil::HumanString(arguments[i]->on_host_shape()).c_str()); } + if (!device_computation_layout.parameter_layout(i).MatchesLayoutInShape( + arguments[i]->on_device_shape())) { + return InvalidParameterArgument( + executable_.get(), i, + "Argument does not match device shape or layout of computation " + "parameter " + "%d: want %s, got %s", + i, + ShapeUtil::HumanString( + device_computation_layout.parameter_layout(i).shape()) + .c_str(), + ShapeUtil::HumanString(arguments[i]->on_device_shape()).c_str()); + } } if (run_options.stream() != nullptr) { diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index f306c520ede0014be52d1b952849c8894b092baf..d8fd7a5623d1fecdcff6851aa3e3538822fb50da 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/client.h" #include "tensorflow/compiler/xla/client/computation.h" #include "tensorflow/compiler/xla/client/executable_build_options.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/executable_run_options.h" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" @@ -42,15 +43,6 @@ class LocalExecutable { const tensorflow::gtl::ArraySlice arguments, ExecutableRunOptions run_options); - // Return the layout (contained in a shape) of the result produced by the - // computation. - const Shape& result_layout() const { - return executable_->module_config() - .entry_computation_layout() - .result_layout() - .shape(); - } - // Return the options used to build the executable. const ExecutableBuildOptions& build_options() const { return build_options_; } diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc index fdc4bbdd8b162b7115788e267c2a53e73c186123..c6f8f6766e9d0156d0c68306af214443f584a9fe 100644 --- a/tensorflow/compiler/xla/layout_util.cc +++ b/tensorflow/compiler/xla/layout_util.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -465,4 +466,25 @@ std::ostream& operator<<(std::ostream& out, const Layout& layout) { return out; } +/*static*/ size_t LayoutUtil::Hash(const Layout& layout) { + using tensorflow::hash; + using tensorflow::Hash64Combine; + + size_t hash_value = hash()(layout.format()); + + for (int64 minor_to_major : layout.minor_to_major()) { + hash_value = Hash64Combine(hash_value, hash()(minor_to_major)); + } + + for (int64 padded_dim : layout.padded_dimensions()) { + hash_value = Hash64Combine(hash_value, hash()(padded_dim)); + } + + hash_value = + Hash64Combine(hash_value, hash()(layout.padding_value())); + hash_value = Hash64Combine(hash_value, layout.max_sparse_elements()); + + return hash_value; +} + } // namespace xla diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h index 6c54eb2201b66a4a0c5695bceb14bb2367133935..6cec7501015e2dff6b5e56e20b793a5458618501 100644 --- a/tensorflow/compiler/xla/layout_util.h +++ b/tensorflow/compiler/xla/layout_util.h @@ -195,6 +195,9 @@ class LayoutUtil { static bool AreDimensionsConsecutive(const Layout& layout, tensorflow::gtl::ArraySlice dims); + // Compute a hash for `layout`. + static size_t Hash(const Layout& layout); + private: TF_DISALLOW_COPY_AND_ASSIGN(LayoutUtil); }; diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index bb6dd4f9098aefc1c2bbb1b1c41b3cee856b67de..b3b5e34ba220c7e9bf1cefef4b27baa6faee2c20 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" @@ -2148,6 +2149,27 @@ string Literal::GetR1U8AsString() const { return LiteralView(literal, view_root); } +size_t Literal::Hash() const { + using tensorflow::Hash64; + using tensorflow::Hash64Combine; + + size_t hash_value = ShapeUtil::Hash(shape()); + + ShapeUtil::ForEachSubshape( + shape(), [&](const Shape& subshape, const ShapeIndex& index) { + if (ShapeUtil::IsTuple(subshape)) { + return; + } + + CHECK(LayoutUtil::IsDense(subshape.layout())); + hash_value = Hash64Combine( + hash_value, Hash64(static_cast(untyped_data(index)), + size_bytes(index))); + }); + + return hash_value; +} + LiteralView::LiteralView(const Literal& literal, const ShapeIndex& view_root) { shape_ = ShapeUtil::GetSubshape(literal.shape(), view_root); pieces_ = ShapeTree(shape_); diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index 8aa19222dc4b9175ec72128dfdad448f65c23e91..c6bd03bf21ac8dc88e96856cffe02c758e7b996d 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -74,6 +74,10 @@ class Literal { Literal(const Literal& other) = delete; Literal& operator=(const Literal& other) = delete; Literal(Literal&& other); + // 'allocate_arrays' indicates whether to allocate memory for the arrays in + // the shape. If false, buffer pointers inside of the Literal::Pieces are set + // to nullptr. + Literal(const Shape& shape, bool allocate_arrays); Literal& operator=(Literal&& other); // Literals are equal if they have compatible shapes and the same data @@ -282,7 +286,7 @@ class Literal { // Creates a new value that has the equivalent value as this literal, but // conforms to new_layout; e.g. a literal matrix that was in {0, 1} - // minor-to-major dimension layout can be re-layed-out as {1, 0} + // minor-to-major dimension layout can be re-laid-out as {1, 0} // minor-to-major dimension layout and the value in the cell at any given // logical index (i0, i1) will be the same. // @@ -658,12 +662,11 @@ class Literal { // LayoutUtil::MaxSparseElements(SetSubshape(shape(), index).layout()). int64 sparse_element_count() const; - protected: - // 'allocate_arrays' indicates whether to allocate memory for the arrays in - // the shape. If false, buffer pointers inside of the Literal::Pieces are set - // to nullptr. - Literal(const Shape& shape, bool allocate_arrays); + // Compute a hash for this literal. This literal must not be a sparse tensor + // or a tuple containing a sparse tensor. + size_t Hash() const; + protected: // Internal template helper for the Literal::CopySliceFrom(), matching its // arguments one by one. template diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD index ecb87bd8893276fbb9ecffaa0f8a3233d2e0043f..932cce943f7c046a85984e6e5ed6b59dae371473 100644 --- a/tensorflow/compiler/xla/python/BUILD +++ b/tensorflow/compiler/xla/python/BUILD @@ -49,9 +49,10 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:client_library", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:executable_build_options", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/service:shaped_buffer", "//tensorflow/core:framework_lite", "//tensorflow/core:lib", diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index 7102f467373edc0e12eeb66bce865ecca82bf484..df262c97bfcd91a5c2921a36ecb8f8a6172cffe6 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/python/local_computation_builder.h" #include "tensorflow/compiler/xla/executable_run_options.h" +#include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/platform/default/thread_annotations.h" @@ -104,25 +105,25 @@ static StatusOr ToBuffer(LocalClient* client, } /* static */ -LocalShapedBuffer* LocalShapedBuffer::FromLiteral( +StatusOr LocalShapedBuffer::FromLiteral( const Literal& argument, const tensorflow::gtl::optional& shape_with_layout) { LocalClient* client = GetOrCreateLocalClient(); - ScopedShapedBuffer buf = [&] { + StatusOr buf = [&] { if (shape_with_layout) { std::unique_ptr relaid = argument.Relayout(shape_with_layout.value()); - return ToBuffer(client, /*device_ordinal=*/0, *relaid) - .ConsumeValueOrDie(); + return ToBuffer(client, /*device_ordinal=*/0, *relaid); } - return ToBuffer(client, /*device_ordinal=*/0, argument).ConsumeValueOrDie(); + return ToBuffer(client, /*device_ordinal=*/0, argument); }(); - return new LocalShapedBuffer(std::move(buf)); + TF_RETURN_IF_ERROR(buf.status()); + return new LocalShapedBuffer(std::move(buf).ValueOrDie()); } -std::unique_ptr LocalShapedBuffer::ToLiteral() const { +StatusOr> LocalShapedBuffer::ToLiteral() const { LocalClient* client = GetOrCreateLocalClient(); - return client->ShapedBufferToLiteral(*shaped_buffer()).ConsumeValueOrDie(); + return client->ShapedBufferToLiteral(*shaped_buffer()); } CompiledLocalComputation::CompiledLocalComputation( @@ -248,7 +249,7 @@ LocalShapedBuffer* CompiledLocalComputation::ExecuteWithShapedBuffers( return new LocalShapedBuffer(std::move(result_buffer)); } -LocalComputation::LocalComputation(Computation computation) +LocalComputation::LocalComputation(XlaComputation computation) : computation_(std::move(computation)) {} StatusOr LocalComputation::Compile( @@ -271,7 +272,7 @@ StatusOr LocalComputation::Compile( return new CompiledLocalComputation(std::move(local_executable)); } -const Computation& LocalComputation::computation() const { +const XlaComputation& LocalComputation::computation() const { return computation_; } @@ -281,8 +282,12 @@ StatusOr LocalComputation::GetReturnValueShape() const { return std::move(*program_shape.mutable_result()); } +LocalOp::LocalOp(const XlaOp& op) : op_(op) {} + +const XlaOp& LocalOp::op() const { return op_; } + LocalComputationBuilder::LocalComputationBuilder(const string& computation_name) - : builder_(GetOrCreateLocalClient(), computation_name) {} + : builder_(computation_name) {} void LocalComputationBuilder::SetOpMetadata(const OpMetadata& metadata) { builder_.SetOpMetadata(metadata); @@ -291,19 +296,21 @@ void LocalComputationBuilder::SetOpMetadata(const OpMetadata& metadata) { void LocalComputationBuilder::ClearOpMetadata() { builder_.ClearOpMetadata(); } StatusOr LocalComputationBuilder::Build() { - TF_ASSIGN_OR_RETURN(Computation computation, builder_.Build()); + TF_ASSIGN_OR_RETURN(XlaComputation computation, builder_.Build()); return new LocalComputation(std::move(computation)); } -ComputationDataHandle LocalComputationBuilder::Parameter(int64 parameter_number, - const Shape& shape, - const string& name) { +LocalOp LocalComputationBuilder::Parameter(int64 parameter_number, + const Shape& shape, + const string& name) { return builder_.Parameter(parameter_number, shape, name); } std::unique_ptr LocalComputationBuilder::GetShape( - const ComputationDataHandle& operand) { - return builder_.GetShape(operand).ConsumeValueOrDie(); + const LocalOp& operand) { + auto result = MakeUnique(); + *result = builder_.GetShape(operand.op()).ValueOrDie(); + return result; } StatusOr LocalComputationBuilder::GetReturnValueShape() { @@ -311,222 +318,236 @@ StatusOr LocalComputationBuilder::GetReturnValueShape() { return program_shape.result(); } -ComputationDataHandle LocalComputationBuilder::Infeed(const Shape& shape) { +LocalOp LocalComputationBuilder::Infeed(const Shape& shape) { return builder_.Infeed(shape); } -void LocalComputationBuilder::Outfeed(const ComputationDataHandle& operand, +void LocalComputationBuilder::Outfeed(const LocalOp& operand, const Shape& shape, const string& outfeed_config) { - builder_.Outfeed(operand, shape, outfeed_config); + builder_.Outfeed(operand.op(), shape, outfeed_config); } -ComputationDataHandle LocalComputationBuilder::ConstantLiteral( - const Literal& literal) { +LocalOp LocalComputationBuilder::ConstantLiteral(const Literal& literal) { return builder_.ConstantLiteral(literal); } -ComputationDataHandle LocalComputationBuilder::Broadcast( - const ComputationDataHandle& operand, +LocalOp LocalComputationBuilder::Broadcast( + const LocalOp& operand, tensorflow::gtl::ArraySlice broadcast_sizes) { - return builder_.Broadcast(operand, broadcast_sizes); + return builder_.Broadcast(operand.op(), broadcast_sizes); } -ComputationDataHandle LocalComputationBuilder::Pad( - const ComputationDataHandle& operand, - const ComputationDataHandle& padding_value, - const PaddingConfig& padding_config) { - return builder_.Pad(operand, padding_value, padding_config); +LocalOp LocalComputationBuilder::Pad(const LocalOp& operand, + const LocalOp& padding_value, + const PaddingConfig& padding_config) { + return builder_.Pad(operand.op(), padding_value.op(), padding_config); } -ComputationDataHandle LocalComputationBuilder::Reshape( - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice dimensions, +LocalOp LocalComputationBuilder::Reshape( + const LocalOp& operand, tensorflow::gtl::ArraySlice dimensions, tensorflow::gtl::ArraySlice new_sizes) { - return builder_.Reshape(operand, dimensions, new_sizes); + return builder_.Reshape(operand.op(), dimensions, new_sizes); } -ComputationDataHandle LocalComputationBuilder::Collapse( - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice dimensions) { - return builder_.Collapse(operand, dimensions); +LocalOp LocalComputationBuilder::Collapse( + const LocalOp& operand, tensorflow::gtl::ArraySlice dimensions) { + return builder_.Collapse(operand.op(), dimensions); } -ComputationDataHandle LocalComputationBuilder::CrossReplicaSum( - const ComputationDataHandle& operand) { - return builder_.CrossReplicaSum(operand); +LocalOp LocalComputationBuilder::CrossReplicaSum(const LocalOp& operand) { + return builder_.CrossReplicaSum(operand.op()); } -ComputationDataHandle LocalComputationBuilder::Slice( - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice start_indices, +LocalOp LocalComputationBuilder::Slice( + const LocalOp& operand, tensorflow::gtl::ArraySlice start_indices, tensorflow::gtl::ArraySlice limit_indices, tensorflow::gtl::ArraySlice strides) { - return builder_.Slice(operand, start_indices, limit_indices, strides); + return builder_.Slice(operand.op(), start_indices, limit_indices, strides); } -ComputationDataHandle LocalComputationBuilder::SliceInDim( - const ComputationDataHandle& operand, int64 start_index, int64 limit_index, - int64 stride, int64 dimno) { - return builder_.SliceInDim(operand, start_index, limit_index, stride, dimno); +LocalOp LocalComputationBuilder::SliceInDim(const LocalOp& operand, + int64 start_index, + int64 limit_index, int64 stride, + int64 dimno) { + return builder_.SliceInDim(operand.op(), start_index, limit_index, stride, + dimno); } -ComputationDataHandle LocalComputationBuilder::DynamicSlice( - const ComputationDataHandle& operand, - const ComputationDataHandle& start_indices, +LocalOp LocalComputationBuilder::DynamicSlice( + const LocalOp& operand, const LocalOp& start_indices, tensorflow::gtl::ArraySlice slice_sizes) { - return builder_.DynamicSlice(operand, start_indices, slice_sizes); + return builder_.DynamicSlice(operand.op(), start_indices.op(), slice_sizes); } -ComputationDataHandle LocalComputationBuilder::DynamicUpdateSlice( - const ComputationDataHandle& operand, const ComputationDataHandle& update, - const ComputationDataHandle& start_indices) { - return builder_.DynamicUpdateSlice(operand, update, start_indices); +LocalOp LocalComputationBuilder::DynamicUpdateSlice( + const LocalOp& operand, const LocalOp& update, + const LocalOp& start_indices) { + return builder_.DynamicUpdateSlice(operand.op(), update.op(), + start_indices.op()); } -ComputationDataHandle LocalComputationBuilder::ConcatInDim( - tensorflow::gtl::ArraySlice operands, - int64 dimension) { - return builder_.ConcatInDim(operands, dimension); +LocalOp LocalComputationBuilder::ConcatInDim( + tensorflow::gtl::ArraySlice operands, int64 dimension) { + std::vector xla_ops; + xla_ops.reserve(operands.size()); + for (const auto& op : operands) { + xla_ops.push_back(op.op()); + } + return builder_.ConcatInDim(xla_ops, dimension); } -ComputationDataHandle -LocalComputationBuilder::SelectAndScatterWithGeneralPadding( - const ComputationDataHandle& operand, const LocalComputation& select, +LocalOp LocalComputationBuilder::SelectAndScatterWithGeneralPadding( + const LocalOp& operand, const LocalComputation& select, tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, tensorflow::gtl::ArraySlice> padding, - const ComputationDataHandle& source, - const ComputationDataHandle& init_value, const LocalComputation& scatter) { + const LocalOp& source, const LocalOp& init_value, + const LocalComputation& scatter) { return builder_.SelectAndScatterWithGeneralPadding( - operand, select.computation(), window_dimensions, window_strides, padding, - source, init_value, scatter.computation()); + operand.op(), select.computation(), window_dimensions, window_strides, + padding, source.op(), init_value.op(), scatter.computation()); } -ComputationDataHandle LocalComputationBuilder::Tuple( - tensorflow::gtl::ArraySlice elements) { - return builder_.Tuple(elements); +LocalOp LocalComputationBuilder::Tuple( + tensorflow::gtl::ArraySlice elements) { + std::vector xla_ops; + xla_ops.reserve(elements.size()); + for (const auto& op : elements) { + xla_ops.push_back(op.op()); + } + + return builder_.Tuple(xla_ops); } -ComputationDataHandle LocalComputationBuilder::GetTupleElement( - const ComputationDataHandle& tuple_data, int64 index) { - return builder_.GetTupleElement(tuple_data, index); +LocalOp LocalComputationBuilder::GetTupleElement(const LocalOp& tuple_data, + int64 index) { + return builder_.GetTupleElement(tuple_data.op(), index); } -ComputationDataHandle LocalComputationBuilder::Dot( - const ComputationDataHandle& lhs, const ComputationDataHandle& rhs) { - return builder_.Dot(lhs, rhs); +LocalOp LocalComputationBuilder::Dot(const LocalOp& lhs, const LocalOp& rhs) { + return builder_.Dot(lhs.op(), rhs.op()); } -ComputationDataHandle LocalComputationBuilder::DotGeneral( - const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, +LocalOp LocalComputationBuilder::DotGeneral( + const LocalOp& lhs, const LocalOp& rhs, const DotDimensionNumbers& dimension_numbers) { - return builder_.DotGeneral(lhs, rhs, dimension_numbers); + return builder_.DotGeneral(lhs.op(), rhs.op(), dimension_numbers); } -ComputationDataHandle LocalComputationBuilder::ConvGeneralDilated( - const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, +LocalOp LocalComputationBuilder::ConvGeneralDilated( + const LocalOp& lhs, const LocalOp& rhs, tensorflow::gtl::ArraySlice window_strides, tensorflow::gtl::ArraySlice> padding, tensorflow::gtl::ArraySlice lhs_dilation, tensorflow::gtl::ArraySlice rhs_dilation, const ConvolutionDimensionNumbers& dimension_numbers) { - return builder_.ConvGeneralDilated(lhs, rhs, window_strides, padding, - lhs_dilation, rhs_dilation, + return builder_.ConvGeneralDilated(lhs.op(), rhs.op(), window_strides, + padding, lhs_dilation, rhs_dilation, dimension_numbers); } -ComputationDataHandle LocalComputationBuilder::ConvertElementType( - const ComputationDataHandle& operand, PrimitiveType new_element_type) { - return builder_.ConvertElementType(operand, new_element_type); +LocalOp LocalComputationBuilder::ConvertElementType( + const LocalOp& operand, PrimitiveType new_element_type) { + return builder_.ConvertElementType(operand.op(), new_element_type); } -ComputationDataHandle LocalComputationBuilder::Call( +LocalOp LocalComputationBuilder::Call( const LocalComputation& local_computation, - tensorflow::gtl::ArraySlice operands) { - return builder_.Call(local_computation.computation(), operands); + tensorflow::gtl::ArraySlice operands) { + std::vector xla_ops; + xla_ops.reserve(operands.size()); + for (const auto& op : operands) { + xla_ops.push_back(op.op()); + } + return builder_.Call(local_computation.computation(), xla_ops); } -ComputationDataHandle LocalComputationBuilder::Transpose( - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice permutation) { - return builder_.Transpose(operand, permutation); +LocalOp LocalComputationBuilder::Transpose( + const LocalOp& operand, tensorflow::gtl::ArraySlice permutation) { + return builder_.Transpose(operand.op(), permutation); } -ComputationDataHandle LocalComputationBuilder::Rev( - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice dimensions) { - return builder_.Rev(operand, dimensions); +LocalOp LocalComputationBuilder::Rev( + const LocalOp& operand, tensorflow::gtl::ArraySlice dimensions) { + return builder_.Rev(operand.op(), dimensions); } -ComputationDataHandle LocalComputationBuilder::Map( - tensorflow::gtl::ArraySlice operands, +LocalOp LocalComputationBuilder::Map( + tensorflow::gtl::ArraySlice operands, const LocalComputation& local_computation, tensorflow::gtl::ArraySlice dimensions, - tensorflow::gtl::ArraySlice static_operands) { - return builder_.Map(operands, local_computation.computation(), dimensions, - static_operands); + tensorflow::gtl::ArraySlice static_operands) { + std::vector xla_ops; + xla_ops.reserve(operands.size()); + for (const auto& op : operands) { + xla_ops.push_back(op.op()); + } + + std::vector static_xla_ops; + static_xla_ops.reserve(static_operands.size()); + for (const auto& op : static_operands) { + static_xla_ops.push_back(op.op()); + } + + return builder_.Map(xla_ops, local_computation.computation(), dimensions, + static_xla_ops); } -ComputationDataHandle LocalComputationBuilder::Reduce( - const ComputationDataHandle& operand, - const ComputationDataHandle& init_value, +LocalOp LocalComputationBuilder::Reduce( + const LocalOp& operand, const LocalOp& init_value, const LocalComputation& local_computation, tensorflow::gtl::ArraySlice dimensions_to_reduce) { - return builder_.Reduce(operand, init_value, local_computation.computation(), - dimensions_to_reduce); + return builder_.Reduce(operand.op(), init_value.op(), + local_computation.computation(), dimensions_to_reduce); } -ComputationDataHandle LocalComputationBuilder::ReduceWindowWithGeneralPadding( - const ComputationDataHandle& operand, - const ComputationDataHandle& init_value, +LocalOp LocalComputationBuilder::ReduceWindowWithGeneralPadding( + const LocalOp& operand, const LocalOp& init_value, const LocalComputation& local_computation, tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, tensorflow::gtl::ArraySlice> padding) { return builder_.ReduceWindowWithGeneralPadding( - operand, init_value, local_computation.computation(), window_dimensions, - window_strides, padding); + operand.op(), init_value.op(), local_computation.computation(), + window_dimensions, window_strides, padding); } -ComputationDataHandle LocalComputationBuilder::RngNormal( - const ComputationDataHandle& mu, const ComputationDataHandle& sigma, - const Shape& shape) { - return builder_.RngNormal(mu, sigma, shape); +LocalOp LocalComputationBuilder::RngNormal(const LocalOp& mu, + const LocalOp& sigma, + const Shape& shape) { + return builder_.RngNormal(mu.op(), sigma.op(), shape); } -ComputationDataHandle LocalComputationBuilder::RngUniform( - const ComputationDataHandle& a, const ComputationDataHandle& b, - const Shape& shape) { - return builder_.RngUniform(a, b, shape); +LocalOp LocalComputationBuilder::RngUniform(const LocalOp& a, const LocalOp& b, + const Shape& shape) { + return builder_.RngUniform(a.op(), b.op(), shape); } -ComputationDataHandle LocalComputationBuilder::While( - const LocalComputation& condition, const LocalComputation& body, - const ComputationDataHandle& init) { - return builder_.While(condition.computation(), body.computation(), init); +LocalOp LocalComputationBuilder::While(const LocalComputation& condition, + const LocalComputation& body, + const LocalOp& init) { + return builder_.While(condition.computation(), body.computation(), init.op()); } -ComputationDataHandle LocalComputationBuilder::Conditional( - const ComputationDataHandle& predicate, - const ComputationDataHandle& true_operand, - const LocalComputation& true_computation, - const ComputationDataHandle& false_operand, +LocalOp LocalComputationBuilder::Conditional( + const LocalOp& predicate, const LocalOp& true_operand, + const LocalComputation& true_computation, const LocalOp& false_operand, const LocalComputation& false_computation) { - return builder_.Conditional(predicate, true_operand, - true_computation.computation(), false_operand, - false_computation.computation()); + return builder_.Conditional( + predicate.op(), true_operand.op(), true_computation.computation(), + false_operand.op(), false_computation.computation()); } -StatusOr LocalComputationBuilder::IsConstant( - const ComputationDataHandle& operand, int64 num_parameters) { - return builder_.IsConstant(operand, num_parameters); +StatusOr LocalComputationBuilder::IsConstant(const LocalOp& operand) { + return builder_.IsConstant(operand.op()); } -StatusOr> LocalComputationBuilder::ComputeConstant( - const ComputationDataHandle& operand, const Layout* output_layout, - tensorflow::gtl::ArraySlice parameters) { - return builder_.ComputeConstant(operand, output_layout, parameters); +StatusOr LocalComputationBuilder::BuildConstantSubGraph( + const LocalOp& operand) { + TF_ASSIGN_OR_RETURN(XlaComputation computation, + builder_.BuildConstantSubGraph(operand.op())); + return new LocalComputation(std::move(computation)); } #define _FORWARD(method_name, return_sig, args_sig, args) \ @@ -534,23 +555,19 @@ StatusOr> LocalComputationBuilder::ComputeConstant( return builder_.method_name args; \ } -#define _FORWARD_UNOP(method_name) \ - _FORWARD(method_name, ComputationDataHandle, \ - (const ComputationDataHandle& operand), (operand)) - -#define _FORWARD_BINOP(method_name) \ - _FORWARD( \ - method_name, ComputationDataHandle, \ - (const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, \ - tensorflow::gtl::ArraySlice broadcast_dimensions), \ - (lhs, rhs, broadcast_dimensions)) - -#define _FORWARD_TRIOP(method_name) \ - _FORWARD( \ - method_name, ComputationDataHandle, \ - (const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, \ - const ComputationDataHandle& ehs), \ - (lhs, rhs, ehs)) +#define _FORWARD_UNOP(method_name) \ + _FORWARD(method_name, LocalOp, (const LocalOp& operand), (operand.op())) + +#define _FORWARD_BINOP(method_name) \ + _FORWARD(method_name, LocalOp, \ + (const LocalOp& lhs, const LocalOp& rhs, \ + tensorflow::gtl::ArraySlice broadcast_dimensions), \ + (lhs.op(), rhs.op(), broadcast_dimensions)) + +#define _FORWARD_TRIOP(method_name) \ + _FORWARD(method_name, LocalOp, \ + (const LocalOp& lhs, const LocalOp& rhs, const LocalOp& ehs), \ + (lhs.op(), rhs.op(), ehs.op())) _FORWARD_TRIOP(Select) _FORWARD_TRIOP(Clamp) diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index e1048909ab29c2147a37ed72844391400d99e90d..a06b85b4ea28c4f386598901138930eaaed12079 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -17,9 +17,10 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_COMPUTATION_BUILDER_H_ #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/executable_build_options.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/service/shaped_buffer.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -59,12 +60,14 @@ StatusOr > TransferFromOutfeedLocalReplica( // client. class LocalShapedBuffer { public: - static LocalShapedBuffer* FromLiteral( + static StatusOr FromLiteral( const Literal& argument, const tensorflow::gtl::optional& shape_with_layout); + LocalShapedBuffer(ScopedShapedBuffer shaped_buffer); const ScopedShapedBuffer* shaped_buffer() const; - std::unique_ptr ToLiteral() const; + + StatusOr > ToLiteral() const; private: ScopedShapedBuffer shaped_buffer_; @@ -95,25 +98,37 @@ class CompiledLocalComputation { std::unique_ptr executable_; }; -// Wraps a Computation produced by a LocalComputationBuilder. The +// Wraps a XlaComputation produced by a LocalComputationBuilder. The // Compile method compiles the computation to a (local) executable via // the client library's local client. This class is intended to be // made available to Python via SWIG. class LocalComputation { public: - LocalComputation(Computation computation); + LocalComputation(XlaComputation computation); StatusOr Compile( const std::vector& argument_shapes, const ExecutableBuildOptions* build_options); - const Computation& computation() const; + const XlaComputation& computation() const; // Returns the return-value shape for this computation. StatusOr GetReturnValueShape() const; private: - Computation computation_; + XlaComputation computation_; +}; + +// Wraps a XlaOp produced by a LocalComputationBuilder. This class is intended +// to be made available to Python via SWIG. +class LocalOp { + public: + LocalOp(const XlaOp& op); + + const XlaOp& op() const; + + private: + XlaOp op_; }; // Wraps the ComputationBuilder API in order to: @@ -133,166 +148,137 @@ class LocalComputationBuilder { // Returns an owned LocalComputation to the caller on success. StatusOr Build(); - ComputationDataHandle Parameter(int64 parameter_number, const Shape& shape, - const string& name); + LocalOp Parameter(int64 parameter_number, const Shape& shape, + const string& name); - std::unique_ptr GetShape(const ComputationDataHandle& operand); + std::unique_ptr GetShape(const LocalOp& operand); // Returns the shape of the current return value for the computation. StatusOr GetReturnValueShape(); - ComputationDataHandle Infeed(const Shape& shape); + LocalOp Infeed(const Shape& shape); - void Outfeed(const ComputationDataHandle& operand, const Shape& shape, + void Outfeed(const LocalOp& operand, const Shape& shape, const string& outfeed_config); - ComputationDataHandle ConstantLiteral(const Literal& literal); + LocalOp ConstantLiteral(const Literal& literal); - ComputationDataHandle Broadcast( - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice broadcast_sizes); + LocalOp Broadcast(const LocalOp& operand, + tensorflow::gtl::ArraySlice broadcast_sizes); - ComputationDataHandle Pad(const ComputationDataHandle& operand, - const ComputationDataHandle& padding_value, - const PaddingConfig& padding_config); + LocalOp Pad(const LocalOp& operand, const LocalOp& padding_value, + const PaddingConfig& padding_config); - ComputationDataHandle Reshape(const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice dimensions, - tensorflow::gtl::ArraySlice new_sizes); + LocalOp Reshape(const LocalOp& operand, + tensorflow::gtl::ArraySlice dimensions, + tensorflow::gtl::ArraySlice new_sizes); - ComputationDataHandle Collapse(const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice dimensions); + LocalOp Collapse(const LocalOp& operand, + tensorflow::gtl::ArraySlice dimensions); - ComputationDataHandle CrossReplicaSum(const ComputationDataHandle& operand); + LocalOp CrossReplicaSum(const LocalOp& operand); - ComputationDataHandle Slice(const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice start_indices, - tensorflow::gtl::ArraySlice limit_indices, - tensorflow::gtl::ArraySlice strides); + LocalOp Slice(const LocalOp& operand, + tensorflow::gtl::ArraySlice start_indices, + tensorflow::gtl::ArraySlice limit_indices, + tensorflow::gtl::ArraySlice strides); - ComputationDataHandle SliceInDim(const ComputationDataHandle& operand, - int64 start_index, int64 limit_index, - int64 stride, int64 dimno); + LocalOp SliceInDim(const LocalOp& operand, int64 start_index, + int64 limit_index, int64 stride, int64 dimno); - ComputationDataHandle DynamicSlice( - const ComputationDataHandle& operand, - const ComputationDataHandle& start_indices, - tensorflow::gtl::ArraySlice slice_sizes); + LocalOp DynamicSlice(const LocalOp& operand, const LocalOp& start_indices, + tensorflow::gtl::ArraySlice slice_sizes); - ComputationDataHandle DynamicUpdateSlice( - const ComputationDataHandle& operand, const ComputationDataHandle& update, - const ComputationDataHandle& start_indices); + LocalOp DynamicUpdateSlice(const LocalOp& operand, const LocalOp& update, + const LocalOp& start_indices); - ComputationDataHandle ConcatInDim( - tensorflow::gtl::ArraySlice operands, - int64 dimension); + LocalOp ConcatInDim(tensorflow::gtl::ArraySlice operands, + int64 dimension); - ComputationDataHandle SelectAndScatterWithGeneralPadding( - const ComputationDataHandle& operand, const LocalComputation& select, + LocalOp SelectAndScatterWithGeneralPadding( + const LocalOp& operand, const LocalComputation& select, tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, tensorflow::gtl::ArraySlice > padding, - const ComputationDataHandle& source, - const ComputationDataHandle& init_value, const LocalComputation& scatter); + const LocalOp& source, const LocalOp& init_value, + const LocalComputation& scatter); - ComputationDataHandle Tuple( - tensorflow::gtl::ArraySlice elements); + LocalOp Tuple(tensorflow::gtl::ArraySlice elements); - ComputationDataHandle GetTupleElement(const ComputationDataHandle& tuple_data, - int64 index); + LocalOp GetTupleElement(const LocalOp& tuple_data, int64 index); - ComputationDataHandle Dot(const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs); + LocalOp Dot(const LocalOp& lhs, const LocalOp& rhs); - ComputationDataHandle DotGeneral( - const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, - const DotDimensionNumbers& dimension_numbers); + LocalOp DotGeneral(const LocalOp& lhs, const LocalOp& rhs, + const DotDimensionNumbers& dimension_numbers); - ComputationDataHandle ConvGeneralDilated( - const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, + LocalOp ConvGeneralDilated( + const LocalOp& lhs, const LocalOp& rhs, tensorflow::gtl::ArraySlice window_strides, tensorflow::gtl::ArraySlice > padding, tensorflow::gtl::ArraySlice lhs_dilation, tensorflow::gtl::ArraySlice rhs_dilation, const ConvolutionDimensionNumbers& dimension_numbers); - ComputationDataHandle ConvertElementType(const ComputationDataHandle& operand, - PrimitiveType new_element_type); + LocalOp ConvertElementType(const LocalOp& operand, + PrimitiveType new_element_type); - ComputationDataHandle Call( - const LocalComputation& local_computation, - tensorflow::gtl::ArraySlice operands); + LocalOp Call(const LocalComputation& local_computation, + tensorflow::gtl::ArraySlice operands); - ComputationDataHandle Transpose( - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice permutation); + LocalOp Transpose(const LocalOp& operand, + tensorflow::gtl::ArraySlice permutation); - ComputationDataHandle Rev(const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice dimensions); + LocalOp Rev(const LocalOp& operand, + tensorflow::gtl::ArraySlice dimensions); - ComputationDataHandle Map( - tensorflow::gtl::ArraySlice operands, - const LocalComputation& local_computation, - tensorflow::gtl::ArraySlice dimensions, - tensorflow::gtl::ArraySlice static_operands); + LocalOp Map(tensorflow::gtl::ArraySlice operands, + const LocalComputation& local_computation, + tensorflow::gtl::ArraySlice dimensions, + tensorflow::gtl::ArraySlice static_operands); - ComputationDataHandle Reduce( - const ComputationDataHandle& operand, - const ComputationDataHandle& init_value, - const LocalComputation& local_computation, - tensorflow::gtl::ArraySlice dimensions_to_reduce); + LocalOp Reduce(const LocalOp& operand, const LocalOp& init_value, + const LocalComputation& local_computation, + tensorflow::gtl::ArraySlice dimensions_to_reduce); - ComputationDataHandle ReduceWindowWithGeneralPadding( - const ComputationDataHandle& operand, - const ComputationDataHandle& init_value, + LocalOp ReduceWindowWithGeneralPadding( + const LocalOp& operand, const LocalOp& init_value, const LocalComputation& local_computation, tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, tensorflow::gtl::ArraySlice > padding); - ComputationDataHandle RngNormal(const ComputationDataHandle& mu, - const ComputationDataHandle& sigma, - const Shape& shape); + LocalOp RngNormal(const LocalOp& mu, const LocalOp& sigma, + const Shape& shape); - ComputationDataHandle RngUniform(const ComputationDataHandle& a, - const ComputationDataHandle& b, - const Shape& shape); + LocalOp RngUniform(const LocalOp& a, const LocalOp& b, const Shape& shape); - ComputationDataHandle While(const LocalComputation& condition, - const LocalComputation& body, - const ComputationDataHandle& init); + LocalOp While(const LocalComputation& condition, const LocalComputation& body, + const LocalOp& init); - ComputationDataHandle Conditional(const ComputationDataHandle& predicate, - const ComputationDataHandle& true_operand, - const LocalComputation& true_computation, - const ComputationDataHandle& false_operand, - const LocalComputation& false_computation); + LocalOp Conditional(const LocalOp& predicate, const LocalOp& true_operand, + const LocalComputation& true_computation, + const LocalOp& false_operand, + const LocalComputation& false_computation); - StatusOr IsConstant(const ComputationDataHandle& operand, - int64 num_parameters); + StatusOr IsConstant(const LocalOp& operand); - StatusOr > ComputeConstant( - const ComputationDataHandle& operand, const Layout* output_layout, - tensorflow::gtl::ArraySlice parameters); + StatusOr BuildConstantSubGraph(const LocalOp& operand); #define _FORWARD(method_name, return_sig, args_sig) \ return_sig method_name args_sig; -#define _FORWARD_UNOP(method_name) \ - _FORWARD(method_name, ComputationDataHandle, \ - (const ComputationDataHandle& operand)) +#define _FORWARD_UNOP(method_name) \ + _FORWARD(method_name, LocalOp, (const LocalOp& operand)) -#define _FORWARD_BINOP(method_name) \ - _FORWARD( \ - method_name, ComputationDataHandle, \ - (const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, \ - tensorflow::gtl::ArraySlice broadcast_dimensions)) +#define _FORWARD_BINOP(method_name) \ + _FORWARD(method_name, LocalOp, \ + (const LocalOp& lhs, const LocalOp& rhs, \ + tensorflow::gtl::ArraySlice broadcast_dimensions)) -#define _FORWARD_TRIOP(method_name) \ - _FORWARD( \ - method_name, ComputationDataHandle, \ - (const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, \ - const ComputationDataHandle& ehs)) +#define _FORWARD_TRIOP(method_name) \ + _FORWARD(method_name, LocalOp, \ + (const LocalOp& lhs, const LocalOp& rhs, const LocalOp& ehs)) _FORWARD_TRIOP(Select) _FORWARD_TRIOP(Clamp) @@ -336,7 +322,7 @@ class LocalComputationBuilder { #undef _FORWARD_TRIOP private: - ComputationBuilder builder_; + XlaBuilder builder_; }; // Functions for freeing resources from the Python side. diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index ac792e8189bda9eda472e7d282db86ac988c57b9..04c56bbba95fbf3248df6c49700ff563c8b253c0 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -22,9 +22,8 @@ limitations under the License. // // C++ Python // -------------------------------------+--------------------------------------- -// ComputationDataHandle <-> int // ArraySlice <- sequence of int -// ArraySlice <- sequence of int +// ArraySlice <- sequence of LocalOp // Literal <-> (nested tuple of) numpy ndarray // std::vector <- sequence of (nested tuple of) ndarray // Shape -> pair holding (dtype, dimensions) @@ -91,12 +90,9 @@ limitations under the License. // One central reason for the Python-side indirection is that the // Python-side objects produced by the typemaps in this file are // further packaged up by xla_client before being passed on. For -// instance, xla_client wraps the long produced for a C++ -// ComputationDataHandle in a Python ComputationDataHandle proto, -// rather than exposing a raw long outside of the client. Similarly, -// the Python pair produced for a C++ Shape is further wrapped in a -// Python class (xla_client.Shape) so as not to expose the raw pair -// externally. +// instance, the Python pair produced for a C++ Shape is further +// wrapped in a Python class (xla_client.Shape) so as not to expose +// the raw pair externally. // // Other SWIG object wrappers (e.g. of LocalComputation) are further // wrapped by xla_client in order to set up a custom destructor that @@ -124,6 +120,7 @@ using namespace xla; using namespace xla::swig; namespace xla { + namespace swig { bool GetIntAttr(PyObject* o, const char* field, int64* result) { @@ -177,27 +174,25 @@ bool HandleStringAttribute(PyObject* o, tensorflow::ImportNumpy(); %} -// ComputationDataHandle - -%typemap(in) const ComputationDataHandle& (ComputationDataHandle temp) { - const int64 handle = numpy::PyIntOrPyLongToLong($input); - if (handle == -1 && PyErr_Occurred()) { +%typemap(out) StatusOr { + if ($1.ok()) { + auto* value = $1.ValueOrDie(); + { + auto* $1 = value; + $typemap(out, xla::swig::CompiledLocalComputation*) + } + } else { + PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); SWIG_fail; } - temp.set_handle(handle); - $1 = &temp; -} - -%typemap(out) ComputationDataHandle { - $result = numpy::LongToPyIntOrPyLong($1.handle()); } -%typemap(out) StatusOr { +%typemap(out) StatusOr { if ($1.ok()) { auto* value = $1.ValueOrDie(); { auto* $1 = value; - $typemap(out, xla::swig::CompiledLocalComputation*) + $typemap(out, xla::swig::LocalShapedBuffer*) } } else { PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); @@ -288,33 +283,23 @@ tensorflow::ImportNumpy(); $1 = temps; } -// ComputationDataHandle +// ArraySlice -%typemap(in) tensorflow::gtl::ArraySlice - (std::vector temps) { +%typemap(in) tensorflow::gtl::ArraySlice( + std::vector temps) { if (!PySequence_Check($input)) { PyErr_SetString(PyExc_TypeError, "Argument is not a sequence"); SWIG_fail; } const int size = PySequence_Size($input); - temps.resize(size); for (int i = 0; i < size; ++i) { PyObject* o = PySequence_GetItem($input, i); - PyObject* py_int = numpy::PyNumberToPyInt(o); - if (!py_int) { - PyErr_SetString( - PyExc_TypeError, - "Argument sequence element cannot be converted to int"); - SWIG_fail; - } - const int64 handle = numpy::PyIntOrPyLongToLong(py_int); - if (handle == -1 && PyErr_Occurred()) { - Py_DECREF(py_int); - Py_DECREF(o); + LocalOp* op; + if ((SWIG_ConvertPtr(o, (void**)&op, $descriptor(xla::swig::LocalOp*), + SWIG_POINTER_EXCEPTION)) == -1) { SWIG_fail; } - temps[i].set_handle(handle); - Py_DECREF(py_int); + temps.push_back(*op); Py_DECREF(o); } $1 = temps; @@ -921,6 +906,7 @@ tensorflow::ImportNumpy(); %unignore xla::swig::LocalComputation; %unignore xla::swig::LocalComputation::Compile; %unignore xla::swig::LocalComputation::GetReturnValueShape; +%unignore xla::swig::LocalOp; %unignore xla::swig::LocalComputationBuilder; %unignore xla::swig::LocalComputationBuilder::LocalComputationBuilder; %unignore xla::swig::LocalComputationBuilder::Build; diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index f6809b6b871d7e246dd43811c7e8c08378d53989..1d5b75d1bee2dcee3e448d0bcb72103b539efac6 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -335,20 +335,6 @@ def _wrap_shape(shape_info): return Shape.array_shape(dtype, dims) -def _wrap_data_handle(handle): - cdh = xla_data_pb2.ComputationDataHandle() - cdh.handle = handle - return cdh - - -def _unwrap_data_handle(handle_proto): - return handle_proto.handle - - -def _unwrap_data_handles(handle_protos): - return [_unwrap_data_handle(cdh) for cdh in handle_protos] - - def require_numpy_array_layout(value): if isinstance(value, tuple): return tuple(require_numpy_array_layout(x) for x in value) @@ -535,9 +521,9 @@ class ComputationBuilder(object): queue for subsequent use in the computation. Returns: - A ComputationDataHandle message. + A LocalOp. """ - return _wrap_data_handle(self._client.Infeed(shape)) + return self._client.Infeed(shape) def Outfeed(self, operand): """Enqueues an outfeed op onto the computation. @@ -545,9 +531,7 @@ class ComputationBuilder(object): Outfeed operations enqueue data, using the given operand, onto the XLA outfeed queue for subsequent dequeue via the client API. """ - self._client.Outfeed( - _unwrap_data_handle(operand), self.GetShape(operand), - ''.encode('utf-8')) + self._client.Outfeed(operand, self.GetShape(operand), ''.encode('utf-8')) def Constant(self, value): """Enqueues a constant op onto the computation. @@ -557,10 +541,10 @@ class ComputationBuilder(object): to one of the supported types. Returns: - A ComputationDataHandle message. + A LocalOp. """ value = require_numpy_array_layout(value) - return _wrap_data_handle(self._client.ConstantLiteral(value)) + return self._client.ConstantLiteral(value) def ConstantF32Scalar(self, value): """Convenience method to enqueue a scalar F32 constant op. @@ -569,7 +553,7 @@ class ComputationBuilder(object): value: a floating-point number. Returns: - A ComputationDataHandle message. + A LocalOp. """ return self.Constant(np.array(value, dtype=np.float32)) @@ -580,7 +564,7 @@ class ComputationBuilder(object): value: a floating-point number. Returns: - A ComputationDataHandle message. + A LocalOp. """ return self.Constant(np.array(value, dtype=np.float64)) @@ -591,7 +575,7 @@ class ComputationBuilder(object): value: a floating-point number. Returns: - A ComputationDataHandle message. + A LocalOp. """ return self.Constant(np.array(value, dtype=np.int32)) @@ -602,7 +586,7 @@ class ComputationBuilder(object): value: a floating-point number. Returns: - A ComputationDataHandle message. + A LocalOp. """ return self.Constant(np.array(value, dtype=np.int64)) @@ -613,7 +597,7 @@ class ComputationBuilder(object): value: a boolean value. Returns: - A ComputationDataHandle message. + A LocalOp. """ return self.Constant(np.array(value, dtype=np.bool)) @@ -629,15 +613,14 @@ class ComputationBuilder(object): parameters, use it for *all* parameters to avoid clashes. Returns: - A ComputationDataHandle message. + A LocalOp. """ if name is None: name = '' if parameter_num is None: parameter_num = next(self._parameter_numbering) - return _wrap_data_handle( - self._client.Parameter(parameter_num, shape, name.encode('utf8'))) + return self._client.Parameter(parameter_num, shape, name.encode('utf8')) def ParameterFromNumpy(self, value, name=None, parameter_num=None): """Enqueues a Parameter op onto the computation. @@ -649,7 +632,7 @@ class ComputationBuilder(object): parameter_num: as in ParameterWithShape. Returns: - A ComputationDataHandle message. + A LocalOp. """ return self.ParameterWithShape( Shape.from_pyval(value), name=name, parameter_num=parameter_num) @@ -658,14 +641,13 @@ class ComputationBuilder(object): """Enqueues a broadcast operation onto the computation. Args: - operand: the operand ComputationDataHandle to broadcast. + operand: the operand LocalOp to broadcast. sizes: an iterable of broadcast sizes. Returns: - A ComputationDataHandle representing the added broadcast op. + A LocalOp representing the added broadcast op. """ - return _wrap_data_handle( - self._client.Broadcast(_unwrap_data_handle(operand), sizes)) + return self._client.Broadcast(operand, sizes) def Concatenate(self, operands, dimension): """Enqueues a concatenate operation onto the computation. @@ -675,10 +657,9 @@ class ComputationBuilder(object): dimension: the dimension in which to perform the concatenation. Returns: - A ComputationDataHandle representing the added concatenate op. + A LocalOp representing the added concatenate op. """ - return _wrap_data_handle( - self._client.ConcatInDim(_unwrap_data_handles(operands), dimension)) + return self._client.ConcatInDim(operands, dimension) def ConvertElementType(self, operand, new_element_type): """Enqueues an element type conversion operation onto the computation. @@ -688,14 +669,12 @@ class ComputationBuilder(object): new_element_type: the target primitive type. Returns: - A ComputationDataHandle representing the added conversion op. + A LocalOp representing the added conversion op. """ - return _wrap_data_handle( - self._client.ConvertElementType( - _unwrap_data_handle(operand), new_element_type)) + return self._client.ConvertElementType(operand, new_element_type) def GetShape(self, operand): - return _wrap_shape(self._client.GetShape(_unwrap_data_handle(operand))) + return _wrap_shape(self._client.GetShape(operand)) def GetReturnValueShape(self): return _wrap_shape(self._client.GetReturnValueShape()) @@ -707,40 +686,35 @@ class ComputationBuilder(object): """Enqueues a Pad operation onto the computation. Args: - operand: ComputationDataHandle representing the array to pad. - padding_value: ComputationDataHandle representing the scalar pad value. + operand: LocalOp representing the array to pad. + padding_value: LocalOp representing the scalar pad value. padding_config: either an xla_data_pb2.PaddingConfig or a list of integer triples (edge_padding_low, edge_padding_high, interior_padding) representing the configuration of the padding operation. Returns: - A ComputationDataHandle representing the added Pad op. + A LocalOp representing the added Pad op. """ if not isinstance(padding_config, xla_data_pb2.PaddingConfig): padding_config = GetPaddingConfigFromTriples(padding_config) - return _wrap_data_handle( - self._client.Pad(_unwrap_data_handle(operand), - _unwrap_data_handle(padding_value), - padding_config)) + return self._client.Pad(operand, padding_value, padding_config) def Reshape(self, operand, dimensions, new_sizes): """Enqueues a reshape op onto the computation. Args: - operand: ComputationDataHandle representing the array to be reshaped. + operand: LocalOp representing the array to be reshaped. dimensions: sequence of integers encoding the order in which dimensions are collapsed or None, in which case dimensions are flattened in order. new_sizes: sequence of integers encoding the new dimension sizes (shape). Returns: - A ComputationDataHandle representing the added Reshape op. + A LocalOp representing the added Reshape op. """ if dimensions is None: ndim = len(self.GetShape(operand).dimensions()) dimensions = tuple(range(ndim)) - return _wrap_data_handle( - self._client.Reshape( - _unwrap_data_handle(operand), dimensions, new_sizes)) + return self._client.Reshape(operand, dimensions, new_sizes) def CrossReplicaSum(self, operand): """CrossReplicaSum op. @@ -749,67 +723,56 @@ class ComputationBuilder(object): operand: the operand to sum across replica instances. Returns: - A ComputationDataHandle that has the sum of the value among all replicas. + A LocalOp that has the sum of the value among all replicas. """ - return _wrap_data_handle( - self._client.CrossReplicaSum(_unwrap_data_handle(operand))) + return self._client.CrossReplicaSum(operand) def Collapse(self, operand, dimensions): """Collapse op.""" - return _wrap_data_handle( - self._client.Collapse(_unwrap_data_handle(operand), dimensions)) + return self._client.Collapse(operand, dimensions) def Trans(self, operand): """Specialized matrix transpose op.""" - return _wrap_data_handle( - self._client.Transpose(_unwrap_data_handle(operand), [1, 0])) + return self._client.Transpose(operand, [1, 0]) def Transpose(self, operand, permutation): """Transpose op.""" - return _wrap_data_handle( - self._client.Transpose(_unwrap_data_handle(operand), permutation)) + return self._client.Transpose(operand, permutation) def Rev(self, operand, dimensions): """Rev op.""" - return _wrap_data_handle( - self._client.Rev(_unwrap_data_handle(operand), dimensions)) + return self._client.Rev(operand, dimensions) def Clamp(self, min, operand, max): # pylint: disable=redefined-builtin """Clamp op.""" - return _wrap_data_handle( - self._client.Clamp(_unwrap_data_handle(min), - _unwrap_data_handle(operand), - _unwrap_data_handle(max))) + return self._client.Clamp(min, operand, max) def SelectAndScatter(self, operand, select, window_dimensions, window_strides, padding, source, init_value, scatter): """Select and scatter op, used by the gradient of ReduceWindow. Args: - operand: ComputationDataHandle for array of dimension N and type T over + operand: LocalOp for array of dimension N and type T over which the windows slide. select: Computation of type (T, T) -> Pred to apply to the elements of each window to indicate which element is selected. window_dimensions: sequence of N integers for dimensions of the window. window_strides: sequence of N integers for the strides of the window. padding: PaddingType representing either 'SAME' or 'VALID ' padding. - source: ComputationDataHandle for array of type T with values to scatter. - init_value: ComputationDataHandle of scalar type T for initial out value. + source: LocalOp for array of type T with values to scatter. + init_value: LocalOp of scalar type T for initial out value. scatter: Computation of type (T, T) -> T to apply to each scatter source element with its destination element. Returns: - A ComputationDataHandle representing the added SelectAndScatter op. + A LocalOp representing the added SelectAndScatter op. """ pads = _convert_padding_type_to_pad_values( padding, self.GetShape(operand).dimensions(), window_dimensions, window_strides) - return _wrap_data_handle( - self._client.SelectAndScatterWithGeneralPadding( - _unwrap_data_handle(operand), select.c_local_computation, - window_dimensions, window_strides, pads, - _unwrap_data_handle(source), _unwrap_data_handle(init_value), - scatter.c_local_computation)) + return self._client.SelectAndScatterWithGeneralPadding( + operand, select.c_local_computation, window_dimensions, window_strides, + pads, source, init_value, scatter.c_local_computation) def Select(self, pred, on_true, on_false): """Element-wise selection op. @@ -817,17 +780,13 @@ class ComputationBuilder(object): Constructs an output array from elements of two input arrays, based on the values of a predicate array. """ - return _wrap_data_handle( - self._client.Select( - _unwrap_data_handle(pred), - _unwrap_data_handle(on_true), - _unwrap_data_handle(on_false))) + return self._client.Select(pred, on_true, on_false) def Slice(self, operand, start_indices, limit_indices, strides=None): """Enqueues a slice operation onto the computation. Args: - operand: ComputationDataHandle for the N dimensional array to be sliced. + operand: LocalOp for the N dimensional array to be sliced. start_indices: iterable of N integers containing the starting indices of the slice for each dimension. limit_indices: iterable of N integers containing the ending indices @@ -836,207 +795,177 @@ class ComputationBuilder(object): each dimension. Returns: - A ComputationDataHandle representing the added Slice op. + A LocalOp representing the added Slice op. """ if strides is None: start_indices = list(start_indices) strides = [1] * len(start_indices) - return _wrap_data_handle( - self._client.Slice( - _unwrap_data_handle(operand), start_indices, limit_indices, - strides)) + return self._client.Slice(operand, start_indices, limit_indices, strides) def SliceInDim(self, operand, start_index, limit_index, stride, dimno): """Enqueues a slice-in-dimension operation onto the computation. Args: - operand: ComputationDataHandle for the N dimensional array to be sliced. + operand: LocalOp for the N dimensional array to be sliced. start_index: an integer containing the start index of the slice. limit_index: an integer containing the end index of the slice. stride: an integer containing the stride size for the slice. dimno: an integer indicating the dimension along which to slice. Returns: - A ComputationDataHandle representing the added Slice op. + A LocalOp representing the added Slice op. """ - return _wrap_data_handle( - self._client.SliceInDim( - _unwrap_data_handle(operand), start_index, limit_index, stride, - dimno)) + return self._client.SliceInDim(operand, start_index, limit_index, stride, + dimno) def DynamicSlice(self, operand, start_indices, slice_sizes): """Enqueues a slice op with dynamic start indices onto the computation. Args: - operand: ComputationDataHandle for the N dimensional array to be sliced. - start_indices: ComputationDataHandle for the 1D array of N integers + operand: LocalOp for the N dimensional array to be sliced. + start_indices: LocalOp for the 1D array of N integers containing the starting indices of the slice. slice_sizes: iterable of N integers containing the slice sizes in each dimension. Returns: - A ComputationDataHandle representing the added DynamicSlice op. + A LocalOp representing the added DynamicSlice op. """ - return _wrap_data_handle( - self._client.DynamicSlice( - _unwrap_data_handle(operand), - _unwrap_data_handle(start_indices), - slice_sizes)) + return self._client.DynamicSlice(operand, start_indices, slice_sizes) def DynamicUpdateSlice(self, operand, update, start_indices): """Enqueues a dynamic update slice operation onto the computation. Args: - operand: ComputationDataHandle for the N dimensional array to be updated. + operand: LocalOp for the N dimensional array to be updated. update: N dimensional array comprising the slice update. start_indices: Rank-1 array of N integers comprising the starting indices of the slice along each dimension. Returns: - A ComputationDataHandle representing the added DynamicUpdateSlice op. + A LocalOp representing the added DynamicUpdateSlice op. """ - return _wrap_data_handle( - self._client.DynamicUpdateSlice( - _unwrap_data_handle(operand), - _unwrap_data_handle(update), - _unwrap_data_handle(start_indices))) + return self._client.DynamicUpdateSlice(operand, update, start_indices) def Tuple(self, *ops): """Enqueues a tuple operation onto the computation. Args: - ops: a sequence of tuple operands (each a ComputationDataHandle). + ops: a sequence of tuple operands (each a LocalOp). Returns: - A ComputationDataHandle representing the added Tuple op. + A LocalOp representing the added Tuple op. """ - return _wrap_data_handle(self._client.Tuple(_unwrap_data_handles(ops))) + return self._client.Tuple(ops) def GetTupleElement(self, tup, index): """Enqueues a 'get tuple element' operation onto the computation. Args: - tup: the tuple operand (a ComputationDataHandle). + tup: the tuple operand (a LocalOp). index: numeric index to select from the tuple. Returns: - A ComputationDataHandle representing the added GetTupleElement op. + A LocalOp representing the added GetTupleElement op. """ - return _wrap_data_handle( - self._client.GetTupleElement(_unwrap_data_handle(tup), index)) + return self._client.GetTupleElement(tup, index) def Call(self, computation_to_apply, operands): """Enqueues a call operation onto the computation. Args: computation_to_apply: a Computation object. - operands: an iterable of ComputationDataHandle. The number and types of + operands: an iterable of LocalOp. The number and types of operands must match the arity of computation_to_apply. Returns: - A ComputationDataHandle representing the added call op. + A LocalOp representing the added call op. """ - return _wrap_data_handle( - self._client.Call(computation_to_apply.c_local_computation, - _unwrap_data_handles(operands))) + return self._client.Call(computation_to_apply.c_local_computation, operands) def Map(self, operands, computation_to_apply, dimensions, static_operands=()): """Enqueues a map operation onto the computation. Args: - operands: an iterable of ComputationDataHandle. + operands: an iterable of LocalOp. computation_to_apply: a Computation object. dimensions: dimensions over which to apply map the function. static_operands: auxiliary arguments passed to the applied computation. Returns: - A ComputationDataHandle representing the added Map op. + A LocalOp representing the added Map op. """ - return _wrap_data_handle( - self._client.Map( - _unwrap_data_handles(operands), - computation_to_apply.c_local_computation, - dimensions, - _unwrap_data_handles(static_operands))) + return self._client.Map(operands, computation_to_apply.c_local_computation, + dimensions, static_operands) def Reduce(self, operand, init_value, computation_to_apply, dimensions): """Enqueues a reduction operation onto the computation. Args: - operand: reduction operand (ComputationDataHandle). - init_value: reduction initial value (ComputationDataHandle). + operand: reduction operand (LocalOp). + init_value: reduction initial value (LocalOp). computation_to_apply: a Computation object - binary reduction function. dimensions: sequence of dimensions (integers) to reduce on. Returns: - A ComputationDataHandle representing the added Reduce op. + A LocalOp representing the added Reduce op. """ - return _wrap_data_handle( - self._client.Reduce( - _unwrap_data_handle(operand), - _unwrap_data_handle(init_value), - computation_to_apply.c_local_computation, - dimensions)) + return self._client.Reduce(operand, init_value, + computation_to_apply.c_local_computation, + dimensions) def ReduceWindow(self, operand, init_value, computation_to_apply, window_dimensions, window_strides, padding): """Enqueues a windowed reduction operation onto the computation. Args: - operand: reduction operand (ComputationDataHandle). - init_value: reduction initial value (ComputationDataHandle). + operand: reduction operand (LocalOp). + init_value: reduction initial value (LocalOp). computation_to_apply: a binary reduction function (Computation). window_dimensions: dimensions of window (sequence of integers). window_strides: strides for window (sequence of integers). padding: PaddingType representing either 'SAME' or 'VALID' padding. Returns: - A ComputationDataHandle representing the added ReduceWindow op. + A LocalOp representing the added ReduceWindow op. """ pads = _convert_padding_type_to_pad_values( padding, self.GetShape(operand).dimensions(), window_dimensions, window_strides) - return _wrap_data_handle( - self._client.ReduceWindowWithGeneralPadding( - _unwrap_data_handle(operand), - _unwrap_data_handle(init_value), - computation_to_apply.c_local_computation, - window_dimensions, window_strides, pads)) + return self._client.ReduceWindowWithGeneralPadding( + operand, init_value, computation_to_apply.c_local_computation, + window_dimensions, window_strides, pads) def RngNormal(self, mu, sigma, dims): """Enqueues an RngNormal operation onto the computation. Args: - mu: A ComputationDataHandle to an F32 scalar specifying the mean. - sigma: A ComputationDataHandle to an F32 scalar specifying the standard + mu: A LocalOp to an F32 scalar specifying the mean. + sigma: A LocalOp to an F32 scalar specifying the standard deviation. dims: A 1D array-like of nonnegative integers specifying the dimensions. - Returns: a ComputationDataHandle to the generated array of F32 values. + Returns: a LocalOp to the generated array of F32 values. """ shape = Shape.array_shape(self.GetShape(mu).element_type(), dims) - return _wrap_data_handle( - self._client.RngNormal( - _unwrap_data_handle(mu), _unwrap_data_handle(sigma), shape)) + return self._client.RngNormal(mu, sigma, shape) def RngUniform(self, a, b, dims): """Enqueues an RngUniform operation onto the computation. Args: - a: a ComputationDataHandle to an F32, S32, or U32 scalar (consistent with + a: a LocalOp to an F32, S32, or U32 scalar (consistent with the type of b) specifying the low end of the interval [a, b) over which values are generated. - b: a ComputationDataHandle to an F32, S32, or U32 scalar (consistent with + b: a LocalOp to an F32, S32, or U32 scalar (consistent with the type of a) specifying the high end of the interval [a, b) over which values are generated. dims: A 1D array-like of nonnegative integers specifying the dimensions. - Returns: a ComputationDataHandle to the generated array of values with the + Returns: a LocalOp to the generated array of values with the same numeric type (F32, S32, or U32) as the arguments a and b. """ shape = Shape.array_shape(self.GetShape(a).element_type(), dims) - return _wrap_data_handle( - self._client.RngUniform( - _unwrap_data_handle(a), _unwrap_data_handle(b), shape)) + return self._client.RngUniform(a, b, shape) def While(self, cond, body, init): """Enqueues a While operation onto the computation. @@ -1044,112 +973,105 @@ class ComputationBuilder(object): Args: cond: a Computation for the loop condition, which has type T -> PRED body: a Computation for the loop body, which has type T -> T - init: a ComputationDataHandle for the initial parameter, which has type T + init: a LocalOp for the initial parameter, which has type T - Returns: a ComputationDataHandle representing the While operation. + Returns: a LocalOp representing the While operation. """ - return _wrap_data_handle( - self._client.While(cond.c_local_computation, - body.c_local_computation, - _unwrap_data_handle(init))) + return self._client.While(cond.c_local_computation, + body.c_local_computation, init) def Conditional(self, pred, true_operand, true_computation, false_operand, false_computation): """Enqueues a Conditional operation onto the computation. Args: - predicate: a ComputationDataHandle to test, which has scalar type PRED - true_operand: a ComputationDataHandle of type T_0 + predicate: a LocalOp to test, which has scalar type PRED + true_operand: a LocalOp of type T_0 true_computation: a Computation to apply to true_operand, type T_0 -> S false_operand: a ComputationDatahandle of type T_1 false_computation: a Computation to apply to false_operand, type T_1 -> S - Returns: a ComputationDataHandle representing the Conditional operation. + Returns: a LocalOp representing the Conditional operation. """ - return _wrap_data_handle( - self._client.Conditional( - _unwrap_data_handle(pred), _unwrap_data_handle(true_operand), - true_computation.c_local_computation, - _unwrap_data_handle(false_operand), - false_computation.c_local_computation)) + return self._client.Conditional( + pred, true_operand, true_computation.c_local_computation, false_operand, + false_computation.c_local_computation) - def IsConstant(self, operand, num_parameters=0): - """Enqueues an IsConstant operation onto the computation. + def IsConstant(self, operand): + """Checks whether the given operand is a compile-time constant. Args: operand: a ComputationDataHandle to test. - num_parameters: optional int, number of computation parameters to treat as - constant (default 0). Returns: bool indicating whether `operand` is a compile-time constant, - meaning its value does not depend on parameters with index greater than or - equal to `num_parameters`. + meaning its value does not depend on any parametersor, or on stateful + operators such as `RngNormal` or `Infeed`. + """ + return self._client.IsConstant(operand) + + def BuildConstantSubGraph(self, operand): + """Builds a constant sub graph. + + Args: + operand: a LocalOp to test. + Returns: a LocalComputation that is rooted on the given `operand` which is a + compile-time constant. """ - return self._client.IsConstant(_unwrap_data_handle(operand), num_parameters) + return self._client.BuildConstantSubGraph(operand) def Dot(self, lhs, rhs): """Enqueues a dot operation onto the computation. Args: - lhs: ComputationDataHandle for the rank 1 or rank 2 left-hand-side array. - rhs: ComputationDataHandle for the rank 1 or rank 2 right-hand-side array. + lhs: LocalOp for the rank 1 or rank 2 left-hand-side array. + rhs: LocalOp for the rank 1 or rank 2 right-hand-side array. - Returns: a ComputationDataHandle representing the Dot operation. + Returns: a LocalOp representing the Dot operation. """ - return _wrap_data_handle( - self._client.Dot(_unwrap_data_handle(lhs), _unwrap_data_handle(rhs))) + return self._client.Dot(lhs, rhs) def DotGeneral(self, lhs, rhs, dimension_numbers): """Enqueues a general dot operation onto the computation. Args: - lhs: ComputationDataHandle for the left-hand-side array. - rhs: ComputationDataHandle for the right-hand-side array. + lhs: LocalOp for the left-hand-side array. + rhs: LocalOp for the right-hand-side array. dimension_numbers: either an xla_data_pb2.DotDimensionNumbers or a nested tuple ((lhs_contract, rhs_contract), (lhs_batch, rhs_batch)) of lists of integers representing the dimensions to treat as contracting dimensions and batch dimensions on each input operand. - Returns: a ComputationDataHandle representing the DotGeneral operation. + Returns: a LocalOp representing the DotGeneral operation. """ if not isinstance(dimension_numbers, xla_data_pb2.DotDimensionNumbers): dimension_numbers = GetDotDimensionsFromLists(dimension_numbers) - return _wrap_data_handle( - self._client.DotGeneral( - _unwrap_data_handle(lhs), _unwrap_data_handle(rhs), - dimension_numbers)) + return self._client.DotGeneral(lhs, rhs, dimension_numbers) def Conv(self, lhs, rhs, window_strides, padding): """Enqueues a Conv operation onto the computation. Args: - lhs: ComputationDataHandle for the rank N+2 array of inputs. - rhs: ComputationDataHandle for the rank N+2 array of kernel weights. + lhs: LocalOp for the rank N+2 array of inputs. + rhs: LocalOp for the rank N+2 array of kernel weights. window_strides: length-N array-like of integer kernel strides. padding: PaddingType representing either 'SAME' or 'VALID' padding. - Returns: a ComputationDataHandle representing the Conv operation. + Returns: a LocalOp representing the Conv operation. """ pads = _convert_padding_type_to_pad_values( padding, self.GetShape(lhs).dimensions()[2:], self.GetShape(rhs).dimensions()[2:], window_strides) dimension_numbers = self._GetConvDimensionNumbers(len(window_strides)) - return _wrap_data_handle( - self._client.ConvGeneralDilated(_unwrap_data_handle(lhs), - _unwrap_data_handle(rhs), - window_strides, - pads, - (), - (), - dimension_numbers)) + return self._client.ConvGeneralDilated(lhs, rhs, window_strides, pads, (), + (), dimension_numbers) def ConvWithGeneralPadding(self, lhs, rhs, window_strides, padding, lhs_dilation, rhs_dilation): """Enqueues a ConvWithGeneralPadding operation onto the computation. Args: - lhs: ComputationDataHandle for the rank N+2 array of inputs. - rhs: ComputationDataHandle for the rank N+2 array of kernel weights. + lhs: LocalOp for the rank N+2 array of inputs. + rhs: LocalOp for the rank N+2 array of kernel weights. window_strides: length-N array-like of kernel strides. padding: length-N array-like of pairs of integers of (low, high) padding. lhs_dilation: length-N array-like of dilation factors. @@ -1159,14 +1081,9 @@ class ComputationBuilder(object): A ComputationdataHandle representing the added ConvWithGeneralPadding op. """ dimension_numbers = self._GetConvDimensionNumbers(len(window_strides)) - return _wrap_data_handle( - self._client.ConvGeneralDilated(_unwrap_data_handle(lhs), - _unwrap_data_handle(rhs), - window_strides, - padding, - lhs_dilation, - rhs_dilation, - dimension_numbers)) + return self._client.ConvGeneralDilated(lhs, rhs, window_strides, padding, + lhs_dilation, rhs_dilation, + dimension_numbers) def _GetConvDimensionNumbers(self, num_spatial_dims): """Create ConvolutionDimensionNumbers proto for convolutions.""" @@ -1196,15 +1113,14 @@ def _forward_methods_to_local_builder(): """Generate a forwarding method that wraps/unwraps data handles.""" def forward(self, *args, **kwargs): - unwrapped_args = [_unwrap_data_handle(arg) for arg in args] + arg_list = list(args) - if is_binop and len(unwrapped_args) < 3: - unwrapped_args.append(kwargs.get('broadcast_dimensions', ())) + if is_binop and len(arg_list) < 3: + arg_list.append(kwargs.get('broadcast_dimensions', ())) - return _wrap_data_handle( - target_method( - self._client, # pylint: disable=protected-access - *unwrapped_args)) + return target_method( + self._client, # pylint: disable=protected-access + *arg_list) return forward diff --git a/tensorflow/compiler/xla/rpc/BUILD b/tensorflow/compiler/xla/rpc/BUILD index 977f8637873a4b6555798f533010a28ff36e8679..0d56a9a477b15964ad45e798865aa8d2c7385073 100644 --- a/tensorflow/compiler/xla/rpc/BUILD +++ b/tensorflow/compiler/xla/rpc/BUILD @@ -55,7 +55,7 @@ tf_cc_test( deps = [ ":grpc_stub", "//tensorflow/compiler/xla/client", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", diff --git a/tensorflow/compiler/xla/rpc/grpc_client_test.cc b/tensorflow/compiler/xla/rpc/grpc_client_test.cc index b559ee4b5a345dbb2cc481b571562a0a630b3294..10997c0719dfb80efc7b855c7888500caeb1591b 100644 --- a/tensorflow/compiler/xla/rpc/grpc_client_test.cc +++ b/tensorflow/compiler/xla/rpc/grpc_client_test.cc @@ -24,7 +24,7 @@ limitations under the License. #include "grpc++/security/credentials.h" #include "tensorflow/compiler/xla/client/client.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/rpc/grpc_stub.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/core/lib/io/path.h" @@ -84,7 +84,7 @@ TEST_F(GRPCClientTestBase, ItsAlive) { } TEST_F(GRPCClientTestBase, AxpyTenValues) { - ComputationBuilder builder(client_.get(), "axpy_10"); + XlaBuilder builder("axpy_10"); auto alpha = builder.ConstantR0(3.1415926535); auto x = builder.ConstantR1( {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); diff --git a/tensorflow/compiler/xla/rpc/grpc_service.cc b/tensorflow/compiler/xla/rpc/grpc_service.cc index 0b100bd108e239964483ed5ba279dff61bce0023..ffb72fc73c5bc1ad6e648fb3d772eb5749700dc0 100644 --- a/tensorflow/compiler/xla/rpc/grpc_service.cc +++ b/tensorflow/compiler/xla/rpc/grpc_service.cc @@ -75,6 +75,13 @@ namespace xla { [this, arg, result]() { return service_->Execute(arg, result); }); } +::grpc::Status GRPCService::ExecuteGraph(::grpc::ServerContext* /*context*/, + const ExecuteGraphRequest* arg, + ExecuteResponse* result) { + return DelegateRPC( + [this, arg, result]() { return service_->ExecuteGraph(arg, result); }); +} + ::grpc::Status GRPCService::ExecuteAsync(::grpc::ServerContext* context, const ExecuteAsyncRequest* arg, ExecuteAsyncResponse* result) { diff --git a/tensorflow/compiler/xla/rpc/grpc_service.h b/tensorflow/compiler/xla/rpc/grpc_service.h index fad74375bd59f7254d97c4adbc6b3d2f5fbf6b29..50f02796f2d45baf894841782cd96d8d51a5ba00 100644 --- a/tensorflow/compiler/xla/rpc/grpc_service.h +++ b/tensorflow/compiler/xla/rpc/grpc_service.h @@ -54,6 +54,10 @@ class GRPCService : public grpc::XlaService::Service { const ExecuteRequest* arg, ExecuteResponse* result) override; + ::grpc::Status ExecuteGraph(::grpc::ServerContext* context, + const ExecuteGraphRequest* arg, + ExecuteResponse* result) override; + ::grpc::Status ExecuteAsync(::grpc::ServerContext* context, const ExecuteAsyncRequest* arg, ExecuteAsyncResponse* result) override; diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index ed0da47681c7eff8120ed46b4088213b0bb24734..aa3a6261e0117c4c2e5c745d6851142b22a62a07 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -26,6 +26,7 @@ xla_proto_library( xla_proto_library( name = "hlo_proto", srcs = ["hlo.proto"], + visibility = ["//visibility:public"], deps = ["//tensorflow/compiler/xla:xla_data_proto"], ) @@ -200,7 +201,22 @@ tf_cc_test( cc_library( name = "hlo_evaluator", - srcs = ["hlo_evaluator.cc"], + srcs = [ + "hlo_evaluator.cc", + "hlo_evaluator_typed_visitor.h", + "hlo_evaluator_typed_visitor_bfloat16.cc", + "hlo_evaluator_typed_visitor_bool.cc", + "hlo_evaluator_typed_visitor_complex64.cc", + "hlo_evaluator_typed_visitor_double.cc", + "hlo_evaluator_typed_visitor_float.cc", + "hlo_evaluator_typed_visitor_half.cc", + "hlo_evaluator_typed_visitor_int32.cc", + "hlo_evaluator_typed_visitor_int64.cc", + "hlo_evaluator_typed_visitor_int8.cc", + "hlo_evaluator_typed_visitor_uint32.cc", + "hlo_evaluator_typed_visitor_uint64.cc", + "hlo_evaluator_typed_visitor_uint8.cc", + ], hdrs = ["hlo_evaluator.h"], deps = [ ":hlo", @@ -233,7 +249,7 @@ tf_cc_test( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/service:hlo_element_type_converter", "//tensorflow/compiler/xla/tests:hlo_verified_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", @@ -370,6 +386,7 @@ tf_cc_test( ":hlo_matchers", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -780,6 +797,7 @@ cc_library( srcs = ["compiler.cc"], hdrs = ["compiler.h"], deps = [ + ":buffer_value", ":executable", ":hlo", ":hlo_module_config", @@ -1014,6 +1032,7 @@ tf_cc_test( srcs = ["buffer_assignment_test.cc"], deps = [ ":buffer_assignment", + ":buffer_value", ":call_graph", ":computation_tracker", ":copy_insertion", @@ -1095,6 +1114,7 @@ tf_cc_test( name = "heap_simulator_test", srcs = ["heap_simulator_test.cc"], deps = [ + ":buffer_value", ":heap_simulator", ":hlo", ":hlo_ordering", @@ -1163,6 +1183,7 @@ tf_cc_test( name = "hlo_scheduling_test", srcs = ["hlo_scheduling_test.cc"], deps = [ + ":buffer_value", ":hlo", ":hlo_ordering", ":hlo_scheduling", @@ -1665,10 +1686,10 @@ tf_cc_test( "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla/client", "//tensorflow/compiler/xla/client:client_library", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", @@ -1749,11 +1770,27 @@ tf_cc_test( ], ) +cc_library( + name = "buffer_value", + srcs = ["buffer_value.cc"], + hdrs = ["buffer_value.h"], + deps = [ + ":hlo", + ":hlo_proto", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + cc_library( name = "logical_buffer", srcs = ["logical_buffer.cc"], hdrs = ["logical_buffer.h"], deps = [ + ":buffer_value", ":hlo", ":hlo_proto", "//tensorflow/compiler/xla:shape_util", @@ -1769,6 +1806,7 @@ cc_library( srcs = ["hlo_value.cc"], hdrs = ["hlo_value.h"], deps = [ + ":buffer_value", ":hlo", "//tensorflow/compiler/xla:shape_tree", "//tensorflow/compiler/xla:shape_util", @@ -2066,6 +2104,7 @@ cc_library( hdrs = ["hlo_rematerialization.h"], deps = [ ":buffer_liveness", + ":buffer_value", ":call_graph", ":flatten_call_graph", ":hlo", @@ -2384,7 +2423,6 @@ tf_cc_test( srcs = ["hlo_tfgraph_builder_test.cc"], deps = [ ":hlo_tfgraph_builder", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:protos_all_cc", @@ -2421,7 +2459,6 @@ tf_cc_test( ":hlo_graph_dumper", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:xla_proto", - "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep "//tensorflow/core:lib", @@ -2447,6 +2484,7 @@ tf_cc_test( srcs = ["transpose_folding_test.cc"], deps = [ ":hlo", + ":hlo_matchers", ":shape_inference", ":transpose_folding", "//tensorflow/compiler/xla:literal_util", @@ -2454,10 +2492,11 @@ tf_cc_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/service/gpu:ir_emission_utils", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", ], ) @@ -2491,6 +2530,7 @@ tf_cc_test( "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", @@ -2688,6 +2728,33 @@ tf_cc_test( ], ) +cc_library( + name = "while_loop_constant_sinking", + srcs = ["while_loop_constant_sinking.cc"], + hdrs = ["while_loop_constant_sinking.h"], + deps = [ + ":hlo", + ":hlo_pass", + ":while_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "while_loop_constant_sinking_test", + srcs = ["while_loop_constant_sinking_test.cc"], + deps = [ + ":hlo_matchers", + ":while_loop_constant_sinking", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/tests:hlo_verified_test_base", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", + "//tensorflow/core:test", + ], +) + cc_library( name = "despecializer", srcs = ["despecializer.cc"], diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 8e785de68cb1fbe4ce9fd58a661bdc208725483b..4ec79a024463b5129cc8687235e673f9ea95959d 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -291,6 +291,8 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { const Shape& dot_shape, HloInstruction* lhs, int64 lhs_contracting_dim, HloInstruction* rhs, int64 rhs_contracting_dim, bool swapped); + StatusOr OptimizeDotOfGather(HloInstruction* dot); + // Current HloComputation instance the AlgebraicSimplifierVisitor is // traversing. HloComputation* computation_; @@ -912,6 +914,134 @@ StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfConcatHelper( return add_result; } +StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfGather( + HloInstruction* dot) { + const DotDimensionNumbers& dnums = dot->dot_dimension_numbers(); + if (dnums.lhs_contracting_dimensions_size() != 1 || + dnums.rhs_contracting_dimensions_size() != 1 || + dnums.lhs_batch_dimensions_size() != 0 || + dnums.rhs_batch_dimensions_size() != 0 || + dot->shape().dimensions_size() != 2) { // dot output 2D + VLOG(10) << "DotOfGather: Can only optimize 2D, non-batch dot operations."; + return nullptr; + } + + // Optimize either dot(DS(ctA), ctB)) or dot(ctB, DS(ctA)). + // Currently a Gather is a DynamicSlice. + auto is_dynamic_slice_constant_combination = + [](HloInstruction* a, HloInstruction* b, int a_contracting_dimension) { + // First operand is a DynamicSlice(Constant). + if (a->opcode() != HloOpcode::kDynamicSlice) { + return false; + } + auto* dynamic_slice_op = a->operand(0); + if (dynamic_slice_op->opcode() != HloOpcode::kConstant) { + return false; + } + // Second operand is a Constant. + if (b->opcode() != HloOpcode::kConstant) { + return false; + } + // The DynamicSlice output is a vector. + const Shape& dynamic_slice_shape = a->shape(); + if (dynamic_slice_shape.dimensions(1 - a_contracting_dimension) != 1) { + return false; + } + // Constant size is the same before and after slice in the contracting + // dimension, otherwise we either must precompute for all possible slice + // indices or dot is invalid. + const Shape& dynamic_slice_op_shape = dynamic_slice_op->shape(); + if (dynamic_slice_op_shape.dimensions(a_contracting_dimension) != + dynamic_slice_shape.dimensions(a_contracting_dimension)) { + return false; + } + return true; + }; + + HloInstruction* lhs = dot->mutable_operand(0); + HloInstruction* rhs = dot->mutable_operand(1); + int lhs_contracting_dimension = dnums.lhs_contracting_dimensions(0); + int rhs_contracting_dimension = dnums.rhs_contracting_dimensions(0); + + if (!is_dynamic_slice_constant_combination( + lhs, rhs, /*a_contracting_dimension=*/lhs_contracting_dimension) && + !is_dynamic_slice_constant_combination( + rhs, lhs, /*a_contracting_dimension=*/rhs_contracting_dimension)) { + VLOG(10) << "DotOfGather: Can only optimize dot(DS(ctA), ctB)) or " + "dot(ctB, DS(ctA)), where the two constants have equal " + "contracting dimensions."; + return nullptr; + } + + // LHS is DynamicSlice: + // input: dot(DS(ctA), ctB)) + // where DS(ctA) = DS({M x K}, {start, 0}, {1, K}) and ctB = {K x N}. + // => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. + // output: DS(dot(ctA, ctB)) + // => output dimensions: DS ({M x N}, {start, 0}, {1, N}) => {1 x N}. + + // RHS is DynamicSlice: + // input: dot(ctA, DS(ctB)) + // where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, start}, {K, 1}). + // => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. + // output: DS(dot(ctA, ctB)) + // => output dimensions: DS ({M x N}, {0, start}, {M, 1}) => {M x 1}. + + bool lhs_is_dynamic_slice = lhs->opcode() == HloOpcode::kDynamicSlice; + + // ctA: + HloInstruction* left_operand = + lhs_is_dynamic_slice ? lhs->mutable_operand(0) : lhs; + // ctB: + HloInstruction* right_operand = + lhs_is_dynamic_slice ? rhs : rhs->mutable_operand(0); + // Build ctA x ctB. + const int m = left_operand->shape().dimensions(1 - lhs_contracting_dimension); + const int n = + right_operand->shape().dimensions(1 - rhs_contracting_dimension); + auto memoized_shape = ShapeUtil::MakeShape(F32, {m, n}); + auto* memoized_inst = computation_->AddInstruction(HloInstruction::CreateDot( + memoized_shape, left_operand, right_operand, dnums)); + // Get pair {start, 0} or {0, start}. + HloInstruction* original_start_indices = + lhs_is_dynamic_slice ? lhs->mutable_operand(1) : rhs->mutable_operand(1); + // Position of start: + int index_of_non_zero_start = lhs_is_dynamic_slice + ? 1 - lhs_contracting_dimension + : 1 - rhs_contracting_dimension; + // Position of zero: + int index_of_zero_start = 1 - index_of_non_zero_start; + + // Slice out start and 0 components and reorder if necessary. + auto indices_type = original_start_indices->shape().element_type(); + Shape s_shape = ShapeUtil::MakeShape(indices_type, {1}); + Shape d_shape = ShapeUtil::MakeShape(indices_type, {2}); + HloInstruction* non_zero_start = + computation_->AddInstruction(HloInstruction::CreateSlice( + s_shape, original_start_indices, {index_of_non_zero_start}, + {index_of_non_zero_start + 1}, {1})); + HloInstruction* zero_start = + computation_->AddInstruction(HloInstruction::CreateSlice( + s_shape, original_start_indices, {index_of_zero_start}, + {index_of_zero_start + 1}, {1})); + HloInstruction* new_start_indices = + lhs_is_dynamic_slice + ? computation_->AddInstruction(HloInstruction::CreateConcatenate( + d_shape, {non_zero_start, zero_start}, 0)) + : computation_->AddInstruction(HloInstruction::CreateConcatenate( + d_shape, {zero_start, non_zero_start}, 0)); + + // Build DynamicSlice(ctA x ctB). + const int new_slice_m = lhs_is_dynamic_slice ? 1 : m; + const int new_slice_n = lhs_is_dynamic_slice ? n : 1; + auto* memoized_lookup = + computation_->AddInstruction(HloInstruction::CreateDynamicSlice( + dot->shape(), memoized_inst, new_start_indices, + {new_slice_m, new_slice_n})); + + return memoized_lookup; +} + Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { HloInstruction *lhs, *rhs; CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs)))); @@ -941,6 +1071,17 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { return ReplaceInstruction(dot, dot_of_concat_optimized); } + // Simplify dot(ConstA, Gather(Index, ConstB)) to: + // Gather(Index, dot*(ConstA, ConstB)), where dot* is an appropriately + // batched version of dot. + TF_ASSIGN_OR_RETURN(HloInstruction * dot_of_gather_optimized, + OptimizeDotOfGather(dot)); + if (dot_of_gather_optimized) { + VLOG(10) << "Replaced dot(constA, gather(i, constB)) with " + "gather(i, dot*(constA, constB))"; + return ReplaceInstruction(dot, dot_of_gather_optimized); + } + if (enable_dot_strength_reduction_ && !is_layout_sensitive_) { TF_ASSIGN_OR_RETURN(bool did_strength_reduction, HandleDotStrengthReduction(dot)); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index d0c99bf818cd54b897ae9da6f9c46862254d64e5..4e082877c776c35bab499c805fef7632765a3ee1 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2963,5 +2963,208 @@ TEST_F(AlgebraicSimplifierTest, DynamicUpdateSliceZeroUpdate) { INSTANTIATE_TEST_CASE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); + +struct DotOfGatherTestSpec { + int64 m; + int64 k; + int64 n; + int s; // start index for dynamic slice on the non-contracting dimension + int64 lcd; // left contracting dimension + int64 rcd; // right contracting dimension + bool neg; // is negative testcase +}; + +class DotOfGatherSimplificationTest + : public HloVerifiedTestBase, + public ::testing::WithParamInterface {}; + +// input: dot(DS(ctA), ctB)) +// where DS(ctA) = DS({M x K}, {s, 0}, {1, K}) and ctB = {K x N}. +// => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. +// output: DS(dot(ctA, ctB)) +// => output dimensions: DS ({M x N}, {s, 0}, {1, N}) => {1 x N}. +TEST_P(DotOfGatherSimplificationTest, ConstantRHS) { + HloComputation::Builder builder(TestName()); + + DotOfGatherTestSpec spec = GetParam(); + + ASSERT_LE(spec.s, spec.m); + + // For negative tests, increase k of the dynamic slice argument to prevent the + // optimization (constants ctA, ctB must have equal contracting dimensions). + int64 k_increase = spec.neg ? 5 : 0; + int64 lhs_rows = (spec.lcd == 0) ? (spec.k + k_increase) : spec.m; + int64 lhs_cols = (spec.lcd == 0) ? spec.m : (spec.k + k_increase); + Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); + auto* lhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, + /*cols=*/lhs_cols))); + + int32 start_row = (spec.lcd == 0) ? 0 : spec.s; + int32 start_col = (spec.lcd == 0) ? spec.s : 0; + const auto start_indices = + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({start_row, start_col}))); + int64 slice_row_size = (spec.lcd == 0) ? spec.k : 1; + int64 slice_col_size = (spec.lcd == 0) ? 1 : spec.k; + Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); + auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( + ds_shape, lhs, start_indices, {slice_row_size, slice_col_size})); + + int64 rhs_rows = (spec.rcd == 0) ? spec.k : spec.n; + int64 rhs_cols = (spec.rcd == 0) ? spec.n : spec.k; + Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); + auto* rhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, + /*cols=*/rhs_cols))); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(spec.lcd); + dot_dnums.add_rhs_contracting_dimensions(spec.rcd); + + int64 dot_row_size = 1; + int64 dot_col_size = spec.n; + Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); + builder.AddInstruction( + HloInstruction::CreateDot(dot_shape, ds, rhs, dot_dnums)); + + auto computation = module().AddEntryComputation(builder.Build()); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); + ASSERT_TRUE(run_successful); + EXPECT_TRUE( + ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); + + if (spec.neg) { + EXPECT_NE(computation->root_instruction()->opcode(), + HloOpcode::kDynamicSlice); + } else { + EXPECT_THAT(computation->root_instruction(), + op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), + op::Concatenate())); + } +} + +// input: dot(ctA, DS(ctB)) +// where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, s}, {K, 1}). +// => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. +// output: DS(dot(ctA, ctB)) +// => output dimensions: DS ({M x N}, {0, s}, {M, 1}) => {M x 1}. +TEST_P(DotOfGatherSimplificationTest, ConstantLHS) { + HloComputation::Builder builder(TestName()); + + DotOfGatherTestSpec spec = GetParam(); + + ASSERT_LE(spec.s, spec.n); + + int64 lhs_rows = (spec.lcd == 0) ? spec.k : spec.m; + int64 lhs_cols = (spec.lcd == 0) ? spec.m : spec.k; + Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); + auto* lhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, + /*cols=*/lhs_cols))); + + // For negative tests increase k of the dynamic slice argument to prevent the + // optimization + int64 k_increase = spec.neg ? 5 : 0; + int64 rhs_rows = (spec.rcd == 0) ? (spec.k + k_increase) : spec.n; + int64 rhs_cols = (spec.rcd == 0) ? spec.n : (spec.k + k_increase); + Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); + auto* rhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, + /*cols=*/rhs_cols))); + + int32 start_row = (spec.rcd == 0) ? 0 : spec.s; + int32 start_col = (spec.rcd == 0) ? spec.s : 0; + const auto start_indices = + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({start_row, start_col}))); + int64 slice_row_size = (spec.rcd == 0) ? spec.k : 1; + int64 slice_col_size = (spec.rcd == 0) ? 1 : spec.k; + Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); + auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( + ds_shape, rhs, start_indices, {slice_row_size, slice_col_size})); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(spec.lcd); + dot_dnums.add_rhs_contracting_dimensions(spec.rcd); + + int64 dot_row_size = spec.m; + int64 dot_col_size = 1; + Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); + builder.AddInstruction( + HloInstruction::CreateDot(dot_shape, lhs, ds, dot_dnums)); + + auto computation = module().AddEntryComputation(builder.Build()); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); + ASSERT_TRUE(run_successful); + EXPECT_TRUE( + ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); + + if (spec.neg) { + EXPECT_NE(computation->root_instruction()->opcode(), + HloOpcode::kDynamicSlice); + } else { + EXPECT_THAT(computation->root_instruction(), + op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), + op::Concatenate())); + } +} + +std::vector DotOfGatherPositiveNegativeTests() { + std::vector positives = { + // "Classical dot", i.e. matrix multiply: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/0, + /*neg=*/false}, + // Note: testing for m=1 and n=1 is unnecessary, as this optimizes to + // dot(ct, ct) before DotOfGather optimization kicks in. + // Contract on rows: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/0, + /*neg=*/false}, + // Reverse matrix multiply: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/1, + /*neg=*/false}, + // Contract on columns: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/1, + /*neg=*/false}, + }; + std::vector all; + for (int i = 0; i < positives.size(); i++) { + DotOfGatherTestSpec positive_test = positives[i]; + all.push_back(positive_test); + DotOfGatherTestSpec negative_test = positive_test; + negative_test.neg = true; + all.push_back(negative_test); + } + return all; +} + +INSTANTIATE_TEST_CASE_P( + DotOfGatherSimplificationTestInstantiation, DotOfGatherSimplificationTest, + ::testing::ValuesIn(DotOfGatherPositiveNegativeTests())); + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc index 43ebe92c5ec1c945780f76ca4178a94f948a81b9..ed0746980f87ac2bea79c308644dc63769f9e309 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc @@ -33,7 +33,7 @@ BFloat16Propagation::BFloat16Propagation( const BFloat16Support* bfloat16_support) : bfloat16_support_(bfloat16_support) {} -void BFloat16Propagation::DetermineAndMutateFusionComputationPrecision( +void BFloat16Propagation::DetermineFusionComputationPrecision( HloInstruction* fusion) { CHECK_EQ(fusion->opcode(), HloOpcode::kFusion); if (!bfloat16_support_->SupportsMixedPrecisions(*fusion)) { @@ -48,15 +48,13 @@ void BFloat16Propagation::DetermineAndMutateFusionComputationPrecision( auto root = fusion->fused_instructions_computation()->root_instruction(); // Adjust root's element types according to the fusion's output shape. - ShapeUtil::ForEachMutableSubshape( - root->mutable_shape(), [&](Shape* subshape, const ShapeIndex& index) { - if (subshape->element_type() != F32) { + ShapeUtil::ForEachSubshape( + root->shape(), [&](const Shape& subshape, const ShapeIndex& index) { + if (subshape.element_type() != F32) { return; } - if (ShapeUtil::GetSubshape(fusion->shape(), index).element_type() == - BF16) { - subshape->set_element_type(BF16); - changed_ = true; + if (OutputTypeAfterChange(fusion, index) == BF16) { + AddToOrRemoveFromBF16ChangeSet(root, index, BF16); VLOG(2) << "Fused root " << root->ToString() << " at shape index " << index << " changed to BF16 precision for fusion " << fusion->ToString(); @@ -67,13 +65,101 @@ void BFloat16Propagation::DetermineAndMutateFusionComputationPrecision( auto insts = fusion->fused_instructions_computation()->MakeInstructionPostOrder(); for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { - DetermineAndMutateInstructionPrecision(*inst_it, /*skip_parameters=*/false); + DetermineInstructionPrecision(*inst_it, /*skip_parameters=*/false); } - computations_visited_in_mutation_pass_.insert( + computations_visited_in_backward_pass_.insert( fusion->fused_instructions_computation()); + + RevertIfFusionInternalBF16Changes(fusion); +} + +void BFloat16Propagation::RevertIfFusionInternalBF16Changes( + HloInstruction* fusion) { + auto has_changes = [this](HloInstruction* inst) { + auto it = changes_to_bf16_.find(inst); + return it != changes_to_bf16_.end() && !it->second.empty(); + }; + + auto root = fusion->fused_instructions_computation()->root_instruction(); + tensorflow::gtl::FlatSet changed_root_buffers; + + auto root_changes_it = changes_to_bf16_.find(root); + if (root_changes_it != changes_to_bf16_.end()) { + for (const auto& index : root_changes_it->second) { + for (const HloValue* value : + dataflow_->GetValueSet(root, index).values()) { + changed_root_buffers.insert(value); + } + } + } + + auto aliases_changed_root_buffer = + [this, &changed_root_buffers](const HloInstruction* inst) { + bool aliasing = false; + ShapeUtil::ForEachSubshape( + inst->shape(), [&](const Shape& subshape, const ShapeIndex& index) { + if (aliasing) { + // Skip if aliasing is already found. + return; + } + // Only F32 buffers are considered for changing to BF16 in this + // pass. + if (subshape.element_type() != F32) { + return; + } + for (const HloValue* value : + dataflow_->GetValueSet(inst, index).values()) { + if (ContainsKey(changed_root_buffers, value)) { + aliasing = true; + break; + } + } + }); + return aliasing; + }; + + for (auto inst : + fusion->fused_instructions_computation()->MakeInstructionPostOrder()) { + if (inst->opcode() == HloOpcode::kParameter) { + continue; + } + if (aliases_changed_root_buffer(inst)) { + continue; + } + if (inst->opcode() == HloOpcode::kFusion) { + bool parameter_reverted = false; + for (int64 i = 0; i < inst->operand_count(); ++i) { + if (has_changes(inst->mutable_operand(i))) { + // Changes on the operand have not been reverted. + continue; + } + auto* fused_parameter = inst->fused_parameter(i); + if (has_changes(fused_parameter)) { + changes_to_bf16_.erase(fused_parameter); + parameter_reverted = true; + } + } + if (parameter_reverted) { + RevertIfFusionInternalBF16Changes(inst); + } + } + if (!has_changes(inst)) { + continue; + } + bool revert_changes = true; + for (auto operand : inst->operands()) { + if (has_changes(operand)) { + revert_changes = false; + break; + } + } + if (revert_changes) { + changes_to_bf16_.erase(inst); + } + } } -void BFloat16Propagation::DetermineAndMutateWhileComputationsPrecision( +void BFloat16Propagation::DetermineWhileComputationsPrecision( HloInstruction* while_hlo) { CHECK_EQ(while_hlo->opcode(), HloOpcode::kWhile); @@ -86,16 +172,14 @@ void BFloat16Propagation::DetermineAndMutateWhileComputationsPrecision( auto body_root = body->root_instruction(); HloComputation* condition = while_hlo->while_condition(); - ShapeUtil::ForEachMutableSubshape( - body_root->mutable_shape(), - [this, while_hlo, body_root](Shape* subshape, const ShapeIndex& index) { - if (subshape->element_type() != F32) { + ShapeUtil::ForEachSubshape( + body_root->shape(), [this, while_hlo, body_root]( + const Shape& subshape, const ShapeIndex& index) { + if (subshape.element_type() != F32) { return; } - if (ShapeUtil::GetSubshape(while_hlo->shape(), index).element_type() == - BF16) { - subshape->set_element_type(BF16); - changed_ = true; + if (OutputTypeAfterChange(while_hlo, index) == BF16) { + AddToOrRemoveFromBF16ChangeSet(body_root, index, BF16); VLOG(2) << "While body root " << body_root->ToString() << " at shape index " << index << " changed to BF16 precision for while " @@ -106,30 +190,30 @@ void BFloat16Propagation::DetermineAndMutateWhileComputationsPrecision( auto body_insts = body->MakeInstructionPostOrder(); for (auto inst_it = body_insts.rbegin(); inst_it != body_insts.rend(); ++inst_it) { - DetermineAndMutateInstructionPrecision(*inst_it, /*skip_parameters=*/false); + DetermineInstructionPrecision(*inst_it, /*skip_parameters=*/false); } - computations_visited_in_mutation_pass_.insert(body); + computations_visited_in_backward_pass_.insert(body); auto condition_insts = condition->MakeInstructionPostOrder(); for (auto inst_it = condition_insts.rbegin(); inst_it != condition_insts.rend(); ++inst_it) { - DetermineAndMutateInstructionPrecision(*inst_it, /*skip_parameters=*/false); + DetermineInstructionPrecision(*inst_it, /*skip_parameters=*/false); } - computations_visited_in_mutation_pass_.insert(condition); + computations_visited_in_backward_pass_.insert(condition); } bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, const ShapeIndex& index) const { - auto value_set = dataflow_->GetValueSet(&hlo, index); + auto& value_set = dataflow_->GetValueSet(&hlo, index); for (const HloValue* value : value_set.values()) { if (ContainsKey(values_that_must_be_kept_as_f32_, value)) { return false; } - if (value->shape().element_type() == BF16) { + if (ValueTypeAfterChange(value) == BF16) { continue; } for (const HloUse& use : value->uses()) { - if (!ContainsKey(instructions_visited_in_mutation_pass_, + if (!ContainsKey(instructions_visited_in_backward_pass_, use.instruction)) { // We don't know yet whether use.instruction will consume BF16 since it // hasn't been visited. Although we visit instructions in reverse @@ -145,26 +229,23 @@ bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, // precision, or a called computation's parameters have been changed to // BF16 for fusions or whiles. if (use.instruction->opcode() == HloOpcode::kFusion) { - const auto* fused_parameter = + auto* fused_parameter = use.instruction->fused_parameter(use.operand_number); - if (ShapeUtil::GetSubshape(fused_parameter->shape(), use.operand_index) - .element_type() != BF16) { + if (OutputTypeAfterChange(fused_parameter, use.operand_index) != BF16) { return false; } continue; } else if (use.instruction->opcode() == HloOpcode::kWhile) { - const auto* cond_parameter = + auto* cond_parameter = use.instruction->while_condition()->parameter_instruction( use.operand_number); - if (ShapeUtil::GetSubshape(cond_parameter->shape(), use.operand_index) - .element_type() != BF16) { + if (OutputTypeAfterChange(cond_parameter, use.operand_index) != BF16) { return false; } - const auto* body_parameter = + auto* body_parameter = use.instruction->while_body()->parameter_instruction( use.operand_number); - if (ShapeUtil::GetSubshape(body_parameter->shape(), use.operand_index) - .element_type() != BF16) { + if (OutputTypeAfterChange(body_parameter, use.operand_index) != BF16) { return false; } continue; @@ -174,19 +255,20 @@ bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, continue; } // If the op propagates precision and it outputs a BF16, then it's OK to - // supply BF16 also as the input. In the backward mutation pass, the users - // shapes should have already been processed. + // supply BF16 also as the input. In the backward pass, the users shapes + // should have already been processed. PrimitiveType user_output_type = PRIMITIVE_TYPE_INVALID; if (use.instruction->opcode() == HloOpcode::kTuple || (use.instruction->opcode() == HloOpcode::kCrossReplicaSum && ShapeUtil::IsTuple(use.instruction->shape()))) { - user_output_type = ShapeUtil::GetSubshape( - ShapeUtil::GetSubshape(use.instruction->shape(), - {use.operand_number}), - use.operand_index) - .element_type(); + ShapeIndex use_output_index{use.operand_number}; + for (int64 i : use.operand_index) { + use_output_index.push_back(i); + } + user_output_type = + OutputTypeAfterChange(use.instruction, use_output_index); } else { - user_output_type = use.instruction->shape().element_type(); + user_output_type = OutputTypeAfterChange(use.instruction, {}); } if (bfloat16_support_->EffectiveOperandPrecisionIsOutputPrecision( *use.instruction, use.operand_number) && @@ -199,8 +281,8 @@ bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, return true; } -void BFloat16Propagation::DetermineAndMutateInstructionPrecision( - HloInstruction* hlo, bool skip_parameters) { +void BFloat16Propagation::DetermineInstructionPrecision(HloInstruction* hlo, + bool skip_parameters) { // We handle any fusion computation or while body/condition after the // instruction is handled, because we need to know the output shape of a // fusion or while before propagating inside its computations. @@ -209,12 +291,12 @@ void BFloat16Propagation::DetermineAndMutateInstructionPrecision( [this, hlo, &postpone_processing_called_computations] { if (!postpone_processing_called_computations) { if (hlo->opcode() == HloOpcode::kFusion) { - DetermineAndMutateFusionComputationPrecision(hlo); + DetermineFusionComputationPrecision(hlo); } else if (hlo->opcode() == HloOpcode::kWhile) { - DetermineAndMutateWhileComputationsPrecision(hlo); + DetermineWhileComputationsPrecision(hlo); } } - instructions_visited_in_mutation_pass_.insert(hlo); + instructions_visited_in_backward_pass_.insert(hlo); }); if (hlo->opcode() == HloOpcode::kWhile && @@ -245,9 +327,9 @@ void BFloat16Propagation::DetermineAndMutateInstructionPrecision( CHECK(hlo->parent() != nullptr); if (hlo == hlo->parent()->root_instruction()) { if (!hlo->parent()->IsFusionComputation()) { - ShapeUtil::ForEachSubshape(hlo->shape(), [&](const Shape& subshape, + ShapeUtil::ForEachSubshape(hlo->shape(), [&](const Shape& /* subshape */, const ShapeIndex& index) { - if (subshape.element_type() != F32) { + if (OutputTypeAfterChange(hlo, index) != F32) { return; } for (const auto* value : dataflow_->GetValueSet(hlo, index).values()) { @@ -269,13 +351,12 @@ void BFloat16Propagation::DetermineAndMutateInstructionPrecision( return; } - ShapeUtil::ForEachMutableSubshape( - hlo->mutable_shape(), - [hlo, this](Shape* subshape, const ShapeIndex& index) { - if (subshape->element_type() == F32 && + ShapeUtil::ForEachSubshape( + hlo->shape(), + [hlo, this](const Shape& /* subshape */, const ShapeIndex& index) { + if (OutputTypeAfterChange(hlo, index) == F32 && AllUsersConsumeBF16(*hlo, index)) { - subshape->set_element_type(BF16); - changed_ = true; + AddToOrRemoveFromBF16ChangeSet(hlo, index, BF16); VLOG(2) << "HloInstruction output at shape index " << index << " changed to BF16 precision: " << hlo->ToString(); } @@ -308,26 +389,24 @@ void BFloat16Propagation::AdjustCalledComputationParameters( CHECK_EQ(operands.size(), computation->num_parameters()); for (int64 i = 0; i < operands.size(); ++i) { auto parameter = computation->parameter_instruction(i); - ShapeUtil::ForEachMutableSubshape( - parameter->mutable_shape(), - [this, i, hlo, &operands, parameter](Shape* subshape, + ShapeUtil::ForEachSubshape( + parameter->shape(), + [this, i, hlo, &operands, parameter](const Shape& /* subshape */, const ShapeIndex& index) { if (!ShapeUtil::IsLeafIndex(parameter->shape(), index)) { return; } PrimitiveType operand_type = - ShapeUtil::GetSubshape(operands[i]->shape(), index) - .element_type(); - if (subshape->element_type() == operand_type) { + OutputTypeAfterChange(operands[i], index); + if (OutputTypeAfterChange(parameter, index) == operand_type) { return; } - CHECK(operand_type == F32 || operand_type == BF16); - subshape->set_element_type(operand_type); - changed_ = true; + AddToOrRemoveFromBF16ChangeSet(parameter, index, operand_type); VLOG(2) << "Called computation parameter " << parameter->ToString() << " at shape index " << index - << " adjusted to match operand in HLO " - << hlo->ToString(); + << " adjusted to " + << (operand_type == BF16 ? "BF16" : "F32") + << " to match operand in HLO " << hlo->ToString(); }); } }; @@ -348,51 +427,48 @@ void BFloat16Propagation::AdjustCalledComputationParameters( void BFloat16Propagation::AdjustCalledComputationRoot(HloInstruction* hlo) { auto adjust_computation = [this, hlo](HloComputation* computation, - const Shape& output_shape) { + HloInstruction* output) { // Adjust root. HloInstruction* root = computation->root_instruction(); - ShapeUtil::ForEachMutableSubshape( - root->mutable_shape(), [this, hlo, root, &output_shape]( - Shape* subshape, const ShapeIndex& index) { - if (!ShapeUtil::IsLeafIndex(hlo->shape(), index)) { - return; - } - const PrimitiveType output_type = - ShapeUtil::GetSubshape(output_shape, index).element_type(); - if (subshape->element_type() == output_type) { - return; - } - CHECK(output_type == F32 || output_type == BF16); - subshape->set_element_type(output_type); - // It's possible that output_type is F32, but the root instruction's - // type is BF16; e.g., a fusion node's output was changed to BF16 - // initially but then adjusted back to F32, and the fusion computation - // is now being adjusted after the fusion node. - if (output_type == F32) { - for (const auto* value : - dataflow_->GetValueSet(root, index).values()) { - // We rely on the fact that this adjustment works in reverse - // topological order so that called computation will be - // processed later. Adding the value to - // values_that_must_be_kept_as_f32_ will ensure the - // correctness of the adjustment for HLOs that will be - // processed later. - values_that_must_be_kept_as_f32_.insert(value); - } - } - changed_ = true; - VLOG(2) << "Called computation root " << root->ToString() - << " at shape index " << index - << " adjusted to match output shape of " << hlo->ToString(); - }); + ShapeUtil::ForEachSubshape(root->shape(), [this, hlo, root, output]( + const Shape& /* subshape */, + const ShapeIndex& index) { + if (!ShapeUtil::IsLeafIndex(hlo->shape(), index)) { + return; + } + const PrimitiveType output_type = OutputTypeAfterChange(output, index); + if (OutputTypeAfterChange(root, index) == output_type) { + return; + } + AddToOrRemoveFromBF16ChangeSet(root, index, output_type); + // It's possible that output_type is F32, but the root instruction's + // type is BF16; e.g., a fusion node's output was changed to BF16 + // initially but then adjusted back to F32, and the fusion computation + // is now being adjusted after the fusion node. + if (output_type == F32) { + for (const auto* value : dataflow_->GetValueSet(root, index).values()) { + // We rely on the fact that this adjustment works in reverse + // topological order so that called computation will be + // processed later. Adding the value to + // values_that_must_be_kept_as_f32_ will ensure the + // correctness of the adjustment for HLOs that will be + // processed later. + values_that_must_be_kept_as_f32_.insert(value); + } + } + VLOG(2) << "Called computation root " << root->ToString() + << " at shape index " << index << " adjusted to " + << (output_type == BF16 ? "BF16" : "F32") + << " to match output shape of " << hlo->ToString(); + }); }; switch (hlo->opcode()) { case HloOpcode::kFusion: - adjust_computation(hlo->fused_instructions_computation(), hlo->shape()); + adjust_computation(hlo->fused_instructions_computation(), hlo); break; case HloOpcode::kWhile: - adjust_computation(hlo->while_body(), hlo->shape()); + adjust_computation(hlo->while_body(), hlo); break; default: break; @@ -409,16 +485,19 @@ bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper( for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { auto hlo = *inst_it; auto adjust_hlo_output = [this, hlo, ¶meter_changed]( - Shape* subshape, const ShapeIndex& index) { - if (subshape->element_type() != F32 && subshape->element_type() != BF16) { + const Shape& /* subshape */, + const ShapeIndex& index) { + auto output_type = OutputTypeAfterChange(hlo, index); + if (output_type != F32 && output_type != BF16) { return; } PrimitiveType type = BF16; for (const auto* value : dataflow_->GetValueSet(hlo, index).values()) { - if (value->shape().element_type() == BF16) { + auto value_type = ValueTypeAfterChange(value); + if (value_type == BF16) { continue; } - CHECK_EQ(value->shape().element_type(), F32); + CHECK_EQ(value_type, F32); type = F32; break; } @@ -437,16 +516,17 @@ bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper( values_that_must_be_kept_as_f32_.insert(value); } } - if (type != subshape->element_type()) { - subshape->set_element_type(type); + if (type != output_type) { + AddToOrRemoveFromBF16ChangeSet(hlo, index, type); VLOG(2) << "HloInstruction output at shape index " << index - << " adjusted to " << *subshape << ": " << hlo->ToString(); + << " adjusted to " << (type == BF16 ? "BF16" : "F32") << ": " + << hlo->ToString(); if (hlo->opcode() == HloOpcode::kParameter) { parameter_changed = true; } } }; - ShapeUtil::ForEachMutableSubshape(hlo->mutable_shape(), adjust_hlo_output); + ShapeUtil::ForEachSubshape(hlo->shape(), adjust_hlo_output); AdjustCalledComputationRoot(hlo); if (hlo->opcode() == HloOpcode::kWhile) { // We need to run on the while body and condition repeatedly until a fixed @@ -463,8 +543,7 @@ bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper( ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_body(), &visited_in_while)) { visited_in_while.clear(); - ShapeUtil::ForEachMutableSubshape(hlo->mutable_shape(), - adjust_hlo_output); + ShapeUtil::ForEachSubshape(hlo->shape(), adjust_hlo_output); AdjustCalledComputationRoot(hlo); } visited_computations->insert(visited_in_while.begin(), @@ -478,7 +557,7 @@ bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper( return parameter_changed; } -Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( +void BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( HloModule* module) { std::list computations_topological_order = module->MakeComputationPostOrder(); @@ -490,7 +569,9 @@ Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( } ResolveInconsistencyOfAliasingBuffersHelper(*comp_it, &resolved); } +} +Status BFloat16Propagation::ResolveInconsistentFusions(HloModule* module) { // We could have changed a fusion computation's root shape to have a different // precision than the fusion node's output, if the fusion root does not // define a buffer (e.g., a tuple). Now we add conversions after such fusion @@ -517,7 +598,7 @@ Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( // (2) after adding conversion // (3) after tuple simplifier and DCE. bool needs_tuple_simplifier = false; - for (auto computation : computations_topological_order) { + for (auto computation : module->MakeComputationPostOrder()) { auto insts = computation->MakeInstructionPostOrder(); for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { auto hlo = *inst_it; @@ -587,7 +668,14 @@ Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( needs_tuple_simplifier |= ShapeUtil::IsTuple(hlo->shape()); } } + if (needs_tuple_simplifier) { + TupleSimplifier tuple_simplifier; + TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status()); + } + return Status::OK(); +} +Status BFloat16Propagation::ResolveConvertedConstants(HloModule* module) { // We may have converted some constants from F32 to BF16, so adjust the // constant literals in such cases. We do this here instead of when the // constant node's is changed because 1) the HloInstruction interface does not @@ -598,8 +686,7 @@ Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( // can avoid repeated conversions. // // TODO(b/73833576): Consider resetting literal in HloInstruction. - bool needs_dce = needs_tuple_simplifier; - for (auto computation : computations_topological_order) { + for (auto computation : module->MakeComputationPostOrder()) { for (auto hlo : computation->MakeInstructionPostOrder()) { if (hlo->opcode() != HloOpcode::kConstant) { continue; @@ -612,23 +699,13 @@ Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( auto new_constant = computation->AddInstruction( HloInstruction::CreateConstant(std::move(converted_literal))); TF_RETURN_IF_ERROR(hlo->ReplaceAllUsesWith(new_constant)); - needs_dce = true; } } } - - if (needs_tuple_simplifier) { - TupleSimplifier tuple_simplifier; - TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status()); - } - if (needs_dce) { - HloDCE dce; - TF_RETURN_IF_ERROR(dce.Run(module).status()); - } return Status::OK(); } -Status BFloat16Propagation::RemoveNoopConversions(HloModule* module) { +Status BFloat16Propagation::SkipNoopConversions(HloModule* module) { for (auto computation : module->computations()) { for (auto hlo : computation->MakeInstructionPostOrder()) { if (hlo->opcode() != HloOpcode::kConvert) { @@ -643,7 +720,6 @@ Status BFloat16Propagation::RemoveNoopConversions(HloModule* module) { if (is_root) { computation->set_root_instruction(source); } - TF_RETURN_IF_ERROR(computation->RemoveInstructionAndUnusedOperands(hlo)); } } return Status::OK(); @@ -652,8 +728,18 @@ Status BFloat16Propagation::RemoveNoopConversions(HloModule* module) { // The algorithm first does a forward pass (parameters to root) to determine a // set of instructions to consider using bfloat16, then does a backward pass to // determine the precisions of those instructions according to the need of -// their users. +// their users. During the backward pass, the potential changes are stored in +// changes_to_bf16_ which are subject to further adjustments then applied to the +// HLOs. StatusOr BFloat16Propagation::Run(HloModule* module) { + consider_using_bfloat16_.clear(); + instructions_visited_in_backward_pass_.clear(); + computations_visited_in_backward_pass_.clear(); + values_that_must_be_kept_as_f32_.clear(); + caller_counts_.clear(); + changes_to_bf16_.clear(); + changed_ = false; + TF_ASSIGN_OR_RETURN(dataflow_, HloDataflowAnalysis::Run(*module)); std::list computations_topological_order = @@ -686,8 +772,24 @@ StatusOr BFloat16Propagation::Run(HloModule* module) { } auto insts = (*comp_it)->MakeInstructionPostOrder(); for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { - DetermineAndMutateInstructionPrecision(*inst_it, - /*skip_parameters=*/true); + DetermineInstructionPrecision(*inst_it, + /*skip_parameters=*/true); + } + } + + // It's possible that an instruction does not define a buffer, but the + // defining instruction's shape has changed. So we need to adjust the output + // shapes of instructions according to the HLO values they refer to. + ResolveInconsistencyOfAliasingBuffers(module); + + // Apply the changes in changes_to_bf16_. + for (auto& change : changes_to_bf16_) { + auto shape = change.first->mutable_shape(); + for (const auto& index : change.second) { + auto subshape = ShapeUtil::GetMutableSubshape(shape, index); + CHECK_EQ(subshape->element_type(), F32); + subshape->set_element_type(BF16); + changed_ = true; } } @@ -695,15 +797,56 @@ StatusOr BFloat16Propagation::Run(HloModule* module) { return false; } - // It's possible that an instruction does not define a buffer, but the - // defining instruction's shape has changed. So we need to adjust the output - // shapes of instructions according to the HLO values they refer to. - TF_RETURN_IF_ERROR(ResolveInconsistencyOfAliasingBuffers(module)); + TF_RETURN_IF_ERROR(ResolveInconsistentFusions(module)); + TF_RETURN_IF_ERROR(ResolveConvertedConstants(module)); // This pass could have turned an F32 -> BF16 conversion to a no-op (BF16 -> - // BF16), so we remove them now. - TF_RETURN_IF_ERROR(RemoveNoopConversions(module)); + // BF16), so we skip them now. + TF_RETURN_IF_ERROR(SkipNoopConversions(module)); + + { + // We may have dead HLOs after ResolveInconsistentFusions, + // ResolveConvertedConstants and SkipNoopConversions. + HloDCE dce; + TF_RETURN_IF_ERROR(dce.Run(module).status()); + } return true; } +PrimitiveType BFloat16Propagation::OutputTypeAfterChange( + HloInstruction* hlo, const ShapeIndex& index) const { + PrimitiveType type_on_hlo = + ShapeUtil::GetSubshape(hlo->shape(), index).element_type(); + if (type_on_hlo != F32) { + return type_on_hlo; + } + auto it = changes_to_bf16_.find(hlo); + if (it == changes_to_bf16_.end()) { + return type_on_hlo; + } + return ContainsKey(it->second, index) ? BF16 : F32; +} + +PrimitiveType BFloat16Propagation::ValueTypeAfterChange( + const HloValue* value) const { + auto hlo = value->defining_instruction(); + const auto& position = value->defining_position(); + return OutputTypeAfterChange(hlo, position.index); +} + +void BFloat16Propagation::AddToOrRemoveFromBF16ChangeSet( + HloInstruction* hlo, const ShapeIndex& index, PrimitiveType target_type) { + if (target_type == BF16) { + auto& entry = changes_to_bf16_[hlo]; + entry.insert(index); + } else { + CHECK_EQ(target_type, F32); + auto it = changes_to_bf16_.find(hlo); + if (it == changes_to_bf16_.end()) { + return; + } + it->second.erase(index); + } +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h index 1744e9db90aeff269daa91eb68a1d61bb0fc3035..de0355ddfca127753f90d1899b424a8e77c9b291 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.h +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" +#include "tensorflow/core/lib/hash/hash.h" namespace xla { @@ -85,30 +86,39 @@ class BFloat16Propagation : public HloPassInterface { tensorflow::gtl::FlatSet consider_using_bfloat16_; // *************************** - // Functions called and state produced by the backward mutation pass (from - // root to parameters). + // Functions called and state produced by the backward pass (from root to + // parameters) that finds opportunities to use BF16. - // Determines the precision for the given instruction in the mutation pass. - void DetermineAndMutateInstructionPrecision(HloInstruction* hlo, - bool skip_parameters); + // Determines the precision for the given instruction in the + // opportunity-finding pass. + void DetermineInstructionPrecision(HloInstruction* hlo, bool skip_parameters); - // Special handling in the mutation pass for fusion computations. + // Special handling in the opportunity-finding pass for fusion computations. // // Precondition: hlo->opcode() == kFusion - void DetermineAndMutateFusionComputationPrecision(HloInstruction* fusion); + void DetermineFusionComputationPrecision(HloInstruction* fusion); - // Special handling in the mutation pass for while computations. + // Reverts changes to BF16 that will not propagate outside a fusion + // computation. This avoids BF16 casts overhead inside a fusion which won't + // save memory bandwidth. + // + // Precondition: hlo->opcode() == kFusion + void RevertIfFusionInternalBF16Changes(HloInstruction* fusion); + + // Special handling in the opportunity-finding pass for while computations. // // Precondition: hlo->opcode() == kWhile - void DetermineAndMutateWhileComputationsPrecision(HloInstruction* while_hlo); + void DetermineWhileComputationsPrecision(HloInstruction* while_hlo); - // The set of HloInstructions that have been visited in the mutation pass. + // The set of HloInstructions that have been visited in the + // opportunity-finding pass. tensorflow::gtl::FlatSet - instructions_visited_in_mutation_pass_; + instructions_visited_in_backward_pass_; - // The set of HloComputations that have been visited in the mutation pass. + // The set of HloComputations that have been visited in the + // opportunity-finding pass. tensorflow::gtl::FlatSet - computations_visited_in_mutation_pass_; + computations_visited_in_backward_pass_; // *************************** // Functions called by the final inconsistency resolving pass. @@ -116,7 +126,7 @@ class BFloat16Propagation : public HloPassInterface { // Adjusts the output shapes of HloInstructions such that if two // HloInstructions have aliasing buffers in their outputs, they must have the // same precision. - Status ResolveInconsistencyOfAliasingBuffers(HloModule* module); + void ResolveInconsistencyOfAliasingBuffers(HloModule* module); // Resolves inconsistency of aliasing buffers for the given computation, and // recursively runs on a while instruction's condition and body until a fixed @@ -134,9 +144,19 @@ class BFloat16Propagation : public HloPassInterface { void AdjustCalledComputationRoot(HloInstruction* hlo); // *************************** - // Removes no-op conversions (same source and target shapes) that can be - // produced this pass. - Status RemoveNoopConversions(HloModule* module); + // Functions called after changes in changes_to_bf16_ are applied. + + // Resolves inconsistencies introduced by this pass for fusions with + // tuple-type output. + Status ResolveInconsistentFusions(HloModule* module); + + // Converts the literals in kConstant HLOs which have their types changed to + // BF16 by this pass. + Status ResolveConvertedConstants(HloModule* module); + + // Skips no-op conversions (same source and target shapes) that can be + // produced this pass, i.e., replaces them in their uses with their operands. + Status SkipNoopConversions(HloModule* module); // *************************** // Functions called and state used by two or more passes. @@ -146,6 +166,23 @@ class BFloat16Propagation : public HloPassInterface { bool AllUsersConsumeBF16(const HloInstruction& hlo, const ShapeIndex& index) const; + // The output element type of the HLO at the given shape index after changes + // in changes_to_bf16_ are applied. + PrimitiveType OutputTypeAfterChange(HloInstruction* hlo, + const ShapeIndex& index) const; + + // The element type of the HLO value after changes in changes_to_bf16_ are + // applied. + PrimitiveType ValueTypeAfterChange(const HloValue* value) const; + + // If target_type == BF16, adds the HLO at the given index to + // changes_to_bf16_; otherwise, target_type must be F32 and this function + // removes the HLO at the given index from changes_to_bf16_ if it was earlier + // added. + void AddToOrRemoveFromBF16ChangeSet(HloInstruction* hlo, + const ShapeIndex& index, + PrimitiveType target_type); + // The set of F32 HLO values that must be kept in F32. tensorflow::gtl::FlatSet values_that_must_be_kept_as_f32_; @@ -153,10 +190,28 @@ class BFloat16Propagation : public HloPassInterface { // module. Populated at the beginning of this pass. tensorflow::gtl::FlatMap caller_counts_; + // We first store the potential F32-to-BF16 changes to changes_to_bf16_, which + // are subject to further adjustment, then finally applied to the HLOs. This + // avoids setting changed_ to true but all changes are reverted during + // adjustment. + struct IndexHasher { + int64 operator()(const ShapeIndex& index) const { + int64 hash = 0; + for (int64 i : index) { + hash = tensorflow::Hash64Combine(hash, std::hash()(i)); + } + return hash; + } + }; + tensorflow::gtl::FlatMap> + changes_to_bf16_; + + // Whether the last processed HLO module has been changed by this pass. + bool changed_ = false; + const BFloat16Support* bfloat16_support_; std::unique_ptr dataflow_; - - bool changed_ = false; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc index 183db1652e498edb0b94e9c9a272e2b8a7fc53ba..313910a861f7f4c0d1d60b738caef40e76cc4260 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc @@ -323,6 +323,37 @@ TEST_F(BFloat16PropagationTest, PropagateThroughFusion) { EXPECT_TRUE(OutputsBF16(b_f1)); } +// Tests that changes to BF16 that cannot be propagated outside a fusion are +// discarded. +TEST_F(BFloat16PropagationTest, DiscardFusionInternalBF16Changes) { + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {2, 4}); + + HloInstruction* param = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param")); + HloInstruction* add = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param, param)); + + auto builder_f = HloComputation::Builder("fusion"); + HloInstruction* a_f = + builder_f.AddInstruction(HloInstruction::CreateParameter(0, shape, "a")); + HloInstruction* b_f = + builder_f.AddInstruction(HloInstruction::CreateParameter(1, shape, "b")); + HloInstruction* add_f = builder_f.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, a_f, b_f)); + HloInstruction* dot_f = builder_f.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(F32, {4, 4}), HloOpcode::kDot, add_f, add_f)); + auto comp_f = module->AddEmbeddedComputation(builder_f.Build()); + auto fusion = builder.AddInstruction(HloInstruction::CreateFusion( + dot_f->shape(), HloInstruction::FusionKind::kCustom, {add, add}, comp_f)); + + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_FALSE(PropagatePrecision(module.get())); + EXPECT_EQ(computation->root_instruction(), fusion); +} + // Tests that if 1) the root instruction of a fusion is a tuple, 2) the fusion // outputs are only used by a dot, and 3) one element of the tuple is used by // an add in the fusion computation, then the propagation pass should create a diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index dbe45e932cdeed00e959355d5b3199d2e858148f..94ccfedf6289b4af1accebd358671c3e2bc10ba7 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -292,112 +292,6 @@ BufferAllocationProto BufferAllocation::ToProto() const { return proto; } -std::pair> -BufferAllocation::ComputePeakMemoryLogicalBuffers() const { - if (HeapTraces().empty()) { - // Just return the largest LogicalBuffer in the allocation. - const LogicalBuffer* largest_buffer = nullptr; - int64 largest_size = 0; - for (const auto& pair : assigned_buffers()) { - const LogicalBuffer* buffer = pair.first; - int64 size = pair.second.size; - if (largest_buffer == nullptr) { - largest_buffer = buffer; - largest_size = size; - continue; - } - // Tie-break with LogicalBuffer::Id so the return value is stable relative - // to changing addresses. - if (size > largest_size || - ((size == largest_size) && (largest_buffer->id() > buffer->id()))) { - largest_buffer = buffer; - largest_size = size; - } - } - CHECK(largest_buffer != nullptr) - << "No logical buffers in allocation: " << ToString(); - return {largest_size, {largest_buffer}}; - } - - // Create a map from LogicalBuffer::Id to LogicalBuffer* for the logical - // buffers in this allocation. - tensorflow::gtl::FlatMap - id_to_buffer; - tensorflow::gtl::FlatMap buffer_sizes; - for (const auto& pair : assigned_buffers()) { - const LogicalBuffer* buffer = pair.first; - const OffsetSize& offset_size = pair.second; - id_to_buffer[buffer->id()] = buffer; - buffer_sizes[buffer] = offset_size.size; - } - - // Returns how much the given event increases the total size of live - // buffers. Can be negative. - auto memory_delta = [this, &id_to_buffer, &buffer_sizes]( - const HeapSimulatorTrace::Event& event) -> int64 { - const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id()); - const int64 buffer_size = buffer_sizes.at(buffer); - if (event.kind() == HeapSimulatorTrace::Event::ALLOC) { - return buffer_size; - } else if (event.kind() == HeapSimulatorTrace::Event::SHARE_WITH) { - // Sharing a buffer does not change the live set size for the purposes of - // the heap simulator. Even though the shared-with buffer may be smaller, - // the entire allocation remains live. - return 0; - } else if (event.kind() == HeapSimulatorTrace::Event::FREE) { - return -1 * buffer_size; - } - LOG(FATAL) << "Unknown event kind: " << event.kind(); - }; - - int64 total_max_live_size = 0; - std::vector live_buffers_vector; - for (const HeapSimulatorTrace& heap_trace : HeapTraces()) { - // First compute the size of the maximal live set. - int64 max_live_size = 0; - int64 live_size = 0; - for (const auto& event : heap_trace.events()) { - live_size += memory_delta(event); - if (max_live_size < live_size) { - max_live_size = live_size; - } - } - - // Next gather the set of logical buffers live at the earliest point of - // maximal live set size. - tensorflow::gtl::FlatSet live_buffers; - live_size = 0; - for (const auto& event : heap_trace.events()) { - const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id()); - if (event.kind() == HeapSimulatorTrace::Event::ALLOC) { - InsertOrDie(&live_buffers, buffer); - } else if (event.kind() == HeapSimulatorTrace::Event::SHARE_WITH) { - // Nothing to do. - } else if (event.kind() == HeapSimulatorTrace::Event::FREE) { - CHECK(ContainsKey(live_buffers, buffer)); - live_buffers.erase(buffer); - } - - live_size += memory_delta(event); - if (live_size == max_live_size) { - break; - } - } - CHECK_EQ(live_size, max_live_size); - total_max_live_size += max_live_size; - - live_buffers_vector.insert(live_buffers_vector.end(), live_buffers.begin(), - live_buffers.end()); - } - - // Stabily sort the live buffers. - std::sort(live_buffers_vector.begin(), live_buffers_vector.end(), - [](const LogicalBuffer* a, const LogicalBuffer* b) { - return a->id() < b->id(); - }); - return {total_max_live_size, live_buffers_vector}; -} - string BufferAllocation::ToString() const { string output; Appendf(&output, "allocation %lld: %p, size %lld", index_, this, size()); @@ -610,6 +504,7 @@ BufferAllocation* BufferAssignment::NewAllocation(const LogicalBuffer& buffer, BufferAllocation* allocation = NewEmptyAllocation(size, is_thread_local, is_reusable, buffer.color()); AddAssignment(allocation, buffer, /*offset=*/0, size); + allocation->peak_buffers_.push_back(&buffer); return allocation; } @@ -680,6 +575,10 @@ void BufferAssignment::CombineTempAllocations() { CHECK_EQ(temp_allocation.HeapTraces().size(), 1); combined_allocation->AddHeapTrace(temp_allocation.HeapTraces().front()); } + combined_allocation->peak_buffers_.insert( + combined_allocation->peak_buffers_.end(), + temp_allocation.peak_buffers_.begin(), + temp_allocation.peak_buffers_.end()); } // Replace all existing temporary allocations with the new combined // allocations. @@ -1228,6 +1127,89 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering( return Status::OK(); } +namespace { + +// Computes and returns the set of logical buffers live at the point of maximal +// liveness in the given heap trace. LogicalBuffers are (stabily) sorted by id. +std::vector ComputePeakMemoryLogicalBuffers( + const BufferAllocation& allocation, const HeapSimulatorTrace& heap_trace) { + // Create a map from LogicalBuffer::Id to LogicalBuffer* for the logical + // buffers in this allocation. + tensorflow::gtl::FlatMap + id_to_buffer; + tensorflow::gtl::FlatMap buffer_sizes; + for (const auto& pair : allocation.assigned_buffers()) { + const LogicalBuffer* buffer = pair.first; + const BufferAllocation::OffsetSize& offset_size = pair.second; + id_to_buffer[buffer->id()] = buffer; + buffer_sizes[buffer] = offset_size.size; + } + + // Returns how much the given event increases the total size of live + // buffers. Can be negative. + auto memory_delta = [&id_to_buffer, &buffer_sizes]( + const HeapSimulatorTrace::Event& event) -> int64 { + const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id()); + const int64 buffer_size = buffer_sizes.at(buffer); + if (event.kind() == HeapSimulatorTrace::Event::ALLOC) { + return buffer_size; + } else if (event.kind() == HeapSimulatorTrace::Event::SHARE_WITH) { + // Sharing a buffer does not change the live set size for the purposes of + // the heap simulator. Even though the shared-with buffer may be smaller, + // the entire allocation remains live. + return 0; + } else if (event.kind() == HeapSimulatorTrace::Event::FREE) { + return -1 * buffer_size; + } + LOG(FATAL) << "Unknown event kind: " << event.kind(); + }; + + // First compute the size of the maximal live set. + int64 max_live_size = 0; + int64 live_size = 0; + for (const auto& event : heap_trace.events()) { + live_size += memory_delta(event); + if (max_live_size < live_size) { + max_live_size = live_size; + } + } + + // Next gather the set of logical buffers live at the earliest point of + // maximal live set size. + tensorflow::gtl::FlatSet live_buffers; + live_size = 0; + for (const auto& event : heap_trace.events()) { + const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id()); + if (event.kind() == HeapSimulatorTrace::Event::ALLOC) { + InsertOrDie(&live_buffers, buffer); + } else if (event.kind() == HeapSimulatorTrace::Event::SHARE_WITH) { + // Nothing to do. + } else if (event.kind() == HeapSimulatorTrace::Event::FREE) { + CHECK(ContainsKey(live_buffers, buffer)); + live_buffers.erase(buffer); + } + + live_size += memory_delta(event); + if (live_size == max_live_size) { + break; + } + } + CHECK_EQ(live_size, max_live_size); + + std::vector live_buffers_vector; + live_buffers_vector.insert(live_buffers_vector.end(), live_buffers.begin(), + live_buffers.end()); + + // Stabily sort the live buffers. + std::sort(live_buffers_vector.begin(), live_buffers_vector.end(), + [](const LogicalBuffer* a, const LogicalBuffer* b) { + return a->id() < b->id(); + }); + return live_buffers_vector; +} + +} // namespace + void BufferAssigner::AssignBuffersFromHeapSimulator( const HeapSimulator::Result& result, BufferAssignment* assignment, LogicalBuffer::Color color) { @@ -1246,6 +1228,8 @@ void BufferAssigner::AssignBuffersFromHeapSimulator( const HeapSimulator::Chunk& chunk = buffer_chunk.second; assignment->AddAssignment(allocation, buffer, chunk.offset, chunk.size); } + allocation->peak_buffers_ = + ComputePeakMemoryLogicalBuffers(*allocation, result.debug_trace); VLOG(1) << "Ran heap simulation for allocation: " << allocation->ToString(); allocation->AddHeapTrace(result.debug_trace); diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h index 3086d0e2ca0026547134285b8ceb357390fc7ece..15fd905e8d593994c1cd5ec77cef6db7c2dbefdb 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.h +++ b/tensorflow/compiler/xla/service/buffer_assignment.h @@ -206,17 +206,15 @@ class BufferAllocation { return heap_traces_; } - // Compute and return the LogicalBuffers which are live at the point of peak - // memory usage for the given allocation. The point of peak memory usage is - // the point at which the total size of all live logical buffers is - // maximal. If peak memory is reached at multiple points, the set of logical - // buffers live at the earliest maximal point is returned. The vector is - // stabily asserted by LogicalBuffer::Index. - // - // The return value is a pair of total size of the logical buffers at peak, - // and the buffers themselves. - std::pair> - ComputePeakMemoryLogicalBuffers() const; + // Returns the LogicalBuffers which are live at the point of peak memory usage + // for this allocation. The point of peak memory usage is the point at which + // the total size of all live logical buffers is maximal. If peak memory is + // reached at multiple points, the set of logical buffers live at the earliest + // maximal point is returned. The vector is stabily sorted by + // LogicalBuffer::Index. + const std::vector& PeakMemoryLogicalBuffers() const { + return peak_buffers_; + } // Get the number of bytes lost to fragmentation. This is equal to the // difference between the size of the allocation and the size of the maximal @@ -291,6 +289,9 @@ class BufferAllocation { int64 fragmentation_bytes_ = 0; std::vector heap_traces_; + + // Set of buffers live at the point of peak memory usage for this allocation. + std::vector peak_buffers_; }; // Add stream operators for nicer output of CHECK/RET_CHECK failures. diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc index 3ec9795a655041548f96487f9f6401dab9e5c58d..a4fb0eefaca094898ed9acad8062484d1a36afe7 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/call_graph.h" #include "tensorflow/compiler/xla/service/computation_tracker.h" #include "tensorflow/compiler/xla/service/copy_insertion.h" @@ -1519,12 +1520,8 @@ TEST_F(BufferAssignmentTest, TrivialPeakBuffers) { // single logical buffer should be exactly the logical buffer in that // allocation. const BufferAllocation& mul_buffer = GetTopLevelAllocation(*buffers, mul); - int64 peak_size; - std::vector peak_buffers; - - std::tie(peak_size, peak_buffers) = - mul_buffer.ComputePeakMemoryLogicalBuffers(); - EXPECT_EQ(peak_size, ShapeUtil::ByteSizeOf(f32vec100_)); + const std::vector& peak_buffers = + mul_buffer.PeakMemoryLogicalBuffers(); ASSERT_EQ(peak_buffers.size(), 1); EXPECT_EQ(peak_buffers[0]->instruction(), mul); } @@ -1555,6 +1552,7 @@ TEST_F(BufferAssignmentTest, PeakBuffers) { HloInstruction::CreateConcatenate(concat_shape, {rev, neg}, 0)); // Make the root tiny so no interior nodes can share its buffer. auto root = builder.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {1}), concat, {0}, {1}, {1})); auto module = CreateNewModule(); @@ -1569,12 +1567,10 @@ TEST_F(BufferAssignmentTest, PeakBuffers) { EXPECT_TRUE(buffer.IsPreallocatedTempBuffer()); ASSERT_EQ(buffer.assigned_buffers().size(), 4); - int64 peak_size; - std::vector peak_buffers; - std::tie(peak_size, peak_buffers) = buffer.ComputePeakMemoryLogicalBuffers(); + const std::vector& peak_buffers = + buffer.PeakMemoryLogicalBuffers(); // The peak live set should be concat and its inputs. - EXPECT_EQ(peak_size, ShapeUtil::ByteSizeOf(ShapeUtil::MakeShape(F32, {400}))); ASSERT_EQ(peak_buffers.size(), 3); std::vector peak_instructions; for (const LogicalBuffer* logical_buffer : peak_buffers) { @@ -1583,6 +1579,69 @@ TEST_F(BufferAssignmentTest, PeakBuffers) { EXPECT_THAT(peak_instructions, UnorderedElementsAre(rev, neg, concat)); } +TEST_F(BufferAssignmentTest, PeakBuffersWhile) { + auto module = CreateNewModule(); + const Shape shape = ShapeUtil::MakeShape(F32, {123, 123}); + HloComputation* condition; + { + auto b = HloComputation::Builder(TestName() + ".cond"); + b.AddInstruction(HloInstruction::CreateParameter(0, shape, "x")); + b.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(true))); + condition = module->AddEmbeddedComputation(b.Build()); + } + HloComputation* body; + { + auto b = HloComputation::Builder(TestName() + ".body"); + auto param = + b.AddInstruction(HloInstruction::CreateParameter(0, shape, "x")); + b.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kNegate, param)); + body = module->AddEmbeddedComputation(b.Build()); + } + auto builder = HloComputation::Builder(TestName()); + auto param = + builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "p0")); + auto copy = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kCopy, param)); + auto while_op = builder.AddInstruction( + HloInstruction::CreateWhile(shape, condition, body, copy)); + // This broadcast should get a temporary allocation which is merged with the + // allocation for the while. Peak buffers should include the while and the + // broadcast. + auto bcast = builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {123, 123, 123}), while_op, {0, 1})); + builder.AddInstruction(HloInstruction::CreateReverse( + ShapeUtil::MakeShape(F32, {123, 123, 123}), bcast, {0})); + module->AddEntryComputation(builder.Build()); + + auto buffers = RunBufferAssignment(module.get()); + const BufferAllocation& buffer = GetTopLevelAllocation(*buffers, bcast); + const std::vector& peak_buffers = + buffer.PeakMemoryLogicalBuffers(); + ASSERT_EQ(peak_buffers.size(), 2); + + // The peak buffers should include the broadcast and one of the colocated + // buffers of the while (body param, condition param, body root, or the while + // itself). + const LogicalBuffer* bcast_buffer; + const LogicalBuffer* nonbcast_buffer; + if (peak_buffers[0]->instruction() == bcast) { + bcast_buffer = peak_buffers[0]; + nonbcast_buffer = peak_buffers[1]; + } else { + bcast_buffer = peak_buffers[1]; + nonbcast_buffer = peak_buffers[0]; + } + EXPECT_EQ(bcast_buffer->instruction(), bcast); + EXPECT_TRUE( + nonbcast_buffer->instruction() == copy || + nonbcast_buffer->instruction() == while_op || + nonbcast_buffer->instruction() == body->parameter_instruction(0) || + nonbcast_buffer->instruction() == body->root_instruction() || + nonbcast_buffer->instruction() == condition->parameter_instruction(0)); +} + class WhileBufferAssignmentTest : public HloTestBase { protected: std::unique_ptr BuildWhileConditionComputation( @@ -1626,7 +1685,7 @@ class WhileBufferAssignmentTest : public HloTestBase { .ConsumeValueOrDie(); } - static int64 ByteSizeOf(const LogicalBuffer& buffer) { + static int64 ByteSizeOf(const BufferValue& buffer) { return ShapeUtil::ByteSizeOf(buffer.shape(), sizeof(void*)); } diff --git a/tensorflow/compiler/xla/service/buffer_value.cc b/tensorflow/compiler/xla/service/buffer_value.cc new file mode 100644 index 0000000000000000000000000000000000000000..2bc556a9e270136f5f3eaf2433f8c96eeeaea0a2 --- /dev/null +++ b/tensorflow/compiler/xla/service/buffer_value.cc @@ -0,0 +1,68 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/buffer_value.h" + +#include + +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +BufferValue::BufferValue(HloInstruction* instruction, const ShapeIndex& index, + Id id) + : id_(id) { + const Shape& shape = ShapeUtil::GetSubshape(instruction->shape(), index); + is_array_ = ShapeUtil::IsArray(shape); + is_tuple_ = ShapeUtil::IsTuple(shape); +} + +BufferValue::~BufferValue() {} + +std::ostream& operator<<(std::ostream& out, const BufferValue& buffer) { + out << buffer.ToString(); + return out; +} + +/*static*/ LogicalBufferProto::Location BufferValue::ToLocationProto( + const HloInstruction& instruction, const ShapeIndex& index) { + LogicalBufferProto::Location proto; + proto.set_computation_name(instruction.parent()->name()); + proto.set_instruction_name(instruction.name()); + for (const int64 index_entry : index) { + proto.add_shape_index(index_entry); + } + return proto; +} + +LogicalBufferProto BufferValue::ToProto(const SizeFunction& size_fn) const { + LogicalBufferProto proto; + proto.set_id(id()); + proto.set_size(size_fn(*this)); + LogicalBufferProto::Location proto_location = + ToLocationProto(*instruction(), index()); + proto.mutable_defined_at()->Swap(&proto_location); + if (has_color()) { + proto.set_color(color().value()); + } + return proto; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h new file mode 100644 index 0000000000000000000000000000000000000000..f4be16e0843f64f41ef27539bf263ae98ce0ebf9 --- /dev/null +++ b/tensorflow/compiler/xla/service/buffer_value.h @@ -0,0 +1,177 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_H_ + +#include +#include + +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/gtl/int_type.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +// Abstract class describing a value used by one of the dataflow analyses - +// TuplePointsToAnalysis or HloDataflowAnalysis. +// TODO(b/78906445) Delete this class when TuplePointsToAnalysis is unused. +// +// XLA arrays are trivially a single BufferValue. Tuples are made up of more +// than one BufferValue: an BufferValue for the pointer vector, and an +// BufferValue for each child element. +// +// Every BufferValue is defined by a particular instruction and most +// instructions define only a single BufferValue. Instructions which define a +// single BufferValue include array-shaped instructions such as Add but also +// includes Tuple-shaped instructions such as Tuple. The Tuple instruction +// defines a single BufferValue which is a vector of pointers to the values +// containing the Tuple instruction's operands. Though the result of the Tuple +// instruction includes multiple values only the top-level BufferValue (the +// vector of pointers) is defined by the Tuple instruction. The values +// containing the tuple elements are defined by earlier instructions, usually +// the operands of the Tuple instruction. +// +// Instructions which construct both the tuple *and* the tuple elements define +// more than one BufferValue. This includes (at least) tuple-shaped Constant, +// Parameter, Infeed and While instructions. These tuple-shaped instructions do +// not assemble a tuple from existing BufferValues like the Tuple instruction +// does, but rather define all the BufferValues in the tuple. +// +// Some instructions, such as Bitcast, define no buffers. These instructions +// simply forward buffers from their operands. +// +// The BufferValue object describes which HLO instruction defines a buffer and +// where within that instruction's output shape the buffer is defined. The +// location within the output shape is indicated by BufferValue::index() which +// is defined identically to the index used in ShapeUtil::GetSubshape(). +// Examples: +// +// %add = Add(%foo, %bar) +// %tuple_constant = Constant({1, {42, 43}}) +// +// %add defines a single array-shaped buffer BufferValue(%add, {}) which holds +// the array result of the add operation. The nested-tuple-shaped +// %tuple_constant defines 5 buffers described by the following BufferValue +// objects: +// +// BufferValue(%tuple_constant, {}) // "Top-level" buffer: vector of +// // pointers to BufferValues at +// // indices {0} and {1} +// BufferValue(%tuple_constant, {0}) // Holds value "1" +// BufferValue(%tuple_constant, {1}) // Holds nested tuple: vector of +// // pointers to BufferValues at +// // indices {1, 0} and {1, 1} +// BufferValue(%tuple_constant, {1, 0}) // Holds value "42" +// BufferValue(%tuple_constant, {1, 1}) // Holds value "43" + +class BufferValue { + public: + TF_LIB_GTL_DEFINE_INT_TYPE(Color, int64); + + // Id is a unique identifier for the BufferValue to facilitate efficient + // collections of BufferValues with stable iteration order. + using Id = int64; + + // Functions which return the size and alignment of a logical buffer in bytes. + using SizeFunction = std::function; + using AlignmentFunction = std::function; + + virtual ~BufferValue(); + + Id id() const { return id_; } + + // Return the instruction that defines the buffer. + virtual HloInstruction* instruction() const = 0; + + // Return the index within the output of the instruction where the buffer is + // defined. Index used defined as in ShapeUtil::GetSubshape() + virtual const ShapeIndex& index() const = 0; + + // Return the color of the BufferValue. Differently colored buffers can not be + // parts of the same allocation. + Color color() const { + CHECK_NE(color_, kInvalidColor) + << "Should not query the color of a buffer that was never colored"; + return color_; + } + + void set_color(Color color) { + CHECK_NE(color, kInvalidColor) + << "Should not set the color of a buffer to the invalid color"; + color_ = color; + } + + bool has_color() const { return color_ != kInvalidColor; } + + // Return the shape of the buffer. This reference points into the shape field + // of the instruction defining the buffer. Therefore, the returned shape will + // contain the layout of instruction, if any. + virtual const Shape& shape() const = 0; + + // Returns true if this buffer is the top-level output buffer of the defining + // HLO instruction. This is equivalent to index == {}. + bool IsTopLevel() const { return index().empty(); } + + // Whether this buffer contains a tuple. + bool IsTuple() const { return is_tuple_; } + + // Whether this buffer contains an array. + bool IsArray() const { return is_array_; } + + // operator< is required for std::set. + bool operator<(const BufferValue& other) const { return id_ < other.id_; } + + virtual string ToString() const = 0; + + // TODO(lauj) rename LogicalBufferProto to BufferValueProto. + LogicalBufferProto ToProto(const SizeFunction& size_fn) const; + + // Returns the LogicalBufferProto::Location that serializes the given + // instruction and index. + static LogicalBufferProto::Location ToLocationProto( + const HloInstruction& instruction, const ShapeIndex& index); + + const Color kInvalidColor = Color(-1); + + protected: + BufferValue(HloInstruction* instruction, const ShapeIndex& index, Id id); + + private: + // The definining instruction and index are not stored here; they can be found + // in the LogicalBuffer and HloValue subclasses. This class exists only to + // support migrations from TuplePointsToAnalysis to HloDataflowAnalysis, by + // allowing abstract use of LogicalBuffer or HloValue. After those migrations + // are complete, this class should be deleted (b/78906445). Because we plan to + // delete LogicalBuffer and this class, we don't refactor all the shared + // features from LogicalBuffer and HloValue into this class. + Id id_ : 62; + bool is_array_ : 1; + bool is_tuple_ : 1; + Color color_ = kInvalidColor; +}; + +std::ostream& operator<<(std::ostream& out, const BufferValue& buffer); + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_H_ diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index c9f78a0f9f1c0e889cd2c761e3129ec329a7b647..d39fd7307ae1b5bd0c431f98c413011ca081050b 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -70,6 +70,22 @@ CompileOnlyService::CompileAheadOfTime( TF_RET_CHECK(instance.computation.has_program_shape()); const DebugOptions& debug_options = options.debug_options(); + + // Dump computation proto if flag is set. + const string& directory_path = debug_options.xla_dump_computations_to(); + if (!directory_path.empty()) { + HloSnapshot hlo_snapshot; + *hlo_snapshot.mutable_hlo()->mutable_hlo_module() = instance.computation; + string filename = tensorflow::strings::StrCat( + "computation_", instance.computation.id(), "__", + instance.computation.entry_computation_name()); + const string& per_host_path = tensorflow::io::JoinPath( + directory_path, tensorflow::port::Hostname()); + + TF_RETURN_IF_ERROR( + Executable::DumpToDirectory(per_host_path, filename, hlo_snapshot)); + } + const auto& program_shape = instance.computation.program_shape(); ExecutionOptions execution_options; *execution_options.mutable_debug_options() = debug_options; diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 5c14591d93cc995a0b75efb14da8ec98d5859ff5..a4b59d1ba9b24e3f886a7feb51181ae8f990951f 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -25,6 +25,7 @@ limitations under the License. #include #include +#include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/executable.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" @@ -181,9 +182,9 @@ class Compiler { // Returns a function that computes the size in bytes of a given // logical buffer. - std::function BufferSizeBytesFunction() { + std::function BufferSizeBytesFunction() { HloCostAnalysis::ShapeSizeFunction shape_size = ShapeSizeBytesFunction(); - return [shape_size](const LogicalBuffer& buffer) { + return [shape_size](const BufferValue& buffer) { return shape_size(buffer.shape()); }; } diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc index e560abc87f84566905333181c159edd3ca297563..e9ec796121fff223474c3e81a5e973cc37f8caec 100644 --- a/tensorflow/compiler/xla/service/conditional_simplifier.cc +++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc @@ -35,7 +35,7 @@ namespace xla { // Tries to replace a conditional with a call operation of the corresponding // computation. If the given conditional has a constant predicate, tries to -// replace it with a call to its true/false computation as appropirate and then +// replace it with a call to its true/false computation as appropriate and then // inline that computation. // // Returns true if it made a change to the graph. diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index 40519ecc799c8f0343294ad88009820dbd8535e9..cbe2ba2e50ab213133196987cf486152edc9d785 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -65,7 +65,7 @@ struct SpecialCaseCopyPolicy { // output tuple. bool copy_root_replicated_buffers = false; // If true, insert a copy if a buffer coming from a constant or a parameter - // is found wihtin the output tuple. + // is found within the output tuple. bool copy_parameters_and_constants = false; }; diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 2fc6c6bd55157521328a1f5a056a399282e79db5..7e6d58c7fa5ccaf3e0a6f21d43a54906a3fbe408 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -131,6 +131,7 @@ cc_library( "//tensorflow/compiler/xla/service:reshape_mover", "//tensorflow/compiler/xla/service:transpose_folding", "//tensorflow/compiler/xla/service:tuple_simplifier", + "//tensorflow/compiler/xla/service:while_loop_constant_sinking", "//tensorflow/compiler/xla/service:while_loop_invariant_code_motion", "//tensorflow/compiler/xla/service:while_loop_simplifier", "//tensorflow/compiler/xla/service:zero_sized_hlo_elimination", @@ -364,10 +365,10 @@ tf_cc_binary( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client", "//tensorflow/compiler/xla/client:client_library", - "//tensorflow/compiler/xla/client:computation", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index ec2bb6c762d0bbb1d28f4db00def04afa895d13d..3d2e24ca14eacd1a26e118a636dcaca5f2768f15 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -87,6 +87,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/reshape_mover.h" #include "tensorflow/compiler/xla/service/transpose_folding.h" #include "tensorflow/compiler/xla/service/tuple_simplifier.h" +#include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h" #include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h" #include "tensorflow/compiler/xla/service/while_loop_simplifier.h" #include "tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h" @@ -270,6 +271,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) { pass.AddPass(); pass.AddPass(); + pass.AddPass(); pass.AddPass(); pass.AddPass(); pass.AddPass(); @@ -294,7 +296,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) { ReducePrecisionInsertion::PassTiming::AFTER_FUSION); pipeline.AddPass( - module->mutable_entry_computation_layout()); + module->device_entry_computation_layout()); // The LayoutAssignment pass may leave behind kCopy instructions which are // duplicate or NOPs, so remove them with algebraic simplification and CSE. pipeline.AddPass>( @@ -533,7 +535,8 @@ StatusOr> CpuCompiler::RunBackend( // and reduced memory usage (as compared to using DependencyHloOrdering). TF_ASSIGN_OR_RETURN( SequentialHloOrdering::HloModuleSequence module_sequence, - CreateMemoryMinimizingSequence(*module, BufferSizeBytesFunction())); + CreateMemoryMinimizingSequence(*module, BufferSizeBytesFunction(), + DFSMemoryScheduler)); // Run buffer analysis on the HLO graph. This analysis figures out which // temporary buffers are required to run the computation. @@ -787,6 +790,8 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules, TF_RETURN_IF_ERROR(verify_status); } + XLA_VLOG_LINES(2, "LLVM IR:\n" + llvm_ir::DumpModuleToString(llvm_module)); + Disassembler disassembler(*target_machine); CompilerFunctor compiler_functor( target_machine.get(), &disassembler, opt_level, diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index aabf4d5161e3af9d49876c6133f8ec5ddfbbf6d6..32613b869078305edda97c11ac250f67de32b805 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -249,8 +249,9 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( std::vector* buffers_in_result) { se::Stream* stream = run_options->stream(); ScopedShapedBuffer result_buffer( - /*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(), - run_options->allocator(), stream->parent()->device_ordinal()); + /*on_host_shape=*/host_result_shape(), + /*on_device_shape=*/host_result_shape(), run_options->allocator(), + stream->parent()->device_ordinal()); // Copy DeviceMemoryBase values which contain the array(s) of the result into // the respective location in ShapedBuffer which is returned to the caller. diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc index a98e85a151ffb77e6682b82164603481265283c4..46fe060817b0264d90574b45a94cf1f6e5964593 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc @@ -158,37 +158,95 @@ TEST_F(InstructionFusionTest, DotOperationFusion_ElementReuse) { EXPECT_EQ(dot, computation->root_instruction()); } -TEST_F(InstructionFusionTest, DotOperationFusion_TransposeFusion) { - HloComputation::Builder builder(TestName()); - HloInstruction* arg0 = builder.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {1, 256}), "arg0")); - HloInstruction* arg1 = builder.AddInstruction(HloInstruction::CreateParameter( - 1, ShapeUtil::MakeShape(F32, {1024, 256}), "arg1")); +TEST_F(InstructionFusionTest, DotOperationFusion_TransposeFusion_RHS) { + string hlo_string = R"( +HloModule DotOperationFusion_TransposeFusion - HloInstruction* exp1 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {1024, 256}), HloOpcode::kExp, arg1)); - HloInstruction* transpose1 = - builder.AddInstruction(HloInstruction::CreateTranspose( - ShapeUtil::MakeShape(S32, {256, 1024}), exp1, {1, 0})); - builder.AddInstruction( - MakeDot(ShapeUtil::MakeShape(F32, {1, 1024}), arg0, transpose1)); +ENTRY DotOperationFusion_TransposeFusion { + arg0 = f32[1,256] parameter(0) + arg1 = f32[1024,256] parameter(1) + exponential = s32[1024,256] exponential(arg1) + transpose = s32[256,1024] transpose(exponential), dimensions={1,0} + ROOT dot = f32[1,1024] dot(arg0, transpose), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + HloComputation* computation = module->entry_computation(); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); TransposeFolding transpose_folding( [](const HloInstruction& dot, const TransposeFolding::OperandIndices& candidate_operands) { return candidate_operands; }, TransposeFolding::NeverFoldTranspose); - EXPECT_TRUE(transpose_folding.Run(module.get()).ValueOrDie()); - EXPECT_EQ(computation->root_instruction()->opcode(), HloOpcode::kFusion); - EXPECT_EQ(computation->root_instruction()->fusion_kind(), - HloInstruction::FusionKind::kTransposeDot); - EXPECT_FALSE(CpuInstructionFusion().Run(module.get()).ValueOrDie()); - EXPECT_EQ(computation->root_instruction()->opcode(), HloOpcode::kFusion); - EXPECT_EQ(computation->root_instruction()->fusion_kind(), - HloInstruction::FusionKind::kTransposeDot); + TF_ASSERT_OK_AND_ASSIGN(bool changed, transpose_folding.Run(module.get())); + ASSERT_TRUE(changed); + ASSERT_THAT(computation->root_instruction(), + op::Dot(op::Parameter(0), op::Exp(op::Parameter(1)), + /*lhs_contracting_dim=*/1, /*rhs_contracting_dim=*/1)); +} + +TEST_F(InstructionFusionTest, DotOperationFusion_TransposeFusion_LHS) { + string hlo_string = R"( +HloModule DotOperationFusion_TransposeFusion + +ENTRY DotOperationFusion_TransposeFusion { + arg0 = f32[256,1] parameter(0) + arg1 = f32[256,1024] parameter(1) + transpose = s32[1,256] transpose(arg0), dimensions={1,0} + exponential = s32[256,1024] exponential(arg1) + ROOT dot = f32[1,1024] dot(transpose, exponential), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + HloComputation* computation = module->entry_computation(); + + TransposeFolding transpose_folding( + [](const HloInstruction& dot, + const TransposeFolding::OperandIndices& candidate_operands) { + return candidate_operands; + }, + TransposeFolding::NeverFoldTranspose); + TF_ASSERT_OK_AND_ASSIGN(bool changed, transpose_folding.Run(module.get())); + ASSERT_TRUE(changed); + ASSERT_THAT(computation->root_instruction(), + op::Dot(op::Parameter(0), op::Exp(op::Parameter(1)), + /*lhs_contracting_dim=*/0, /*rhs_contracting_dim=*/0)); +} + +TEST_F(InstructionFusionTest, + DotOperationFusion_TransposeFusion_LHS_NonDefault) { + string hlo_string = R"( +HloModule DotOperationFusion_TransposeFusion + +ENTRY DotOperationFusion_TransposeFusion { + arg0 = f32[1,256] parameter(0) + arg1 = f32[256,1024] parameter(1) + transpose = s32[256,1] transpose(arg0), dimensions={1,0} + exponential = s32[256,1024] exponential(arg1) + ROOT dot = f32[1,1024] dot(transpose, exponential), lhs_contracting_dims={0}, rhs_contracting_dims={0} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + HloComputation* computation = module->entry_computation(); + + TransposeFolding transpose_folding( + [](const HloInstruction& dot, + const TransposeFolding::OperandIndices& candidate_operands) { + return candidate_operands; + }, + TransposeFolding::NeverFoldTranspose); + TF_ASSERT_OK_AND_ASSIGN(bool changed, transpose_folding.Run(module.get())); + ASSERT_TRUE(changed); + ASSERT_THAT(computation->root_instruction(), + op::Dot(op::Parameter(0), op::Exp(op::Parameter(1)), + /*lhs_contracting_dim=*/1, /*rhs_contracting_dim=*/0)); } class OpcodeFusionTest : public InstructionFusionTest { diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc index e8117377e61a4e21b8c45b929c518a18878fcb60..6c642080c34e72b1f28b13b340fd2e919a453201 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc @@ -139,13 +139,9 @@ Status CpuLayoutAssignment::AddBackendConstraints( Shape lhs_shape(RowMajorShape(lhs_instruction->shape())); TF_RETURN_IF_ERROR(constraints->SetOperandLayout(lhs_shape, dot, 0)); - // dot is a kDot or a kTransposeDot fusion node. In the latter case, if - // it represents X @ X, it may have just one operand. - if (dot->operand_count() > 1) { - const HloInstruction* rhs_instruction = dot->operand(1); - Shape rhs_shape(RowMajorShape(rhs_instruction->shape())); - TF_RETURN_IF_ERROR(constraints->SetOperandLayout(rhs_shape, dot, 1)); - } + const HloInstruction* rhs_instruction = dot->operand(1); + Shape rhs_shape(RowMajorShape(rhs_instruction->shape())); + TF_RETURN_IF_ERROR(constraints->SetOperandLayout(rhs_shape, dot, 1)); // Set layouts of the instructions' shapes. TF_RETURN_IF_ERROR(constraints->SetInstructionLayout(output_shape, dot)); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h index c8edbb9e15a5b6f9c574f5fe9d130d149499ebd2..09adb5cb02abba5844a1740bdb50a578e1bdf8b5 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h @@ -27,7 +27,8 @@ namespace cpu { // layout constraints for operands and results of library calls. class CpuLayoutAssignment : public LayoutAssignment { public: - explicit CpuLayoutAssignment(ComputationLayout* entry_computation_layout) + explicit CpuLayoutAssignment( + const ComputationLayout& entry_computation_layout) : LayoutAssignment(entry_computation_layout) {} ~CpuLayoutAssignment() override {} diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc index 6ba030fff3bbc5f413bfb133114ceb5309b77672..ba4c5a23d3e043fd6680c2f9abc2275696737ee7 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc @@ -49,7 +49,7 @@ class CpuLayoutAssignmentTest : public HloTestBase { protected: void AssignLayouts(HloModule* module, ComputationLayout* entry_computation_layout) { - cpu::CpuLayoutAssignment layout_assignment(entry_computation_layout); + cpu::CpuLayoutAssignment layout_assignment(*entry_computation_layout); EXPECT_IS_OK(layout_assignment.Run(module).status()); } }; @@ -311,7 +311,7 @@ static StatusOr RunDotOutputFusion( result.addend_fusion_param = fusion_instruction->operand( fused_add->operand(1 - dot_operand_idx_in_add)->parameter_number()); - cpu::CpuLayoutAssignment layout_assignment(&computation_layout); + cpu::CpuLayoutAssignment layout_assignment(computation_layout); TF_ASSIGN_OR_RETURN(result.layout_assignment_changed_something, layout_assignment.Run(module)); diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 495fecc4aa8b3cf8fcb3ab63d82d8146546854da..8db4a0650d2867cd7326206787d79aaa7c0acf9f 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -522,16 +522,16 @@ void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( } // namespace -DotOpEmitter::DotOpEmitter( - const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs, - const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array, - const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array, - llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder, - const HloModuleConfig& hlo_module_config, - const TargetMachineFeatures& target_machine_features) +DotOpEmitter::DotOpEmitter(const HloInstruction& dot, + const llvm_ir::IrArray& target_array, + const llvm_ir::IrArray& lhs_array, + const llvm_ir::IrArray& rhs_array, + const llvm_ir::IrArray* addend_array, + llvm::Value* executable_run_options_value, + llvm::IRBuilder<>* ir_builder, + const HloModuleConfig& hlo_module_config, + const TargetMachineFeatures& target_machine_features) : dot_(dot), - transpose_lhs_(transpose_lhs), - transpose_rhs_(transpose_rhs), target_array_(target_array), lhs_array_(lhs_array), rhs_array_(rhs_array), @@ -542,23 +542,21 @@ DotOpEmitter::DotOpEmitter( target_machine_features_(target_machine_features) {} /* static */ tensorflow::Status DotOpEmitter::EmitDotOperation( - const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs, - const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array, - const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array, + const HloInstruction& dot, const llvm_ir::IrArray& target_array, + const llvm_ir::IrArray& lhs_array, const llvm_ir::IrArray& rhs_array, + const llvm_ir::IrArray* addend_array, llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder, const HloModuleConfig& hlo_module_config, const TargetMachineFeatures& target_machine_features) { PrimitiveType type = target_array.GetShape().element_type(); TF_RET_CHECK(F16 == type || F32 == type || F64 == type || C64 == type); - DotOpEmitter dot_emitter(dot, transpose_lhs, transpose_rhs, target_array, - lhs_array, rhs_array, addend_array, - executable_run_options_value, ir_builder, - hlo_module_config, target_machine_features); + DotOpEmitter dot_emitter(dot, target_array, lhs_array, rhs_array, + addend_array, executable_run_options_value, + ir_builder, hlo_module_config, + target_machine_features); return dot_emitter.Emit(); } -bool DotOpEmitter::ShapesAreLegalForRuntimeDot() const { return true; } - bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { if (dot_.shape().dimensions_size() != 2) { return false; @@ -580,7 +578,7 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { if (mat_mult_dims.m == 1) { bool rhs_effectively_row_major = - transpose_rhs_ ^ !mat_mult_dims.rhs_column_major; + mat_mult_dims.rhs_non_canonical ^ !mat_mult_dims.rhs_column_major; if (rhs_effectively_row_major) { k = mat_mult_dims.k; m = mat_mult_dims.n; @@ -596,7 +594,7 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { if (mat_mult_dims.n == 1) { bool lhs_effectively_column_major = - transpose_lhs_ ^ mat_mult_dims.lhs_column_major; + mat_mult_dims.lhs_non_canonical ^ mat_mult_dims.lhs_column_major; if (lhs_effectively_column_major) { m = mat_mult_dims.m; k = mat_mult_dims.k; @@ -743,16 +741,10 @@ tensorflow::Status DotOpEmitter::Emit() { // Reduce along dimension 0 of the LHS and 1 of the RHS. Vectors are a special // case where the reduction dimension is 0 for both LHS and RHS. This results // in a vector dot product producing a scalar. - int64 lhs_reduction_dimension = 0; - if (ShapeUtil::Rank(lhs_shape) >= 2) { - lhs_reduction_dimension = - ShapeUtil::GetDimensionNumber(lhs_shape, transpose_lhs_ ? -2 : -1); - } - int64 rhs_reduction_dimension = 0; - if (ShapeUtil::Rank(rhs_shape) >= 2) { - rhs_reduction_dimension = - ShapeUtil::GetDimensionNumber(rhs_shape, transpose_rhs_ ? -1 : -2); - } + int64 lhs_reduction_dimension = + dot_.dot_dimension_numbers().lhs_contracting_dimensions(0); + int64 rhs_reduction_dimension = + dot_.dot_dimension_numbers().rhs_contracting_dimensions(0); // Verify the reduction dimension in the two operands are the same size. TF_RET_CHECK(lhs_shape.dimensions(lhs_reduction_dimension) == @@ -908,8 +900,6 @@ tensorflow::Status DotOpEmitter::EmitScalarDot() { } tensorflow::Status DotOpEmitter::EmitCallToRuntime() { - DCHECK(ShapesAreLegalForRuntimeDot()); - // The signature of the Eigen runtime matmul function is: // // (void)(void* run_options, float* out, float* lhs, float* rhs, @@ -990,8 +980,8 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { const llvm_ir::IrArray* lhs = &lhs_array_; const llvm_ir::IrArray* rhs = &rhs_array_; - bool transpose_lhs = transpose_lhs_; - bool transpose_rhs = transpose_rhs_; + bool transpose_lhs = mat_mult_dims.lhs_non_canonical; + bool transpose_rhs = mat_mult_dims.rhs_non_canonical; if (!mat_mult_dims.lhs_column_major) { std::swap(mat_mult_dims.m, mat_mult_dims.n); @@ -1019,12 +1009,16 @@ DotOpEmitter::MatMultDims DotOpEmitter::GetMatMultDims() const { const Shape& lhs_shape = lhs_array_.GetShape(); const Shape& rhs_shape = rhs_array_.GetShape(); - - return {lhs_shape.dimensions(transpose_lhs_ ? 1 : 0), - lhs_shape.dimensions(transpose_lhs_ ? 0 : 1), - rhs_shape.dimensions(transpose_rhs_ ? 0 : 1), - LayoutUtil::Minor(lhs_shape.layout(), 0) == 0, - LayoutUtil::Minor(rhs_shape.layout(), 0) == 0}; + const DotDimensionNumbers& dim_nums = dot_.dot_dimension_numbers(); + + return { + /*m=*/lhs_shape.dimensions(1 - dim_nums.lhs_contracting_dimensions(0)), + /*k=*/lhs_shape.dimensions(dim_nums.lhs_contracting_dimensions(0)), + /*n=*/rhs_shape.dimensions(1 - dim_nums.rhs_contracting_dimensions(0)), + /*lhs_column_major=*/LayoutUtil::Minor(lhs_shape.layout(), 0) == 0, + /*lhs_non_canonical=*/dim_nums.lhs_contracting_dimensions(0) == 0, + /*rhs_column_major=*/LayoutUtil::Minor(rhs_shape.layout(), 0) == 0, + /*rhs_non_canonical=*/dim_nums.rhs_contracting_dimensions(0) == 1}; } llvm_ir::IrArray::Index DotOpEmitter::EmitOperandArrayLoopNest( @@ -1094,27 +1088,16 @@ bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo) { // If gemm can accept the operand shapes, use it rather than a custom // kernel. if (AreValidGemmShapes(lhs_shape, rhs_shape, hlo.shape())) { + const DotDimensionNumbers& dim_numbers = hlo.dot_dimension_numbers(); // The size of the reduction dimension should match. The shape inference // guarantees this invariant, so the check here is for programming // errors. - CHECK_EQ(lhs_shape.dimensions(1), rhs_shape.dimensions(0)); + CHECK_EQ(lhs_shape.dimensions(dim_numbers.lhs_contracting_dimensions(0)), + rhs_shape.dimensions(dim_numbers.rhs_contracting_dimensions(0))); return true; } } - if (hlo.opcode() == HloOpcode::kFusion && - hlo.fusion_kind() == HloInstruction::FusionKind::kTransposeDot && - hlo.fused_expression_root()->opcode() == HloOpcode::kDot) { - auto* dot = hlo.fused_expression_root(); - const Shape& lhs_shape = dot->operand(0)->shape(); - const Shape& rhs_shape = dot->operand(1)->shape(); - if (ShapeUtil::HasZeroElements(lhs_shape) || - ShapeUtil::HasZeroElements(rhs_shape)) { - return false; - } - return true; - } - return false; } diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h index 9d748eb81f7850f3ccdb10f076eecfdc8326c05f..a20bf2f9db3ad3b85ec29038b48d5d0ab095197f 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h @@ -56,16 +56,15 @@ class DotOpEmitter { // dot(`lhs_array`, `rhs_array`). A non-null `addend_array` is only supported // for Matrix-vector products. static tensorflow::Status EmitDotOperation( - const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs, - const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array, - const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array, + const HloInstruction& dot, const llvm_ir::IrArray& target_array, + const llvm_ir::IrArray& lhs_array, const llvm_ir::IrArray& rhs_array, + const llvm_ir::IrArray* addend_array, llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder, const HloModuleConfig& hlo_module_config, const TargetMachineFeatures& target_machine_features); private: - DotOpEmitter(const HloInstruction& dot, bool transpose_lhs, - bool transpose_rhs, const llvm_ir::IrArray& target_array, + DotOpEmitter(const HloInstruction& dot, const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array, const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array, @@ -99,10 +98,6 @@ class DotOpEmitter { llvm_ir::ForLoopNest* loop_nest, const llvm_ir::IrArray& operand_array, int64 reduction_dimension, tensorflow::StringPiece name_suffix); - // Our runtime operation requires that all arrays have the same layout, - // no padding, and a rank of two. - bool ShapesAreLegalForRuntimeDot() const; - // Represents the dimensions of a matrix-matrix multiply operation. struct MatMultDims { // The number of rows in the LHS. @@ -118,8 +113,14 @@ class DotOpEmitter { // True if the LHS matrix column major. bool lhs_column_major; + // True if the LHS contraction dimension is not 1. + bool lhs_non_canonical; + // True if the RHS matrix column major. bool rhs_column_major; + + // True if the RHS contraction dimension is not 0. + bool rhs_non_canonical; }; // Get the MatMultDims instance for the dot product this DotOpEmitter @@ -136,8 +137,6 @@ class DotOpEmitter { } const HloInstruction& dot_; - const bool transpose_lhs_; - const bool transpose_rhs_; const llvm_ir::IrArray& target_array_; const llvm_ir::IrArray& lhs_array_; const llvm_ir::IrArray& rhs_array_; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index d582b5aaae93799b0fc0e57873c85ec5af9e8d08..55e5aa5063d0ed0e71c6fed062e549dddc3e1e8d 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -160,10 +160,8 @@ Status IrEmitter::HandleBitcast(HloInstruction* bitcast) { return Status::OK(); } -Status IrEmitter::HandleConstant(HloInstruction* constant) { - VLOG(2) << "HandleConstant: " << constant->ToString(); - const Literal& literal = constant->literal(); - llvm::GlobalVariable* global_for_const; +llvm::GlobalVariable* IrEmitter::EmitGlobalForLiteral(const Literal& literal) { + llvm::GlobalVariable* result; // We avoid creating large constants in the LLVM IR since LLVM is not // efficient for large constant arrays. We still emit "small enough" constant @@ -174,27 +172,42 @@ Status IrEmitter::HandleConstant(HloInstruction* constant) { ByteSizeOf(literal.shape()) >= kMaxInternalConstantSizeInBytes) { string global_name = tensorflow::strings::StrCat( "constant_global_", external_global_constant_counter_++); - global_for_const = new llvm::GlobalVariable( + result = new llvm::GlobalVariable( /*Module=*/*module_, /*Type=*/IrShapeType(literal.shape()), /*isConstant=*/true, /*Linkage=*/llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, /*Name=*/AsStringRef(global_name)); - global_for_const->setAlignment(MinimumAlignmentForShape(literal.shape())); + result->setAlignment(MinimumAlignmentForShape(literal.shape())); external_constant_pool_->Insert(global_name, literal, MinimumAlignmentForShape(literal.shape())); } else { llvm::Constant* initializer = llvm_ir::ConvertLiteralToIrConstant(literal, module_); - global_for_const = new llvm::GlobalVariable( + result = new llvm::GlobalVariable( /*Module=*/*module_, /*Type=*/initializer->getType(), /*isConstant=*/true, /*Linkage=*/llvm::GlobalValue::PrivateLinkage, /*Initializer=*/initializer, /*Name=*/""); - global_for_const->setAlignment(MinimumAlignmentForShape(literal.shape())); + result->setAlignment(MinimumAlignmentForShape(literal.shape())); + } + return result; +} + +Status IrEmitter::HandleConstant(HloInstruction* constant) { + VLOG(2) << "HandleConstant: " << constant->ToString(); + const Literal& literal = constant->literal(); + llvm::GlobalVariable* global_for_const; + + auto it = emitted_literals_.find(&literal); + if (it != emitted_literals_.end()) { + global_for_const = it->second; + } else { + global_for_const = EmitGlobalForLiteral(literal); + emitted_literals_[&literal] = global_for_const; } emitted_value_[constant] = global_for_const; VLOG(2) << " emitted value: " << llvm_ir::DumpToString(*global_for_const); @@ -814,13 +827,6 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { "Dot with multiple contracting dimensions not implemented."); } - if (dnums.lhs_contracting_dimensions(0) != - std::min(lhs->shape().dimensions_size() - 1, 1) || - dnums.rhs_contracting_dimensions(0) != 0) { - return Unimplemented( - "Dot with non-standard contracting dimensions not implemented."); - } - llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs)); llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs)); @@ -837,8 +843,7 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { // Dot operation is complicated so we delegate to a helper class. return DotOpEmitter::EmitDotOperation( - *dot, /*transpose_lhs=*/false, /*transpose_rhs=*/false, target_array, - lhs_array, rhs_array, /*addend_array=*/nullptr, + *dot, target_array, lhs_array, rhs_array, /*addend_array=*/nullptr, GetExecutableRunOptionsArgument(), &ir_builder_, hlo_module_config_, target_machine_features_); } @@ -2073,44 +2078,7 @@ static const HloInstruction* StripTranspose(const HloInstruction& hlo) { Status IrEmitter::HandleFusion(HloInstruction* fusion) { auto* root = fusion->fused_expression_root(); - if (fusion->fusion_kind() == HloInstruction::FusionKind::kTransposeDot) { - DCHECK(root->opcode() == HloOpcode::kDot); - const HloInstruction* lhs_parameter = StripTranspose(*root->operand(0)); - const HloInstruction* rhs_parameter = StripTranspose(*root->operand(1)); - DCHECK(lhs_parameter->opcode() == HloOpcode::kParameter && - rhs_parameter->opcode() == HloOpcode::kParameter); - const HloInstruction* lhs = - fusion->operand(lhs_parameter->parameter_number()); - const HloInstruction* rhs = - fusion->operand(rhs_parameter->parameter_number()); - - TF_RETURN_IF_ERROR(ElementTypesSameAndSupported( - /*instruction=*/*root, /*operands=*/{lhs, rhs}, - /*supported_types=*/{F16, F32, F64})); - - llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs)); - llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs)); - - Shape target_shape = fusion->shape(); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(fusion)); - llvm_ir::IrArray target_array = GetIrArrayFor(fusion); - VLOG(2) << "HandleFusion kTransposeDot: "; - VLOG(2) << " lhs operand: " - << llvm_ir::DumpToString(*lhs_array.GetBasePointer()); - VLOG(2) << " rhs operand: " - << llvm_ir::DumpToString(*rhs_array.GetBasePointer()); - VLOG(2) << " target: " - << llvm_ir::DumpToString(*target_array.GetBasePointer()); - - // Dot operation is complicated so we delegate to a helper class. - TF_RETURN_IF_ERROR(DotOpEmitter::EmitDotOperation( - *root, root->operand(0)->IsRank2Transpose(), - root->operand(1)->IsRank2Transpose(), target_array, lhs_array, - rhs_array, /*addend_array=*/nullptr, GetExecutableRunOptionsArgument(), - &ir_builder_, hlo_module_config_, target_machine_features_)); - return Status::OK(); - } else if (llvm_ir::CanEmitFusedDynamicUpdateSliceInPlace(fusion, - assignment_)) { + if (llvm_ir::CanEmitFusedDynamicUpdateSliceInPlace(fusion, assignment_)) { VLOG(3) << "HandleFusion FusedDynamicUpdateSliceInPlace"; CpuElementalIrEmitter elemental_emitter(hlo_module_config_, this, module_); TF_RETURN_IF_ERROR(EmitTargetAddressForOp(fusion)); @@ -2153,9 +2121,9 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { GetIrArrayFor(fusion->operand(addend_param_number))); TF_RETURN_IF_ERROR(DotOpEmitter::EmitDotOperation( - *dot, /*transpose_lhs=*/false, /*transpose_rhs=*/false, target_array, - lhs_array, rhs_array, &addend_array, GetExecutableRunOptionsArgument(), - &ir_builder_, hlo_module_config_, target_machine_features_)); + *dot, target_array, lhs_array, rhs_array, &addend_array, + GetExecutableRunOptionsArgument(), &ir_builder_, hlo_module_config_, + target_machine_features_)); return Status::OK(); } else { return Unimplemented("Fusion kind not implemented on CPU"); @@ -2550,8 +2518,12 @@ Status IrEmitter::FinishVisit(HloInstruction* root) { // nothing to do since the result was already written directly into the output // buffer. VLOG(2) << "FinishVisit root: " << root->ToString(); - llvm::Value* root_value = GetEmittedValueFor(root); - VLOG(2) << " value: " << llvm_ir::DumpToString(*root_value); + if (root->opcode() == HloOpcode::kOutfeed) { + VLOG(2) << " outfeed with value: " + << llvm_ir::DumpToString(*GetEmittedValueFor(root->operand(0))); + } else { + VLOG(2) << " value: " << llvm_ir::DumpToString(*GetEmittedValueFor(root)); + } auto record_complete_computation = [&](llvm::Value* prof_counter) { if (prof_counter) { diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 0f2f3d1817d6e891211bed843cd05c414771f151..5a040760804fa5609e1d68511d4b2abe8e2ec8f9 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -530,6 +530,8 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status EmitXfeedTransfer(XfeedKind kind, const Shape& shape, llvm::Value* program_buffer_address); + llvm::GlobalVariable* EmitGlobalForLiteral(const Literal& literal); + const HloModuleConfig& hlo_module_config_; bool is_top_level_computation_; @@ -539,6 +541,20 @@ class IrEmitter : public DfsHloVisitorWithDefault { int64 external_global_constant_counter_ = 0; ExternalConstantPool* external_constant_pool_; + struct LiteralPtrHashFunctor { + size_t operator()(const Literal* literal) const { return literal->Hash(); } + }; + + struct LiteralPtrEqualityFunctor { + bool operator()(const Literal* lhs, const Literal* rhs) const { + return *lhs == *rhs; + } + }; + + tensorflow::gtl::FlatMap + emitted_literals_; + TF_DISALLOW_COPY_AND_ASSIGN(IrEmitter); }; diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.h b/tensorflow/compiler/xla/service/cpu/ir_function.h index 557aa4a6bfc2ef70cafca4b226f8d8f15ea01e2b..2e55181eed867aca762f2b9b8310624ea12c7487 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_function.h +++ b/tensorflow/compiler/xla/service/cpu/ir_function.h @@ -33,8 +33,8 @@ namespace cpu { // emitters for function and function argument access. // The llvm::Function is created with the standard function signature // used in the XLA CPU backend (see ir_function.cc for argument details). -// In addtion IrFunction saves the callers IR insert point during contruction, -// and restores it after desctruction. +// In addition IrFunction saves the callers IR insert point during construction, +// and restores it after destruction. // // Example usage: // diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc index fb28280fade307ac1f193e7dca481bd2afa855fc..47e8405ff2ea2c8aa59c66cffb2705d4ab4a6752 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc @@ -127,7 +127,7 @@ int64 ParallelTaskAssignment::GetTargetParallelTaskCount( // Currently, we do not assign parallel tasks to instructions with at least // one of the following properties: // *) Internal threading (library calls to kConv, kDot, kFft, kCustomCall). - // *) Emit custom loops (kSelectAndScatter, FusionKind::kTransposeDot). + // *) Emit custom loops (kSelectAndScatter). // *) Operations that are not thread safe (like infeed and rng). // *) Tuple-shaped. // TODO(b/27458679) Parallelize instructions which are skipped here. diff --git a/tensorflow/compiler/xla/service/cpu/sample_harness.cc b/tensorflow/compiler/xla/service/cpu/sample_harness.cc index b3f4609d465efb4df8921abb684bafd263fe040f..167aa4adda995a259190a932a76a34ca5883444c 100644 --- a/tensorflow/compiler/xla/service/cpu/sample_harness.cc +++ b/tensorflow/compiler/xla/service/cpu/sample_harness.cc @@ -19,10 +19,10 @@ limitations under the License. #include "tensorflow/compiler/xla/array4d.h" #include "tensorflow/compiler/xla/client/client.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" @@ -48,13 +48,13 @@ int main(int argc, char** argv) { client->TransferToServer(*param1_literal).ConsumeValueOrDie(); // Build computation. - xla::ComputationBuilder builder(client, ""); + xla::XlaBuilder builder(""); auto p0 = builder.Parameter(0, param0_literal->shape(), "param0"); auto p1 = builder.Parameter(1, param1_literal->shape(), "param1"); auto add = builder.Add(p1, p0, {0}); - xla::StatusOr computation_status = builder.Build(); - xla::Computation computation = computation_status.ConsumeValueOrDie(); + xla::StatusOr computation_status = builder.Build(); + xla::XlaComputation computation = computation_status.ConsumeValueOrDie(); // Execute and transfer result of computation. xla::ExecutionProfile profile; diff --git a/tensorflow/compiler/xla/service/cpu/shape_partition.h b/tensorflow/compiler/xla/service/cpu/shape_partition.h index 33d02b70e61e3311c9af934e80874939fbe3adae..db2cda2936c834ad79a529bef6596d2f33822a3d 100644 --- a/tensorflow/compiler/xla/service/cpu/shape_partition.h +++ b/tensorflow/compiler/xla/service/cpu/shape_partition.h @@ -38,7 +38,7 @@ namespace cpu { // // [0, 1), [1, 2), [2, 3), [3, 4), [4, 5) [5, 8) // -// Note that the last partition has residule because the dimension size is +// Note that the last partition has residual because the dimension size is // not a multiple of the partition count. // // diff --git a/tensorflow/compiler/xla/service/cpu/tests/BUILD b/tensorflow/compiler/xla/service/cpu/tests/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..18a915e5339623c73fee0e339fe75ee405898a36 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/BUILD @@ -0,0 +1,177 @@ +# Description: +# Tests for LLVM-based CPU backend for XLA. + +licenses(["notice"]) # Apache 2.0 + +package( + default_visibility = [":friends"], +) + +package_group( + name = "friends", + includes = [ + "//tensorflow/compiler/xla:friends", + ], +) + +load("//tensorflow:tensorflow.bzl", "tf_cc_test") + +# Filegroup used to collect source files for dependency checking. +filegroup( + name = "c_srcs", + data = glob([ + "**/*.cc", + "**/*.h", + ]), +) + +cc_library( + name = "cpu_codegen_test", + testonly = True, + hdrs = ["cpu_codegen_test.h"], + deps = [ + "//tensorflow/compiler/xla/service:cpu_plugin", + "//tensorflow/compiler/xla/tests:llvm_irgen_test_base", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_fusion_test", + srcs = ["cpu_fusion_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:cpu_plugin", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu:cpu_instruction_fusion", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_bytesizeof_test", + srcs = ["cpu_bytesizeof_test.cc"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_external_constants_test", + srcs = ["cpu_external_constants_test.cc"], + deps = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu/tests:cpu_codegen_test", + "//tensorflow/compiler/xla/tests:filecheck", + "//tensorflow/core:test", + ], +) + +tf_cc_test( + name = "cpu_noalias_test", + srcs = ["cpu_noalias_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:buffer_assignment", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu/tests:cpu_codegen_test", + "//tensorflow/compiler/xla/service/llvm_ir:alias_analysis", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/compiler/xla/tests:filecheck", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "@llvm//:core", + ], +) + +tf_cc_test( + name = "cpu_intrinsic_test", + srcs = ["cpu_intrinsic_test.cc"], + deps = [ + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu:cpu_compiler", + "//tensorflow/compiler/xla/service/cpu/tests:cpu_codegen_test", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_eigen_dot_operation_test", + srcs = ["cpu_eigen_dot_operation_test.cc"], + deps = [ + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu:cpu_compiler", + "//tensorflow/compiler/xla/service/cpu/tests:cpu_codegen_test", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_infeed_test", + srcs = ["cpu_infeed_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", + "//tensorflow/compiler/xla/service:cpu_plugin", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_literal_caching_test", + srcs = ["cpu_literal_caching_test.cc"], + deps = [ + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu:cpu_compiler", + "//tensorflow/compiler/xla/service/cpu/tests:cpu_codegen_test", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_outfeed_test", + srcs = ["cpu_outfeed_test.cc"], + deps = [ + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu:cpu_compiler", + "//tensorflow/compiler/xla/service/cpu/tests:cpu_codegen_test", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_bytesizeof_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_bytesizeof_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..d5bbe7677ace67c0500750d1911bf98ff791aa60 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_bytesizeof_test.cc @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/core/platform/test.h" + +class CpuByteSizeOfTest : public ::testing::Test {}; + +TEST_F(CpuByteSizeOfTest, ARM32) { + llvm::DataLayout data_layout( + "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"); + auto tuple_shape = + xla::ShapeUtil::MakeTupleShape({xla::ShapeUtil::MakeShape(xla::F32, {})}); + EXPECT_EQ(xla::llvm_ir::ByteSizeOf(tuple_shape, data_layout), + data_layout.getPointerSize(0 /* default address space */)); +} + +TEST_F(CpuByteSizeOfTest, ARM64) { + llvm::DataLayout data_layout("e-m:e-i64:64-i128:128-n32:64-S128"); + auto tuple_shape = + xla::ShapeUtil::MakeTupleShape({xla::ShapeUtil::MakeShape(xla::F32, {})}); + EXPECT_EQ(xla::llvm_ir::ByteSizeOf(tuple_shape, data_layout), + data_layout.getPointerSize(0 /* default address space */)); +} diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h b/tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h new file mode 100644 index 0000000000000000000000000000000000000000..7c8d07a10baf55dba8cbd347ebe1459b78e268e0 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h @@ -0,0 +1,30 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TESTS_CPU_CODEGEN_TEST_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TESTS_CPU_CODEGEN_TEST_H_ + +#include "tensorflow/compiler/xla/tests/llvm_irgen_test_base.h" + +namespace xla { +namespace cpu { + +// Tests that verify IR emitted by the CPU backend is as expected. +class CpuCodegenTest : public LLVMIRGenTestBase {}; + +} // namespace cpu +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TESTS_CPU_CODEGEN_TEST_H_ diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_eigen_dot_operation_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_eigen_dot_operation_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..6fcce42eaa4599eb8a6dacc1bd39eefd39aa5e50 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_eigen_dot_operation_test.cc @@ -0,0 +1,113 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests that we call into Eigen for dot operations as needed. + +#include +#include +#include + +#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" +#include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace cpu { +namespace { + +struct DotTestSpec { + PrimitiveType primitive_type; + string filecheck_lines; +}; + +string DotTestSpecToString(const ::testing::TestParamInfo& info) { + return PrimitiveType_Name(info.param.primitive_type); +} + +class CpuEigenDotOperationTest + : public CpuCodegenTest, + public ::testing::WithParamInterface { + protected: + void CompileAndCheck(std::unique_ptr entry_computation, + const string& filecheck_lines) { + CpuAotCompilationOptions options{ + /*triple=*/"x86_64", /*cpu_name=*/"", /*features=*/"", + /*entry_point_name=*/"entry", + /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static}; + + auto hlo_module = CreateNewModule(); + hlo_module->AddEntryComputation(std::move(entry_computation)); + + CompileAheadOfTimeAndVerifyIr(std::move(hlo_module), options, + filecheck_lines, + /*match_optimized_ir=*/true); + } +}; + +TEST_P(CpuEigenDotOperationTest, SimpleDotOp) { + HloComputation::Builder builder(TestName()); + DotTestSpec spec = GetParam(); + + auto param_shape = ShapeUtil::MakeShape(spec.primitive_type, {128, 128}); + + HloInstruction* lhs = builder.AddInstruction( + HloInstruction::CreateParameter(0, param_shape, "input")); + HloInstruction* rhs = builder.AddInstruction( + HloInstruction::CreateParameter(1, param_shape, "input")); + + builder.AddInstruction( + HloInstruction::CreateCanonicalDot(param_shape, lhs, rhs)); + CompileAndCheck(builder.Build(), spec.filecheck_lines); +} + +TEST_P(CpuEigenDotOperationTest, DotTransposeOp) { + HloComputation::Builder builder(TestName()); + DotTestSpec spec = GetParam(); + + auto param_shape = ShapeUtil::MakeShape(spec.primitive_type, {128, 128}); + + HloInstruction* lhs = builder.AddInstruction( + HloInstruction::CreateParameter(0, param_shape, "input")); + HloInstruction* rhs = builder.AddInstruction( + HloInstruction::CreateParameter(1, param_shape, "input")); + HloInstruction* lhs_transposed = builder.AddInstruction( + HloInstruction::CreateTranspose(param_shape, lhs, {1, 0})); + + builder.AddInstruction( + HloInstruction::CreateCanonicalDot(param_shape, lhs_transposed, rhs)); + CompileAndCheck(builder.Build(), spec.filecheck_lines); +} + +std::vector GetDotTestCases() { + std::vector result; + result.push_back( + {F16, R"(CHECK: call void @__xla_cpu_runtime_EigenMatMulF16)"}); + result.push_back( + {F32, R"(CHECK: call void @__xla_cpu_runtime_EigenMatMulF32)"}); + result.push_back( + {F64, R"(CHECK: call void @__xla_cpu_runtime_EigenMatMulF64)"}); + return result; +} + +INSTANTIATE_TEST_CASE_P(CpuEigenDotOperationTestInstantiation, + CpuEigenDotOperationTest, + ::testing::ValuesIn(GetDotTestCases()), + DotTestSpecToString); + +} // namespace +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_external_constants_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_external_constants_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..ed8f375bd6186e4805fe9ded5be9ae7c9f4d5c84 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_external_constants_test.cc @@ -0,0 +1,73 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/filecheck.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace cpu { +namespace { +class CpuExternalConstantsTest : public CpuCodegenTest { + public: + void TestWithArray(int64 rows, int64 cols, const char* filecheck_pattern) { + HloComputation::Builder builder(TestName()); + + Array2D backing_array(rows, cols); + backing_array.FillUnique(); + + auto shape = ShapeUtil::MakeShape(F32, {rows, cols}); + + HloInstruction* constant = + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR2FromArray2D(backing_array))); + HloInstruction* param = + builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "x")); + builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param, constant)); + + std::unique_ptr module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + CompileAndVerifyIr(std::move(module), filecheck_pattern, + /*match_optimized_ir=*/false); + } +}; + +TEST_F(CpuExternalConstantsTest, Basic) { + TestWithArray(/*rows=*/1024, /*cols=*/1024, R"( +CHECK: @constant_global_0 = external constant [1024 x [1024 x float]], align 16 +)"); +} + +TEST_F(CpuExternalConstantsTest, BasicNegative) { + // The constant array in this test case is small enough that there is no need + // to externalize it. + TestWithArray(/*rows=*/4, /*cols=*/4, R"( +CHECK-NOT: @constant_global_0 = external constant [4 x [4 x float]], align 8 +CHECK: @0 = private constant [4 x [4 x float]] {{.*}}, align 8 +)"); +} +} // namespace +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_fusion_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..23e7a3de4d8188a3add259582e11030539e154c1 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_fusion_test.cc @@ -0,0 +1,330 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace cpu { +namespace { + +class CpuFusionTest : public HloTestBase { + protected: + CpuFusionTest() {} + + ErrorSpec error_spec_{0.0001, 1e-5}; +}; + +TEST_F(CpuFusionTest, FuseTwoElementwiseOps) { + auto builder = HloComputation::Builder(TestName()); + auto input_literal1 = Literal::CreateR1({1.0, 2.0, 3.0}); + auto input_literal2 = Literal::CreateR1({-2.0, -42.0, 2.0}); + Shape vshape = input_literal1->shape(); + + auto input1 = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal1))); + auto input2 = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal2))); + + auto add1 = builder.AddInstruction( + HloInstruction::CreateBinary(vshape, HloOpcode::kAdd, input1, input2)); + builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, add1)); + + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + // The computation root instruction was fused. Verify the fusion instruction + // is now the root. + auto computation = module->entry_computation(); + auto fusion_instruction = computation->root_instruction(); + EXPECT_EQ(HloOpcode::kFusion, fusion_instruction->opcode()); + EXPECT_EQ(HloOpcode::kNegate, + fusion_instruction->fused_expression_root()->opcode()); + // There should be four fused instructions: 2 parameters, the add, and the + // negate. + EXPECT_EQ(4, fusion_instruction->fused_instruction_count()); + + // Compile and execute the computation. + auto result = ExecuteAndTransfer(std::move(module), {}); + + // Check the output correctness. + LiteralTestUtil::ExpectR1Near({1.0, 40.0, -5.0}, *result, error_spec_); +} + +TEST_F(CpuFusionTest, FuseElementwiseOpChain) { + auto builder = HloComputation::Builder(TestName()); + auto input_literal = Literal::CreateR1({-1.5, -2.5, -3.0}); + Shape vshape = input_literal->shape(); + + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal))); + auto negate = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, input)); + auto ceil = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kCeil, negate)); + auto exp = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kExp, ceil)); + auto floor = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kFloor, exp)); + auto two = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(2.0))); + builder.AddInstruction( + HloInstruction::CreateBinary(vshape, HloOpcode::kMultiply, two, floor)); + + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + // The computation root instruction was fused. Verify the fusion instruction + // is now the root. + auto computation = module->entry_computation(); + auto fusion_instruction = computation->root_instruction(); + EXPECT_EQ(HloOpcode::kFusion, fusion_instruction->opcode()); + EXPECT_EQ(HloOpcode::kMultiply, + fusion_instruction->fused_expression_root()->opcode()); + // There should be 7 fused instructions: 2 parameters and the fused + // operations. + EXPECT_EQ(7, fusion_instruction->fused_instruction_count()); + + // Compile and execute the computation. + auto result = ExecuteAndTransfer(std::move(module), {}); + + // Check the output correctness. + LiteralTestUtil::ExpectR1Near({14.0, 40.0, 40.0}, *result, + error_spec_); +} + +TEST_F(CpuFusionTest, ElementwiseOpChainWithNonfusableInstruction) { + // Test a chain of fusable ops with a non-fusable op (a reduce) thrown in the + // middle. + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + auto input_literal = Literal::CreateR1({-1.5, -2.5, -3.0}); + Shape vshape = input_literal->shape(); + + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal))); + auto negate = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, input)); + auto ceil = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kCeil, negate)); + + auto cshape = ShapeUtil::MakeShape(F32, {6}); + auto concatenate = builder.AddInstruction( + HloInstruction::CreateConcatenate(cshape, {ceil, ceil}, /*dimension=*/0)); + + // Build an x+y computation to use in a reduce. + Shape r0f32 = ShapeUtil::MakeShape(F32, {}); + auto embedded_builder = HloComputation::Builder("f32+f32"); + embedded_builder.AddInstruction(HloInstruction::CreateBinary( + r0f32, HloOpcode::kAdd, + embedded_builder.AddInstruction( + HloInstruction::CreateParameter(0, r0f32, "x")), + embedded_builder.AddInstruction( + HloInstruction::CreateParameter(1, r0f32, "y")))); + auto add_f32 = module->AddEmbeddedComputation(embedded_builder.Build()); + + // This is a nop reduction. + auto reduce = builder.AddInstruction(HloInstruction::CreateReduce( + cshape, + builder.AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeShape(F32, {6, 1}), concatenate)), + /*init_value=*/ + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(0))), + /*dimensions_to_reduce=*/{1}, add_f32)); + + auto exp = builder.AddInstruction( + HloInstruction::CreateUnary(cshape, HloOpcode::kExp, reduce)); + auto floor = builder.AddInstruction( + HloInstruction::CreateUnary(cshape, HloOpcode::kFloor, exp)); + auto two = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(2.0))); + builder.AddInstruction( + HloInstruction::CreateBinary(cshape, HloOpcode::kMultiply, two, floor)); + + module->AddEntryComputation(builder.Build()); + + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + // The computation root instruction was fused. Verify the fusion instruction + // is now the root. + auto computation = module->entry_computation(); + + auto fusion_instruction1 = computation->root_instruction(); + EXPECT_EQ(HloOpcode::kFusion, fusion_instruction1->opcode()); + EXPECT_EQ(HloOpcode::kMultiply, + fusion_instruction1->fused_expression_root()->opcode()); + // There should be 5 fused instructions in the root fusion instruction: 2 + // parameters, multiply, floor, and exp. + EXPECT_EQ(5, fusion_instruction1->fused_instruction_count()) + << fusion_instruction1->fused_instructions_computation()->ToString(); + + auto fusion_instruction2 = reduce->operand(0); + EXPECT_EQ(HloOpcode::kFusion, fusion_instruction1->opcode()); + EXPECT_EQ(HloOpcode::kReshape, + fusion_instruction2->fused_expression_root()->opcode()); + // There should be 5 fused instructions in the second fusion instruction: 1 + // parameter, negate, ceil, concat, and reshape. + EXPECT_EQ(5, fusion_instruction2->fused_instruction_count()) + << fusion_instruction2->fused_instructions_computation()->ToString(); + + // Compile and execute the computation. + auto result = ExecuteAndTransfer(std::move(module), {}); + + // Check the output correctness. + LiteralTestUtil::ExpectR1Near({14.0, 40.0, 40.0, 14.0, 40.0, 40.0}, + *result, error_spec_); +} + +TEST_F(CpuFusionTest, TestOperandOrderToAvoidDuplication) { + // Test that the operands of an instruction to be fused are considered in the + // proper order to avoid duplication. Test input: + // + // constant = {...} + // negate = neg(constant) + // ceil = ceil(negate) + // add1 = add(negate, ceil) + // add2 = add(ceil, negate) + // + // In this example, the operands of both add1 and add2 should be fused in the + // order {ceil, negate} even though they have different orders in their + // operand vectors. Test for this problem by counting the number of nodes in + // each fusion instruction to ensure that negate is not duplicated. + auto builder = HloComputation::Builder(TestName()); + auto input_literal = Literal::CreateR1({1.0, 2.0, 3.0}); + Shape vshape = input_literal->shape(); + + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal))); + auto negate = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, constant)); + auto ceil = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kCeil, negate)); + + auto add1 = builder.AddInstruction( + HloInstruction::CreateBinary(vshape, HloOpcode::kMultiply, negate, ceil)); + auto add2 = builder.AddInstruction( + HloInstruction::CreateBinary(vshape, HloOpcode::kMultiply, ceil, negate)); + + // Tie together the two adds with a tuple to create a single root. + auto result = + builder.AddInstruction(HloInstruction::CreateTuple({add1, add2})); + + // Create computation and module. + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + // Run fusion. + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + auto fusion1 = result->operand(0); + auto fusion2 = result->operand(1); + EXPECT_EQ(HloOpcode::kFusion, fusion1->opcode()); + EXPECT_EQ(HloOpcode::kFusion, fusion2->opcode()); + + // Each fusion instruction should have 4 fused instruction inside: add, ceil, + // negate, and the fused parameter. + EXPECT_EQ(4, fusion1->fused_instruction_count()); + EXPECT_EQ(4, fusion2->fused_instruction_count()); + + // Each fusion instruction should have one parameter and the parameter should + // be the constant. + EXPECT_EQ(1, fusion1->operand_count()); + EXPECT_EQ(constant, fusion1->operand(0)); + EXPECT_EQ(1, fusion2->operand_count()); + EXPECT_EQ(constant, fusion2->operand(0)); +} + +TEST_F(CpuFusionTest, DoNotDuplicateExpensiveOps) { + // Verify that expensive operations will not be fused if the fusion results in + // duplication. Test code: + // + // constant = 42.0 + // exp1 = exp(constant) + // negate1 = negate(exp1) + // exp2 = exp(constant) + // negate2 = negate(exp2) + // tuple = tuple(negate1, negate2, exp2) + // + // exp1 should be fused down into negate1, but exp2 will not be fused into + // negate2 because this will result in duplication of the expensive exp + // computation. The duplication is caused by the other use of exp2 in the + // tuple. + auto builder = HloComputation::Builder(TestName()); + auto input_literal1 = Literal::CreateR1({1.0, 2.0, 3.0}); + auto input_literal2 = Literal::CreateR1({-2.0, -42.0, 2.0}); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42.0))); + Shape shape = constant->shape(); + + auto exp1 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kExp, constant)); + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kNegate, exp1)); + + auto exp2 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kExp, constant)); + auto negate2 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kNegate, exp2)); + + auto tuple = builder.AddInstruction( + HloInstruction::CreateTuple({negate1, negate2, exp2})); + + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + // The only fusion instruction should be operand 0 of the tuple (formerly + // negate1). + EXPECT_EQ(HloOpcode::kFusion, tuple->operand(0)->opcode()); + EXPECT_EQ(HloOpcode::kNegate, tuple->operand(1)->opcode()); + EXPECT_EQ(HloOpcode::kExp, tuple->operand(2)->opcode()); + + auto fusion_inst = tuple->operand(0); + // There should be three fused instructions: negate2, exp2, and the fused + // parameter. + EXPECT_EQ(3, fusion_inst->fused_instruction_count()); + EXPECT_EQ(1, fusion_inst->operand_count()); + EXPECT_EQ(constant, fusion_inst->operand(0)); +} + +} // namespace +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_infeed_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_infeed_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..dd63b998e9b6d04981ec6f7300c883c9b23b154f --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_infeed_test.cc @@ -0,0 +1,294 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/math/math_util.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class InfeedTest : public ClientLibraryTestBase { + protected: + // Transfers the given literal to the infeed interface of the device, and + // check if the returned data from Infeed HLO is same as the literal. + void TestInfeedRoundTrip(const Literal& literal) { + // TODO(b/31037751) Explicitly reset the Infeed state so that the + // test is not affected by the state from the previous tests by + // adding ClearInfeed if necessary when it is implemented. For now + // don't use ResetDevice since it is not implemented on CPU. + ASSERT_IS_OK(client_->TransferToInfeed(literal)); + XlaBuilder builder(TestName()); + builder.Infeed(literal.shape()); + if (ShapeUtil::IsTuple(literal.shape())) { + // TODO(b/30609564): Use ComputeAndCompareLiteral instead. + ComputeAndCompareTuple(&builder, literal, {}); + } else { + ComputeAndCompareLiteral(&builder, literal, {}); + } + } +}; + +TEST_F(InfeedTest, SingleInfeedR0Bool) { + TestInfeedRoundTrip(*Literal::CreateR0(true)); +} + +TEST_F(InfeedTest, SingleInfeedR1U32) { + TestInfeedRoundTrip(*Literal::CreateR1({1, 2, 3})); +} + +TEST_F(InfeedTest, SingleInfeedR2F32) { + TestInfeedRoundTrip(*Literal::CreateR2F32Linspace(0.0, 1.0, 128, 64)); +} + +TEST_F(InfeedTest, SingleInfeedR3F32) { + TestInfeedRoundTrip( + *Literal::CreateR3({{{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}, + {{1.1f, 2.1f, 3.1f}, {6.1f, 3.5f, 2.8f}}})); +} + +TEST_F(InfeedTest, SingleInfeedR3F32DifferentLayout) { + const Layout r3_dim0minor = LayoutUtil::MakeLayout({0, 1, 2}); + const Layout r3_dim0major = LayoutUtil::MakeLayout({2, 1, 0}); + + TestInfeedRoundTrip( + *Literal::CreateR3WithLayout({{{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}, + {{1.1f, 2.1f, 3.1f}, {6.1f, 3.5f, 2.8f}}}, + r3_dim0minor)); + + TestInfeedRoundTrip( + *Literal::CreateR3WithLayout({{{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}, + {{1.1f, 2.1f, 3.1f}, {6.1f, 3.5f, 2.8f}}}, + r3_dim0major)); +} + +TEST_F(InfeedTest, SingleInfeedR4S32) { + TestInfeedRoundTrip(*Literal::CreateR4( + {{{{1, -2}, {-4, 5}, {6, 7}}, {{8, 9}, {10, 11}, {12, 13}}}, + {{{10, 3}, {7, -2}, {3, 6}}, {{2, 5}, {-11, 5}, {-2, -5}}}})); +} + +TEST_F(InfeedTest, SingleInfeedTuple) { + TestInfeedRoundTrip( + *Literal::MakeTuple({Literal::CreateR1({1, 2, 3}).get(), + Literal::CreateR0(false).get()})); +} + +TEST_F(InfeedTest, SingleInfeedEmptyTuple) { + TestInfeedRoundTrip(*Literal::MakeTuple({})); +} + +// Tests Infeed operation used in a while loop, as in the code below. The +// computation is launched asynchronously, and then infeed data is transferred. +// +// float acc = 0.0f; +// while (acc < 40.0f) { +// acc += reduce_add(Infeed()); +// } +// return acc; +// TODO(b/30671675) enable this test once asynchronous execution is +// implemented for CPU. +TEST_F(InfeedTest, DISABLED_SingleInfeedInWhile) { + XlaBuilder builder(TestName()); + const auto infeed_shape = ShapeUtil::MakeShape(F32, {3}); + const auto result_shape = ShapeUtil::MakeShape(F32, {}); + + // Create a computation for the condition: repeat until (prev < 40.0f) holds. + XlaComputation condition; + { + XlaBuilder builder("condition"); + auto prev = builder.Parameter(0, result_shape, "prev"); + builder.Gt(builder.ConstantR0(40.0f), prev); + condition = builder.Build().ConsumeValueOrDie(); + } + // Create a computation for the body: add the reduced value of the Infeed + // data to the result variable. + XlaComputation body; + { + XlaBuilder builder("body"); + auto prev = builder.Parameter(0, result_shape, "prev"); + auto infeed = builder.Infeed(infeed_shape); + auto addend = + builder.Reduce(infeed, builder.ConstantR0(0.0f), + CreateScalarAddComputation(F32, &builder), {0}); + builder.Add(prev, addend); + body = builder.Build().ConsumeValueOrDie(); + } + // Create a While node with computations for the condition and the body. + auto init = builder.ConstantR0(0.0f); + builder.While(condition, body, init); + + // Build and asynchronously launch the computation. + auto computation = builder.Build().ConsumeValueOrDie(); + std::unique_ptr result; + tensorflow::Thread* computation_thread = + tensorflow::Env::Default()->StartThread( + tensorflow::ThreadOptions{}, "computation_thread", [&] { + result = client_->Execute(computation, {}, &execution_options_) + .ValueOrDie(); + }); + + // Send 5 Infeed data of shape F32[3]. + ASSERT_IS_OK(client_->TransferToInfeed(*Literal::CreateR1({1, 2, 3}))); + ASSERT_IS_OK(client_->TransferToInfeed(*Literal::CreateR1({4, 5, 6}))); + ASSERT_IS_OK(client_->TransferToInfeed(*Literal::CreateR1({7, 8, 9}))); + ASSERT_IS_OK( + client_->TransferToInfeed(*Literal::CreateR1({10, 11, 12}))); + ASSERT_IS_OK( + client_->TransferToInfeed(*Literal::CreateR1({13, 14, 15}))); + + delete computation_thread; // Joins the thread. + auto result_literal = client_->Transfer(*result).ConsumeValueOrDie(); + + // Only the first 3 infeed data should be added. + LiteralTestUtil::ExpectR0Near(45.0f, *result_literal, ErrorSpec{1e-7}); +} + +// Tests two Infeed operations with a total order. The order is enforced by +// using the result of the first while loop as the initial value of the second +// while loop. The shapes of both Infeeds are Tuples, where the first tuple +// element (R1F32) is for the data to reduce and accumulate, and the second +// tuple element (PRED) to indicate whether the loop should continue. The +// computation is launched asynchronously, and then infeed data is transferred. +// +// float acc = 0.0f; +// continue = true; +// while (!continue) { +// (data, continue) = Infeed(shape1); +// acc += reduce_add(data) +// } +// continue = true; +// while(!continue) { +// (data, continue) = Infeed(shape2); +// acc += reduce_add(data) +// } +// return acc; +// TODO(b/30671675) enable this test once asynchronous execution is +// implemented for CPU. +TEST_F(InfeedTest, DISABLED_TwoInfeedsInTotalOrder) { + XlaBuilder builder(TestName()); + const auto infeed1_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {2}), ShapeUtil::MakeShape(PRED, {})}); + const auto infeed2_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(PRED, {})}); + const auto result_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {}), ShapeUtil::MakeShape(PRED, {})}); + + // Create a computation for the condition: repeat until the second tuple + // element is false. + XlaComputation condition; + { + XlaBuilder builder("condition"); + auto prev = builder.Parameter(0, result_shape, "prev"); + builder.GetTupleElement(prev, 1); + condition = builder.Build().ConsumeValueOrDie(); + } + + // A lambda that builds the body computation of a while loop with the given + // infeed shape, and returns the computation with the ownership. + // + // The body adds the reduced value of the Infeed data (first tuple element) + // to the previous accumulator, and returns the accumulator and the continue + // flag (second tuple element) as a tuple. + const auto build_body = [this, &result_shape](const Shape& infeed_shape) { + XlaComputation body; + XlaBuilder builder("body"); + auto prev = builder.Parameter(0, result_shape, "prev"); + auto infeed = builder.Infeed(infeed_shape); + auto addend = builder.Reduce( + builder.GetTupleElement(infeed, 0), builder.ConstantR0(0.0f), + CreateScalarAddComputation(F32, &builder), {0}); + auto result = builder.Add(builder.GetTupleElement(prev, 0), addend); + builder.Tuple({result, builder.GetTupleElement(infeed, 1)}); + return builder.Build().ConsumeValueOrDie(); + }; + + // Create the first while loop with infeed1_shape. + auto init = builder.Tuple( + {builder.ConstantR0(0.0f), builder.ConstantR0(true)}); + auto while1 = builder.While(condition, build_body(infeed1_shape), init); + auto result1 = builder.Tuple( + {builder.GetTupleElement(while1, 0), builder.ConstantR0(true)}); + + // Create the second while loop with infeed2_shape. Note that the result from + // the first while loop is used as the initial value. + auto while2 = builder.While(condition, build_body(infeed2_shape), result1); + builder.GetTupleElement(while2, 0); + + // Build the computation. + auto computation = builder.Build().ConsumeValueOrDie(); + + // Send the first 4 Infeed data of shape Tuple(F32[2], PRED). + ASSERT_IS_OK(client_->TransferToInfeed( + *Literal::MakeTuple({Literal::CreateR1({1, 2}).get(), + Literal::CreateR0(true).get()}))); + ASSERT_IS_OK(client_->TransferToInfeed( + *Literal::MakeTuple({Literal::CreateR1({3, 4}).get(), + Literal::CreateR0(true).get()}))); + ASSERT_IS_OK(client_->TransferToInfeed( + *Literal::MakeTuple({Literal::CreateR1({5, 6}).get(), + Literal::CreateR0(true).get()}))); + ASSERT_IS_OK(client_->TransferToInfeed( + *Literal::MakeTuple({Literal::CreateR1({7, 8}).get(), + Literal::CreateR0(false).get()}))); + + // Asynchronously launch the execution on the device. + std::unique_ptr result; + tensorflow::Thread* computation_thread = + tensorflow::Env::Default()->StartThread( + tensorflow::ThreadOptions{}, "computation_thread", [&] { + result = client_->Execute(computation, {}, &execution_options_) + .ValueOrDie(); + }); + + // Wait for a second to ensure testing that the execution is waiting on the + // Infeed data, and send the rest Infeed data of shape Tuple(F32[3], PRED). + sleep(1); + ASSERT_IS_OK(client_->TransferToInfeed( + *Literal::MakeTuple({Literal::CreateR1({1, 2, 3}).get(), + Literal::CreateR0(true).get()}))); + ASSERT_IS_OK(client_->TransferToInfeed( + *Literal::MakeTuple({Literal::CreateR1({7, 8, 9}).get(), + Literal::CreateR0(false).get()}))); + ASSERT_IS_OK(client_->TransferToInfeed( + *Literal::MakeTuple({Literal::CreateR1({4, 5, 6}).get(), + Literal::CreateR0(true).get()}))); + + // Wait for the execution to be done, and transfer the result. + delete computation_thread; // Joins the thread. + auto result_literal = client_->Transfer(*result).ConsumeValueOrDie(); + + // Only the first 6 infeed data should be added. + LiteralTestUtil::ExpectR0Near(66.0f, *result_literal, ErrorSpec{1e-7}); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_intrinsic_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_intrinsic_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..973aac8766f5aabca15e5173b43480c113c100dd --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_intrinsic_test.cc @@ -0,0 +1,151 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" +#include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace cpu { +namespace { + +const char* const kTriple_x86_64 = "x86_64-pc-linux"; +const char* const kTriple_android_arm = "armv7-none-android"; + +struct IntrinsicTestSpec { + HloOpcode opcode; + tensorflow::StringPiece triple; + tensorflow::StringPiece features; + tensorflow::StringPiece check_lines; +}; + +// Tests that unary functions get lowered using intrinsic calls. +class CpuUnaryIntrinsicTest + : public CpuCodegenTest, + public ::testing::WithParamInterface { + public: + static string Name(const ::testing::TestParamInfo& info) { + auto spec = info.param; + + string opcode = HloOpcodeString(spec.opcode); + opcode[0] = toupper(opcode[0]); + + string triple{spec.triple.data(), spec.triple.size()}; + if (triple == kTriple_x86_64) { + triple = "x86_64"; + } else if (triple == kTriple_android_arm) { + triple = "android_arm"; + } else { + triple = "Unknown"; + } + + string features{spec.features.data(), spec.features.size()}; + if (!features.empty()) { + std::replace_if(features.begin(), features.end(), + [](char c) { return c != '_' && !isalnum(c); }, '_'); + } else { + features = ""; + } + + return tensorflow::strings::StrCat(opcode.c_str(), "_On_", triple.c_str(), + features.empty() ? "" : "_With", + features.c_str()); + } +}; + +// Creates a module with a call to the unary op, and tests if the +// compiler replaced it with a call to the intrinsic. +TEST_P(CpuUnaryIntrinsicTest, DoIt) { + HloComputation::Builder builder(TestName()); + IntrinsicTestSpec spec = GetParam(); + + auto param_shape = ShapeUtil::MakeShape(F32, {1024}); + HloInstruction* param = builder.AddInstruction( + HloInstruction::CreateParameter(0, param_shape, "input")); + builder.AddInstruction( + HloInstruction::CreateUnary(param_shape, spec.opcode, param)); + std::unique_ptr computation = builder.Build(); + + string triple{spec.triple.data(), spec.triple.size()}; + string features{spec.features.data(), spec.features.size()}; + + CpuAotCompilationOptions options{ + /*triple=*/triple, /*cpu_name=*/"", /*features=*/features, + /*entry_point_name=*/"entry", + /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static}; + + auto hlo_module = CreateNewModule(); + hlo_module->AddEntryComputation(std::move(computation)); + + string check_lines{spec.check_lines.data(), spec.check_lines.size()}; + + CompileAheadOfTimeAndVerifyIr(std::move(hlo_module), options, check_lines, + /*match_optimized_ir=*/true); +} + +IntrinsicTestSpec CpuUnaryIntrinsicTestCases[] = { + // The intrinsics are always inlined, so we match a line from it instead of + // a function call. + + IntrinsicTestSpec{ + HloOpcode::kExp, kTriple_x86_64, "", + R"(CHECK: fmul fast <4 x float> )"}, + + IntrinsicTestSpec{ + HloOpcode::kExp, kTriple_x86_64, "+avx", + R"(CHECK: fmul fast <8 x float> )"}, + + IntrinsicTestSpec{ + HloOpcode::kExp, kTriple_android_arm, "+neon", + R"(CHECK: fmul fast <4 x float> )"}, + + IntrinsicTestSpec{ + HloOpcode::kTanh, kTriple_x86_64, "", + R"(CHECK: fcmp fast uge <4 x float> %wide.load, )"}, + + IntrinsicTestSpec{ + HloOpcode::kTanh, kTriple_x86_64, "+avx", + R"(CHECK: fcmp fast uge <8 x float> %wide.load, )"}, + + IntrinsicTestSpec{ + HloOpcode::kTanh, kTriple_android_arm, "", + R"(CHECK: fcmp fast uge <4 x float> %wide.load, )"}, + + IntrinsicTestSpec{ + HloOpcode::kLog, kTriple_x86_64, "", + R"(CHECK: fadd fast <4 x float> )"}, + + IntrinsicTestSpec{ + HloOpcode::kLog, kTriple_x86_64, "+avx", + R"(CHECK: fadd fast <8 x float> )"}, + + IntrinsicTestSpec{ + HloOpcode::kLog, kTriple_android_arm, "", + R"(CHECK: fadd fast <4 x float> )"}}; + +INSTANTIATE_TEST_CASE_P(CpuUnaryIntrinsicTestInstantiation, + CpuUnaryIntrinsicTest, + ::testing::ValuesIn(CpuUnaryIntrinsicTestCases), + CpuUnaryIntrinsicTest::Name); + +} // namespace +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..d6e0425c5542be89835571f0103b1829f63cc2c2 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc @@ -0,0 +1,121 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" +#include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" + +namespace xla { +namespace cpu { +namespace { +class CpuDuplicateConstantsTest : public CpuCodegenTest {}; + +TEST_F(CpuDuplicateConstantsTest, RepeatedArrayConstants) { + // We use a while loop here to force the two constant HloInstructions to be in + // different computations. Otherwise the HLO optimizer itself CSEs them. + const string hlo_text = R"( +HloModule RepeatedConstants + +while_body { + arg_body = f32[2,3,2] parameter(0) + ROOT const = f32[2,3,2] constant( + f32[2,3,2] + {{{1, 2}, {1001, 1002}, {2001, 2002}}, + {{2, 1}, {2001, 3002}, {2001, 2002}}}) +} + +while_cond { + arg_cond = f32[2,3,2] parameter(0) + ROOT unknown = pred[] infeed() +} + +ENTRY main { + param = f32[2,3,2] parameter(0) + const_a = f32[2,3,2] constant( + f32[2,3,2] + {{{1, 2}, {1001, 1002}, {2001, 2002}}, + {{2, 1}, {2001, 3002}, {2001, 2002}}}) + const_b = f32[2,3,2] while(f32[2,3,2] const_a), condition=while_cond, body=while_body + + out0 = () outfeed(f32[2,3,2] const_a) + ROOT out1 = () outfeed(f32[2,3,2] const_b) +} +)"; + + string filecheck_pattern = R"( +CHECK: private constant [2 x [3 x [2 x float]]] +CHECK-NOT: private constant [2 x [3 x [2 x float]]] +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_text)); + + CpuAotCompilationOptions options{ + /*triple=*/"x86_64-pc-linux", /*cpu_name=*/"", /*features=*/"", + /*entry_point_name=*/"entry", + /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static}; + + CompileAheadOfTimeAndVerifyIr(std::move(module), options, filecheck_pattern, + /*match_optimized_ir=*/false); +} + +TEST_F(CpuDuplicateConstantsTest, RepeatedTupleConstants) { + // We use a while loop here to force the two constant HloInstructions to be in + // different computations. Otherwise the HLO optimizer itself CSEs them. + const string hlo_text = R"( +HloModule RepeatedConstants + +while_body { + arg_body = (f32[2,1]{1,0}, f32[2]{0}) parameter(0) + ROOT const = (f32[2,1]{1,0}, f32[2]{0}) constant((f32[2,1], f32[2]) ( f32[2,1] { { 1 }, { 2 } }, {2, 42} )) +} + +while_cond { + arg_cond = (f32[2,1]{1,0}, f32[2]{0}) parameter(0) + ROOT unknown = pred[] infeed() +} + +ENTRY main { + param = f32[2,3,2] parameter(0) + const_a = (f32[2,1]{1,0}, f32[2]{0}) constant((f32[2,1], f32[2]) ( f32[2,1] { { 1 }, { 2 } }, {2, 42} )) + const_b = (f32[2,1]{1,0}, f32[2]{0}) while((f32[2,1]{1,0}, f32[2]{0}) const_a), condition=while_cond, body=while_body + + out0 = () outfeed((f32[2,1]{1,0}, f32[2]{0}) const_a) + ROOT out1 = () outfeed((f32[2,1]{1,0}, f32[2]{0}) const_b) +} +)"; + + string filecheck_pattern = R"( +CHECK: private constant [2 x float] +CHECK: private constant [2 x [1 x float]] +CHECK-NOT: private constant [2 x float] +CHECK-NOT: private constant [2 x [1 x float]] +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_text)); + + CpuAotCompilationOptions options{ + /*triple=*/"x86_64-pc-linux", /*cpu_name=*/"", /*features=*/"", + /*entry_point_name=*/"entry", + /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static}; + + CompileAheadOfTimeAndVerifyIr(std::move(module), options, filecheck_pattern, + /*match_optimized_ir=*/false); +} + +} // namespace +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..3b6b0ed74065615fb9e47a0ec3c6c4ab078e45c4 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc @@ -0,0 +1,136 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "llvm/IR/Module.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/buffer_assignment.h" +#include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h" +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/filecheck.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace cpu { + +class CpuNoAliasTest : public CpuCodegenTest {}; + +// Creates a simple HLO ir_module (runs concat(concat(x, y), x)), and then +// inspects the aliasing information for loads to its buffers. +TEST_F(CpuNoAliasTest, Concat) { + HloComputation::Builder builder(TestName()); + + std::unique_ptr literal = + Literal::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); + auto param_shape = ShapeUtil::MakeShape(F32, {2, 2}); + HloInstruction* param_x = builder.AddInstruction( + HloInstruction::CreateParameter(0, param_shape, "x")); + HloInstruction* param_y = builder.AddInstruction( + HloInstruction::CreateParameter(1, param_shape, "y")); + HloInstruction* concat1 = + builder.AddInstruction(HloInstruction::CreateConcatenate( + ShapeUtil::MakeShape(F32, {2, 4}), {param_x, param_y}, 1)); + HloInstruction* concat2 = + builder.AddInstruction(HloInstruction::CreateConcatenate( + ShapeUtil::MakeShape(F32, {2, 6}), {concat1, param_x}, 1)); + + std::unique_ptr computation = builder.Build(); + + auto hlo_module = CreateNewModule(); + hlo_module->AddEntryComputation(std::move(computation)); + + // Now that we have an HLO module, build an llvm_ir::AliasAnalysis for it. + auto status_or_buffer_assn = BufferAssigner::Run( + hlo_module.get(), MakeUnique(hlo_module.get()), + backend().compiler()->BufferSizeBytesFunction(), + [](LogicalBuffer::Color) { return /*alignment=*/1; }); + ASSERT_EQ(status_or_buffer_assn.status(), Status::OK()); + + llvm::LLVMContext context; + llvm_ir::AliasAnalysis aa(*hlo_module, *status_or_buffer_assn.ValueOrDie(), + &context); + + // Construct an LLVM module containing loads that we annotate as being from + // the buffers in the HLO module. We'll inspect these loads to ensure that + // they have the expected alias information. + llvm::Module ir_module("test", context); + llvm::Function* func = llvm::cast( + ir_module.getOrInsertFunction("test_fn", llvm::Type::getVoidTy(context))); + llvm::BasicBlock* bb = llvm::BasicBlock::Create(context, "body", func); + llvm::IRBuilder<> ir_builder(bb); + auto* zero = llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 0); + llvm_ir::IrArray::Index zero2D({zero, zero}); + + llvm::ArrayType* array2d_type = llvm::ArrayType::get( + llvm::ArrayType::get(llvm::Type::getFloatTy(context), 100), 100); + + { + llvm::Value* param_x_val = + ir_module.getOrInsertGlobal("param_x", array2d_type); + llvm_ir::IrArray param_x_array(param_x_val, param_shape); + aa.AddAliasingInformationToIrArray(*param_x, ¶m_x_array); + param_x_array.EmitReadArrayElement(zero2D, &ir_builder) + ->setName("read_param_x_array"); + } + + { + llvm::Value* concat1_val = + ir_module.getOrInsertGlobal("concat1", array2d_type); + auto shape = ShapeUtil::MakeShape(F32, {2, 4}); + llvm_ir::IrArray concat1_array(concat1_val, shape); + aa.AddAliasingInformationToIrArray(*concat1, &concat1_array); + concat1_array.EmitReadArrayElement(zero2D, &ir_builder) + ->setName("read_concat1_array"); + } + + { + llvm::Value* concat2_val = + ir_module.getOrInsertGlobal("concat2", array2d_type); + auto shape = ShapeUtil::MakeShape(F32, {2, 6}); + llvm_ir::IrArray concat2_array(concat2_val, shape); + aa.AddAliasingInformationToIrArray(*concat2, &concat2_array); + concat2_array.EmitReadArrayElement(zero2D, &ir_builder) + ->setName("read_concat2_array"); + } + + // Check the AA info in the loads. + const char* filecheck_pattern = R"( + CHECK: %read_param_x_array = load {{.*}} !noalias [[param_x_noalias:![0-9]+]] + CHECK: %read_concat1_array = load {{.*}} !alias.scope [[concat1_scope:![0-9]+]], !noalias [[concat1_noalias:![0-9]+]] + CHECK: %read_concat2_array = load {{.*}} !alias.scope [[concat1_noalias]], !noalias [[concat1_scope]] + CHECK-DAG: [[buf_size32:![0-9]+]] = !{!"buffer:{{.*}} size:32 + CHECK-DAG: [[buf_size48:![0-9]+]] = !{!"buffer:{{.*}} size:48 + CHECK-DAG: [[param_x_noalias]] = !{[[buf_size32]], [[buf_size48]]} + CHECK-DAG: [[concat1_scope]] = !{[[buf_size32]]} + CHECK-DAG: [[concat1_noalias]] = !{[[buf_size48]]} + )"; + + TF_ASSERT_OK_AND_ASSIGN( + bool filecheck_match, + RunFileCheck(llvm_ir::DumpModuleToString(ir_module), filecheck_pattern)); + EXPECT_TRUE(filecheck_match); +} + +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..879372eb13884cdb7edd8cfb3e8b4bac4e314951 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc @@ -0,0 +1,57 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" +#include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" + +namespace xla { +namespace cpu { +namespace { +class CpuOutfeedTest : public CpuCodegenTest {}; + +TEST_F(CpuOutfeedTest, OutfeedRoot) { + const string hlo_text = R"( +HloModule Outfeed + +ENTRY main { + const_a = f32[2,3,2] constant( + f32[2,3,2] + {{{1, 2}, {1001, 1002}, {2001, 2002}}, + {{2, 1}, {2001, 3002}, {2001, 2002}}}) + + ROOT out = () outfeed(f32[2,3,2] const_a) +} +)"; + + string filecheck_pattern = R"( +CHECK: private constant [2 x [3 x [2 x float]]] +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_text)); + + CpuAotCompilationOptions options{ + /*triple=*/"x86_64-pc-linux", /*cpu_name=*/"", /*features=*/"", + /*entry_point_name=*/"entry", + /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static}; + + CompileAheadOfTimeAndVerifyIr(std::move(module), options, filecheck_pattern, + /*match_optimized_ir=*/false); +} + +} // namespace +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/despecializer.h b/tensorflow/compiler/xla/service/despecializer.h index af48f4ab6e506d295251239fe92db68cfec6dcfa..cc1695b7f863805e0b483478639c17cb9061310a 100644 --- a/tensorflow/compiler/xla/service/despecializer.h +++ b/tensorflow/compiler/xla/service/despecializer.h @@ -25,7 +25,7 @@ namespace xla { // Creates an HloPassPipeline containing multiple HloPasses that can // despecialize an optimized HloModule. This is useful to run an HloModule -// optimized for one specfic platform on a different platform (undoing platform +// optimized for one specific platform on a different platform (undoing platform // specific passes) with matching numerics for comparison. // // Current despecialization passes are Defuser, ImplicitBroadcastRemover, diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc index 021f09d310b718b51932d0492d1b8f5eb562605c..8119478ce934da06969024905e5e054e0b509b03 100644 --- a/tensorflow/compiler/xla/service/executable.cc +++ b/tensorflow/compiler/xla/service/executable.cc @@ -143,6 +143,19 @@ Status Executable::DumpSessionModule() { *session_module_); } +Status Executable::DumpHloSnapshot() { + TF_RET_CHECK(dumping_snapshot()); + TF_RET_CHECK(hlo_snapshot_->has_hlo() && + hlo_snapshot_->hlo().has_hlo_module()); + const string& directory_path = + module_config().debug_options().xla_dump_executions_to(); + const auto& module = hlo_snapshot_->hlo().hlo_module(); + string filename = tensorflow::strings::Printf( + "computation_%lld__%s__execution_%lld", module.id(), + module.entry_computation_name().c_str(), ++execution_count_); + return Executable::DumpToDirectory(directory_path, filename, *hlo_snapshot_); +} + /* static */ Status Executable::DumpToDirectory( const string& directory_path, string filename, const SessionModule& session_module) { diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index f7af1ca57492972c58d3ce5ddc083088a32a968f..4f0466c544738fa1ec4602ee5104daee8d969c83 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -140,11 +140,11 @@ class Executable { // The shape (including layout) that results from this execution. This is the // shape of the DeviceMemoryBase result value in ExecuteOnStream above. - const Shape& result_shape() const { - return hlo_module_->config().entry_computation_layout().result_shape(); + const Shape& host_result_shape() const { + return hlo_module_->config().host_entry_computation_layout().result_shape(); } - // Dumping helpers. + // TODO(b/74197823): Delete the session module dumping helpers. void set_session_module(std::unique_ptr session_module) { session_module_ = std::move(session_module); } @@ -152,6 +152,14 @@ class Executable { SessionModule* session_module() const { return session_module_.get(); } Status DumpSessionModule(); + // Dumping helpers. + void set_hlo_snapshot(std::unique_ptr hlo_snapshot) { + hlo_snapshot_ = std::move(hlo_snapshot); + } + bool dumping_snapshot() const { return hlo_snapshot_ != nullptr; } + HloSnapshot* hlo_snapshot() const { return hlo_snapshot_.get(); } + Status DumpHloSnapshot(); + // Dump session_module to directory_path/filename. static Status DumpToDirectory(const string& directory_path, string filename, const SessionModule& session_module); @@ -174,6 +182,9 @@ class Executable { // SessionModule this was compiled from. Null if not dumping executions. std::unique_ptr session_module_; + // HloSnapshot this was compiled from. Null if not dumping executions. + std::unique_ptr hlo_snapshot_; + // Execution count, used to generate a unique filename for each dumped // execution. int64 execution_count_ = 0; diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index f1707442fe3354d5183d905468810f3871146ff5..7cb7f550730eeb562a6163cf5499ffaaf02d3327 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -620,6 +620,7 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/service:buffer_value", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_ordering", "//tensorflow/compiler/xla/service:hlo_reachability", diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc index c4c56c56928810d043085f284cda351391195c3b..41ee45f55fafcbb96265b97f31e26b75ab96675c 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc @@ -197,22 +197,42 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( // We don't put any data in these buffers, because (in theory, anyway) the // speed of a conv isn't affected by the data being convolved. ScratchAllocator input_output_allocator(device_ordinal, allocator); - se::port::StatusOr input_buf = + StatusOr maybe_input_buf = input_output_allocator.AllocateBytes(&stream, ShapeUtil::ByteSizeOf(input_shape)); - se::port::StatusOr filter_buf = + StatusOr maybe_filter_buf = input_output_allocator.AllocateBytes(&stream, ShapeUtil::ByteSizeOf(filter_shape)); - se::port::StatusOr output_buf = + StatusOr maybe_output_buf = input_output_allocator.AllocateBytes(&stream, ShapeUtil::ByteSizeOf(output_shape)); - if (!input_buf.ok() || !filter_buf.ok() || !output_buf.ok()) { + if (!maybe_input_buf.ok() || !maybe_filter_buf.ok() || + !maybe_output_buf.ok()) { LOG(WARNING) << "Couldn't allocate space for input/filter/output of convolution " << instr->ToString() << ". Falling back to default algorithm."; return nullopt; } + DeviceMemoryBase input_buf = maybe_input_buf.ValueOrDie(); + DeviceMemoryBase filter_buf = maybe_filter_buf.ValueOrDie(); + DeviceMemoryBase output_buf = maybe_output_buf.ValueOrDie(); + + // Although we don't have evidence this matters, zero out the buffers before + // autotuning. It's conceivable that using uninitialized memory as the inputs + // might affect performance if e.g. the inputs contain denormals, and this is + // easy enough. + if (!stream.ThenMemZero(&input_buf, input_buf.size()) + .ThenMemZero(&filter_buf, filter_buf.size()) + .ThenMemZero(&output_buf, output_buf.size()) + .BlockHostUntilDone() + .ok()) { + LOG(WARNING) + << "Couldn't zero out input/filter/output buffer for convolution " + << instr->ToString() << ". Falling back to default algorithm."; + return nullopt; + } + const bool use_winograd_nonfused = ShouldIncludeWinogradNonfusedAlgo( input_shape, output_shape, dnums, stream_exec_); se::dnn::ProfileResult best_result; @@ -225,12 +245,12 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( VLOG(3) << "Trying algorithm " << AlgorithmToString(alg) << " for " << instr->ToString(); - bool launch_ok = RunCudnnConvolution( - kind, input_shape, filter_shape, output_shape, - input_buf.ValueOrDie(), filter_buf.ValueOrDie(), - output_buf.ValueOrDie(), &scratch_allocator, window, - dnums, AlgorithmConfig(alg), &stream, &profile_result) - .ok(); + bool launch_ok = + RunCudnnConvolution(kind, input_shape, filter_shape, output_shape, + input_buf, filter_buf, output_buf, + &scratch_allocator, window, dnums, + AlgorithmConfig(alg), &stream, &profile_result) + .ok(); if (launch_ok && profile_result.is_valid()) { int64 scratch_bytes_used = scratch_allocator.TotalAllocatedBytes(); diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc index 0ec12f52d8b398218ec370fc74bfdf6f97f43893..f996fe486d1fe691899bd69dcedf3e29a963ff42 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc @@ -221,8 +221,7 @@ GemmThunk::GemmThunk(const BufferAllocation::Slice& lhs_buffer, const BufferAllocation::Slice& rhs_buffer, const BufferAllocation::Slice& output_buffer, const Shape& lhs_shape, const Shape& rhs_shape, - const Shape& output_shape, bool transpose_lhs, - bool transpose_rhs, double alpha, + const Shape& output_shape, double alpha, const HloInstruction* hlo_instruction) : Thunk(Kind::kGemm, hlo_instruction), lhs_buffer_(lhs_buffer), @@ -231,8 +230,6 @@ GemmThunk::GemmThunk(const BufferAllocation::Slice& lhs_buffer, lhs_shape_(lhs_shape), rhs_shape_(rhs_shape), output_shape_(output_shape), - transpose_lhs_(transpose_lhs), - transpose_rhs_(transpose_rhs), alpha_(alpha) {} tensorflow::Status GemmThunk::ExecuteOnStream( @@ -284,10 +281,13 @@ tensorflow::Status GemmThunk::ExecuteOnStream( shape.dimensions(!is_row_major)); }; - const MatrixDescriptor lhs_descriptor = - make_descriptor(lhs_data, lhs_shape_, transpose_lhs_); - const MatrixDescriptor rhs_descriptor = - make_descriptor(rhs_data, rhs_shape_, transpose_rhs_); + const DotDimensionNumbers& dim_nums = + hlo_instruction()->dot_dimension_numbers(); + + const MatrixDescriptor lhs_descriptor = make_descriptor( + lhs_data, lhs_shape_, dim_nums.lhs_contracting_dimensions(0) == 0); + const MatrixDescriptor rhs_descriptor = make_descriptor( + rhs_data, rhs_shape_, dim_nums.rhs_contracting_dimensions(0) == 1); // Dispatches to a regular cublas gemm, a gemm-with-algorithm, or attempts to // autotune this gemm to figure out the best algorithm. diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h index a18f425bc38fd3fbbb345901514c4ac16dbe97ec..f42cbf9e9483b59f1f103b128b36263ccaf64ec5 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h @@ -35,15 +35,13 @@ namespace gpu { class GemmThunk : public Thunk { public: // Constructs a thunk that computes "output = (lhs rhs) * alpha" using - // BLAS gemm. transpose_lhs and transpose_rhs indicate whether gemm should - // transpose the lhs and rhs operand. hlo_instruction is as in Thunk. alpha is - // a constant. + // BLAS gemm. hlo_instruction is as in Thunk. alpha is a constant. GemmThunk(const BufferAllocation::Slice& lhs_buffer, const BufferAllocation::Slice& rhs_buffer, const BufferAllocation::Slice& output_buffer, const Shape& lhs_shape, const Shape& rhs_shape, - const Shape& output_shape, bool transpose_lhs, bool transpose_rhs, - double alpha, const HloInstruction* hlo_instruction); + const Shape& output_shape, double alpha, + const HloInstruction* hlo_instruction); GemmThunk(const GemmThunk&) = delete; GemmThunk& operator=(const GemmThunk&) = delete; @@ -69,8 +67,6 @@ class GemmThunk : public Thunk { const Shape rhs_shape_; const Shape output_shape_; - const bool transpose_lhs_; - const bool transpose_rhs_; const double alpha_; // Maps device names (StreamExecutor::DeviceDescription::name()) to autotune diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 796c3070f22edd0cd088ccaf08a7f31fcde70f2d..4fdc4c89618bc0f179b2332373cb2fd3cf637390 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -248,7 +248,7 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module, { HloPassPipeline pipeline("layout_assignment"); pipeline.AddPass( - hlo_module->mutable_entry_computation_layout()); + hlo_module->device_entry_computation_layout()); // The LayoutAssignment pass may leave behind kCopy instructions which are // duplicate or NOPs, so remove them with algebraic simplification and CSE. diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h index 86a3a7111fd79494e469beecf3234f6cec9adb9c..51aae79c3d8d0000007f9d2926d245de838d3aca 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h @@ -27,7 +27,8 @@ namespace gpu { // layout constraints for operands and results of library calls. class GpuLayoutAssignment : public LayoutAssignment { public: - explicit GpuLayoutAssignment(ComputationLayout* entry_computation_layout) + explicit GpuLayoutAssignment( + const ComputationLayout& entry_computation_layout) : LayoutAssignment(entry_computation_layout) {} ~GpuLayoutAssignment() override {} diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc index 4c45d2e94aebce5496da94841f6a1ae9015615c1..7c801955943021def4ddc0accd9f318b7916ce93 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc @@ -69,7 +69,7 @@ TEST_F(LayoutAssignmentTest, Elementwise) { *computation_layout.mutable_result_layout() = ShapeLayout(result_shape_with_layout); - GpuLayoutAssignment layout_assignment(&computation_layout); + GpuLayoutAssignment layout_assignment(computation_layout); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); for (const HloInstruction* operand : add->operands()) { @@ -156,7 +156,7 @@ TEST_F(LayoutAssignmentTest, BatchNormInference) { *computation_layout.mutable_result_layout() = ShapeLayout(result_shape); } - GpuLayoutAssignment layout_assignment(&computation_layout); + GpuLayoutAssignment layout_assignment(computation_layout); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); // The first operand to batchnorm should have the same layout as the @@ -225,7 +225,7 @@ TEST_F(LayoutAssignmentTest, BatchNormTraining) { {result_shape, offset_scale_shape, offset_scale_shape})); } - GpuLayoutAssignment layout_assignment(&computation_layout); + GpuLayoutAssignment layout_assignment(computation_layout); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); // The first operand to batchnorm should have the same layout as the @@ -305,7 +305,7 @@ TEST_F(LayoutAssignmentTest, BatchNormGrad) { {result_shape, scale_shape, scale_shape})); } - GpuLayoutAssignment layout_assignment(&computation_layout); + GpuLayoutAssignment layout_assignment(computation_layout); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); // The first and fourth operands to the batchnorm call should have the diff --git a/tensorflow/compiler/xla/service/gpu/hlo_schedule.cc b/tensorflow/compiler/xla/service/gpu/hlo_schedule.cc index 42c1539e86c2ab162fa473852b80b28b57d0e370..f766f968826d960a8e86308f2395301aaa09f1ae 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_schedule.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_schedule.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h" #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/hlo_reachability.h" #include "tensorflow/compiler/xla/service/hlo_scheduling.h" #include "tensorflow/compiler/xla/types.h" @@ -199,7 +200,7 @@ StatusOr> HloSchedule::Build( TF_ASSIGN_OR_RETURN( schedule->thunk_launch_order_, CreateMemoryMinimizingSequence( - *entry_computation, [pointer_size](const LogicalBuffer& buffer) { + *entry_computation, [pointer_size](const BufferValue& buffer) { return ShapeUtil::ByteSizeOf(buffer.shape(), pointer_size); })); } else { diff --git a/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc b/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc index ece9fa04dce3fd12713fb7e58097dc16ebba83df..6436abc06cb9b0d69bc977334e68d91c03af2c98 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc @@ -65,9 +65,9 @@ TEST_F(HloScheduleTest, SequentialMatMul) { HloInstruction* z = builder.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/2, f32_2x2_, /*name=*/"z")); HloInstruction* dot1 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, x, y)); + HloInstruction::CreateCanonicalDot(f32_2x2_, x, y)); HloInstruction* dot2 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, dot1, z)); + HloInstruction::CreateCanonicalDot(f32_2x2_, dot1, z)); auto module = CreateNewModule(); module->AddEntryComputation(builder.Build(dot2)); @@ -193,11 +193,11 @@ TEST_F(HloScheduleTest, ConcurrentMatMul) { HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/1, f32_2x2_, /*name=*/"y")); HloInstruction* dot1 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, x, y)); + HloInstruction::CreateCanonicalDot(f32_2x2_, x, y)); HloInstruction* dot2 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, y, x)); + HloInstruction::CreateCanonicalDot(f32_2x2_, y, x)); HloInstruction* add = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kAdd, dot1, dot2)); + HloInstruction::CreateCanonicalDot(f32_2x2_, dot1, dot2)); auto module = CreateNewModule(); module->AddEntryComputation(builder.Build(add)); @@ -259,24 +259,24 @@ TEST_F(HloScheduleTest, LatticeMatMul) { params.push_back(builder.AddInstruction(HloInstruction::CreateParameter( i, f32_2x2_, /*name=*/tensorflow::strings::Printf("param%d", i)))); } - HloInstruction* d00 = builder.AddInstruction(HloInstruction::CreateBinary( - f32_2x2_, HloOpcode::kDot, params[2], params[3])); + HloInstruction* d00 = builder.AddInstruction( + HloInstruction::CreateCanonicalDot(f32_2x2_, params[2], params[3])); HloInstruction* d10 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, params[1], d00)); + HloInstruction::CreateCanonicalDot(f32_2x2_, params[1], d00)); HloInstruction* d11 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d00, params[4])); + HloInstruction::CreateCanonicalDot(f32_2x2_, d00, params[4])); HloInstruction* d20 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, params[0], d10)); + HloInstruction::CreateCanonicalDot(f32_2x2_, params[0], d10)); HloInstruction* d21 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d10, d11)); + HloInstruction::CreateCanonicalDot(f32_2x2_, d10, d11)); HloInstruction* d22 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d11, params[5])); + HloInstruction::CreateCanonicalDot(f32_2x2_, d11, params[5])); HloInstruction* d30 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d20, d21)); + HloInstruction::CreateCanonicalDot(f32_2x2_, d20, d21)); HloInstruction* d31 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d21, d22)); + HloInstruction::CreateCanonicalDot(f32_2x2_, d21, d22)); HloInstruction* d40 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d30, d31)); + HloInstruction::CreateCanonicalDot(f32_2x2_, d30, d31)); auto module = CreateNewModule(); module->AddEntryComputation(builder.Build(d40)); diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index 85ecbe8fdb34700ca738b99ddd9ea615afc35da3..c5eb7211859c8fcb728d28ba432b7e65979a194a 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -48,6 +48,19 @@ bool IsFusile(const HloInstruction& hlo) { } // namespace +/*static*/ bool GpuInstructionFusion::IsExpensive( + const HloInstruction& instruction) { + switch (instruction.opcode()) { + // We say that floating-point division is cheap on the GPU. + case HloOpcode::kDivide: + return !ShapeUtil::ElementIsFloating(instruction.shape()) && + InstructionFusion::IsExpensive(instruction); + + default: + return InstructionFusion::IsExpensive(instruction); + } +} + bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer, int64 operand_index) { HloInstruction* producer = consumer->mutable_operand(operand_index); diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.h b/tensorflow/compiler/xla/service/gpu/instruction_fusion.h index bb2990e6dfc9de0a11566bb3a2fb3a1b62498ffa..9fb06b0a244186484b1c17edf13bd28a4305a1a6 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.h +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.h @@ -27,6 +27,8 @@ class GpuInstructionFusion : public InstructionFusion { explicit GpuInstructionFusion(bool may_duplicate) : InstructionFusion(GpuInstructionFusion::IsExpensive, may_duplicate) {} + static bool IsExpensive(const HloInstruction& instruction); + bool ShouldFuse(HloInstruction* consumer, int64 operand_index) override; HloInstruction::FusionKind ChooseKind( diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index 4b231c449f8f101127b4d30bfff20c69d8cef5c1..6c9a805ad637ceef71f6bb021154f358e6e02825 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -253,5 +253,61 @@ TEST_F(InstructionFusionTest, DotOutputFusion) { op::Dot(op::Parameter(), op::Transpose(op::Parameter())))); } +// Compute sum(1/p0), where p0 has type f32, twice. Check that the division is +// duplicated and fused into both reduces. +TEST_F(InstructionFusionTest, FloatingPointDivIsCheap) { + auto module = tools::Parse(R"( + HloModule test_module + Add { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT add = f32[] add(lhs, rhs) + } + ENTRY TestComputation { + zero = f32[] constant(0) + one = f32[] constant(1) + p0 = f32[100] parameter(0) + recip = f32[100] divide(one, p0) + sum1 = f32[] reduce(recip, zero), dimensions={0}, to_apply=Add + sum2 = f32[] reduce(recip, zero), dimensions={0}, to_apply=Add + ROOT root = (f32[], f32[]) tuple(sum1, sum2) + })") + .ValueOrDie(); + + EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Tuple(op::Fusion(), op::Fusion())); +} + +// Compute sum(100/p0), where p0 has type s32, twice. Check that the division +// is *not* duplicated and fused into both reduces, because we say that integer +// division is not cheap. +TEST_F(InstructionFusionTest, IntegerDivIsNotCheap) { + auto module = tools::Parse(R"( + HloModule test_module + Add { + lhs = s32[] parameter(0) + rhs = s32[] parameter(1) + ROOT add = s32[] add(lhs, rhs) + } + ENTRY TestComputation { + zero = s32[] constant(0) + one_hundred = s32[] constant(100) + p0 = s32[100] parameter(0) + recip = s32[100] divide(one_hundred, p0) + sum1 = s32[] reduce(recip, zero), dimensions={0}, to_apply=Add + sum2 = s32[] reduce(recip, zero), dimensions={0}, to_apply=Add + ROOT mul = (s32[], s32[]) tuple(sum1, sum2) + })") + .ValueOrDie(); + + EXPECT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc index 532d436ee82b985a4efe300f90223e1298e85765..96199035b9e6d39332861079e16b5a4d20eee1a8 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc @@ -78,17 +78,13 @@ bool ImplementedAsGemm(const HloInstruction& hlo) { // The size of the reduction dimension should match. The shape inference // guarantees this invariant, so the check here is for programming // errors. - CHECK_EQ(lhs_shape.dimensions(1), rhs_shape.dimensions(0)); + const DotDimensionNumbers& dim_numbers = hlo.dot_dimension_numbers(); + CHECK_EQ(lhs_shape.dimensions(dim_numbers.lhs_contracting_dimensions(0)), + rhs_shape.dimensions(dim_numbers.rhs_contracting_dimensions(0))); return true; } } - if (hlo.opcode() == HloOpcode::kFusion && - hlo.fusion_kind() == HloInstruction::FusionKind::kTransposeDot && - hlo.fused_expression_root()->opcode() == HloOpcode::kDot) { - return true; - } - if (hlo.opcode() == HloOpcode::kFusion && hlo.fusion_kind() == HloInstruction::FusionKind::kOutput && hlo.fused_expression_root()->opcode() == HloOpcode::kMultiply) { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 26e497762f2a6f23767c5b98f339eefdef0b7468..83d90296df8ec75c29c537a90c6292e4f4f0e0ae 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -257,8 +257,36 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype( return kernel; } +namespace { +// Computes the maximum valid unroll factor for a given instruction. +int ComputeMaxUnrollFactor(const HloInstruction* hlo) { + int max_unroll_factor = hlo->GetModule() + ->config() + .debug_options() + .xla_gpu_max_kernel_unroll_factor(); + + // Find the largest possible power of two to unroll by. + // TODO(kramerb): Make this smarter. + int64 num_elements = ShapeUtil::ElementsIn(hlo->shape()); + for (int i = max_unroll_factor; i > 1; i /= 2) { + if (num_elements % i == 0) { + return i; + } + } + + // Cannot unroll. + return 1; +} +} // namespace + Status IrEmitterUnnested::DefaultAction(HloInstruction* hlo) { - thunk_sequence_->emplace_back(BuildKernelThunk(hlo)); + int unroll_factor = 1; + // Unfused elementwise operations are usually memory bound, unroll them. + if (hlo->IsElementwise()) { + unroll_factor = ComputeMaxUnrollFactor(hlo); + } + + thunk_sequence_->emplace_back(BuildKernelThunk(hlo, unroll_factor)); return IrEmitter::DefaultAction(hlo); } @@ -537,23 +565,11 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { return Status::OK(); } - int max_unroll_factor = fusion->GetModule() - ->config() - .debug_options() - .xla_gpu_max_kernel_unroll_factor(); - - // Find the largest possible power of two to unroll by. - // TODO(kramerb): Make this smarter. int unroll_factor = 1; + // TODO(kramerb): Unrolling multi-output loop fusions too. if (!fusion->IsMultiOutputFusion()) { CHECK(fusion->fusion_kind() == HloInstruction::FusionKind::kLoop); - int64 num_elements = ShapeUtil::ElementsIn(fusion->shape()); - for (int i = max_unroll_factor; i > 1; i /= 2) { - if (num_elements % i == 0) { - unroll_factor = i; - break; - } - } + unroll_factor = ComputeMaxUnrollFactor(fusion); } thunk_sequence_->emplace_back(BuildKernelThunk(fusion, unroll_factor)); @@ -2190,65 +2206,37 @@ std::unique_ptr IrEmitterUnnested::BuildGemmThunk( lhs->shape(), // The shape of LHS. rhs->shape(), // The shape of RHS. inst->shape(), // The shape of the output. - false, // Do not transpose LHS. - false, // Do not transpose RHS. 1.0, // alpha. inst); } if (inst->opcode() == HloOpcode::kFusion) { - if (inst->fusion_kind() == HloInstruction::FusionKind::kOutput) { - const HloInstruction* mul = inst->fused_expression_root(); - const HloInstruction* dot = mul->operand(0); - const HloInstruction* alpha = mul->operand(1); - if (dot->opcode() != HloOpcode::kDot) { - std::swap(dot, alpha); - } - DCHECK(dot->opcode() == HloOpcode::kDot); - const HloInstruction* lhs_parameter = StripTranspose(*dot->operand(0)); - const HloInstruction* rhs_parameter = StripTranspose(*dot->operand(1)); - DCHECK(lhs_parameter->opcode() == HloOpcode::kParameter && - rhs_parameter->opcode() == HloOpcode::kParameter); - const HloInstruction* lhs = - inst->operand(lhs_parameter->parameter_number()); - const HloInstruction* rhs = - inst->operand(rhs_parameter->parameter_number()); - - return MakeUnique( - GetAllocationSlice(*lhs), // The buffer assigned to LHS. - GetAllocationSlice(*rhs), // The buffer assigned to RHS. - GetAllocationSlice(*mul), // The output buffer. - lhs->shape(), // The shape of LHS. - rhs->shape(), // The shape of RHS. - inst->shape(), // The shape of the output. - dot->operand(0)->IsRank2Transpose(), // Transpose LHS. - dot->operand(1)->IsRank2Transpose(), // Transpose RHS. - alpha->literal().Get({0}), // alpha. - inst); - } else { - const HloInstruction* dot = inst->fused_expression_root(); - DCHECK(dot->opcode() == HloOpcode::kDot); - const HloInstruction* lhs_parameter = StripTranspose(*dot->operand(0)); - const HloInstruction* rhs_parameter = StripTranspose(*dot->operand(1)); - DCHECK(lhs_parameter->opcode() == HloOpcode::kParameter && - rhs_parameter->opcode() == HloOpcode::kParameter); - const HloInstruction* lhs = - inst->operand(lhs_parameter->parameter_number()); - const HloInstruction* rhs = - inst->operand(rhs_parameter->parameter_number()); - - return MakeUnique( - GetAllocationSlice(*lhs), // The buffer assigned to LHS. - GetAllocationSlice(*rhs), // The buffer assigned to RHS. - GetAllocationSlice(*inst), // The output buffer. - lhs->shape(), // The shape of LHS. - rhs->shape(), // The shape of RHS. - inst->shape(), // The shape of the output. - dot->operand(0)->IsRank2Transpose(), // Transpose LHS. - dot->operand(1)->IsRank2Transpose(), // Transpose RHS. - 1.0, // Alpha. - inst); + CHECK_EQ(inst->fusion_kind(), HloInstruction::FusionKind::kOutput); + const HloInstruction* mul = inst->fused_expression_root(); + const HloInstruction* dot = mul->operand(0); + const HloInstruction* alpha = mul->operand(1); + if (dot->opcode() != HloOpcode::kDot) { + std::swap(dot, alpha); } + DCHECK(dot->opcode() == HloOpcode::kDot); + const HloInstruction* lhs_parameter = StripTranspose(*dot->operand(0)); + const HloInstruction* rhs_parameter = StripTranspose(*dot->operand(1)); + DCHECK(lhs_parameter->opcode() == HloOpcode::kParameter && + rhs_parameter->opcode() == HloOpcode::kParameter); + const HloInstruction* lhs = + inst->operand(lhs_parameter->parameter_number()); + const HloInstruction* rhs = + inst->operand(rhs_parameter->parameter_number()); + + return MakeUnique( + GetAllocationSlice(*lhs), // The buffer assigned to LHS. + GetAllocationSlice(*rhs), // The buffer assigned to RHS. + GetAllocationSlice(*mul), // The output buffer. + lhs->shape(), // The shape of LHS. + rhs->shape(), // The shape of RHS. + inst->shape(), // The shape of the output. + alpha->literal().Get({0}), // alpha. + inst); } LOG(FATAL) << "Cannot build a GemmThunk for " << inst->ToString(); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index b842f480c6257c1a8bee8cdac55e29c5db6801a0..b41ab2162ab81f66e123a7055ca3ffc815c3ef88 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -38,7 +38,7 @@ namespace gpu { // // Examples of things that are not unnested computations: // -// - The reducer of a kReduce HLO. This is emited using IrEmitterNested. +// - The reducer of a kReduce HLO. This is emitted using IrEmitterNested. // - The body of a fusion node. IrEmitterUnenested emits the relevant code // within a kernel function using FusedIrEmitter. (FusedIrEmitter is not // really an IrEmitter, but is more an "IR generator generator".) diff --git a/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc index 8c98956f1a9b2a0bb1d304a27eb8c8cfcf610784..b42767dfd500bd87ad5bd88c3f39072058b18673 100644 --- a/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc +++ b/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc @@ -41,9 +41,9 @@ TEST_F(StreamAssignmentTest, SequentialMatMul) { HloInstruction* z = builder.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/2, f32_2x2_, /*name=*/"z")); HloInstruction* dot1 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, x, y)); + HloInstruction::CreateCanonicalDot(f32_2x2_, x, y)); HloInstruction* dot2 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, dot1, z)); + HloInstruction::CreateCanonicalDot(f32_2x2_, dot1, z)); auto module = CreateNewModule(); module->AddEntryComputation(builder.Build(dot2)); @@ -60,9 +60,9 @@ TEST_F(StreamAssignmentTest, ConcurrentMatMul) { HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/1, f32_2x2_, /*name=*/"y")); HloInstruction* dot1 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, x, y)); + HloInstruction::CreateCanonicalDot(f32_2x2_, x, y)); HloInstruction* dot2 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, y, x)); + HloInstruction::CreateCanonicalDot(f32_2x2_, y, x)); HloInstruction* add = builder.AddInstruction( HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kAdd, dot1, dot2)); @@ -91,24 +91,24 @@ TEST_F(StreamAssignmentTest, LatticeMatMul) { params.push_back(builder.AddInstruction(HloInstruction::CreateParameter( i, f32_2x2_, /*name=*/tensorflow::strings::Printf("param%d", i)))); } - HloInstruction* d00 = builder.AddInstruction(HloInstruction::CreateBinary( - f32_2x2_, HloOpcode::kDot, params[2], params[3])); + HloInstruction* d00 = builder.AddInstruction( + HloInstruction::CreateCanonicalDot(f32_2x2_, params[2], params[3])); HloInstruction* d10 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, params[1], d00)); + HloInstruction::CreateCanonicalDot(f32_2x2_, params[1], d00)); HloInstruction* d11 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d00, params[4])); + HloInstruction::CreateCanonicalDot(f32_2x2_, d00, params[4])); HloInstruction* d20 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, params[0], d10)); + HloInstruction::CreateCanonicalDot(f32_2x2_, params[0], d10)); HloInstruction* d21 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d10, d11)); + HloInstruction::CreateCanonicalDot(f32_2x2_, d10, d11)); HloInstruction* d22 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d11, params[5])); + HloInstruction::CreateCanonicalDot(f32_2x2_, d11, params[5])); HloInstruction* d30 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d20, d21)); + HloInstruction::CreateCanonicalDot(f32_2x2_, d20, d21)); HloInstruction* d31 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d21, d22)); + HloInstruction::CreateCanonicalDot(f32_2x2_, d21, d22)); HloInstruction* d40 = builder.AddInstruction( - HloInstruction::CreateBinary(f32_2x2_, HloOpcode::kDot, d30, d31)); + HloInstruction::CreateCanonicalDot(f32_2x2_, d30, d31)); auto module = CreateNewModule(); module->AddEntryComputation(builder.Build(d40)); diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc index e983fd11d4eefc2c4b3ba910a7d9ed50d637c2a3..fd56a603bb6f849b1c1f1578fe7395d9b372e2d5 100644 --- a/tensorflow/compiler/xla/service/heap_simulator_test.cc +++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -85,7 +86,7 @@ class HeapSimulatorTracker { // size of the buffers doesn't matter, so we always return 0. We rely on // the secondary sorting criteria of DecreasingSizeRunsHeap to sort calls by // buffer id, for determinism in the tests. - auto zero_size = [](const LogicalBuffer& buffer) { return 0; }; + auto zero_size = [](const BufferValue& buffer) { return 0; }; auto algorithm = MakeUnique( MakeUnique(&actual_calls_)); result_ = HeapSimulator::Run( @@ -119,7 +120,7 @@ class HeapSimulatorTracker { // the sequence. This lets us ensure the Alloc calls are in the sequence // order. The Free calls are sorted by LogicalBuffer.id, which is at least // deterministic. - auto size_fn = [&reverse_position](const LogicalBuffer& buffer) { + auto size_fn = [&reverse_position](const BufferValue& buffer) { return reverse_position[buffer.instruction()]; }; auto algorithm = MakeUnique( diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index aa6860880b7a1308d3ecabb52318daa7d2852af2..1f7c1cffd324ad2f4e4cdf11046c8459b8ceb6d5 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -147,6 +147,9 @@ message HloInstructionProto { repeated int64 called_computation_ids = 38; xla.OpSharding sharding = 40; + + // Backend configuration for the instruction. Has backend-specific meaning. + string backend_config = 43; } // Serialization of HloComputation. diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 594413e88fb26e86b198d08b2e4db77fad671348..17e43c3cb826aaba584ca5652bcdcb8cb829cb36 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -347,6 +347,11 @@ std::list HloComputation::MakeEmbeddedComputationsList() // To avoid special handling of this computation, cast away const of // 'this'. 'this' is immediately removed from the post order after // construction. + // + // TODO(b/78350259): This violates const-correctness, since while the original + // computation is not returned, we still retrieve non-const computations from + // a const one. Consider also avoiding const for HloComputation, or review XLA + // for const-correctness of non-HloInstruction* types like this. ComputeComputationPostOrder(const_cast(this), &visited, &post_order); @@ -723,18 +728,25 @@ Status HloComputation::Accept( return this->Accept(&visitor); } -std::unique_ptr HloComputation::Clone(const string& suffix, - HloModule* module) { +std::unique_ptr HloComputation::Clone( + const string& suffix, HloModule* module, + HloInstruction::CloneMap* clone_map) { return CloneWithReplacements( /*replacements=*/std::unordered_map>(), - module, suffix); + module, clone_map, suffix); } std::unique_ptr HloComputation::CloneWithReplacements( std::unordered_map> replacements, - HloModule* module, const string& suffix) { + HloModule* module, HloInstruction::CloneMap* clone_map, + const string& suffix) { + HloInstruction::CloneMap local_clone_map; + if (clone_map == nullptr) { + clone_map = &local_clone_map; + } + // Look up instr in the replacements map, and return either the replacement, // or instr, if the replacement isn't present. // @@ -756,24 +768,19 @@ std::unique_ptr HloComputation::CloneWithReplacements( } } - std::unordered_map clone_map; std::vector> instructions; std::unique_ptr new_instr = nullptr; for (auto instr : postorder) { std::vector new_operands; for (auto operand : instr->operands()) { auto replaced_operand = replace(operand); - // If replaced_operand is null, that means 'replacements' asked us not to - // include operand in the new computation. But we can't do that, because - // operand is used by instr. CHECK_NE(replaced_operand, nullptr) - << "replacements map tried to eliminate a used instruction " - << operand->ToString() << ", used by " << instr->ToString(); - new_operands.push_back(FindOrDie(clone_map, replaced_operand)); + << "Replacements map specifies to leave out " << operand->ToString() + << ", but it is used by " << instr->ToString() << "."; + new_operands.push_back(FindOrDie(*clone_map, replaced_operand)); } - new_instr = - instr->CloneWithNewOperands(instr->shape(), new_operands, module); - InsertOrDie(&clone_map, instr, new_instr.get()); + new_instr = instr->CloneWithNewOperands(instr->shape(), new_operands, + module, clone_map); instructions.push_back(std::move(new_instr)); } Builder builder(name() + "." + suffix); @@ -781,27 +788,24 @@ std::unique_ptr HloComputation::CloneWithReplacements( builder.AddInstruction(std::move(instr)); } auto result = builder.Build( - /*root_instruction=*/FindOrDie(clone_map, replace(root_instruction()))); + /*root_instruction=*/FindOrDie(*clone_map, replace(root_instruction()))); // Clone control dependencies. for (auto instr : postorder) { - HloInstruction* new_instr = FindOrDie(clone_map, instr); + HloInstruction* new_instr = FindOrDie(*clone_map, instr); for (auto successor : instr->control_successors()) { auto replaced_successor = replace(successor); - - // successor may not be in clone_map, because it might have been - // removed by the replacements map. - if (replaced_successor == nullptr) { - continue; - } + CHECK_NE(replaced_successor, nullptr) + << "Replacements map specifies to leave out " << successor->ToString() + << ", but it is control-depended-on by " << instr->ToString() << "."; TF_CHECK_OK(new_instr->AddControlDependencyTo( - FindOrDie(clone_map, replaced_successor))); + FindOrDie(*clone_map, replaced_successor))); } } // We cloned the elements of 'replacements', so they're all going to be - // destroyed. HloInstructions need to be detached from their operands before + // destroyed. HloInstructions need to be detached from their operands before // they're destroyed, otherwise they stick around in the operands' users lists // and cause use-after-frees. for (auto& kv : replacements) { diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 9d3f6e9a2c2efd97681a22b6b0f6d929afc553de..98983556256cec01759f924c7d02993cbe18c891 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -291,11 +291,17 @@ class HloComputation { const std::function& visitor_func) const; // Returns a deep copy of this computation including all instructions. - // If the module pointer is not nullptr, it will be the module where - // the cloned computations will be added to (in order to support deep - // cloning). - std::unique_ptr Clone(const string& suffix = "clone", - HloModule* module = nullptr); + // + // If the module pointer is not nullptr, then the cloned computations will be + // added to this module in order to support deep cloning. Otherwise the module + // of the computation is used. + // + // If clone_map is not nullptr, then each original instruction that is cloned + // will be inserted and map to its clone. clone_map should not already contain + // any of the instructions to clone. + std::unique_ptr Clone( + const string& suffix = "clone", HloModule* module = nullptr, + HloInstruction::CloneMap* clone_map = nullptr); // Like Clone(), but if an instruction is present in replacement_map, we use // the map's value to replace that instruction in the cloned computation. @@ -305,7 +311,9 @@ class HloComputation { std::unique_ptr CloneWithReplacements( std::unordered_map> replacements, - HloModule* module = nullptr, const string& suffix = "clone"); + HloModule* module = nullptr, + HloInstruction::CloneMap* clone_map = nullptr, + const string& suffix = "clone"); // Returns true if the given instruction can be removed from the computation. // Parameter instructions cannot be removed without violating invariants of diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc index 81cc7c4bdc1e0092dd671c741cdc0b8adec72156..16fdda8a8b9ade09ea31cda1f4cf5e8ff2c0a081 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc @@ -20,16 +20,13 @@ limitations under the License. #include "tensorflow/compiler/xla/client/client.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/padding.h" -#include "tensorflow/compiler/xla/service/computation_tracker.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/local_service.h" #include "tensorflow/compiler/xla/service/service.h" -#include "tensorflow/compiler/xla/service/user_computation.h" -#include "tensorflow/compiler/xla/service/versioned_computation_handle.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/core/platform/logging.h" @@ -58,11 +55,10 @@ class HloCostAnalysisTest : public ::testing::Test { // whitebox accesses to the user computation built from the client, // as shown in the BuildHloGraph functions below. service_(static_cast(ClientLibrary::GetXlaService( - static_cast(client_)->platform()))), - computation_tracker_(service_->computation_tracker()) { + static_cast(client_)->platform()))) { // Create a computation for a unary user function: x => exp(x + 0.5) { - ComputationBuilder builder(client_, "add_and_exp"); + XlaBuilder builder("add_and_exp"); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto half = builder.ConstantR0(0.5); builder.Exp(builder.Add(x, half)); @@ -73,7 +69,7 @@ class HloCostAnalysisTest : public ::testing::Test { // Create a computation for a binary user function: (x, y) => x + y { - ComputationBuilder builder(client_, "add"); + XlaBuilder builder("add"); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); builder.Add(x, y); @@ -84,7 +80,7 @@ class HloCostAnalysisTest : public ::testing::Test { // Create a computation for a sigmoid function: x => 1 / (1 + exp(-x)) { - ComputationBuilder builder(client_, "sigmoid"); + XlaBuilder builder("sigmoid"); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto one = builder.ConstantR0(1.0); builder.Div(one, builder.Add(one, builder.Exp(builder.Neg(x)))); @@ -95,7 +91,7 @@ class HloCostAnalysisTest : public ::testing::Test { // Create a computation for a binary max function: (x, y) => max (x, y) { - ComputationBuilder builder(client_, "max"); + XlaBuilder builder("max"); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); builder.Max(x, y); @@ -106,7 +102,7 @@ class HloCostAnalysisTest : public ::testing::Test { // Create a computation for a binary GT function: (x, y) => x > y { - ComputationBuilder builder(client_, "gt"); + XlaBuilder builder("gt"); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); builder.Gt(x, y); @@ -117,35 +113,30 @@ class HloCostAnalysisTest : public ::testing::Test { } // Build HLO graph from the given builder and return the HLO module. - std::unique_ptr BuildHloGraph(ComputationBuilder* builder) { + std::unique_ptr BuildHloGraph(XlaBuilder* builder) { auto computation_status = builder->Build(); TF_CHECK_OK(computation_status.status()); auto computation = computation_status.ConsumeValueOrDie(); - auto user_computation_status = - computation_tracker_.Resolve(computation.handle()); - TF_CHECK_OK(user_computation_status.status()); - auto user_computation = user_computation_status.ConsumeValueOrDie(); - VersionedComputationHandle versioned_handle = - user_computation->GetVersionedHandle(); - return std::move( - computation_tracker_.BuildHloModule(versioned_handle, HloModuleConfig()) - .ValueOrDie()); + auto config = HloModule::CreateModuleConfigFromProto(computation.proto(), + DebugOptions()) + .ConsumeValueOrDie(); + return HloModule::CreateFromProto(computation.proto(), config) + .ConsumeValueOrDie(); } Client* client_; Service* service_; - const ComputationTracker& computation_tracker_; // User computations used for higher order operations (e.g., Map, Reduce). - Computation add_; - Computation add_and_exp_; - Computation sigmoid_; - Computation max_; - Computation gt_; + XlaComputation add_; + XlaComputation add_and_exp_; + XlaComputation sigmoid_; + XlaComputation max_; + XlaComputation gt_; }; TEST_F(HloCostAnalysisTest, MatrixMultiply) { - ComputationBuilder builder(client_, "matrix_multiply"); + XlaBuilder builder("matrix_multiply"); auto lhs = builder.Parameter(0, ShapeUtil::MakeShape(F32, {10, 5}), "lhs"); auto rhs = builder.Parameter(1, ShapeUtil::MakeShape(F32, {5, 30}), "rhs"); auto result = builder.Dot(lhs, rhs); @@ -167,7 +158,7 @@ TEST_F(HloCostAnalysisTest, MatrixMultiply) { } TEST_F(HloCostAnalysisTest, Map) { - ComputationBuilder builder(client_, "map"); + XlaBuilder builder("map"); auto input = builder.Parameter(0, ShapeUtil::MakeShape(F32, {10}), "in"); auto result = builder.Map({input}, add_and_exp_, {0}); @@ -184,7 +175,7 @@ TEST_F(HloCostAnalysisTest, Map) { } TEST_F(HloCostAnalysisTest, Convolution) { - ComputationBuilder builder(client_, "convolution"); + XlaBuilder builder("convolution"); auto input = builder.Parameter( 0, ShapeUtil::MakeShape(F32, {/*p_dim=*/1, /*z_dim=*/1, /*y_dim=*/10, @@ -213,7 +204,7 @@ TEST_F(HloCostAnalysisTest, Convolution) { } TEST_F(HloCostAnalysisTest, Reduce) { - ComputationBuilder builder(client_, "reduce"); + XlaBuilder builder("reduce"); auto input = builder.Parameter(0, ShapeUtil::MakeShape(F32, {10, 20}), "input"); auto result = @@ -231,7 +222,7 @@ TEST_F(HloCostAnalysisTest, Reduce) { } TEST_F(HloCostAnalysisTest, ReduceWindow) { - ComputationBuilder builder(client_, "reduce_window"); + XlaBuilder builder("reduce_window"); auto input = builder.Parameter(0, ShapeUtil::MakeShape(F32, {10, 20}), "input"); auto result = builder.ReduceWindow(input, builder.ConstantR0(0), add_, @@ -248,7 +239,7 @@ TEST_F(HloCostAnalysisTest, ReduceWindow) { } TEST_F(HloCostAnalysisTest, SelectAndScatter) { - ComputationBuilder builder(client_, "select_and_scatter"); + XlaBuilder builder("select_and_scatter"); auto operand = builder.Parameter(0, ShapeUtil::MakeShape(F32, {10, 20}), "input"); auto source = @@ -269,7 +260,7 @@ TEST_F(HloCostAnalysisTest, SelectAndScatter) { } TEST_F(HloCostAnalysisTest, Broadcast) { - ComputationBuilder b(client_, "broadcast"); + XlaBuilder b("broadcast"); b.Broadcast(b.ConstantR0(42), {10, 7}); auto hlo_module = BuildHloGraph(&b); HloCostAnalysis analysis(ShapeSize); @@ -280,7 +271,7 @@ TEST_F(HloCostAnalysisTest, Broadcast) { // Calculates the computation cost of a graph with more than one HLO node. TEST_F(HloCostAnalysisTest, FullyConnectedForward) { - ComputationBuilder builder(client_, "fully_connected_forward"); + XlaBuilder builder("fully_connected_forward"); auto input = builder.Parameter(0, ShapeUtil::MakeShape(F32, {10, 5}), "input"); auto weight = @@ -305,7 +296,7 @@ TEST_F(HloCostAnalysisTest, FullyConnectedForward) { TEST_F(HloCostAnalysisTest, MatmulAndConvolutionCanBeTheSameComputation) { HloCostAnalysis conv_analysis(ShapeSize); { - ComputationBuilder builder(client_, "conv_looking_matmul"); + XlaBuilder builder("conv_looking_matmul"); auto lhs = builder.Parameter(0, ShapeUtil::MakeShape(F32, {64, 64, 1, 1}), "input"); auto rhs = builder.Parameter(1, ShapeUtil::MakeShape(F32, {64, 64, 1, 1}), @@ -318,7 +309,7 @@ TEST_F(HloCostAnalysisTest, MatmulAndConvolutionCanBeTheSameComputation) { HloCostAnalysis matmul_analysis(ShapeSize); { - ComputationBuilder builder(client_, "matmul"); + XlaBuilder builder("matmul"); auto lhs = builder.Parameter(0, ShapeUtil::MakeShape(F32, {64, 64}), "input"); auto rhs = @@ -427,7 +418,7 @@ TEST_F(FusionCostAnalysis, NoLayout) { TEST_F(HloCostAnalysisTest, TupleCost) { HloCostAnalysis analysis(ShapeSize); { - ComputationBuilder builder(client_, "matmul"); + XlaBuilder builder("matmul"); auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {123}), "x"); auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {42}), "y"); auto tuple = builder.Tuple({x, y}); @@ -443,7 +434,7 @@ TEST_F(HloCostAnalysisTest, TupleCost) { } TEST_F(HloCostAnalysisTest, BaseDilatedConvolution) { - ComputationBuilder builder(client_, "BaseDilatedConvolution"); + XlaBuilder builder("BaseDilatedConvolution"); auto input = builder.Parameter( 0, ShapeUtil::MakeShape(F32, {/*p_dim=*/1, /*z_dim=*/1, /*y_dim=*/10, @@ -458,7 +449,7 @@ TEST_F(HloCostAnalysisTest, BaseDilatedConvolution) { auto result = builder.ConvGeneralDilated( input, kernel, /*window_strides=*/{1, 1}, /*padding=*/{{1, 1}, {1, 1}}, /*lhs_dilation=*/{3, 5}, /*rhs_dilation=*/{7, 11}, - ComputationBuilder::CreateDefaultConvDimensionNumbers(2)); + XlaBuilder::CreateDefaultConvDimensionNumbers(2)); // Run HLO cost analysis. auto hlo_module = BuildHloGraph(&builder); diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index 9a89888480b8c79dfb1f79a50e9686bf45aa49b3..ed3b654851ab9311ef1fa8278b7acfa987bb294c 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -269,7 +269,7 @@ StatusOr BroadcastZeros( StatusOr> CreateComputationWithSignature( ArraySlice domain, const Shape& range, tensorflow::StringPiece name) { - HloComputation::Builder b(name.ToString()); + HloComputation::Builder b{std::string(name)}; int64 param_idx = 0; for (const Shape* param_shape : domain) { b.AddInstruction(HloInstruction::CreateParameter( diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 8cf94123b714031455088047b4620001804c6b43..e7425c8ba790d8b18ba35bc3c3b9227b7a750e7e 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/primitive_util.h" #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_query.h" @@ -42,7 +43,6 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" -#include "tensorflow/core/lib/gtl/optional.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" @@ -53,19 +53,6 @@ namespace { using tensorflow::gtl::ArraySlice; using tensorflow::gtl::FlatSet; -using tensorflow::gtl::optional; - -template -struct is_complex_t : public std::false_type {}; - -template <> -struct is_complex_t : public std::true_type {}; - -template -struct is_complex64_t : public std::false_type {}; - -template <> -struct is_complex64_t : public std::true_type {}; template StatusOr> Compare(const Shape& shape, HloOpcode opcode, @@ -147,2092 +134,48 @@ StatusOr> Compare( return std::move(result); } -template -StatusOr> ElementWiseUnaryOpImpl( - HloInstruction* instruction, - const std::function& unary_op, - const Literal& operand_literal) { - const auto shape = instruction->shape(); - const auto* operand = instruction->operand(0); - - // TODO(b/35950897, b/27796129): add DCHECK back once implicit broadcast is - // removed. - if (!ShapeUtil::SameDimensions(shape, operand->shape())) { - return Unimplemented( - "Implicit broadcasting is currently unsupported in HLO evaluator " - "Shape Mismatch: %s vs %s", - ShapeUtil::HumanString(shape).c_str(), - ShapeUtil::HumanString(operand->shape()).c_str()); - } - - auto result = Literal::CreateFromShape(shape); - - TF_RETURN_IF_ERROR( - result->Populate([&](ArraySlice multi_index) { - return unary_op(operand_literal.Get(multi_index)); - })); - return std::move(result); -} - -// For one particular placement of a window in a base shape (the placement is -// represented as `window_count_index`), iterates inside the window. Translates -// the window index into base index. If the base index is within bound, call `f` -// with the base index. -void IterateThroughWindow( - const Shape& window_shape, const Window& window, const Shape& base_shape, - const ArraySlice& window_count_index, - const std::function&)>& f) { - const int64 rank = ShapeUtil::Rank(base_shape); - DimensionVector window_index(rank); - std::fill(window_index.begin(), window_index.end(), 0); - do { - std::vector base_index(rank); - bool out_of_bound = false; - for (int64 i = 0; i < rank; ++i) { - base_index[i] = window_count_index[i] * window.dimensions(i).stride() + - window_index[i] - window.dimensions(i).padding_low(); - if (base_index[i] < 0 || base_index[i] >= base_shape.dimensions(i)) { - out_of_bound = true; - break; - } - } - if (!out_of_bound) { - f(base_index); - } - } while (IndexUtil::BumpIndices(window_shape, &window_index)); -} - -// Creates a vector of multipliers which can be used to create a linear index -// into shape. -// -// Given the multidimensional index {i1, ..., iN} and -// M = MakeDimMultipliers(shape), the corresponding linear index LI is simply -// -// LI = i1 * M[1] + i2 * M[2] + ... + iN * M[N]. -// -// This lets you calculate LI given the multidimensional indices in any order. -DimensionVector MakeDimMultipliers(const Shape& shape) { - DimensionVector v(ShapeUtil::Rank(shape)); - int64 scale = 1; - for (auto dim : LayoutUtil::MinorToMajor(shape)) { - v[dim] = scale; - scale *= shape.dimensions(dim); - } - return v; -} - } // namespace -template -class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { - public: - explicit TypedVisitor(HloEvaluator* p) : parent_(p) {} - - // The following higher-order functions convert a function with ElementwiseT - // to a function with ReturnT. - std::function ConvertUnaryFunction( - const std::function& unary_op) { - return [&unary_op](ReturnT arg) { - return static_cast(unary_op(static_cast(arg))); - }; - } - std::function ConvertBinaryFunction( - const std::function& - binary_op) { - return [&binary_op](ReturnT arg1, ReturnT arg2) { - return static_cast(binary_op(static_cast(arg1), - static_cast(arg2))); - }; - } - std::function ConvertTernaryFunction( - const std::function& ternary_op) { - return [&ternary_op](ReturnT arg1, ReturnT arg2, ReturnT arg3) { - return static_cast(ternary_op(static_cast(arg1), - static_cast(arg2), - static_cast(arg3))); - }; - } - - Status DefaultAction(HloInstruction* hlo_instruction) override { - return Unimplemented("unhandled HLO ops for HloEvaluator: %s.", - HloOpcodeString(hlo_instruction->opcode()).c_str()); - } - - // TODO(b/35950897): many of the stl functions used in the handlers are not - // overloaded for every XLA primitive types. - - template ::value>::type* = - nullptr> - Status HandleAbs(HloInstruction* abs) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs], - ElementWiseUnaryOp(abs, [](NativeT elem_operand) { - return elem_operand; - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleAbs(HloInstruction* abs) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs], - ElementWiseUnaryOp(abs, [](NativeT elem_operand) { - return std::abs(elem_operand); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleAbs(HloInstruction* abs) { - const Literal& operand_literal = - parent_->GetEvaluatedLiteralFor(abs->operand(0)); - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[abs], - (ElementWiseUnaryOpImpl( - abs, [](NativeT elem_operand) { return std::abs(elem_operand); }, - operand_literal))); - - return Status::OK(); - } - - Status HandleAbs(HloInstruction* abs) override { - // If the operand is of C64 type, the return type of abs will be F32. - // However, ElementwiseT would still be the return type, F32, and thus - // specifying the ElementwiseT explicitly as C64 is needed below. - if (abs->operand(0)->shape().element_type() == C64) { - return HandleAbs(abs); - } - return HandleAbs(abs); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleRound(HloInstruction* round) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[round], - ElementWiseUnaryOp(round, [](ElementwiseT elem_operand) { - return std::round(elem_operand); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleRound(HloInstruction* round) { - return InvalidArgument("Unsupported type for Round"); - } - - Status HandleRound(HloInstruction* round) override { - return HandleRound(round); - } - - Status HandleBroadcast(HloInstruction* broadcast) override { - parent_->evaluated_[broadcast] = - Literal::CreateFromShape(broadcast->shape()); - auto output = parent_->evaluated_[broadcast].get(); - const Literal& operand_to_broadcast = - parent_->GetEvaluatedLiteralFor(broadcast->operand(0)); - std::vector broadcast_indices( - ShapeUtil::Rank(broadcast->operand(0)->shape()), 0); - - TF_RET_CHECK(broadcast->dimensions().size() == - ShapeUtil::Rank(operand_to_broadcast.shape())) - << "broadcast dimensions is of size: " << broadcast->dimensions().size() - << " and rank of operand_to_broadcast is: " - << ShapeUtil::Rank(operand_to_broadcast.shape()); - // Checks that operand's dimensions are the same as the broadcast's - // dimensions along the dimensions to be broadcasted. - for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { - TF_RET_CHECK(broadcast->shape().dimensions(broadcast->dimensions(i)) == - operand_to_broadcast.shape().dimensions(i)); - } - - return output->Populate([&](ArraySlice multi_index) { - for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { - broadcast_indices[i] = multi_index[broadcast->dimensions(i)]; - } - return operand_to_broadcast.Get(broadcast_indices); - }); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleCeil(HloInstruction* ceil) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[ceil], - ElementWiseUnaryOp(ceil, [](ElementwiseT elem_operand) { - return std::ceil(elem_operand); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleCeil(HloInstruction* ceil) { - return InvalidArgument("Unsupported type for Ceil"); - } - - Status HandleCeil(HloInstruction* ceil) override { - return HandleCeil(ceil); - } - - Status HandleConvert(HloInstruction* convert) override { - const HloInstruction* operand = convert->operand(0); - TF_RET_CHECK(ShapeUtil::SameDimensions(operand->shape(), convert->shape())); - TF_ASSIGN_OR_RETURN(std::unique_ptr result, - parent_->GetEvaluatedLiteralFor(operand).Convert( - convert->shape().element_type())); - - if (LayoutUtil::LayoutsInShapesEqual(result->shape(), convert->shape())) { - parent_->evaluated_[convert] = std::move(result); - } else { - parent_->evaluated_[convert] = - result->Relayout(convert->shape().layout()); - } - return Status::OK(); - } - - Status HandleBitcastConvert(HloInstruction* convert) override { - const HloInstruction* operand = convert->operand(0); - TF_RET_CHECK(ShapeUtil::SameDimensions(operand->shape(), convert->shape())); - TF_ASSIGN_OR_RETURN(std::unique_ptr result, - parent_->GetEvaluatedLiteralFor(operand).BitcastConvert( - convert->shape().element_type())); - - if (LayoutUtil::LayoutsInShapesEqual(result->shape(), convert->shape())) { - parent_->evaluated_[convert] = std::move(result); - } else { - parent_->evaluated_[convert] = - result->Relayout(convert->shape().layout()); - } - return Status::OK(); - } - - Status HandleExp(HloInstruction* exp) override { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[exp], - ElementWiseUnaryOp(exp, [](ElementwiseT elem_operand) { - return std::exp(elem_operand); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleFloor(HloInstruction* floor) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[floor], - ElementWiseUnaryOp(floor, [](ElementwiseT elem_operand) { - return std::floor(elem_operand); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleFloor(HloInstruction* floor) { - return InvalidArgument("Unsupported type for Floor"); - } - - Status HandleFloor(HloInstruction* floor) override { - return HandleFloor(floor); - } - - Status HandleLog(HloInstruction* log) override { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[log], - ElementWiseUnaryOp(log, [](ElementwiseT elem_operand) { - return std::log(elem_operand); - })); - return Status::OK(); - } - - template ::value && - !std::is_same::value>::type* = nullptr> - Status HandleNot(HloInstruction* not_) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_], - ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) { - return ~elem_operand; - })); - return Status::OK(); - } - - template ::value>::type* = nullptr> - Status HandleNot(HloInstruction* not_) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_], - ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) { - return !elem_operand; - })); - return Status::OK(); - } - - template ::value>::type* = - nullptr> - Status HandleNot(HloInstruction* not_) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_], - ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) { - return !elem_operand; - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleNot(HloInstruction* not_) { - return InvalidArgument("Unsupported type for Not"); - } - - Status HandleNot(HloInstruction* not_) override { - return HandleNot(not_); - } - - template ::value && - !std::is_floating_point::value>::type* = nullptr> - Status HandleNegate(HloInstruction* negate) { - using type = typename std::make_unsigned::type; - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[negate], - ElementWiseUnaryOp(negate, [](ElementwiseT elem_operand) { - return NativeT(-type(elem_operand)); - })); - return Status::OK(); - } - - template ::value || - std::is_floating_point::value>::type* = nullptr> - Status HandleNegate(HloInstruction* negate) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[negate], - ElementWiseUnaryOp( - negate, [](ElementwiseT elem_operand) { return -elem_operand; })); - return Status::OK(); - } - - Status HandleNegate(HloInstruction* negate) override { - return HandleNegate(negate); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleSign(HloInstruction* sign) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[sign], - ElementWiseUnaryOp(sign, [](ElementwiseT elem_operand) { - return (ElementwiseT(0) < elem_operand) - - (elem_operand < ElementwiseT(0)); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleSign(HloInstruction* sign) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[sign], - ElementWiseUnaryOp(sign, [](ElementwiseT elem_operand) { - auto abs_val = std::abs(elem_operand); - return 0 == abs_val ? ElementwiseT(0) - : elem_operand / abs_val; - })); - return Status::OK(); - } - - Status HandleSign(HloInstruction* sign) override { - return HandleSign(sign); - } - - template ::value>::type* = nullptr> - Status HandleAtan2(HloInstruction* atan2) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[atan2], - ElementWiseBinaryOp(atan2, [](ElementwiseT lhs_elem, - ElementwiseT rhs_elem) { - return std::atan2(lhs_elem, rhs_elem); - })); - return Status::OK(); - } - - template ::value>::type* = nullptr> - Status HandleAtan2(HloInstruction* atan2) { - return InvalidArgument("Unsupported type for Atan2"); - } - - Status HandleAtan2(HloInstruction* atan2) override { - return HandleAtan2(atan2); - } - - Status HandleTanh(HloInstruction* tanh) override { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[tanh], - ElementWiseUnaryOp(tanh, [](ElementwiseT elem_operand) { - return std::tanh(elem_operand); - })); - return Status::OK(); - } - - template ::value && - !std::is_floating_point::value>::type* = nullptr> - Status HandleMultiply(HloInstruction* multiply) { - using type = typename std::make_unsigned::type; - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[multiply], - ElementWiseBinaryOp(multiply, - [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) { - return NativeT(type(lhs_elem) * type(rhs_elem)); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value || - std::is_floating_point::value || - is_complex_t::value>::type* = nullptr> - Status HandleMultiply(HloInstruction* multiply) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[multiply], - ElementWiseBinaryOp(multiply, - [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) { - return lhs_elem * rhs_elem; - })); - return Status::OK(); - } - - Status HandleMultiply(HloInstruction* multiply) override { - return HandleMultiply(multiply); - } - - Status HandleSubtract(HloInstruction* subtract) override { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[subtract], - ElementWiseBinaryOp(subtract, - [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) { - return lhs_elem - rhs_elem; - })); - return Status::OK(); - } - - Status HandleAdd(HloInstruction* add) override { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[add], - ElementWiseBinaryOp(add, [](ElementwiseT lhs_elem, - ElementwiseT rhs_elem) { - return lhs_elem + rhs_elem; - })); - return Status::OK(); - } - - Status HandleDivide(HloInstruction* divide) override { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[divide], - ElementWiseBinaryOp(divide, [](ElementwiseT lhs_elem, - ElementwiseT rhs_elem) { - return lhs_elem / rhs_elem; - })); - return Status::OK(); - } - - template ::value>::type* = - nullptr> - Status HandleMaximum(HloInstruction* maximum) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[maximum], - ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { - return std::max(lhs, rhs); - })); - return Status::OK(); - } - - template ::value>::type* = nullptr> - Status HandleMaximum(HloInstruction* maximum) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[maximum], - ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { - return ((lhs >= rhs) || std::isnan(lhs)) ? lhs : rhs; - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleMaximum(HloInstruction* maximum) { - return InvalidArgument("Unsupported type for Maximum"); - } - - Status HandleMaximum(HloInstruction* maximum) override { - return HandleMaximum(maximum); - } - - template ::value>::type* = - nullptr> - Status HandleMinimum(HloInstruction* minimum) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[minimum], - ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, - ElementwiseT rhs_el) { - return std::min(lhs_el, rhs_el); - })); - return Status::OK(); - } - - template ::value>::type* = nullptr> - Status HandleMinimum(HloInstruction* minimum) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[minimum], - ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, - ElementwiseT rhs_el) { - return ((lhs_el <= rhs_el) || std::isnan(lhs_el)) ? lhs_el : rhs_el; - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleMinimum(HloInstruction* minimum) { - return InvalidArgument("Unsupported type for Minimum"); - } - - Status HandleMinimum(HloInstruction* minimum) override { - return HandleMinimum(minimum); - } - - Status HandlePower(HloInstruction* power) override { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[power], - ElementWiseBinaryOp(power, [](ElementwiseT lhs_el, - ElementwiseT rhs_el) { - return std::pow(lhs_el, rhs_el); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleRemainder(HloInstruction* remainder) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[remainder], - ElementWiseBinaryOp(remainder, [](ElementwiseT lhs_el, - ElementwiseT rhs_el) { - return std::fmod(lhs_el, rhs_el); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleRemainder(HloInstruction* remainder) { - return InvalidArgument("Unsupported type for Remainder"); - } - - Status HandleRemainder(HloInstruction* remainder) override { - return HandleRemainder(remainder); - } - - template ::value>::type* = - nullptr> - Status HandleAnd(HloInstruction* and_) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[and_], - ElementWiseBinaryOp(and_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { - return lhs_el & rhs_el; - })); - return Status::OK(); - } - - template ::value>::type* = nullptr> - Status HandleAnd(HloInstruction* and_) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[and_], - ElementWiseBinaryOp(and_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { - return lhs_el && rhs_el; - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleAnd(HloInstruction* and_) { - return InvalidArgument("Unsupported type for And"); - } - - Status HandleAnd(HloInstruction* and_) override { - return HandleAnd(and_); - } - - template ::value>::type* = - nullptr> - Status HandleOr(HloInstruction* or_) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[or_], - ElementWiseBinaryOp(or_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { - return lhs_el | rhs_el; - })); - return Status::OK(); - } - - template ::value>::type* = nullptr> - Status HandleOr(HloInstruction* or_) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[or_], - ElementWiseBinaryOp(or_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { - return lhs_el || rhs_el; - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleOr(HloInstruction* or_) { - return InvalidArgument("Unsupported type for Or"); - } - - Status HandleOr(HloInstruction* or_) override { - return HandleOr(or_); - } - - template ::value && - !std::is_same::value>::type* = nullptr> - Status HandleShiftLeft(HloInstruction* shl) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[shl], - ElementWiseBinaryOp(shl, [](NativeT lhs_elem, NativeT rhs_elem) { - return IsShiftOutOfBounds(rhs_elem) ? 0 - : (lhs_elem << rhs_elem); - })); - return Status::OK(); - } - - template ::value || - std::is_same::value>::type* = - nullptr> - Status HandleShiftLeft(HloInstruction*) { - return InvalidArgument("Unsupported type for ShiftLeft"); - } - - Status HandleShiftLeft(HloInstruction* shl) override { - return HandleShiftLeft(shl); - } - template ::value && - !std::is_same::value>::type* = nullptr> - Status HandleShiftRightArithmetic(HloInstruction* shr) { - typedef typename std::make_signed::type SignedT; - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[shr], - ElementWiseBinaryOp(shr, [](NativeT lhs_elem, NativeT rhs_elem) { - SignedT lhs_signed = static_cast(lhs_elem); - if (IsShiftOutOfBounds(rhs_elem)) { - return lhs_signed < 0 ? static_cast(-1) : 0; - } else { - return lhs_signed >> rhs_elem; - } - })); - return Status::OK(); - } - - template ::value || - std::is_same::value>::type* = - nullptr> - Status HandleShiftRightArithmetic(HloInstruction*) { - return InvalidArgument("Unsupported type for ShiftRightArithmetic"); - } - - Status HandleShiftRightArithmetic(HloInstruction* shra) override { - return HandleShiftRightArithmetic(shra); - } - - template ::value && - !std::is_same::value>::type* = nullptr> - Status HandleShiftRightLogical(HloInstruction* shr) { - typedef typename std::make_unsigned::type UnsignedT; - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[shr], - ElementWiseBinaryOp(shr, [](NativeT lhs_elem, NativeT rhs_elem) { - // If shift amount is greater than the number of bits, then return 0. - if (IsShiftOutOfBounds(rhs_elem)) { - return static_cast(0); - } - return static_cast(static_cast(lhs_elem) >> - rhs_elem); - })); - return Status::OK(); - } - - template ::value || - std::is_same::value>::type* = - nullptr> - Status HandleShiftRightLogical(HloInstruction*) { - return InvalidArgument("Unsupported type for ShiftRightLogical"); - } - - Status HandleShiftRightLogical(HloInstruction* shrl) override { - return HandleShiftRightLogical(shrl); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleClamp(HloInstruction* clamp) { - std::function - clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) { - return std::fmin(high, std::fmax(value, low)); - }; - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[clamp], - ElementwiseTernaryOp(clamp, - std::move(ConvertTernaryFunction(clamp_op)))); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> - Status HandleClamp(HloInstruction*) { - return InvalidArgument("Unsupported type for Clamp"); - } - - Status HandleClamp(HloInstruction* clamp) override { - return HandleClamp(clamp); - } - - Status HandleSelect(HloInstruction* select) override { - CHECK(!ShapeUtil::IsScalar(select->operand(0)->shape())); - CHECK(!ShapeUtil::IsTuple(select->shape())); - std::function select_op = - [](bool pred, ReturnT on_true, ReturnT on_false) { - if (pred) { - return on_true; - } - return on_false; - }; - TF_ASSIGN_OR_RETURN(parent_->evaluated_[select], - ElementwiseTernaryOp(select, std::move(select_op))); - return Status::OK(); - } - - Status HandleReverse(HloInstruction* reverse) override { - const auto result_shape = reverse->shape(); - const auto reverse_dimensions = reverse->dimensions(); - - auto operand = reverse->operand(0); - TF_ASSIGN_OR_RETURN(auto inferred_return_shape, - ShapeInference::InferReverseShape(operand->shape(), - reverse_dimensions)); - - TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) - << "return shape set to: " << ShapeUtil::HumanString(result_shape) - << " but is inferred to be: " - << ShapeUtil::HumanString(inferred_return_shape); - - const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); - auto result = Literal::CreateFromShape(result_shape); - - TF_RETURN_IF_ERROR( - result->Populate([&](ArraySlice out_index) { - std::vector from_index(out_index.begin(), out_index.end()); - for (const int64 dim : reverse_dimensions) { - from_index[dim] = result_shape.dimensions(dim) - 1 - out_index[dim]; - } - return operand_literal.Get(from_index); - })); - - parent_->evaluated_[reverse] = std::move(result); - return Status::OK(); - } - - Status HandleConvolution(HloInstruction* conv) override { - auto lhs = conv->operand(0); - auto rhs = conv->operand(1); - const auto& window = conv->window(); - const Shape& result_shape = conv->shape(); - const Shape& lhs_shape = lhs->shape(); - const Shape& rhs_shape = rhs->shape(); - - TF_CHECK_OK(ShapeUtil::ValidateShape(lhs_shape)); - TF_CHECK_OK(ShapeUtil::ValidateShape(rhs_shape)); - CHECK(ShapeUtil::IsArray(lhs_shape)); - CHECK(ShapeUtil::IsArray(rhs_shape)); - CHECK(ShapeUtil::SameElementType(lhs_shape, rhs_shape)); - CHECK(ShapeUtil::SameElementType(lhs_shape, result_shape)); - - const auto& dnums = conv->convolution_dimension_numbers(); - const int64 num_spatial_dims = dnums.output_spatial_dimensions_size(); - CHECK_EQ(num_spatial_dims, dnums.input_spatial_dimensions_size()); - CHECK_EQ(num_spatial_dims, dnums.kernel_spatial_dimensions_size()); - CHECK_GE(num_spatial_dims, 0); - CHECK_EQ(window.dimensions_size(), num_spatial_dims); - - const auto lhs_rank = ShapeUtil::Rank(lhs_shape); - const auto rhs_rank = ShapeUtil::Rank(rhs_shape); - - CHECK_EQ(num_spatial_dims + 2, lhs_rank); - CHECK_EQ(num_spatial_dims + 2, rhs_rank); - - TF_ASSIGN_OR_RETURN(auto inferred_return_shape, - ShapeInference::InferConvolveShape(lhs_shape, rhs_shape, - window, dnums)); - CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) - << "return shape set to: " << ShapeUtil::HumanString(result_shape) - << " but is inferred to be: " - << ShapeUtil::HumanString(inferred_return_shape); - - const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); - const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); - - std::vector window_dimension_sizes; - for (auto i : dnums.kernel_spatial_dimensions()) { - window_dimension_sizes.push_back(ShapeUtil::GetDimension(rhs_shape, i)); - } - - const Shape& window_shape = - ShapeUtil::MakeShape(rhs_shape.element_type(), window_dimension_sizes); - - DimensionVector lhs_dim_multipliers = MakeDimMultipliers(lhs_shape); - DimensionVector rhs_dim_multipliers = MakeDimMultipliers(rhs_shape); - - auto lhs_literal_data = lhs_literal.data(); - auto rhs_literal_data = rhs_literal.data(); - - auto func = [&window_shape, &dnums, &lhs_shape, &rhs_shape, &window, - &lhs_dim_multipliers, &rhs_dim_multipliers, lhs_literal_data, - rhs_literal_data](ArraySlice out_index) { - // Dimension number applicable for input (lhs). - const int64 input_batch_dim = dnums.input_batch_dimension(); - const int64 input_z_dim = dnums.input_feature_dimension(); - // Dimension number applicable for kernel (rhs). - const int64 kernel_input_z_dim = dnums.kernel_input_feature_dimension(); - const int64 kernel_output_z_dim = dnums.kernel_output_feature_dimension(); - // Dimension number applicable for output. - const int64 output_batch_dim = dnums.output_batch_dimension(); - const int64 output_z_dim = dnums.output_feature_dimension(); - - const int64 z_size = ShapeUtil::GetDimension(lhs_shape, input_z_dim); - - ElementwiseT result_val = static_cast(0); - DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size(), - 0); - - // Convolve input feature with kernel. - do { - for (int64 iz = 0; iz < z_size; ++iz) { - int64 lhs_linear_index = 0; - lhs_linear_index += out_index[output_batch_dim] * - lhs_dim_multipliers[input_batch_dim]; - lhs_linear_index += iz * lhs_dim_multipliers[input_z_dim]; - - int64 rhs_linear_index = 0; - rhs_linear_index += out_index[output_z_dim] * - rhs_dim_multipliers[kernel_output_z_dim]; - rhs_linear_index += iz * rhs_dim_multipliers[kernel_input_z_dim]; - - // Find corresponding spatial dimension index for input (lhs). - for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { - // Spatial dimension number for input (lhs) and output. - const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki); - const int64 output_spatial_dim = - dnums.output_spatial_dimensions(ki); - - // Calculate lhs (input) index without taking base dilation into - // account. - const auto& window_dim = window.dimensions(ki); - const int64 undilated_index = - out_index[output_spatial_dim] * window_dim.stride() - - window_dim.padding_low() + - rhs_spatial_index[ki] * window_dim.window_dilation(); - // Skip if the lhs (input) index is to be dilated. As an - // optimization, skip this mod if there's no dilation. - if (window_dim.base_dilation() > 1 && - undilated_index % window_dim.base_dilation() != 0) { - goto cnt; - } - - // Calculate the actual lhs (input) index after dilation. As an - // optimization, skip this integer divide if there's no dilation. - int64 lhs_spatial_index; - if (window_dim.base_dilation() > 1) { - lhs_spatial_index = undilated_index / window_dim.base_dilation(); - } else { - lhs_spatial_index = undilated_index; - } - lhs_linear_index += - lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim]; - - // Skip if input index is not in bounds. - if (!(lhs_spatial_index >= 0 && - lhs_spatial_index < - lhs_shape.dimensions(input_spatial_dim))) { - goto cnt; - } - - rhs_linear_index += - (window_dim.window_reversal() - ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) - : rhs_spatial_index[ki]) * - rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)]; - } - - result_val += - static_cast(lhs_literal_data[lhs_linear_index]) * - static_cast(rhs_literal_data[rhs_linear_index]); - } - cnt : {} - } while (IndexUtil::BumpIndices(window_shape, &rhs_spatial_index)); - - return static_cast(result_val); - }; - - auto result = Literal::CreateFromShape(result_shape); - TF_RETURN_IF_ERROR(result->PopulateParallel(func)); - - parent_->evaluated_[conv] = std::move(result); - return Status::OK(); - } - - Status HandleDot(HloInstruction* dot) override { - auto lhs = dot->operand(0); - auto rhs = dot->operand(1); - CHECK(ShapeUtil::IsArray(dot->shape())); - CHECK(ShapeUtil::IsArray(lhs->shape())); - CHECK(ShapeUtil::IsArray(rhs->shape())); - - const auto& dnums = dot->dot_dimension_numbers(); - - const auto lhs_rank = ShapeUtil::Rank(lhs->shape()); - const auto rhs_rank = ShapeUtil::Rank(rhs->shape()); - - CHECK(ShapeUtil::SameElementType(lhs->shape(), rhs->shape())); - CHECK(ShapeUtil::SameElementType(lhs->shape(), dot->shape())); - - // There must be 1 and only 1 Contracting dimension for lhs and rhs. - CHECK_EQ(dnums.lhs_contracting_dimensions_size(), 1); - CHECK_EQ(dnums.rhs_contracting_dimensions_size(), 1); - const int64 lhs_contracting_dimension = dnums.lhs_contracting_dimensions(0); - const int64 rhs_contracting_dimension = dnums.rhs_contracting_dimensions(0); - // Contracted dimension sizes must be the same. - CHECK_EQ(lhs->shape().dimensions(lhs_contracting_dimension), - rhs->shape().dimensions(rhs_contracting_dimension)) - << "lhs contracted dimension: " - << lhs->shape().dimensions(lhs_contracting_dimension) - << " rhs contracted dimension: " - << rhs->shape().dimensions(rhs_contracting_dimension); - const int64 contracted_dimension_size = - lhs->shape().dimensions(lhs_contracting_dimension); - - const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); - const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); - - auto result = Literal::CreateFromShape(dot->shape()); - - CHECK_EQ(dnums.lhs_batch_dimensions_size(), - dnums.rhs_batch_dimensions_size()); - - std::vector lhs_non_contracting_dims; - for (int64 i = 0; i < lhs_rank; i++) { - if (i != lhs_contracting_dimension) { - lhs_non_contracting_dims.push_back(i); - } - } - - std::vector rhs_non_batch_non_contracting_dims; - FlatSet batch_dims_set(dnums.rhs_batch_dimensions().begin(), - dnums.rhs_batch_dimensions().end()); - for (int64 i = 0; i < rhs_rank; i++) { - if (i != rhs_contracting_dimension && batch_dims_set.count(i) == 0) { - rhs_non_batch_non_contracting_dims.push_back(i); - } - } - - const int64 batch_dim_size = dnums.lhs_batch_dimensions_size(); - const int64 lhs_non_contracting_size = lhs_non_contracting_dims.size(); - - DimensionVector lhs_index(lhs_rank); - DimensionVector rhs_index(rhs_rank); - TF_RETURN_IF_ERROR( - result->Populate([&](ArraySlice result_index) { - ElementwiseT result_val = static_cast(0); - - // Find the corresponding non-contracting indices for lhs and rhs. - // - // For `result_index`, its batch dimension, if exists, will be at the - // same dimension as the batch dimension of lhs and rhs. More - // specifically: - // - For lhs, the non-contracting dimensions, including the batch - // dimension have the same index as the `result_index`. - // - For rhs, the batch dimension is set seperately from other - // non-contracting dimensions, since these other non-contracting - // dimensions in rhs follow the non-contracting dimensions of lhs in - // the resulting index. - // - // As an example, for a resulting index: - // result_index [result_batch, result_x, result_y] - // the effecting lhs and rhs indices are: - // lhs [result_batch, lhs_non_contracting_dim, contracting_dim - // rhs [result_batch, contracting_dim, rhs_non_contracting_dim] - // `result_x` is only affected by the lhs_non_contracting_dim and - // likewise `result_y` only depends on rhs_non_contracting_dim. - // - // so we can look up the lhs and rhs indices by: - // - // lhs: - // batch index is the same as `result_batch`. - // non-contracting dimension is the same as - // result_index[lhs_non_contracting_dim] - // rhs: - // batch index: the same as `result_batch`. - // non-contracting dimension index: *not* the same as - // result_index[rhs_non_contractng_dim], since the - // non-contracting dimensions of lhs are included in the - // result_index first. Instead, the non_contracting_dim of rhs must - // be calculated as following: - // lhs_non_contracting_dimensions_size + - // (rhs_non_batch_non_contracting_dim - batch_dim_size) - 1 - // - // Note that (rhs_non_batch_contracting_dim - batch_dim_size) is - // the index offset to the result_index that only depends on - // the non_batch and non-contracting dimensions of rhs. -1 at the - // end translates size to index. - for (auto i : lhs_non_contracting_dims) { - lhs_index[i] = result_index[i]; - } - for (auto i : dnums.rhs_batch_dimensions()) { - rhs_index[i] = result_index[i]; - } - for (auto i : rhs_non_batch_non_contracting_dims) { - const int64 rhs_non_batch_non_contracting_dim = - lhs_non_contracting_size + (i - batch_dim_size) - 1; - rhs_index[i] = result_index[rhs_non_batch_non_contracting_dim]; - } - - // Accumulates resulting product along the contracted dimension. - for (int64 i = 0; i < contracted_dimension_size; ++i) { - lhs_index[lhs_contracting_dimension] = i; - rhs_index[rhs_contracting_dimension] = i; - - result_val += - static_cast(lhs_literal.Get(lhs_index)) * - static_cast(rhs_literal.Get(rhs_index)); - } - - return static_cast(result_val); - })); - - parent_->evaluated_[dot] = std::move(result); - return Status::OK(); - } - - Status HandlePad(HloInstruction* pad) override { - CHECK(!ShapeUtil::IsTuple(pad->operand(0)->shape())); - // Padding value must be scalar. - CHECK(ShapeUtil::IsScalar(pad->operand(1)->shape())); - CHECK_EQ(ShapeUtil::Rank(pad->operand(0)->shape()), - pad->padding_config().dimensions_size()); - - TF_ASSIGN_OR_RETURN(auto inferred_return_shape, - ShapeInference::InferPadShape( - /*operand_shape=*/pad->operand(0)->shape(), - /*padding_value_shape=*/pad->operand(1)->shape(), - /*padding_config=*/pad->padding_config())); - CHECK(ShapeUtil::Compatible(pad->shape(), inferred_return_shape)) - << "return shape is set to: " << ShapeUtil::HumanString(pad->shape()) - << "but is inferred to be: " - << ShapeUtil::HumanString(inferred_return_shape); - - // Create new HLO of padded shape with padding value. - ReturnT scalar = - parent_->GetEvaluatedLiteralFor(pad->operand(1)).Get({}); - auto result = Literal::CreateFromShape(pad->shape()); - TF_RETURN_IF_ERROR(result->Populate( - [&scalar](ArraySlice multi_index) { return scalar; })); - - const Literal& evaluated_operand = - parent_->GetEvaluatedLiteralFor(pad->operand(0)); - - std::vector input_index(ShapeUtil::Rank(evaluated_operand.shape()), - 0); - std::vector target_index(ShapeUtil::Rank(result->shape()), 0); - - // Loop through each element of the operand, assign them to the - // corresponding index of the resulting padded literal. - const PaddingConfig& pad_config = pad->padding_config(); - - auto func = [&](ArraySlice input_index) { - for (auto i = 0; i < input_index.size(); ++i) { - // Interior padding occurs logically before edge padding, so in the case - // of negative edge padding elements are removed from the - // interior-padded operand. - target_index[i] = - pad_config.dimensions(i).edge_padding_low() + - input_index[i] * (pad_config.dimensions(i).interior_padding() + 1); - - // Account for negative low and high padding: skip assignment if the - // any target index is out of range. - if (!(target_index[i] >= 0 && - target_index[i] < pad->shape().dimensions(i))) { - return true; - } - } - result->Set(target_index, - evaluated_operand.Get(input_index)); - return true; - }; - - std::vector zero_base(evaluated_operand.shape().dimensions_size(), - 0); - std::vector step(evaluated_operand.shape().dimensions_size(), 1); - - ShapeUtil::ForEachIndex( - evaluated_operand.shape(), zero_base, - AsInt64Slice(evaluated_operand.shape().dimensions()), step, func); - - parent_->evaluated_[pad] = std::move(result); - return Status::OK(); - } - - Status HandleDynamicSlice(HloInstruction* dynamic_slice) override { - auto operand = dynamic_slice->operand(0); - auto start_indices = dynamic_slice->operand(1); - auto result_shape = dynamic_slice->shape(); - TF_ASSIGN_OR_RETURN(auto inferred_return_shape, - ShapeInference::InferDynamicSliceShape( - operand->shape(), start_indices->shape(), - dynamic_slice->dynamic_slice_sizes())); - TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) - << "return shape is set to: " << ShapeUtil::HumanString(result_shape) - << "but is inferred to be: " - << ShapeUtil::HumanString(inferred_return_shape); - TF_RET_CHECK( - primitive_util::IsIntegralType(start_indices->shape().element_type())); - - const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); - const Literal& start_indices_literal = - parent_->GetEvaluatedLiteralFor(start_indices); - - switch (start_indices->shape().element_type()) { - case S32: { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[dynamic_slice], - DynamicSlice(operand_literal, start_indices_literal, - result_shape)); - } break; - case S64: { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[dynamic_slice], - DynamicSlice(operand_literal, start_indices_literal, - result_shape)); - } break; - case U32: { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[dynamic_slice], - DynamicSlice(operand_literal, start_indices_literal, - result_shape)); - } break; - case U64: { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[dynamic_slice], - DynamicSlice(operand_literal, start_indices_literal, - result_shape)); - } break; - default: - LOG(FATAL) << "HandleDynamicSlice: unhandled primitive type for " - "start_indices: " - << PrimitiveType_Name(start_indices->shape().element_type()); - } - - return Status::OK(); - } - - Status HandleDynamicUpdateSlice( - HloInstruction* dynamic_update_slice) override { - auto operand = dynamic_update_slice->operand(0); - auto update = dynamic_update_slice->operand(1); - auto start_indices = dynamic_update_slice->operand(2); - auto result_shape = dynamic_update_slice->shape(); - TF_ASSIGN_OR_RETURN( - auto inferred_return_shape, - ShapeInference::InferDynamicUpdateSliceShape( - operand->shape(), update->shape(), start_indices->shape())); - TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) - << "return shape is set to: " << ShapeUtil::HumanString(result_shape) - << "but is inferred to be: " - << ShapeUtil::HumanString(inferred_return_shape); - TF_RET_CHECK( - primitive_util::IsIntegralType(start_indices->shape().element_type())); - TF_RET_CHECK(ShapeUtil::Compatible(result_shape, operand->shape())); - - const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); - const Literal& update_literal = parent_->GetEvaluatedLiteralFor(update); - const Literal& start_indices_literal = - parent_->GetEvaluatedLiteralFor(start_indices); - - switch (start_indices->shape().element_type()) { - case S32: { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[dynamic_update_slice], - DynamicUpdateSlice(operand_literal, update_literal, - start_indices_literal)); - } break; - case S64: { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[dynamic_update_slice], - DynamicUpdateSlice(operand_literal, update_literal, - start_indices_literal)); - } break; - case U32: { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[dynamic_update_slice], - DynamicUpdateSlice(operand_literal, update_literal, - start_indices_literal)); - } break; - case U64: { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[dynamic_update_slice], - DynamicUpdateSlice(operand_literal, update_literal, - start_indices_literal)); - } break; - default: - LOG(FATAL) << "HandleDynamicUpdateSlice: unhandled primitive type for " - "start_indices: " - << PrimitiveType_Name(start_indices->shape().element_type()); - } - - return Status::OK(); - } - - template - StatusOr> MapImpl(HloInstruction* map) { - auto operands = map->operands(); - HloComputation* computation = map->to_apply(); - - auto result = Literal::CreateFromShape(map->shape()); - - HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); - TF_RETURN_IF_ERROR( - result->Populate([&](ArraySlice multi_index) { - std::vector> arg_literals; - arg_literals.reserve(operands.size()); - - // Construct scalar literal parameters to be passed to the map - // computation. - for (auto operand : operands) { - const Literal& arg_literal = - parent_->GetEvaluatedLiteralFor(operand); - - auto curr_val = arg_literal.Get(multi_index); - auto curr_val_literal = Literal::CreateR0(curr_val); - - arg_literals.push_back(std::move(curr_val_literal)); - } - - std::unique_ptr computed_result = - embedded_evaluator - .Evaluate>(*computation, - arg_literals) - .ConsumeValueOrDie(); - // Clear visit states so that the we can use the evaluate again on - // the same computation. - embedded_evaluator.ResetVisitStates(); - - return computed_result->Get({}); - })); - return std::move(result); - } - - Status HandleMap(HloInstruction* map) override { - switch (map->operand(0)->shape().element_type()) { - case PRED: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - case U8: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - case U32: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - case U64: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - case S8: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - case S32: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - case S64: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - case F16: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], - MapImpl(map)); - break; - } - case F32: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - case F64: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - case C64: { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); - break; - } - default: - LOG(FATAL) << "HandleMap: unhandled primitive type for " - "input operand: " - << PrimitiveType_Name( - map->operand(0)->shape().element_type()); - } - - return Status::OK(); - } - - Status HandleReduce(HloInstruction* reduce) override { - auto arg = reduce->operand(0); - auto init_value = reduce->operand(1); - ArraySlice dimensions(reduce->dimensions()); - HloComputation* function = reduce->to_apply(); - TF_RET_CHECK(ShapeUtil::Rank(reduce->shape()) == - ShapeUtil::Rank(arg->shape()) - dimensions.size()); - TF_ASSIGN_OR_RETURN(auto inferred_return_shape, - ShapeInference::InferReduceShape( - /*arg=*/arg->shape(), - /*init_value=*/init_value->shape(), - /*dimensions_to_reduce=*/dimensions, - /*to_apply=*/function->ComputeProgramShape())); - TF_RET_CHECK(ShapeUtil::Compatible(reduce->shape(), inferred_return_shape)) - << "return shape is set to: " << ShapeUtil::HumanString(reduce->shape()) - << "but is inferred to be: " - << ShapeUtil::HumanString(inferred_return_shape); - - const Literal& arg_literal = parent_->GetEvaluatedLiteralFor(arg); - VLOG(3) << "HandleReduce arg_literal: " << arg_literal.ToString(); - const Literal& init_literal = parent_->GetEvaluatedLiteralFor(init_value); - VLOG(3) << "HandleReduce init_literal: " << init_literal.ToString(); - TF_RET_CHECK(ShapeUtil::IsScalar(init_literal.shape())); - auto init_scalar = init_literal.Get({}); - - auto result = Literal::CreateFromShape(reduce->shape()); - - const auto arg_dimensions = AsInt64Slice(arg_literal.shape().dimensions()); - std::vector arg_dim_steps(arg_dimensions.size()); - std::vector arg_dim_counts(arg_dimensions.size()); - for (const int64 dim : dimensions) { - arg_dim_steps[dim] = 1; - arg_dim_counts[dim] = arg_dimensions[dim]; - } - - // Map each dimension in the result to a dimension in arg that isn't - // being reduced. - std::vector result_to_arg_index; - for (int64 i = 0; i < arg_dimensions.size(); ++i) { - if (arg_dim_steps[i] == 0) { - result_to_arg_index.push_back(i); - } - } - - HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); - // For each resulting dimension, calculate and assign computed value. - TF_RETURN_IF_ERROR( - result->Populate([&](ArraySlice multi_index) { - ReturnT result_val = init_scalar; - - std::vector base(arg_dimensions.size()); - for (int64 i = 0; i < multi_index.size(); ++i) { - base[result_to_arg_index[i]] = multi_index[i]; - } - - // When the reduction is addition of floats, accumulate in a double - // for better precision. Also, avoid creating Literals for the - // intermediate results; it's much faster. - if (ShapeUtil::ElementIsFloating(init_literal.shape()) && - IsScalarAdd(function)) { - double computed_result = 0; - auto func = [&](ArraySlice input_index) { - computed_result += arg_literal.Get(input_index); - return true; - }; - ShapeUtil::ForEachIndex(arg_literal.shape(), base, arg_dim_counts, - arg_dim_steps, func); - return static_cast(computed_result); - } - auto func = [&](ArraySlice input_index) { - auto curr_val = arg_literal.Get(input_index); - - // Evaluate computation with specified literal operands. - auto curr_val_literal = Literal::CreateR0(curr_val); - auto result_val_literal = Literal::CreateR0(result_val); - std::vector args = {result_val_literal.get(), - curr_val_literal.get()}; - - std::unique_ptr computed_result = - embedded_evaluator.Evaluate(*function, args) - .ConsumeValueOrDie(); - // Clear visit states so that we can use the evaluator again on - // the same computation. - embedded_evaluator.ResetVisitStates(); - // Assign computed result to result_val. - result_val = computed_result->Get({}); - return true; - }; - // Computes one element of the result, reducing all dimensions that - // contribute to that element. - ShapeUtil::ForEachIndex(arg_literal.shape(), base, arg_dim_counts, - arg_dim_steps, func); - return result_val; - })); - - parent_->evaluated_[reduce] = std::move(result); - return Status::OK(); - } - - bool IsScalarAdd(HloComputation* computation) { - HloInstruction* instruction = computation->root_instruction(); - if (instruction->opcode() == HloOpcode::kAdd && - computation->num_parameters() == 2) { - const HloInstruction* lhs = instruction->operand(0); - const HloInstruction* rhs = instruction->operand(1); - return lhs->opcode() == HloOpcode::kParameter && - ShapeUtil::IsScalar(lhs->shape()) && - rhs->opcode() == HloOpcode::kParameter && - ShapeUtil::IsScalar(rhs->shape()) && lhs != rhs; - } - return false; - } - - Status HandleSelectAndScatter(HloInstruction* select_and_scatter) override { - auto operand = select_and_scatter->operand(0); - auto source = select_and_scatter->operand(1); - const Window& window = select_and_scatter->window(); - - const Literal& init_literal = - parent_->GetEvaluatedLiteralFor(select_and_scatter->operand(2)); - TF_RET_CHECK(ShapeUtil::IsScalar(init_literal.shape())); - auto init_scalar = init_literal.Get({}); - - auto result = Literal::CreateFromShape(select_and_scatter->shape()); - - // Initialize result array with the init value. - TF_RETURN_IF_ERROR(result->Populate( - [&](ArraySlice output_index) { return init_scalar; })); - - std::vector window_dimension_sizes; - for (const auto& window_dimension : window.dimensions()) { - window_dimension_sizes.push_back(window_dimension.size()); - } - const Shape window_shape = ShapeUtil::MakeShape( - operand->shape().element_type(), window_dimension_sizes); - - HloComputation* select = select_and_scatter->select(); - HloComputation* scatter = select_and_scatter->scatter(); - - const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); - const Literal& source_literal = parent_->GetEvaluatedLiteralFor(source); - - int64 rank = ShapeUtil::Rank(operand_literal.shape()); - - HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); - DimensionVector source_index(rank); - - std::fill(source_index.begin(), source_index.end(), 0); - do { - // For each element in `source`, we place a window in `operand`. For each - // window placement, we iterate inside the window twice: - // - // 1. Find the selected index by applying `select` function to all - // elements. E.g., If the `select` function is GreaterEqual, the first - // iteration through the window finds the biggest value and returns its - // index. - // - // 2. Using the selected index, scatter value from `source` to result. We - // do this by iterating through the window, and compare each index with - // the selected index. - optional selected_val; - optional> selected_index; - - IterateThroughWindow( - window_shape, window, operand_literal.shape(), source_index, - [&](const std::vector& operand_index) { - auto curr_val = operand_literal.Get(operand_index); - if (!selected_val) { - selected_val = curr_val; - selected_index = operand_index; - } - const auto curr_val_literal = Literal::CreateR0(curr_val); - const auto selected_val_literal = - Literal::CreateR0(*selected_val); - - const std::vector args = { - selected_val_literal.get(), curr_val_literal.get()}; - std::unique_ptr computed_result = - embedded_evaluator.Evaluate(*select, args) - .ConsumeValueOrDie(); - bool selected = !computed_result->Get({}); - if (selected) { - selected_val = curr_val; - selected_index = operand_index; - } - embedded_evaluator.ResetVisitStates(); - }); - - IterateThroughWindow( - window_shape, window, operand_literal.shape(), source_index, - [&](const std::vector& operand_index) { - if (std::equal(operand_index.begin(), operand_index.end(), - selected_index->begin())) { - auto source = source_literal.Get(source_index); - auto scattered = result->Get(operand_index); - const auto source_literal = Literal::CreateR0(source); - const auto scattered_literal = - Literal::CreateR0(scattered); - - const std::vector args = { - source_literal.get(), scattered_literal.get()}; - std::unique_ptr computed_result = - embedded_evaluator.Evaluate(*scatter, args) - .ConsumeValueOrDie(); - result->Set(operand_index, computed_result->Get({})); - // Clear visit states so that the we can use the evaluator again - // on the same computation. - embedded_evaluator.ResetVisitStates(); - } - }); - } while (IndexUtil::BumpIndices(source->shape(), &source_index)); - - parent_->evaluated_[select_and_scatter] = std::move(result); - return Status::OK(); - } - - Status HandleReduceWindow(HloInstruction* reduce_window) override { - auto operand = reduce_window->operand(0); - const Window& window = reduce_window->window(); - HloComputation* function = reduce_window->to_apply(); - TF_ASSIGN_OR_RETURN( - auto inferred_return_shape, - ShapeInference::InferReduceWindowShape( - /*operand_shape=*/reduce_window->operand(0)->shape(), - /*init_value=*/reduce_window->operand(1)->shape(), window, - /*to_apply_shape=*/function->ComputeProgramShape())); - TF_RET_CHECK( - ShapeUtil::Compatible(reduce_window->shape(), inferred_return_shape)) - << "return shape is set to: " - << ShapeUtil::HumanStringWithLayout(reduce_window->shape()) - << "but is inferred to be: " - << ShapeUtil::HumanStringWithLayout(inferred_return_shape); - - const Literal& operand_literal = - parent_->GetEvaluatedLiteralFor(reduce_window->operand(0)); - VLOG(3) << "HandleReduceWindow arg_literal: " << operand_literal.ToString(); - const Literal& init_literal = - parent_->GetEvaluatedLiteralFor(reduce_window->operand(1)); - VLOG(3) << "HandleReduceWindow init_literal: " << init_literal.ToString(); - TF_RET_CHECK(ShapeUtil::IsScalar(init_literal.shape())); - auto init_scalar = init_literal.Get({}); - - auto result = Literal::CreateFromShape(reduce_window->shape()); - - // Creates a Shape object from window, for iteration below. - std::vector window_dimension_sizes; - for (const auto& window_dimension : window.dimensions()) { - window_dimension_sizes.push_back(window_dimension.size()); - } - const Shape window_shape = ShapeUtil::MakeShape( - operand->shape().element_type(), window_dimension_sizes); - - DimensionVector window_index(window.dimensions_size()); - DimensionVector operand_index(ShapeUtil::Rank(operand_literal.shape())); - - HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); - // For each resulting dimension, calculate and assign computed value. - TF_RETURN_IF_ERROR( - result->Populate([&](ArraySlice output_index) { - ReturnT result_val = init_scalar; - - std::fill(window_index.begin(), window_index.end(), 0); - std::fill(operand_index.begin(), operand_index.end(), 0); - - IterateThroughWindow( - window_shape, window, operand_literal.shape(), output_index, - [&](const std::vector& operand_index) { - auto curr_val = operand_literal.Get(operand_index); - - // Evaluate computation with specified literal operands. - const auto curr_val_literal = - Literal::CreateR0(curr_val); - const auto result_val_literal = - Literal::CreateR0(result_val); - const std::vector args = { - result_val_literal.get(), curr_val_literal.get()}; - std::unique_ptr computed_result = - embedded_evaluator.Evaluate(*function, args) - .ConsumeValueOrDie(); - - // Clear visit states so that the we can use the evaluate again - // on the same computation. - embedded_evaluator.ResetVisitStates(); - - result_val = computed_result->Get({}); - }); - - return result_val; - })); - - parent_->evaluated_[reduce_window] = std::move(result); - return Status::OK(); - } - - Status HandleSlice(HloInstruction* slice) override { - auto operand = slice->operand(0); - const Shape& shape = slice->shape(); - TF_ASSIGN_OR_RETURN(auto inferred_return_shape, - ShapeInference::InferSliceShape( - operand->shape(), slice->slice_starts(), - slice->slice_limits(), slice->slice_strides())); - TF_RET_CHECK(ShapeUtil::Compatible(shape, inferred_return_shape)) - << "return shape set to: " << ShapeUtil::HumanString(shape) - << " but is inferred to be: " - << ShapeUtil::HumanString(inferred_return_shape); - - const int64 rank = ShapeUtil::Rank(operand->shape()); - const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); - auto func = [&](ArraySlice out_index) { - DimensionVector operand_index(rank); - for (int64 i = 0; i < rank; ++i) { - operand_index[i] = - slice->slice_starts(i) + out_index[i] * slice->slice_strides(i); - } - return operand_literal.Get(operand_index); - }; - - auto result = Literal::CreateFromDimensions( - shape.element_type(), AsInt64Slice(shape.dimensions())); - TF_RETURN_IF_ERROR(result->Populate(func)); - parent_->evaluated_[slice] = std::move(result); - return Status::OK(); - } - - // Enable CLZ only for int32 and uint32. - template < - typename NativeT, - typename std::enable_if< - (std::is_floating_point::value || - std::is_integral::value || is_complex_t::value) && - !(std::is_same::value || - std::is_same::value)>::type* = nullptr> - Status HandleClz(HloInstruction* clz) { - return InvalidArgument("Unsupported type for Clz"); - } - - template ::value || - std::is_same::value>::type* = nullptr> - Status HandleClz(HloInstruction* clz) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[clz], - ElementWiseUnaryOp(clz, [](ElementwiseT elem_operand) { - return 31 - tensorflow::Log2Floor(elem_operand); - })); - return Status::OK(); - } - - Status HandleClz(HloInstruction* clz) override { - return HandleClz(clz); - } - - template ::value>::type* = nullptr> - Status HandleSin(HloInstruction* sin) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[sin], - ElementWiseUnaryOp(sin, [](ElementwiseT elem_operand) { - return std::sin(elem_operand); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value || - is_complex_t::value>::type* = nullptr> - Status HandleSin(HloInstruction* sin) { - return InvalidArgument("Unsupported type for Sin"); - } - - Status HandleSin(HloInstruction* sin) override { - return HandleSin(sin); - } - - template ::value>::type* = nullptr> - Status HandleCos(HloInstruction* cos) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[cos], - ElementWiseUnaryOp(cos, [](ElementwiseT elem_operand) { - return std::cos(elem_operand); - })); - return Status::OK(); - } - - template < - typename NativeT, - typename std::enable_if::value || - is_complex_t::value>::type* = nullptr> - Status HandleCos(HloInstruction* cos) { - return InvalidArgument("Unsupported type for Cos"); - } - - Status HandleCos(HloInstruction* cos) override { - return HandleCos(cos); - } - - template ::value>::type* = nullptr> - Status HandleReducePrecision(HloInstruction* reduce_precision) { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[reduce_precision], - ElementWiseUnaryOp(reduce_precision, [reduce_precision]( - ElementwiseT elem) { - uint32_t value_as_int = tensorflow::bit_cast(elem); - const uint32_t mantissa_bits = reduce_precision->mantissa_bits(); - const uint32_t exponent_bits = reduce_precision->exponent_bits(); - - // Code is based on the CPU/GPU implementation in LLVM-emitting code. - // - // Bits in float type: - // mantissa : bits [0:22] - // exponent : bits [23:30] - // sign : bits [31] - if (mantissa_bits < 23) { - const uint32_t last_mantissa_bit_mask = 1u << (23 - mantissa_bits); - - // Compute rounding bias for round-to-nearest with ties to even. - // This is equal to a base value of 0111... plus one bit if the last - // remaining mantissa bit is 1. - const uint32_t base_rounding_bias = - (last_mantissa_bit_mask >> 1) - 1; - const uint32_t x_last_mantissa_bit = - (value_as_int & last_mantissa_bit_mask) >> (23 - mantissa_bits); - const uint32_t x_rounding_bias = - x_last_mantissa_bit + base_rounding_bias; - - // Add rounding bias, and mask out truncated bits. Note that the - // case where adding the rounding bias overflows into the exponent - // bits is correct; the non-masked mantissa bits will all be zero, - // and the exponent will be incremented by one. - const uint32_t truncation_mask = ~(last_mantissa_bit_mask - 1); - value_as_int = value_as_int + x_rounding_bias; - value_as_int = value_as_int & truncation_mask; - } - if (exponent_bits < 8) { - // Masks for f32 values. - const uint32_t f32_sign_bit_mask = 1u << 31; - const uint32_t f32_exp_bits_mask = 0xffu << 23; - - // An exponent of 2^(n-1)-1 -- that is, 0111... with the zero in the - // most- significant bit -- is equal to 1.0f for all exponent sizes. - // Adding 2^(n-1)-1 to this gives us the highest non-infinite - // exponent for a bit- size of n, and subtracting 2^(n-1)-1 from - // this gives us the lowest' exponent (corresponding to 0.0f). - // - // Thus, the f32 exponent corresponding to the highest non-infinite - // exponent for a bit size of n is (2^7-1) + 2^(n-1)-1, and the f32 - // exponent corresponding to the lowest exponent for a bit size of n - // is (2^7-1) - 2^(n-1)-1. - // - // Note that we have already checked that exponents_bits >= 1. - const uint32_t f32_exponent_bias = (1 << 7) - 1; - const uint32_t reduced_exponent_bias = - (1 << (exponent_bits - 1)) - 1; - const uint32_t reduced_max_exponent = - f32_exponent_bias + reduced_exponent_bias; - const uint32_t reduced_min_exponent = - f32_exponent_bias - reduced_exponent_bias; - - // Do we overflow or underflow? - const uint32_t x_exponent = value_as_int & f32_exp_bits_mask; - const bool x_overflows = x_exponent > (reduced_max_exponent << 23); - const bool x_underflows = - x_exponent <= (reduced_min_exponent << 23); - - // Compute appropriately-signed values of zero and infinity. - const uint32_t x_signed_zero = value_as_int & f32_sign_bit_mask; - const uint32_t x_signed_inf = x_signed_zero | f32_exp_bits_mask; - - // Force to zero or infinity if overflow or underflow. (Note that - // this truncates all denormal values to zero, rather than rounding - // them.) - value_as_int = x_overflows ? x_signed_inf : value_as_int; - value_as_int = x_underflows ? x_signed_zero : value_as_int; - } - - float reduced_result = tensorflow::bit_cast(value_as_int); - if (std::isnan(elem)) { - reduced_result = mantissa_bits > 0 - ? elem - : std::numeric_limits::infinity(); - } - return reduced_result; - })); - return Status::OK(); - } - - template ::value>::type* = nullptr> - Status HandleReducePrecision(HloInstruction* reduce_precision) { - return InvalidArgument("Double not supported for reduce precision"); - } - - template < - typename NativeT, - typename std::enable_if::value || - is_complex_t::value>::type* = nullptr> - Status HandleReducePrecision(HloInstruction* reduce_precision) { - return InvalidArgument("Unsupported type for reduce precision"); - } - - Status HandleReducePrecision(HloInstruction* reduce_precision) override { - return HandleReducePrecision(reduce_precision); - } - - private: - template - StatusOr> DynamicSlice( - const Literal& operand_literal, const Literal& start_indices_literal, - const Shape& result_shape) { - auto start_indices_typed = start_indices_literal.data(); - std::vector start(start_indices_typed.begin(), - start_indices_typed.end()); - - std::vector operand_indices(start.size()); - - auto result = Literal::CreateFromShape(result_shape); - TF_RETURN_IF_ERROR( - result->Populate([&](ArraySlice multi_index) { - for (int64 i = 0; i < operand_indices.size(); ++i) { - CHECK_GE(multi_index[i] + start[i], 0); - // Mod is only used here to be consistent with the existing - // backends' behavior. - operand_indices[i] = (multi_index[i] + start[i]) % - operand_literal.shape().dimensions(i); - } - - auto result = operand_literal.Get(operand_indices); - return result; - })); - - return std::move(result); - } - - template - StatusOr> DynamicUpdateSlice( - const Literal& operand_literal, const Literal& update_literal, - const Literal& start_indices_literal) { - auto result = operand_literal.CloneToUnique(); - auto start_indices_typed = start_indices_literal.data(); - const auto rank = ShapeUtil::Rank(result->shape()); - std::vector start(rank, 0); - for (int64 i = 0; i < rank; ++i) { - // All other implementations currently wrap-around the index, so this - // should do so as well. - start[i] = (start_indices_typed[i] % result->shape().dimensions(i)); - start[i] += (start[i] < 0) * result->shape().dimensions(i); - } - std::vector result_index(rank, 0); - - auto func = [&](ArraySlice update_index) { - std::transform(update_index.begin(), update_index.end(), start.begin(), - result_index.begin(), std::plus()); - // Same as above, wrap-around only to match other implementations' - // semantics. - std::transform(result_index.begin(), result_index.end(), - result->shape().dimensions().begin(), result_index.begin(), - std::modulus()); - result->Set(result_index, - update_literal.Get(update_index)); - return true; - }; - - std::vector base(update_literal.shape().dimensions_size(), 0); - std::vector step(update_literal.shape().dimensions_size(), 1); - ShapeUtil::ForEachIndex(update_literal.shape(), base, - AsInt64Slice(update_literal.shape().dimensions()), - step, func); - - return std::move(result); - } - - StatusOr> ElementWiseUnaryOp( - HloInstruction* instruction, - const std::function& unary_op) { - const Literal& operand_literal = - parent_->GetEvaluatedLiteralFor(instruction->operand(0)); - TF_ASSIGN_OR_RETURN( - auto result_literal, - (ElementWiseUnaryOpImpl( - instruction, ConvertUnaryFunction(unary_op), operand_literal))); - - return std::move(result_literal); - } - - StatusOr> ElementWiseBinaryOp( - HloInstruction* instruction, - const std::function& - binary_op) { - const auto shape = instruction->shape(); - const auto* lhs = instruction->operand(0); - const auto* rhs = instruction->operand(1); - - // TODO(b/35950897, b/27796129): add DCHECK back once implicit broadcast - // is removed. - if (!(ShapeUtil::SameDimensions(shape, rhs->shape()) && - ShapeUtil::SameDimensions(lhs->shape(), rhs->shape()))) { - return Unimplemented( - "Implicit broadcasting is currently unsupported in HLO evaluator " - "Shape Mismatch: %s vs %s vs %s: ", - ShapeUtil::HumanString(shape).c_str(), - ShapeUtil::HumanString(lhs->shape()).c_str(), - ShapeUtil::HumanString(rhs->shape()).c_str()); - } - - const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); - const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); - - auto result = Literal::CreateFromShape(shape); - - TF_RETURN_IF_ERROR( - result->Populate([&](ArraySlice multi_index) { - return ConvertBinaryFunction(binary_op)( - lhs_literal.Get(multi_index), - rhs_literal.Get(multi_index)); - })); - return std::move(result); - } - - template - StatusOr> ElementwiseTernaryOp( - HloInstruction* instruction, - const std::function& ternary_op) { - const auto shape = instruction->shape(); - const auto* lhs = instruction->operand(0); - const auto* rhs = instruction->operand(1); - const auto* ehs = instruction->operand(2); - - // TODO(b/35950897, b/27796129): add DCHECK back once implicit - // broadcast is removed. - if (!(ShapeUtil::SameDimensions(shape, lhs->shape()) && - ShapeUtil::SameDimensions(lhs->shape(), rhs->shape()) && - ShapeUtil::SameDimensions(rhs->shape(), ehs->shape()))) { - return Unimplemented( - "Implicit broadcasting is currently unsupported in HLO evaluator " - "Shape Mismatch: %s vs %s vs %s vs %s: ", - ShapeUtil::HumanString(shape).c_str(), - ShapeUtil::HumanString(lhs->shape()).c_str(), - ShapeUtil::HumanString(rhs->shape()).c_str(), - ShapeUtil::HumanString(ehs->shape()).c_str()); - } - - const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); - const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); - const Literal& ehs_literal = parent_->GetEvaluatedLiteralFor(ehs); - - auto result = Literal::CreateFromShape(shape); - - TF_RETURN_IF_ERROR( - result->Populate([&](ArraySlice multi_index) { - return ternary_op(lhs_literal.Get(multi_index), - rhs_literal.Get(multi_index), - ehs_literal.Get(multi_index)); - })); - - return std::move(result); - } - - template - static bool IsShiftOutOfBounds(NativeT rhs) { - typedef typename std::make_unsigned::type UnsignedT; - UnsignedT lhs_size_unsigned = sizeof(NativeT) * CHAR_BIT; - UnsignedT rhs_unsigned = static_cast(rhs); - return rhs_unsigned >= lhs_size_unsigned; - } - - HloEvaluator* parent_; -}; // class HloEvaluator::TypedVisitor HloEvaluator::HloEvaluator(int64 max_loop_iterations) : max_loop_iterations_(max_loop_iterations) { - typed_visitors_[PRED] = MakeUnique>(this); - typed_visitors_[U8] = MakeUnique>(this); + typed_visitors_[PRED] = MakeUnique>(this); + typed_visitors_[U8] = MakeUnique>(this); typed_visitors_[U16] = MakeUnique([](HloInstruction*) { return Unimplemented( - "HloEvaluator::TypedVisitor: unhandled primitive type: U16."); + "HloEvaluator::HloEvaluatorTypedVisitor: unhandled primitive type: " + "U16."); }); - typed_visitors_[U32] = MakeUnique>(this); - typed_visitors_[U64] = MakeUnique>(this); - typed_visitors_[S8] = MakeUnique>(this); + typed_visitors_[U32] = MakeUnique>(this); + typed_visitors_[U64] = MakeUnique>(this); + typed_visitors_[S8] = MakeUnique>(this); typed_visitors_[S16] = MakeUnique([](HloInstruction*) { return Unimplemented( - "HloEvaluator::TypedVisitor: unhandled primitive type: S16."); + "HloEvaluator::HloEvaluatorTypedVisitor: unhandled primitive type: " + "S16."); }); - typed_visitors_[S32] = MakeUnique>(this); - typed_visitors_[S64] = MakeUnique>(this); - typed_visitors_[F16] = MakeUnique>(this); - typed_visitors_[F32] = MakeUnique>(this); - typed_visitors_[F64] = MakeUnique>(this); - typed_visitors_[C64] = MakeUnique>(this); + typed_visitors_[S32] = MakeUnique>(this); + typed_visitors_[S64] = MakeUnique>(this); + typed_visitors_[F16] = + MakeUnique>(this); + typed_visitors_[F32] = MakeUnique>(this); + typed_visitors_[F64] = MakeUnique>(this); + typed_visitors_[C64] = MakeUnique>(this); // Most of the evaluator computations we use don't support BF16 (e.g., // std::ceil, std::tanh). To make evaluator work with BF16, we set all // elementwise computations to be done in F32 and do BF16<->F32 conversion // around the input and the output of the computations. - typed_visitors_[BF16] = MakeUnique>(this); + typed_visitors_[BF16] = + MakeUnique>(this); typed_visitors_[TUPLE] = MakeUnique([](HloInstruction*) { return Unimplemented( - "HloEvaluator::TypedVistor: unhandled primitive type: TUPLE."); + "HloEvaluatorTypedVisitor: unhandled primitive type: TUPLE."); }); typed_visitors_[OPAQUE] = MakeUnique([](HloInstruction*) { return Unimplemented( - "HloEvaluator::TypedVisitor: unhandled primitive type: OPAQUE."); + "HloEvaluatorTypedVisitor: unhandled primitive type: OPAQUE."); }); } @@ -3034,7 +977,7 @@ Status HloEvaluator::HandleSelect(HloInstruction* select) { // If predicate is of scalar type, no element-wise selection would be needed. // This would also handle output array of tuple types as the DefaultAction - // would go through the TypedVisitor which doesn't handle tuples. + // would go through the HloEvaluatorTypedVisitor which doesn't handle tuples. if (ShapeUtil::IsScalar(pred.shape())) { if (pred.Get({})) { evaluated_[select] = on_true.CloneToUnique(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index c0dcee0c3e382f74de72a2b89f39e06f042e2b80..cc5676ea7b05be6e0b7066bf703d8e48da0133ab 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -109,19 +109,16 @@ class HloEvaluator : public DfsHloVisitorWithDefault { substitutions); protected: - // Templated DfsHloVisitor. Typically ReturnT here indicates the resulting - // literal type of each evaluated Handle* method of a TypedVisitor. - // There are however a few notable exceptions to this rule, notably: - // - HandleCompare and HandleIsFinite: where the resulting literal type is - // always boolean. - // These operations are handled outside of the parent HloEvaluator handlers - // instead of from within TypedVisitor. + // Make HloEvaluatorTypedVisitor a friend because it is logically part of this + // class. // - // Type params: - // - ReturnT: The type of input and output of each operation. - // - ElementwiseT: The type in which internal computation are done. - template - class TypedVisitor; + // A straightforward implementation would be to make it a nested class + // declared and defined in hlo_evaluator.cc. Instead HloEvaluatorTypedVisitor + // lives as a separate class with its own header because its template gets + // instantiated many times and we want to use extern templates to shard out + // the compilation of those instantiations across multiple cc files. + template + friend class HloEvaluatorTypedVisitor; // Wraps around instruction handling to infer types before dispatching to // the corresponding typed Visitor. @@ -169,6 +166,33 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleSelect(HloInstruction* select) override; private: + template + static StatusOr> ElementWiseUnaryOpImpl( + HloInstruction* instruction, + const std::function& unary_op, + const Literal& operand_literal) { + const auto shape = instruction->shape(); + const auto* operand = instruction->operand(0); + + // TODO(b/35950897, b/27796129): add DCHECK back once implicit broadcast is + // removed. + if (!ShapeUtil::SameDimensions(shape, operand->shape())) { + return Unimplemented( + "Implicit broadcasting is currently unsupported in HLO evaluator " + "Shape Mismatch: %s vs %s", + ShapeUtil::HumanString(shape).c_str(), + ShapeUtil::HumanString(operand->shape()).c_str()); + } + + auto result = Literal::CreateFromShape(shape); + + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice multi_index) { + return unary_op(operand_literal.Get(multi_index)); + })); + return std::move(result); + } + // Returns the already-evaluated literal result for the instruction. // A Constant instruction is considered evaluated and its literal will be // returned directly without looking up the cache. diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 230147abfec10d84d704ba0d62a17d2fb031aa35..cc16446778cbeac5ec4bed110adc9be8620084fe 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -21,7 +21,7 @@ limitations under the License. #include #include -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/reference_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -827,7 +827,7 @@ TEST_P(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) { *window.add_dimensions() = dim; ConvolutionDimensionNumbers dnums = - ComputationBuilder::CreateDefaultConvDimensionNumbers(2); + XlaBuilder::CreateDefaultConvDimensionNumbers(2); const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); b.AddInstruction(HloInstruction::CreateConvolve( @@ -1046,7 +1046,7 @@ TEST_P(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) { *window.add_dimensions() = dim; ConvolutionDimensionNumbers dnums = - ComputationBuilder::CreateDefaultConvDimensionNumbers(2); + XlaBuilder::CreateDefaultConvDimensionNumbers(2); const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 7, 7}); b.AddInstruction(HloInstruction::CreateConvolve( @@ -1109,7 +1109,7 @@ TEST_P(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) { *window.add_dimensions() = dim; ConvolutionDimensionNumbers dnums = - ComputationBuilder::CreateDefaultConvDimensionNumbers(2); + XlaBuilder::CreateDefaultConvDimensionNumbers(2); const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 8, 8}); b.AddInstruction(HloInstruction::CreateConvolve( @@ -1180,7 +1180,7 @@ TEST_P(HloEvaluatorTest, *window.add_dimensions() = dim; ConvolutionDimensionNumbers dnums = - ComputationBuilder::CreateDefaultConvDimensionNumbers(2); + XlaBuilder::CreateDefaultConvDimensionNumbers(2); const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 9, 3}); b.AddInstruction(HloInstruction::CreateConvolve( diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h new file mode 100644 index 0000000000000000000000000000000000000000..f1cb36347850a5af8d9f0cb7b28d05bc7b382030 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -0,0 +1,2102 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EVALUATOR_TYPED_VISITOR_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EVALUATOR_TYPED_VISITOR_H_ + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" +#include "tensorflow/compiler/xla/service/shape_inference.h" +#include "tensorflow/core/lib/core/casts.h" +#include "tensorflow/core/lib/gtl/optional.h" + +namespace xla { + +// TODO(b/79274244): We'd like these type traits to live inside of +// HloEvaluatorTypedVisitor so they don't pollute namespace xla, but that +// crashes clang in the frontend. +// +// Anyway this is relatively safe as-is because hlo_evaluator_typed_visitor.h is +// a "private" header that's not exposed outside of hlo_evaluator.cc. +template +using is_complex_t = std::is_same; +template +using is_complex64_t = std::is_same; + +// Templated DfsHloVisitor for use by HloEvaluator. +// +// Typically ReturnT here indicates the resulting literal type of each evaluated +// Handle* method of a TypedVisitor. There are however a few notable exceptions +// to this rule, notably: +// - HandleCompare and HandleIsFinite: where the resulting literal type is +// always boolean. +// These operations are handled outside of the parent HloEvaluator handlers +// instead of from within TypedVisitor. +// +// Type params: +// - ReturnT: The type of input and output of each operation. +// - ElementwiseT: The type in which internal computation are done. +// +// This a logically a private part of HloEvaluator. It lives in this header +// file rather than in hlo_evaluator.cc because we use extern templates and a +// bunch of independent cc files to speed up compiling the many instantiations +// of this class. +template +class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { + public: + explicit HloEvaluatorTypedVisitor(HloEvaluator* p) : parent_(p) {} + + // The following higher-order functions convert a function with ElementwiseT + // to a function with ReturnT. + std::function ConvertUnaryFunction( + const std::function& unary_op) { + return [&unary_op](ReturnT arg) { + return static_cast(unary_op(static_cast(arg))); + }; + } + std::function ConvertBinaryFunction( + const std::function& + binary_op) { + return [&binary_op](ReturnT arg1, ReturnT arg2) { + return static_cast(binary_op(static_cast(arg1), + static_cast(arg2))); + }; + } + std::function ConvertTernaryFunction( + const std::function& ternary_op) { + return [&ternary_op](ReturnT arg1, ReturnT arg2, ReturnT arg3) { + return static_cast(ternary_op(static_cast(arg1), + static_cast(arg2), + static_cast(arg3))); + }; + } + + Status DefaultAction(HloInstruction* hlo_instruction) override { + return Unimplemented("unhandled HLO ops for HloEvaluator: %s.", + HloOpcodeString(hlo_instruction->opcode()).c_str()); + } + + // TODO(b/35950897): many of the stl functions used in the handlers are not + // overloaded for every XLA primitive type. + + template ::value>::type* = + nullptr> + Status HandleAbs(HloInstruction* abs) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs], + ElementWiseUnaryOp(abs, [](NativeT elem_operand) { + return elem_operand; + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleAbs(HloInstruction* abs) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs], + ElementWiseUnaryOp(abs, [](NativeT elem_operand) { + return std::abs(elem_operand); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleAbs(HloInstruction* abs) { + const Literal& operand_literal = + parent_->GetEvaluatedLiteralFor(abs->operand(0)); + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[abs], + (HloEvaluator::ElementWiseUnaryOpImpl( + abs, [](NativeT elem_operand) { return std::abs(elem_operand); }, + operand_literal))); + + return Status::OK(); + } + + Status HandleAbs(HloInstruction* abs) override { + // If the operand is of C64 type, the return type of abs will be F32. + // However, ElementwiseT would still be the return type, F32, and thus + // specifying the ElementwiseT explicitly as C64 is needed below. + if (abs->operand(0)->shape().element_type() == C64) { + return HandleAbs(abs); + } + return HandleAbs(abs); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleRound(HloInstruction* round) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[round], + ElementWiseUnaryOp(round, [](ElementwiseT elem_operand) { + return std::round(elem_operand); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleRound(HloInstruction* round) { + return InvalidArgument("Unsupported type for Round"); + } + + Status HandleRound(HloInstruction* round) override { + return HandleRound(round); + } + + Status HandleBroadcast(HloInstruction* broadcast) override { + parent_->evaluated_[broadcast] = + Literal::CreateFromShape(broadcast->shape()); + auto output = parent_->evaluated_[broadcast].get(); + const Literal& operand_to_broadcast = + parent_->GetEvaluatedLiteralFor(broadcast->operand(0)); + std::vector broadcast_indices( + ShapeUtil::Rank(broadcast->operand(0)->shape()), 0); + + TF_RET_CHECK(broadcast->dimensions().size() == + ShapeUtil::Rank(operand_to_broadcast.shape())) + << "broadcast dimensions is of size: " << broadcast->dimensions().size() + << " and rank of operand_to_broadcast is: " + << ShapeUtil::Rank(operand_to_broadcast.shape()); + // Checks that operand's dimensions are the same as the broadcast's + // dimensions along the dimensions to be broadcasted. + for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { + TF_RET_CHECK(broadcast->shape().dimensions(broadcast->dimensions(i)) == + operand_to_broadcast.shape().dimensions(i)); + } + + return output->Populate( + [&](tensorflow::gtl::ArraySlice multi_index) { + for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { + broadcast_indices[i] = multi_index[broadcast->dimensions(i)]; + } + return operand_to_broadcast.Get(broadcast_indices); + }); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleCeil(HloInstruction* ceil) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[ceil], + ElementWiseUnaryOp(ceil, [](ElementwiseT elem_operand) { + return std::ceil(elem_operand); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleCeil(HloInstruction* ceil) { + return InvalidArgument("Unsupported type for Ceil"); + } + + Status HandleCeil(HloInstruction* ceil) override { + return HandleCeil(ceil); + } + + Status HandleConvert(HloInstruction* convert) override { + const HloInstruction* operand = convert->operand(0); + TF_RET_CHECK(ShapeUtil::SameDimensions(operand->shape(), convert->shape())); + TF_ASSIGN_OR_RETURN(std::unique_ptr result, + parent_->GetEvaluatedLiteralFor(operand).Convert( + convert->shape().element_type())); + + if (LayoutUtil::LayoutsInShapesEqual(result->shape(), convert->shape())) { + parent_->evaluated_[convert] = std::move(result); + } else { + parent_->evaluated_[convert] = + result->Relayout(convert->shape().layout()); + } + return Status::OK(); + } + + Status HandleBitcastConvert(HloInstruction* convert) override { + const HloInstruction* operand = convert->operand(0); + TF_RET_CHECK(ShapeUtil::SameDimensions(operand->shape(), convert->shape())); + TF_ASSIGN_OR_RETURN(std::unique_ptr result, + parent_->GetEvaluatedLiteralFor(operand).BitcastConvert( + convert->shape().element_type())); + + if (LayoutUtil::LayoutsInShapesEqual(result->shape(), convert->shape())) { + parent_->evaluated_[convert] = std::move(result); + } else { + parent_->evaluated_[convert] = + result->Relayout(convert->shape().layout()); + } + return Status::OK(); + } + + Status HandleExp(HloInstruction* exp) override { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[exp], + ElementWiseUnaryOp(exp, [](ElementwiseT elem_operand) { + return std::exp(elem_operand); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleFloor(HloInstruction* floor) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[floor], + ElementWiseUnaryOp(floor, [](ElementwiseT elem_operand) { + return std::floor(elem_operand); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleFloor(HloInstruction* floor) { + return InvalidArgument("Unsupported type for Floor"); + } + + Status HandleFloor(HloInstruction* floor) override { + return HandleFloor(floor); + } + + Status HandleLog(HloInstruction* log) override { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[log], + ElementWiseUnaryOp(log, [](ElementwiseT elem_operand) { + return std::log(elem_operand); + })); + return Status::OK(); + } + + template ::value && + !std::is_same::value>::type* = nullptr> + Status HandleNot(HloInstruction* not_) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_], + ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) { + return ~elem_operand; + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> + Status HandleNot(HloInstruction* not_) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_], + ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) { + return !elem_operand; + })); + return Status::OK(); + } + + template ::value>::type* = + nullptr> + Status HandleNot(HloInstruction* not_) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_], + ElementWiseUnaryOp(not_, [](ElementwiseT elem_operand) { + return !elem_operand; + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleNot(HloInstruction* not_) { + return InvalidArgument("Unsupported type for Not"); + } + + Status HandleNot(HloInstruction* not_) override { + return HandleNot(not_); + } + + template ::value && + !std::is_floating_point::value>::type* = nullptr> + Status HandleNegate(HloInstruction* negate) { + using type = typename std::make_unsigned::type; + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[negate], + ElementWiseUnaryOp(negate, [](ElementwiseT elem_operand) { + return NativeT(-type(elem_operand)); + })); + return Status::OK(); + } + + template ::value || + std::is_floating_point::value>::type* = nullptr> + Status HandleNegate(HloInstruction* negate) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[negate], + ElementWiseUnaryOp( + negate, [](ElementwiseT elem_operand) { return -elem_operand; })); + return Status::OK(); + } + + Status HandleNegate(HloInstruction* negate) override { + return HandleNegate(negate); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleSign(HloInstruction* sign) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[sign], + ElementWiseUnaryOp(sign, [](ElementwiseT elem_operand) { + return (ElementwiseT(0) < elem_operand) - + (elem_operand < ElementwiseT(0)); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleSign(HloInstruction* sign) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[sign], + ElementWiseUnaryOp(sign, [](ElementwiseT elem_operand) { + auto abs_val = std::abs(elem_operand); + return 0 == abs_val ? ElementwiseT(0) + : elem_operand / abs_val; + })); + return Status::OK(); + } + + Status HandleSign(HloInstruction* sign) override { + return HandleSign(sign); + } + + template ::value>::type* = nullptr> + Status HandleAtan2(HloInstruction* atan2) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[atan2], + ElementWiseBinaryOp(atan2, [](ElementwiseT lhs_elem, + ElementwiseT rhs_elem) { + return std::atan2(lhs_elem, rhs_elem); + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> + Status HandleAtan2(HloInstruction* atan2) { + return InvalidArgument("Unsupported type for Atan2"); + } + + Status HandleAtan2(HloInstruction* atan2) override { + return HandleAtan2(atan2); + } + + Status HandleTanh(HloInstruction* tanh) override { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[tanh], + ElementWiseUnaryOp(tanh, [](ElementwiseT elem_operand) { + return std::tanh(elem_operand); + })); + return Status::OK(); + } + + template ::value && + !std::is_floating_point::value>::type* = nullptr> + Status HandleMultiply(HloInstruction* multiply) { + using type = typename std::make_unsigned::type; + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[multiply], + ElementWiseBinaryOp(multiply, + [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) { + return NativeT(type(lhs_elem) * type(rhs_elem)); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value || + std::is_floating_point::value || + is_complex_t::value>::type* = nullptr> + Status HandleMultiply(HloInstruction* multiply) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[multiply], + ElementWiseBinaryOp(multiply, + [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) { + return lhs_elem * rhs_elem; + })); + return Status::OK(); + } + + Status HandleMultiply(HloInstruction* multiply) override { + return HandleMultiply(multiply); + } + + Status HandleSubtract(HloInstruction* subtract) override { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[subtract], + ElementWiseBinaryOp(subtract, + [](ElementwiseT lhs_elem, ElementwiseT rhs_elem) { + return lhs_elem - rhs_elem; + })); + return Status::OK(); + } + + Status HandleAdd(HloInstruction* add) override { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[add], + ElementWiseBinaryOp(add, [](ElementwiseT lhs_elem, + ElementwiseT rhs_elem) { + return lhs_elem + rhs_elem; + })); + return Status::OK(); + } + + Status HandleDivide(HloInstruction* divide) override { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[divide], + ElementWiseBinaryOp(divide, [](ElementwiseT lhs_elem, + ElementwiseT rhs_elem) { + return lhs_elem / rhs_elem; + })); + return Status::OK(); + } + + template ::value>::type* = + nullptr> + Status HandleMaximum(HloInstruction* maximum) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[maximum], + ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { + return std::max(lhs, rhs); + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> + Status HandleMaximum(HloInstruction* maximum) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[maximum], + ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { + return ((lhs >= rhs) || std::isnan(lhs)) ? lhs : rhs; + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleMaximum(HloInstruction* maximum) { + return InvalidArgument("Unsupported type for Maximum"); + } + + Status HandleMaximum(HloInstruction* maximum) override { + return HandleMaximum(maximum); + } + + template ::value>::type* = + nullptr> + Status HandleMinimum(HloInstruction* minimum) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[minimum], + ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, + ElementwiseT rhs_el) { + return std::min(lhs_el, rhs_el); + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> + Status HandleMinimum(HloInstruction* minimum) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[minimum], + ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, + ElementwiseT rhs_el) { + return ((lhs_el <= rhs_el) || std::isnan(lhs_el)) ? lhs_el : rhs_el; + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleMinimum(HloInstruction* minimum) { + return InvalidArgument("Unsupported type for Minimum"); + } + + Status HandleMinimum(HloInstruction* minimum) override { + return HandleMinimum(minimum); + } + + Status HandlePower(HloInstruction* power) override { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[power], + ElementWiseBinaryOp(power, [](ElementwiseT lhs_el, + ElementwiseT rhs_el) { + return std::pow(lhs_el, rhs_el); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleRemainder(HloInstruction* remainder) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[remainder], + ElementWiseBinaryOp(remainder, [](ElementwiseT lhs_el, + ElementwiseT rhs_el) { + return std::fmod(lhs_el, rhs_el); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleRemainder(HloInstruction* remainder) { + return InvalidArgument("Unsupported type for Remainder"); + } + + Status HandleRemainder(HloInstruction* remainder) override { + return HandleRemainder(remainder); + } + + template ::value>::type* = + nullptr> + Status HandleAnd(HloInstruction* and_) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[and_], + ElementWiseBinaryOp(and_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { + return lhs_el & rhs_el; + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> + Status HandleAnd(HloInstruction* and_) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[and_], + ElementWiseBinaryOp(and_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { + return lhs_el && rhs_el; + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleAnd(HloInstruction* and_) { + return InvalidArgument("Unsupported type for And"); + } + + Status HandleAnd(HloInstruction* and_) override { + return HandleAnd(and_); + } + + template ::value>::type* = + nullptr> + Status HandleOr(HloInstruction* or_) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[or_], + ElementWiseBinaryOp(or_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { + return lhs_el | rhs_el; + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> + Status HandleOr(HloInstruction* or_) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[or_], + ElementWiseBinaryOp(or_, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { + return lhs_el || rhs_el; + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleOr(HloInstruction* or_) { + return InvalidArgument("Unsupported type for Or"); + } + + Status HandleOr(HloInstruction* or_) override { + return HandleOr(or_); + } + + template ::value && + !std::is_same::value>::type* = nullptr> + Status HandleShiftLeft(HloInstruction* shl) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[shl], + ElementWiseBinaryOp(shl, [](NativeT lhs_elem, NativeT rhs_elem) { + return IsShiftOutOfBounds(rhs_elem) ? 0 + : (lhs_elem << rhs_elem); + })); + return Status::OK(); + } + + template ::value || + std::is_same::value>::type* = + nullptr> + Status HandleShiftLeft(HloInstruction*) { + return InvalidArgument("Unsupported type for ShiftLeft"); + } + + Status HandleShiftLeft(HloInstruction* shl) override { + return HandleShiftLeft(shl); + } + template ::value && + !std::is_same::value>::type* = nullptr> + Status HandleShiftRightArithmetic(HloInstruction* shr) { + typedef typename std::make_signed::type SignedT; + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[shr], + ElementWiseBinaryOp(shr, [](NativeT lhs_elem, NativeT rhs_elem) { + SignedT lhs_signed = static_cast(lhs_elem); + if (IsShiftOutOfBounds(rhs_elem)) { + return lhs_signed < 0 ? static_cast(-1) : 0; + } else { + return lhs_signed >> rhs_elem; + } + })); + return Status::OK(); + } + + template ::value || + std::is_same::value>::type* = + nullptr> + Status HandleShiftRightArithmetic(HloInstruction*) { + return InvalidArgument("Unsupported type for ShiftRightArithmetic"); + } + + Status HandleShiftRightArithmetic(HloInstruction* shra) override { + return HandleShiftRightArithmetic(shra); + } + + template ::value && + !std::is_same::value>::type* = nullptr> + Status HandleShiftRightLogical(HloInstruction* shr) { + typedef typename std::make_unsigned::type UnsignedT; + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[shr], + ElementWiseBinaryOp(shr, [](NativeT lhs_elem, NativeT rhs_elem) { + // If shift amount is greater than the number of bits, then return 0. + if (IsShiftOutOfBounds(rhs_elem)) { + return static_cast(0); + } + return static_cast(static_cast(lhs_elem) >> + rhs_elem); + })); + return Status::OK(); + } + + template ::value || + std::is_same::value>::type* = + nullptr> + Status HandleShiftRightLogical(HloInstruction*) { + return InvalidArgument("Unsupported type for ShiftRightLogical"); + } + + Status HandleShiftRightLogical(HloInstruction* shrl) override { + return HandleShiftRightLogical(shrl); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleClamp(HloInstruction* clamp) { + std::function + clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) { + return std::fmin(high, std::fmax(value, low)); + }; + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[clamp], + ElementwiseTernaryOp(clamp, + std::move(ConvertTernaryFunction(clamp_op)))); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleClamp(HloInstruction*) { + return InvalidArgument("Unsupported type for Clamp"); + } + + Status HandleClamp(HloInstruction* clamp) override { + return HandleClamp(clamp); + } + + Status HandleSelect(HloInstruction* select) override { + CHECK(!ShapeUtil::IsScalar(select->operand(0)->shape())); + CHECK(!ShapeUtil::IsTuple(select->shape())); + std::function select_op = + [](bool pred, ReturnT on_true, ReturnT on_false) { + if (pred) { + return on_true; + } + return on_false; + }; + TF_ASSIGN_OR_RETURN(parent_->evaluated_[select], + ElementwiseTernaryOp(select, std::move(select_op))); + return Status::OK(); + } + + Status HandleReverse(HloInstruction* reverse) override { + const auto result_shape = reverse->shape(); + const auto reverse_dimensions = reverse->dimensions(); + + auto operand = reverse->operand(0); + TF_ASSIGN_OR_RETURN(auto inferred_return_shape, + ShapeInference::InferReverseShape(operand->shape(), + reverse_dimensions)); + + TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) + << "return shape set to: " << ShapeUtil::HumanString(result_shape) + << " but is inferred to be: " + << ShapeUtil::HumanString(inferred_return_shape); + + const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); + auto result = Literal::CreateFromShape(result_shape); + + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice out_index) { + std::vector from_index(out_index.begin(), out_index.end()); + for (const int64 dim : reverse_dimensions) { + from_index[dim] = result_shape.dimensions(dim) - 1 - out_index[dim]; + } + return operand_literal.Get(from_index); + })); + + parent_->evaluated_[reverse] = std::move(result); + return Status::OK(); + } + + Status HandleConvolution(HloInstruction* conv) override { + auto lhs = conv->operand(0); + auto rhs = conv->operand(1); + const auto& window = conv->window(); + const Shape& result_shape = conv->shape(); + const Shape& lhs_shape = lhs->shape(); + const Shape& rhs_shape = rhs->shape(); + + TF_CHECK_OK(ShapeUtil::ValidateShape(lhs_shape)); + TF_CHECK_OK(ShapeUtil::ValidateShape(rhs_shape)); + CHECK(ShapeUtil::IsArray(lhs_shape)); + CHECK(ShapeUtil::IsArray(rhs_shape)); + CHECK(ShapeUtil::SameElementType(lhs_shape, rhs_shape)); + CHECK(ShapeUtil::SameElementType(lhs_shape, result_shape)); + + const auto& dnums = conv->convolution_dimension_numbers(); + const int64 num_spatial_dims = dnums.output_spatial_dimensions_size(); + CHECK_EQ(num_spatial_dims, dnums.input_spatial_dimensions_size()); + CHECK_EQ(num_spatial_dims, dnums.kernel_spatial_dimensions_size()); + CHECK_GE(num_spatial_dims, 0); + CHECK_EQ(window.dimensions_size(), num_spatial_dims); + + const auto lhs_rank = ShapeUtil::Rank(lhs_shape); + const auto rhs_rank = ShapeUtil::Rank(rhs_shape); + + CHECK_EQ(num_spatial_dims + 2, lhs_rank); + CHECK_EQ(num_spatial_dims + 2, rhs_rank); + + TF_ASSIGN_OR_RETURN(auto inferred_return_shape, + ShapeInference::InferConvolveShape(lhs_shape, rhs_shape, + window, dnums)); + CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) + << "return shape set to: " << ShapeUtil::HumanString(result_shape) + << " but is inferred to be: " + << ShapeUtil::HumanString(inferred_return_shape); + + const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); + const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); + + std::vector window_dimension_sizes; + for (auto i : dnums.kernel_spatial_dimensions()) { + window_dimension_sizes.push_back(ShapeUtil::GetDimension(rhs_shape, i)); + } + + const Shape& window_shape = + ShapeUtil::MakeShape(rhs_shape.element_type(), window_dimension_sizes); + + DimensionVector lhs_dim_multipliers = MakeDimMultipliers(lhs_shape); + DimensionVector rhs_dim_multipliers = MakeDimMultipliers(rhs_shape); + + auto lhs_literal_data = lhs_literal.data(); + auto rhs_literal_data = rhs_literal.data(); + + auto func = [&window_shape, &dnums, &lhs_shape, &rhs_shape, &window, + &lhs_dim_multipliers, &rhs_dim_multipliers, lhs_literal_data, + rhs_literal_data]( + tensorflow::gtl::ArraySlice out_index) { + // Dimension number applicable for input (lhs). + const int64 input_batch_dim = dnums.input_batch_dimension(); + const int64 input_z_dim = dnums.input_feature_dimension(); + // Dimension number applicable for kernel (rhs). + const int64 kernel_input_z_dim = dnums.kernel_input_feature_dimension(); + const int64 kernel_output_z_dim = dnums.kernel_output_feature_dimension(); + // Dimension number applicable for output. + const int64 output_batch_dim = dnums.output_batch_dimension(); + const int64 output_z_dim = dnums.output_feature_dimension(); + + const int64 z_size = ShapeUtil::GetDimension(lhs_shape, input_z_dim); + + ElementwiseT result_val = static_cast(0); + DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size(), + 0); + + // Convolve input feature with kernel. + do { + for (int64 iz = 0; iz < z_size; ++iz) { + int64 lhs_linear_index = 0; + lhs_linear_index += out_index[output_batch_dim] * + lhs_dim_multipliers[input_batch_dim]; + lhs_linear_index += iz * lhs_dim_multipliers[input_z_dim]; + + int64 rhs_linear_index = 0; + rhs_linear_index += out_index[output_z_dim] * + rhs_dim_multipliers[kernel_output_z_dim]; + rhs_linear_index += iz * rhs_dim_multipliers[kernel_input_z_dim]; + + // Find corresponding spatial dimension index for input (lhs). + for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { + // Spatial dimension number for input (lhs) and output. + const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki); + const int64 output_spatial_dim = + dnums.output_spatial_dimensions(ki); + + // Calculate lhs (input) index without taking base dilation into + // account. + const auto& window_dim = window.dimensions(ki); + const int64 undilated_index = + out_index[output_spatial_dim] * window_dim.stride() - + window_dim.padding_low() + + rhs_spatial_index[ki] * window_dim.window_dilation(); + // Skip if the lhs (input) index is to be dilated. As an + // optimization, skip this mod if there's no dilation. + if (window_dim.base_dilation() > 1 && + undilated_index % window_dim.base_dilation() != 0) { + goto cnt; + } + + // Calculate the actual lhs (input) index after dilation. As an + // optimization, skip this integer divide if there's no dilation. + int64 lhs_spatial_index; + if (window_dim.base_dilation() > 1) { + lhs_spatial_index = undilated_index / window_dim.base_dilation(); + } else { + lhs_spatial_index = undilated_index; + } + lhs_linear_index += + lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim]; + + // Skip if input index is not in bounds. + if (!(lhs_spatial_index >= 0 && + lhs_spatial_index < + lhs_shape.dimensions(input_spatial_dim))) { + goto cnt; + } + + rhs_linear_index += + (window_dim.window_reversal() + ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) + : rhs_spatial_index[ki]) * + rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)]; + } + + result_val += + static_cast(lhs_literal_data[lhs_linear_index]) * + static_cast(rhs_literal_data[rhs_linear_index]); + } + cnt : {} + } while (IndexUtil::BumpIndices(window_shape, &rhs_spatial_index)); + + return static_cast(result_val); + }; + + auto result = Literal::CreateFromShape(result_shape); + TF_RETURN_IF_ERROR(result->PopulateParallel(func)); + + parent_->evaluated_[conv] = std::move(result); + return Status::OK(); + } + + Status HandleDot(HloInstruction* dot) override { + auto lhs = dot->operand(0); + auto rhs = dot->operand(1); + CHECK(ShapeUtil::IsArray(dot->shape())); + CHECK(ShapeUtil::IsArray(lhs->shape())); + CHECK(ShapeUtil::IsArray(rhs->shape())); + + const auto& dnums = dot->dot_dimension_numbers(); + + const auto lhs_rank = ShapeUtil::Rank(lhs->shape()); + const auto rhs_rank = ShapeUtil::Rank(rhs->shape()); + + CHECK(ShapeUtil::SameElementType(lhs->shape(), rhs->shape())); + CHECK(ShapeUtil::SameElementType(lhs->shape(), dot->shape())); + + // There must be 1 and only 1 Contracting dimension for lhs and rhs. + CHECK_EQ(dnums.lhs_contracting_dimensions_size(), 1); + CHECK_EQ(dnums.rhs_contracting_dimensions_size(), 1); + const int64 lhs_contracting_dimension = dnums.lhs_contracting_dimensions(0); + const int64 rhs_contracting_dimension = dnums.rhs_contracting_dimensions(0); + // Contracted dimension sizes must be the same. + CHECK_EQ(lhs->shape().dimensions(lhs_contracting_dimension), + rhs->shape().dimensions(rhs_contracting_dimension)) + << "lhs contracted dimension: " + << lhs->shape().dimensions(lhs_contracting_dimension) + << " rhs contracted dimension: " + << rhs->shape().dimensions(rhs_contracting_dimension); + const int64 contracted_dimension_size = + lhs->shape().dimensions(lhs_contracting_dimension); + + const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); + const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); + + auto result = Literal::CreateFromShape(dot->shape()); + + CHECK_EQ(dnums.lhs_batch_dimensions_size(), + dnums.rhs_batch_dimensions_size()); + + std::vector lhs_non_contracting_dims; + for (int64 i = 0; i < lhs_rank; i++) { + if (i != lhs_contracting_dimension) { + lhs_non_contracting_dims.push_back(i); + } + } + + std::vector rhs_non_batch_non_contracting_dims; + tensorflow::gtl::FlatSet batch_dims_set( + dnums.rhs_batch_dimensions().begin(), + dnums.rhs_batch_dimensions().end()); + for (int64 i = 0; i < rhs_rank; i++) { + if (i != rhs_contracting_dimension && batch_dims_set.count(i) == 0) { + rhs_non_batch_non_contracting_dims.push_back(i); + } + } + + const int64 batch_dim_size = dnums.lhs_batch_dimensions_size(); + const int64 lhs_non_contracting_size = lhs_non_contracting_dims.size(); + + DimensionVector lhs_index(lhs_rank); + DimensionVector rhs_index(rhs_rank); + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice result_index) { + ElementwiseT result_val = static_cast(0); + + // Find the corresponding non-contracting indices for lhs and rhs. + // + // For `result_index`, its batch dimension, if exists, will be at the + // same dimension as the batch dimension of lhs and rhs. More + // specifically: + // - For lhs, the non-contracting dimensions, including the batch + // dimension have the same index as the `result_index`. + // - For rhs, the batch dimension is set seperately from other + // non-contracting dimensions, since these other non-contracting + // dimensions in rhs follow the non-contracting dimensions of lhs in + // the resulting index. + // + // As an example, for a resulting index: + // result_index [result_batch, result_x, result_y] + // the effecting lhs and rhs indices are: + // lhs [result_batch, lhs_non_contracting_dim, contracting_dim + // rhs [result_batch, contracting_dim, rhs_non_contracting_dim] + // `result_x` is only affected by the lhs_non_contracting_dim and + // likewise `result_y` only depends on rhs_non_contracting_dim. + // + // so we can look up the lhs and rhs indices by: + // + // lhs: + // batch index is the same as `result_batch`. + // non-contracting dimension is the same as + // result_index[lhs_non_contracting_dim] + // rhs: + // batch index: the same as `result_batch`. + // non-contracting dimension index: *not* the same as + // result_index[rhs_non_contractng_dim], since the + // non-contracting dimensions of lhs are included in the + // result_index first. Instead, the non_contracting_dim of rhs must + // be calculated as following: + // lhs_non_contracting_dimensions_size + + // (rhs_non_batch_non_contracting_dim - batch_dim_size) - 1 + // + // Note that (rhs_non_batch_contracting_dim - batch_dim_size) is + // the index offset to the result_index that only depends on + // the non_batch and non-contracting dimensions of rhs. -1 at the + // end translates size to index. + for (auto i : lhs_non_contracting_dims) { + lhs_index[i] = result_index[i]; + } + for (auto i : dnums.rhs_batch_dimensions()) { + rhs_index[i] = result_index[i]; + } + for (auto i : rhs_non_batch_non_contracting_dims) { + const int64 rhs_non_batch_non_contracting_dim = + lhs_non_contracting_size + (i - batch_dim_size) - 1; + rhs_index[i] = result_index[rhs_non_batch_non_contracting_dim]; + } + + // Accumulates resulting product along the contracted dimension. + for (int64 i = 0; i < contracted_dimension_size; ++i) { + lhs_index[lhs_contracting_dimension] = i; + rhs_index[rhs_contracting_dimension] = i; + + result_val += + static_cast(lhs_literal.Get(lhs_index)) * + static_cast(rhs_literal.Get(rhs_index)); + } + + return static_cast(result_val); + })); + + parent_->evaluated_[dot] = std::move(result); + return Status::OK(); + } + + Status HandlePad(HloInstruction* pad) override { + CHECK(!ShapeUtil::IsTuple(pad->operand(0)->shape())); + // Padding value must be scalar. + CHECK(ShapeUtil::IsScalar(pad->operand(1)->shape())); + CHECK_EQ(ShapeUtil::Rank(pad->operand(0)->shape()), + pad->padding_config().dimensions_size()); + + TF_ASSIGN_OR_RETURN(auto inferred_return_shape, + ShapeInference::InferPadShape( + /*operand_shape=*/pad->operand(0)->shape(), + /*padding_value_shape=*/pad->operand(1)->shape(), + /*padding_config=*/pad->padding_config())); + CHECK(ShapeUtil::Compatible(pad->shape(), inferred_return_shape)) + << "return shape is set to: " << ShapeUtil::HumanString(pad->shape()) + << "but is inferred to be: " + << ShapeUtil::HumanString(inferred_return_shape); + + // Create new HLO of padded shape with padding value. + ReturnT scalar = + parent_->GetEvaluatedLiteralFor(pad->operand(1)).Get({}); + auto result = Literal::CreateFromShape(pad->shape()); + TF_RETURN_IF_ERROR(result->Populate( + [&scalar](tensorflow::gtl::ArraySlice multi_index) { + return scalar; + })); + + const Literal& evaluated_operand = + parent_->GetEvaluatedLiteralFor(pad->operand(0)); + + std::vector input_index(ShapeUtil::Rank(evaluated_operand.shape()), + 0); + std::vector target_index(ShapeUtil::Rank(result->shape()), 0); + + // Loop through each element of the operand, assign them to the + // corresponding index of the resulting padded literal. + const PaddingConfig& pad_config = pad->padding_config(); + + auto func = [&](tensorflow::gtl::ArraySlice input_index) { + for (auto i = 0; i < input_index.size(); ++i) { + // Interior padding occurs logically before edge padding, so in the case + // of negative edge padding elements are removed from the + // interior-padded operand. + target_index[i] = + pad_config.dimensions(i).edge_padding_low() + + input_index[i] * (pad_config.dimensions(i).interior_padding() + 1); + + // Account for negative low and high padding: skip assignment if the + // any target index is out of range. + if (!(target_index[i] >= 0 && + target_index[i] < pad->shape().dimensions(i))) { + return true; + } + } + result->Set(target_index, + evaluated_operand.Get(input_index)); + return true; + }; + + std::vector zero_base(evaluated_operand.shape().dimensions_size(), + 0); + std::vector step(evaluated_operand.shape().dimensions_size(), 1); + + ShapeUtil::ForEachIndex( + evaluated_operand.shape(), zero_base, + AsInt64Slice(evaluated_operand.shape().dimensions()), step, func); + + parent_->evaluated_[pad] = std::move(result); + return Status::OK(); + } + + Status HandleDynamicSlice(HloInstruction* dynamic_slice) override { + auto operand = dynamic_slice->operand(0); + auto start_indices = dynamic_slice->operand(1); + auto result_shape = dynamic_slice->shape(); + TF_ASSIGN_OR_RETURN(auto inferred_return_shape, + ShapeInference::InferDynamicSliceShape( + operand->shape(), start_indices->shape(), + dynamic_slice->dynamic_slice_sizes())); + TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) + << "return shape is set to: " << ShapeUtil::HumanString(result_shape) + << "but is inferred to be: " + << ShapeUtil::HumanString(inferred_return_shape); + TF_RET_CHECK( + primitive_util::IsIntegralType(start_indices->shape().element_type())); + + const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); + const Literal& start_indices_literal = + parent_->GetEvaluatedLiteralFor(start_indices); + + switch (start_indices->shape().element_type()) { + case S32: { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[dynamic_slice], + DynamicSlice(operand_literal, start_indices_literal, + result_shape)); + } break; + case S64: { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[dynamic_slice], + DynamicSlice(operand_literal, start_indices_literal, + result_shape)); + } break; + case U32: { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[dynamic_slice], + DynamicSlice(operand_literal, start_indices_literal, + result_shape)); + } break; + case U64: { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[dynamic_slice], + DynamicSlice(operand_literal, start_indices_literal, + result_shape)); + } break; + default: + LOG(FATAL) << "HandleDynamicSlice: unhandled primitive type for " + "start_indices: " + << PrimitiveType_Name(start_indices->shape().element_type()); + } + + return Status::OK(); + } + + Status HandleDynamicUpdateSlice( + HloInstruction* dynamic_update_slice) override { + auto operand = dynamic_update_slice->operand(0); + auto update = dynamic_update_slice->operand(1); + auto start_indices = dynamic_update_slice->operand(2); + auto result_shape = dynamic_update_slice->shape(); + TF_ASSIGN_OR_RETURN( + auto inferred_return_shape, + ShapeInference::InferDynamicUpdateSliceShape( + operand->shape(), update->shape(), start_indices->shape())); + TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) + << "return shape is set to: " << ShapeUtil::HumanString(result_shape) + << "but is inferred to be: " + << ShapeUtil::HumanString(inferred_return_shape); + TF_RET_CHECK( + primitive_util::IsIntegralType(start_indices->shape().element_type())); + TF_RET_CHECK(ShapeUtil::Compatible(result_shape, operand->shape())); + + const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); + const Literal& update_literal = parent_->GetEvaluatedLiteralFor(update); + const Literal& start_indices_literal = + parent_->GetEvaluatedLiteralFor(start_indices); + + switch (start_indices->shape().element_type()) { + case S32: { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[dynamic_update_slice], + DynamicUpdateSlice(operand_literal, update_literal, + start_indices_literal)); + } break; + case S64: { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[dynamic_update_slice], + DynamicUpdateSlice(operand_literal, update_literal, + start_indices_literal)); + } break; + case U32: { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[dynamic_update_slice], + DynamicUpdateSlice(operand_literal, update_literal, + start_indices_literal)); + } break; + case U64: { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[dynamic_update_slice], + DynamicUpdateSlice(operand_literal, update_literal, + start_indices_literal)); + } break; + default: + LOG(FATAL) << "HandleDynamicUpdateSlice: unhandled primitive type for " + "start_indices: " + << PrimitiveType_Name(start_indices->shape().element_type()); + } + + return Status::OK(); + } + + template + StatusOr> MapImpl(HloInstruction* map) { + auto operands = map->operands(); + HloComputation* computation = map->to_apply(); + + auto result = Literal::CreateFromShape(map->shape()); + + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice multi_index) { + std::vector> arg_literals; + arg_literals.reserve(operands.size()); + + // Construct scalar literal parameters to be passed to the map + // computation. + for (auto operand : operands) { + const Literal& arg_literal = + parent_->GetEvaluatedLiteralFor(operand); + + auto curr_val = arg_literal.Get(multi_index); + auto curr_val_literal = Literal::CreateR0(curr_val); + + arg_literals.push_back(std::move(curr_val_literal)); + } + + std::unique_ptr computed_result = + embedded_evaluator + .Evaluate>(*computation, + arg_literals) + .ConsumeValueOrDie(); + // Clear visit states so that the we can use the evaluate again on + // the same computation. + embedded_evaluator.ResetVisitStates(); + + return computed_result->Get({}); + })); + return std::move(result); + } + + Status HandleMap(HloInstruction* map) override { + switch (map->operand(0)->shape().element_type()) { + case PRED: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + case U8: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + case U32: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + case U64: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + case S8: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + case S32: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + case S64: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + case F16: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], + MapImpl(map)); + break; + } + case F32: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + case F64: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + case C64: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); + break; + } + default: + LOG(FATAL) << "HandleMap: unhandled primitive type for " + "input operand: " + << PrimitiveType_Name( + map->operand(0)->shape().element_type()); + } + + return Status::OK(); + } + + Status HandleReduce(HloInstruction* reduce) override { + auto arg = reduce->operand(0); + auto init_value = reduce->operand(1); + tensorflow::gtl::ArraySlice dimensions(reduce->dimensions()); + HloComputation* function = reduce->to_apply(); + TF_RET_CHECK(ShapeUtil::Rank(reduce->shape()) == + ShapeUtil::Rank(arg->shape()) - dimensions.size()); + TF_ASSIGN_OR_RETURN(auto inferred_return_shape, + ShapeInference::InferReduceShape( + /*arg=*/arg->shape(), + /*init_value=*/init_value->shape(), + /*dimensions_to_reduce=*/dimensions, + /*to_apply=*/function->ComputeProgramShape())); + TF_RET_CHECK(ShapeUtil::Compatible(reduce->shape(), inferred_return_shape)) + << "return shape is set to: " << ShapeUtil::HumanString(reduce->shape()) + << "but is inferred to be: " + << ShapeUtil::HumanString(inferred_return_shape); + + const Literal& arg_literal = parent_->GetEvaluatedLiteralFor(arg); + VLOG(3) << "HandleReduce arg_literal: " << arg_literal.ToString(); + const Literal& init_literal = parent_->GetEvaluatedLiteralFor(init_value); + VLOG(3) << "HandleReduce init_literal: " << init_literal.ToString(); + TF_RET_CHECK(ShapeUtil::IsScalar(init_literal.shape())); + auto init_scalar = init_literal.Get({}); + + auto result = Literal::CreateFromShape(reduce->shape()); + + const auto arg_dimensions = AsInt64Slice(arg_literal.shape().dimensions()); + std::vector arg_dim_steps(arg_dimensions.size()); + std::vector arg_dim_counts(arg_dimensions.size()); + for (const int64 dim : dimensions) { + arg_dim_steps[dim] = 1; + arg_dim_counts[dim] = arg_dimensions[dim]; + } + + // Map each dimension in the result to a dimension in arg that isn't + // being reduced. + std::vector result_to_arg_index; + for (int64 i = 0; i < arg_dimensions.size(); ++i) { + if (arg_dim_steps[i] == 0) { + result_to_arg_index.push_back(i); + } + } + + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); + // For each resulting dimension, calculate and assign computed value. + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice multi_index) { + ReturnT result_val = init_scalar; + + std::vector base(arg_dimensions.size()); + for (int64 i = 0; i < multi_index.size(); ++i) { + base[result_to_arg_index[i]] = multi_index[i]; + } + + // When the reduction is addition of floats, accumulate in a double + // for better precision. Also, avoid creating Literals for the + // intermediate results; it's much faster. + if (ShapeUtil::ElementIsFloating(init_literal.shape()) && + IsScalarAdd(function)) { + double computed_result = 0; + auto func = [&](tensorflow::gtl::ArraySlice input_index) { + computed_result += arg_literal.Get(input_index); + return true; + }; + ShapeUtil::ForEachIndex(arg_literal.shape(), base, arg_dim_counts, + arg_dim_steps, func); + return static_cast(computed_result); + } + auto func = [&](tensorflow::gtl::ArraySlice input_index) { + auto curr_val = arg_literal.Get(input_index); + + // Evaluate computation with specified literal operands. + auto curr_val_literal = Literal::CreateR0(curr_val); + auto result_val_literal = Literal::CreateR0(result_val); + std::vector args = {result_val_literal.get(), + curr_val_literal.get()}; + + std::unique_ptr computed_result = + embedded_evaluator.Evaluate(*function, args) + .ConsumeValueOrDie(); + // Clear visit states so that we can use the evaluator again on + // the same computation. + embedded_evaluator.ResetVisitStates(); + // Assign computed result to result_val. + result_val = computed_result->Get({}); + return true; + }; + // Computes one element of the result, reducing all dimensions that + // contribute to that element. + ShapeUtil::ForEachIndex(arg_literal.shape(), base, arg_dim_counts, + arg_dim_steps, func); + return result_val; + })); + + parent_->evaluated_[reduce] = std::move(result); + return Status::OK(); + } + + bool IsScalarAdd(HloComputation* computation) { + HloInstruction* instruction = computation->root_instruction(); + if (instruction->opcode() == HloOpcode::kAdd && + computation->num_parameters() == 2) { + const HloInstruction* lhs = instruction->operand(0); + const HloInstruction* rhs = instruction->operand(1); + return lhs->opcode() == HloOpcode::kParameter && + ShapeUtil::IsScalar(lhs->shape()) && + rhs->opcode() == HloOpcode::kParameter && + ShapeUtil::IsScalar(rhs->shape()) && lhs != rhs; + } + return false; + } + + Status HandleSelectAndScatter(HloInstruction* select_and_scatter) override { + auto operand = select_and_scatter->operand(0); + auto source = select_and_scatter->operand(1); + const Window& window = select_and_scatter->window(); + + const Literal& init_literal = + parent_->GetEvaluatedLiteralFor(select_and_scatter->operand(2)); + TF_RET_CHECK(ShapeUtil::IsScalar(init_literal.shape())); + auto init_scalar = init_literal.Get({}); + + auto result = Literal::CreateFromShape(select_and_scatter->shape()); + + // Initialize result array with the init value. + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice output_index) { + return init_scalar; + })); + + std::vector window_dimension_sizes; + for (const auto& window_dimension : window.dimensions()) { + window_dimension_sizes.push_back(window_dimension.size()); + } + const Shape window_shape = ShapeUtil::MakeShape( + operand->shape().element_type(), window_dimension_sizes); + + HloComputation* select = select_and_scatter->select(); + HloComputation* scatter = select_and_scatter->scatter(); + + const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); + const Literal& source_literal = parent_->GetEvaluatedLiteralFor(source); + + int64 rank = ShapeUtil::Rank(operand_literal.shape()); + + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); + DimensionVector source_index(rank); + + std::fill(source_index.begin(), source_index.end(), 0); + do { + // For each element in `source`, we place a window in `operand`. For each + // window placement, we iterate inside the window twice: + // + // 1. Find the selected index by applying `select` function to all + // elements. E.g., If the `select` function is GreaterEqual, the first + // iteration through the window finds the biggest value and returns its + // index. + // + // 2. Using the selected index, scatter value from `source` to result. We + // do this by iterating through the window, and compare each index with + // the selected index. + tensorflow::gtl::optional selected_val; + tensorflow::gtl::optional> selected_index; + + IterateThroughWindow( + window_shape, window, operand_literal.shape(), source_index, + [&](const std::vector& operand_index) { + auto curr_val = operand_literal.Get(operand_index); + if (!selected_val) { + selected_val = curr_val; + selected_index = operand_index; + } + const auto curr_val_literal = Literal::CreateR0(curr_val); + const auto selected_val_literal = + Literal::CreateR0(*selected_val); + + const std::vector args = { + selected_val_literal.get(), curr_val_literal.get()}; + std::unique_ptr computed_result = + embedded_evaluator.Evaluate(*select, args) + .ConsumeValueOrDie(); + bool selected = !computed_result->Get({}); + if (selected) { + selected_val = curr_val; + selected_index = operand_index; + } + embedded_evaluator.ResetVisitStates(); + }); + + IterateThroughWindow( + window_shape, window, operand_literal.shape(), source_index, + [&](const std::vector& operand_index) { + if (std::equal(operand_index.begin(), operand_index.end(), + selected_index->begin())) { + auto source = source_literal.Get(source_index); + auto scattered = result->Get(operand_index); + const auto source_literal = Literal::CreateR0(source); + const auto scattered_literal = + Literal::CreateR0(scattered); + + const std::vector args = { + source_literal.get(), scattered_literal.get()}; + std::unique_ptr computed_result = + embedded_evaluator.Evaluate(*scatter, args) + .ConsumeValueOrDie(); + result->Set(operand_index, computed_result->Get({})); + // Clear visit states so that the we can use the evaluator again + // on the same computation. + embedded_evaluator.ResetVisitStates(); + } + }); + } while (IndexUtil::BumpIndices(source->shape(), &source_index)); + + parent_->evaluated_[select_and_scatter] = std::move(result); + return Status::OK(); + } + + Status HandleReduceWindow(HloInstruction* reduce_window) override { + auto operand = reduce_window->operand(0); + const Window& window = reduce_window->window(); + HloComputation* function = reduce_window->to_apply(); + TF_ASSIGN_OR_RETURN( + auto inferred_return_shape, + ShapeInference::InferReduceWindowShape( + /*operand_shape=*/reduce_window->operand(0)->shape(), + /*init_value=*/reduce_window->operand(1)->shape(), window, + /*to_apply_shape=*/function->ComputeProgramShape())); + TF_RET_CHECK( + ShapeUtil::Compatible(reduce_window->shape(), inferred_return_shape)) + << "return shape is set to: " + << ShapeUtil::HumanStringWithLayout(reduce_window->shape()) + << "but is inferred to be: " + << ShapeUtil::HumanStringWithLayout(inferred_return_shape); + + const Literal& operand_literal = + parent_->GetEvaluatedLiteralFor(reduce_window->operand(0)); + VLOG(3) << "HandleReduceWindow arg_literal: " << operand_literal.ToString(); + const Literal& init_literal = + parent_->GetEvaluatedLiteralFor(reduce_window->operand(1)); + VLOG(3) << "HandleReduceWindow init_literal: " << init_literal.ToString(); + TF_RET_CHECK(ShapeUtil::IsScalar(init_literal.shape())); + auto init_scalar = init_literal.Get({}); + + auto result = Literal::CreateFromShape(reduce_window->shape()); + + // Creates a Shape object from window, for iteration below. + std::vector window_dimension_sizes; + for (const auto& window_dimension : window.dimensions()) { + window_dimension_sizes.push_back(window_dimension.size()); + } + const Shape window_shape = ShapeUtil::MakeShape( + operand->shape().element_type(), window_dimension_sizes); + + DimensionVector window_index(window.dimensions_size()); + DimensionVector operand_index(ShapeUtil::Rank(operand_literal.shape())); + + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); + // For each resulting dimension, calculate and assign computed value. + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice output_index) { + ReturnT result_val = init_scalar; + + std::fill(window_index.begin(), window_index.end(), 0); + std::fill(operand_index.begin(), operand_index.end(), 0); + + IterateThroughWindow( + window_shape, window, operand_literal.shape(), output_index, + [&](const std::vector& operand_index) { + auto curr_val = operand_literal.Get(operand_index); + + // Evaluate computation with specified literal operands. + const auto curr_val_literal = + Literal::CreateR0(curr_val); + const auto result_val_literal = + Literal::CreateR0(result_val); + const std::vector args = { + result_val_literal.get(), curr_val_literal.get()}; + std::unique_ptr computed_result = + embedded_evaluator.Evaluate(*function, args) + .ConsumeValueOrDie(); + + // Clear visit states so that the we can use the evaluate again + // on the same computation. + embedded_evaluator.ResetVisitStates(); + + result_val = computed_result->Get({}); + }); + + return result_val; + })); + + parent_->evaluated_[reduce_window] = std::move(result); + return Status::OK(); + } + + Status HandleSlice(HloInstruction* slice) override { + auto operand = slice->operand(0); + const Shape& shape = slice->shape(); + TF_ASSIGN_OR_RETURN(auto inferred_return_shape, + ShapeInference::InferSliceShape( + operand->shape(), slice->slice_starts(), + slice->slice_limits(), slice->slice_strides())); + TF_RET_CHECK(ShapeUtil::Compatible(shape, inferred_return_shape)) + << "return shape set to: " << ShapeUtil::HumanString(shape) + << " but is inferred to be: " + << ShapeUtil::HumanString(inferred_return_shape); + + const int64 rank = ShapeUtil::Rank(operand->shape()); + const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); + auto func = [&](tensorflow::gtl::ArraySlice out_index) { + DimensionVector operand_index(rank); + for (int64 i = 0; i < rank; ++i) { + operand_index[i] = + slice->slice_starts(i) + out_index[i] * slice->slice_strides(i); + } + return operand_literal.Get(operand_index); + }; + + auto result = Literal::CreateFromDimensions( + shape.element_type(), AsInt64Slice(shape.dimensions())); + TF_RETURN_IF_ERROR(result->Populate(func)); + parent_->evaluated_[slice] = std::move(result); + return Status::OK(); + } + + // Enable CLZ only for int32 and uint32. + template < + typename NativeT, + typename std::enable_if< + (std::is_floating_point::value || + std::is_integral::value || is_complex_t::value) && + !(std::is_same::value || + std::is_same::value)>::type* = nullptr> + Status HandleClz(HloInstruction* clz) { + return InvalidArgument("Unsupported type for Clz"); + } + + template ::value || + std::is_same::value>::type* = nullptr> + Status HandleClz(HloInstruction* clz) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[clz], + ElementWiseUnaryOp(clz, [](ElementwiseT elem_operand) { + return 31 - tensorflow::Log2Floor(elem_operand); + })); + return Status::OK(); + } + + Status HandleClz(HloInstruction* clz) override { + return HandleClz(clz); + } + + template ::value>::type* = nullptr> + Status HandleSin(HloInstruction* sin) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[sin], + ElementWiseUnaryOp(sin, [](ElementwiseT elem_operand) { + return std::sin(elem_operand); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value || + is_complex_t::value>::type* = nullptr> + Status HandleSin(HloInstruction* sin) { + return InvalidArgument("Unsupported type for Sin"); + } + + Status HandleSin(HloInstruction* sin) override { + return HandleSin(sin); + } + + template ::value>::type* = nullptr> + Status HandleCos(HloInstruction* cos) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[cos], + ElementWiseUnaryOp(cos, [](ElementwiseT elem_operand) { + return std::cos(elem_operand); + })); + return Status::OK(); + } + + template < + typename NativeT, + typename std::enable_if::value || + is_complex_t::value>::type* = nullptr> + Status HandleCos(HloInstruction* cos) { + return InvalidArgument("Unsupported type for Cos"); + } + + Status HandleCos(HloInstruction* cos) override { + return HandleCos(cos); + } + + template ::value>::type* = nullptr> + Status HandleReducePrecision(HloInstruction* reduce_precision) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[reduce_precision], + ElementWiseUnaryOp(reduce_precision, [reduce_precision]( + ElementwiseT elem) { + uint32_t value_as_int = tensorflow::bit_cast(elem); + const uint32_t mantissa_bits = reduce_precision->mantissa_bits(); + const uint32_t exponent_bits = reduce_precision->exponent_bits(); + + // Code is based on the CPU/GPU implementation in LLVM-emitting code. + // + // Bits in float type: + // mantissa : bits [0:22] + // exponent : bits [23:30] + // sign : bits [31] + if (mantissa_bits < 23) { + const uint32_t last_mantissa_bit_mask = 1u << (23 - mantissa_bits); + + // Compute rounding bias for round-to-nearest with ties to even. + // This is equal to a base value of 0111... plus one bit if the last + // remaining mantissa bit is 1. + const uint32_t base_rounding_bias = + (last_mantissa_bit_mask >> 1) - 1; + const uint32_t x_last_mantissa_bit = + (value_as_int & last_mantissa_bit_mask) >> (23 - mantissa_bits); + const uint32_t x_rounding_bias = + x_last_mantissa_bit + base_rounding_bias; + + // Add rounding bias, and mask out truncated bits. Note that the + // case where adding the rounding bias overflows into the exponent + // bits is correct; the non-masked mantissa bits will all be zero, + // and the exponent will be incremented by one. + const uint32_t truncation_mask = ~(last_mantissa_bit_mask - 1); + value_as_int = value_as_int + x_rounding_bias; + value_as_int = value_as_int & truncation_mask; + } + if (exponent_bits < 8) { + // Masks for f32 values. + const uint32_t f32_sign_bit_mask = 1u << 31; + const uint32_t f32_exp_bits_mask = 0xffu << 23; + + // An exponent of 2^(n-1)-1 -- that is, 0111... with the zero in the + // most- significant bit -- is equal to 1.0f for all exponent sizes. + // Adding 2^(n-1)-1 to this gives us the highest non-infinite + // exponent for a bit- size of n, and subtracting 2^(n-1)-1 from + // this gives us the lowest' exponent (corresponding to 0.0f). + // + // Thus, the f32 exponent corresponding to the highest non-infinite + // exponent for a bit size of n is (2^7-1) + 2^(n-1)-1, and the f32 + // exponent corresponding to the lowest exponent for a bit size of n + // is (2^7-1) - 2^(n-1)-1. + // + // Note that we have already checked that exponents_bits >= 1. + const uint32_t f32_exponent_bias = (1 << 7) - 1; + const uint32_t reduced_exponent_bias = + (1 << (exponent_bits - 1)) - 1; + const uint32_t reduced_max_exponent = + f32_exponent_bias + reduced_exponent_bias; + const uint32_t reduced_min_exponent = + f32_exponent_bias - reduced_exponent_bias; + + // Do we overflow or underflow? + const uint32_t x_exponent = value_as_int & f32_exp_bits_mask; + const bool x_overflows = x_exponent > (reduced_max_exponent << 23); + const bool x_underflows = + x_exponent <= (reduced_min_exponent << 23); + + // Compute appropriately-signed values of zero and infinity. + const uint32_t x_signed_zero = value_as_int & f32_sign_bit_mask; + const uint32_t x_signed_inf = x_signed_zero | f32_exp_bits_mask; + + // Force to zero or infinity if overflow or underflow. (Note that + // this truncates all denormal values to zero, rather than rounding + // them.) + value_as_int = x_overflows ? x_signed_inf : value_as_int; + value_as_int = x_underflows ? x_signed_zero : value_as_int; + } + + float reduced_result = tensorflow::bit_cast(value_as_int); + if (std::isnan(elem)) { + reduced_result = mantissa_bits > 0 + ? elem + : std::numeric_limits::infinity(); + } + return reduced_result; + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> + Status HandleReducePrecision(HloInstruction* reduce_precision) { + return InvalidArgument("Double not supported for reduce precision"); + } + + template < + typename NativeT, + typename std::enable_if::value || + is_complex_t::value>::type* = nullptr> + Status HandleReducePrecision(HloInstruction* reduce_precision) { + return InvalidArgument("Unsupported type for reduce precision"); + } + + Status HandleReducePrecision(HloInstruction* reduce_precision) override { + return HandleReducePrecision(reduce_precision); + } + + private: + // Creates a vector of multipliers which can be used to create a linear index + // into shape. + // + // Given the multidimensional index {i1, ..., iN} and + // M = MakeDimMultipliers(shape), the corresponding linear index LI is simply + // + // LI = i1 * M[1] + i2 * M[2] + ... + iN * M[N]. + // + // This lets you calculate LI given the multidimensional indices in any order. + static DimensionVector MakeDimMultipliers(const Shape& shape) { + DimensionVector v(ShapeUtil::Rank(shape)); + int64 scale = 1; + for (auto dim : LayoutUtil::MinorToMajor(shape)) { + v[dim] = scale; + scale *= shape.dimensions(dim); + } + return v; + } + + // For one particular placement of a window in a base shape (the placement is + // represented as `window_count_index`), iterates inside the window. + // Translates the window index into base index. If the base index is within + // bound, call `f` with the base index. + static void IterateThroughWindow( + const Shape& window_shape, const Window& window, const Shape& base_shape, + const tensorflow::gtl::ArraySlice& window_count_index, + const std::function&)>& f) { + const int64 rank = ShapeUtil::Rank(base_shape); + DimensionVector window_index(rank); + std::fill(window_index.begin(), window_index.end(), 0); + do { + std::vector base_index(rank); + bool out_of_bound = false; + for (int64 i = 0; i < rank; ++i) { + base_index[i] = window_count_index[i] * window.dimensions(i).stride() + + window_index[i] - window.dimensions(i).padding_low(); + if (base_index[i] < 0 || base_index[i] >= base_shape.dimensions(i)) { + out_of_bound = true; + break; + } + } + if (!out_of_bound) { + f(base_index); + } + } while (IndexUtil::BumpIndices(window_shape, &window_index)); + } + + template + StatusOr> DynamicSlice( + const Literal& operand_literal, const Literal& start_indices_literal, + const Shape& result_shape) { + auto start_indices_typed = start_indices_literal.data(); + std::vector start(start_indices_typed.begin(), + start_indices_typed.end()); + + std::vector operand_indices(start.size()); + + auto result = Literal::CreateFromShape(result_shape); + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice multi_index) { + for (int64 i = 0; i < operand_indices.size(); ++i) { + CHECK_GE(multi_index[i] + start[i], 0); + // Mod is only used here to be consistent with the existing + // backends' behavior. + operand_indices[i] = (multi_index[i] + start[i]) % + operand_literal.shape().dimensions(i); + } + + auto result = operand_literal.Get(operand_indices); + return result; + })); + + return std::move(result); + } + + template + StatusOr> DynamicUpdateSlice( + const Literal& operand_literal, const Literal& update_literal, + const Literal& start_indices_literal) { + auto result = operand_literal.CloneToUnique(); + auto start_indices_typed = start_indices_literal.data(); + const auto rank = ShapeUtil::Rank(result->shape()); + std::vector start(rank, 0); + for (int64 i = 0; i < rank; ++i) { + // All other implementations currently wrap-around the index, so this + // should do so as well. + start[i] = (start_indices_typed[i] % result->shape().dimensions(i)); + start[i] += (start[i] < 0) * result->shape().dimensions(i); + } + std::vector result_index(rank, 0); + + auto func = [&](tensorflow::gtl::ArraySlice update_index) { + std::transform(update_index.begin(), update_index.end(), start.begin(), + result_index.begin(), std::plus()); + // Same as above, wrap-around only to match other implementations' + // semantics. + std::transform(result_index.begin(), result_index.end(), + result->shape().dimensions().begin(), result_index.begin(), + std::modulus()); + result->Set(result_index, + update_literal.Get(update_index)); + return true; + }; + + std::vector base(update_literal.shape().dimensions_size(), 0); + std::vector step(update_literal.shape().dimensions_size(), 1); + ShapeUtil::ForEachIndex(update_literal.shape(), base, + AsInt64Slice(update_literal.shape().dimensions()), + step, func); + + return std::move(result); + } + + StatusOr> ElementWiseUnaryOp( + HloInstruction* instruction, + const std::function& unary_op) { + const Literal& operand_literal = + parent_->GetEvaluatedLiteralFor(instruction->operand(0)); + TF_ASSIGN_OR_RETURN( + auto result_literal, + (HloEvaluator::ElementWiseUnaryOpImpl( + instruction, ConvertUnaryFunction(unary_op), operand_literal))); + + return std::move(result_literal); + } + + StatusOr> ElementWiseBinaryOp( + HloInstruction* instruction, + const std::function& + binary_op) { + const auto shape = instruction->shape(); + const auto* lhs = instruction->operand(0); + const auto* rhs = instruction->operand(1); + + // TODO(b/35950897, b/27796129): add DCHECK back once implicit broadcast + // is removed. + if (!(ShapeUtil::SameDimensions(shape, rhs->shape()) && + ShapeUtil::SameDimensions(lhs->shape(), rhs->shape()))) { + return Unimplemented( + "Implicit broadcasting is currently unsupported in HLO evaluator " + "Shape Mismatch: %s vs %s vs %s: ", + ShapeUtil::HumanString(shape).c_str(), + ShapeUtil::HumanString(lhs->shape()).c_str(), + ShapeUtil::HumanString(rhs->shape()).c_str()); + } + + const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); + const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); + + auto result = Literal::CreateFromShape(shape); + + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice multi_index) { + return ConvertBinaryFunction(binary_op)( + lhs_literal.Get(multi_index), + rhs_literal.Get(multi_index)); + })); + return std::move(result); + } + + template + StatusOr> ElementwiseTernaryOp( + HloInstruction* instruction, + const std::function& ternary_op) { + const auto shape = instruction->shape(); + const auto* lhs = instruction->operand(0); + const auto* rhs = instruction->operand(1); + const auto* ehs = instruction->operand(2); + + // TODO(b/35950897, b/27796129): add DCHECK back once implicit + // broadcast is removed. + if (!(ShapeUtil::SameDimensions(shape, lhs->shape()) && + ShapeUtil::SameDimensions(lhs->shape(), rhs->shape()) && + ShapeUtil::SameDimensions(rhs->shape(), ehs->shape()))) { + return Unimplemented( + "Implicit broadcasting is currently unsupported in HLO evaluator " + "Shape Mismatch: %s vs %s vs %s vs %s: ", + ShapeUtil::HumanString(shape).c_str(), + ShapeUtil::HumanString(lhs->shape()).c_str(), + ShapeUtil::HumanString(rhs->shape()).c_str(), + ShapeUtil::HumanString(ehs->shape()).c_str()); + } + + const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); + const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); + const Literal& ehs_literal = parent_->GetEvaluatedLiteralFor(ehs); + + auto result = Literal::CreateFromShape(shape); + + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice multi_index) { + return ternary_op(lhs_literal.Get(multi_index), + rhs_literal.Get(multi_index), + ehs_literal.Get(multi_index)); + })); + + return std::move(result); + } + + template + static bool IsShiftOutOfBounds(NativeT rhs) { + typedef typename std::make_unsigned::type UnsignedT; + UnsignedT lhs_size_unsigned = sizeof(NativeT) * CHAR_BIT; + UnsignedT rhs_unsigned = static_cast(rhs); + return rhs_unsigned >= lhs_size_unsigned; + } + + HloEvaluator* parent_; +}; + +// These extern templates prevent users of this class from implicitly +// instantiating it. We explicitly instantiate this class in the various +// hlo_evaluator_typed_visitor*.cc files. +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; +extern template class HloEvaluatorTypedVisitor; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EVALUATOR_TYPED_VISITOR_H_ diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_bfloat16.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_bfloat16.cc new file mode 100644 index 0000000000000000000000000000000000000000..39c352dfb966af4ad9f1874d078b92dd2a321783 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_bfloat16.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_bool.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_bool.cc new file mode 100644 index 0000000000000000000000000000000000000000..289b40fa06d37b8f5b2705e7de2f479c4a30e89d --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_bool.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_complex64.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_complex64.cc new file mode 100644 index 0000000000000000000000000000000000000000..9cb4eb921fd3af566de5998a097423c90f0cb860 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_complex64.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_double.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_double.cc new file mode 100644 index 0000000000000000000000000000000000000000..5e6252fbf8c24a7b79c7e656040a6be7be8d777f --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_double.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_float.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_float.cc new file mode 100644 index 0000000000000000000000000000000000000000..ee793ae77b1b432daece31697ad436de1683bc08 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_float.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_half.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_half.cc new file mode 100644 index 0000000000000000000000000000000000000000..038d9d39e4a5881b9f0fb1d98732132aab3aaa2c --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_half.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_int32.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_int32.cc new file mode 100644 index 0000000000000000000000000000000000000000..b1952ca6193958eec49fd15297f73a6c6ac22b83 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_int32.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_int64.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_int64.cc new file mode 100644 index 0000000000000000000000000000000000000000..0cbaffb40b7128fb6e99308fbc2b48e63a3d6fac --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_int64.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_int8.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..6f4bf2a392b51abc4d37db4beab6d1ea2b0c4e3a --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_int8.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_uint32.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_uint32.cc new file mode 100644 index 0000000000000000000000000000000000000000..10235447e0d266a6071097e38913c3856939509b --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_uint32.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_uint64.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_uint64.cc new file mode 100644 index 0000000000000000000000000000000000000000..8abeaa6ffca4409d2664de6f55850622e95bbc9d --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_uint64.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_uint8.cc b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_uint8.cc new file mode 100644 index 0000000000000000000000000000000000000000..6dabd1c176eabcf6656d6de9683bbf0131456d96 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor_uint8.cc @@ -0,0 +1,22 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h" + +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" + +namespace xla { +template class HloEvaluatorTypedVisitor; +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 516e14b4642ae6665a2d15c91715dc9b057ab41a..b6b03876725e4d0db818e0bbc3738896f0c0e66e 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -322,11 +322,13 @@ class HloDotDumper { public: HloDotDumper(const HloComputation* computation, tensorflow::StringPiece label, const DebugOptions& debug_options, bool show_metadata, - const HloExecutionProfile* profile, NodeFilter filter) + bool show_backend_config, const HloExecutionProfile* profile, + NodeFilter filter) : computation_(computation), - label_(label.ToString()), + label_(std::string(label)), debug_options_(debug_options), show_metadata_(show_metadata), + show_backend_config_(show_backend_config), profile_(profile), filter_(std::move(filter)) {} @@ -365,6 +367,7 @@ class HloDotDumper { string GetInstructionNodeShape(const HloInstruction* instr); string GetInstructionNodeLabel(const HloInstruction* instr); string GetInstructionNodeMetadata(const HloInstruction* instr); + string GetInstructionNodeBackendConfig(const HloInstruction* instr); string GetInstructionNodeExtraInfo(const HloInstruction* instr); string GetInstructionNodeInlinedOperands(const HloInstruction* instr); void AddInstructionIncomingEdges(const HloInstruction* instr); @@ -393,6 +396,7 @@ class HloDotDumper { const string label_; // overall name for the graph const DebugOptions& debug_options_; const bool show_metadata_; + const bool show_backend_config_; const HloExecutionProfile* profile_; // may be null const NodeFilter filter_; @@ -611,6 +615,10 @@ tooltip = " "; if (!extra_info.empty()) { StrAppend(&subcomp_label, "
", extra_info); } + string node_backend_config = GetInstructionNodeBackendConfig(parent_instr); + if (!node_backend_config.empty()) { + StrAppend(&subcomp_label, "
", node_backend_config); + } bool highlight = filter_.Highlight(parent_instr); const char* fillcolor; @@ -765,6 +773,7 @@ string HloDotDumper::DumpInstruction(const HloInstruction* instr) { string node_shape = GetInstructionNodeShape(instr); string node_label = GetInstructionNodeLabel(instr); string node_metadata = GetInstructionNodeMetadata(instr); + string node_backend_config = GetInstructionNodeBackendConfig(instr); string extra_info = GetInstructionNodeExtraInfo(instr); string inlined_constants = GetInstructionNodeInlinedOperands(instr); string trivial_subcomputation = GetInstructionTrivialComputationStr(instr); @@ -782,8 +791,8 @@ string HloDotDumper::DumpInstruction(const HloInstruction* instr) { } // Build the text that will be displayed inside the node. string node_body = node_label; - for (const string& s : - {trivial_subcomputation, node_metadata, extra_info, inlined_constants}) { + for (const string& s : {trivial_subcomputation, node_metadata, + node_backend_config, extra_info, inlined_constants}) { if (!s.empty()) { StrAppend(&node_body, "
", s); } @@ -804,7 +813,7 @@ string HloDotDumper::GetInstructionNodeInlinedOperands( // "{} (f32[42, 0, 10])". The alternative, calling Literal::ToString(), // enumerates all of its empty dimensions (e.g. "{ { {}, {} }, ..."), which // is just noise. - if (ShapeUtil::HasZeroElements(shape)) { + if (!ShapeUtil::IsTuple(shape) && ShapeUtil::HasZeroElements(shape)) { return Printf("{} (%s)", ShapeUtil::HumanString(constant->shape())); } @@ -1078,6 +1087,15 @@ string HloDotDumper::GetInstructionNodeMetadata(const HloInstruction* instr) { return Join(lines, "
"); } +string HloDotDumper::GetInstructionNodeBackendConfig( + const HloInstruction* instr) { + if (!show_backend_config_ || instr->backend_config().empty()) { + return ""; + } + + return StrCat("backend_config=\"", instr->backend_config(), "\""); +} + string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) { std::vector lines; @@ -1404,7 +1422,7 @@ string ExportGraph(const string& graph, string DumpGraph(const HloComputation& computation, const string& label, const DebugOptions& debug_options, const HloExecutionProfile* hlo_execution_profile, - bool show_metadata) { + bool show_metadata, bool show_backend_config) { GraphRendererInterface::GraphKind graph_kind; string graph; if (debug_options.xla_hlo_dump_as_graphdef()) { @@ -1414,9 +1432,10 @@ string DumpGraph(const HloComputation& computation, const string& label, &graph)); graph_kind = GraphRendererInterface::TF_GRAPHDEF; } else { - graph = HloDotDumper(&computation, label, debug_options, show_metadata, - hlo_execution_profile, NodeFilter()) - .Dump(); + graph = + HloDotDumper(&computation, label, debug_options, show_metadata, + show_backend_config, hlo_execution_profile, NodeFilter()) + .Dump(); graph_kind = GraphRendererInterface::DOT_GRAPH; } @@ -1427,15 +1446,15 @@ string DumpGraph(const HloComputation& computation, const string& label, } string DumpNeighborhoodAround(const HloInstruction& node, int radius, - bool show_metadata) { + bool show_metadata, bool show_backend_config) { auto debug_options = node.GetModule()->config().debug_options(); string label = StrCat("Neighborhood of ", radius, " nodes around ", node.name()); NodeFilter filter = MakeNodeFilter(&node, radius); - string graph = - HloDotDumper(node.parent(), label, debug_options, show_metadata, - /*profile=*/nullptr, filter) - .Dump(); + string graph = HloDotDumper(node.parent(), label, debug_options, + show_metadata, show_backend_config, + /*profile=*/nullptr, filter) + .Dump(); return ExportGraph(graph, GraphRendererInterface::DOT_GRAPH, debug_options); } diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.h b/tensorflow/compiler/xla/service/hlo_graph_dumper.h index 2704aae1e3ba7fb131bfcb1287d807d785fd9774..fc8e1468aca9c2edbc22c30a41a1be8b32a1feca 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.h +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.h @@ -56,7 +56,7 @@ string MaybeDumpHloModule(const HloModule& module, const string& label, string DumpGraph(const HloComputation& computation, const string& label, const DebugOptions& debug_options, const HloExecutionProfile* hlo_execution_profile = nullptr, - bool show_metadata = false); + bool show_metadata = false, bool show_backend_config = false); // Like DumpGraph, but renders only nodes "near" the given node in the graph. // @@ -64,7 +64,8 @@ string DumpGraph(const HloComputation& computation, const string& label, // (roughly) corresponds to the max distance a node may be from the primary node // before it's omitted from the graph. string DumpNeighborhoodAround(const HloInstruction& node, int radius, - bool show_metadata = false); + bool show_metadata = false, + bool show_backend_config = false); // Dumps the HloModule::ToString() as a file into the provided directory path // suffixed with the provided label. diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc index b589cd573d82930adf9c37ebfca3328f7d866e1e..8e52d926d85f1ce6fabeb2dedd2f8e0fe0c2051d 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper_test.cc @@ -20,7 +20,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/test.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/xla.pb.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -48,9 +47,7 @@ class DotRenderer : public hlo_graph_dumper::GraphRendererInterface { XLA_REGISTER_GRAPH_RENDERER(DotRenderer); -class HloGraphDumperTest : public HloTestBase {}; - -TEST_F(HloGraphDumperTest, NestedFusion) { +TEST(HloGraphDumperTest, NestedFusion) { HloComputation::Builder b("b"); // Build param0 + param1 + param2 + param3 + param4. @@ -67,9 +64,10 @@ TEST_F(HloGraphDumperTest, NestedFusion) { sums.push_back(b.AddInstruction(HloInstruction::CreateBinary( shape, HloOpcode::kAdd, sums[i], params[i + 2]))); } - auto m = CreateNewModule(); - m->AddEntryComputation(b.Build()); - HloComputation* root_computation = m->entry_computation(); + HloModuleConfig config; + HloModule m(TestName(), config); + m.AddEntryComputation(b.Build()); + HloComputation* root_computation = m.entry_computation(); // Fuse into fusion(param0 + param1 + param2 + param3 + param4). auto* outer_fusion = root_computation->CreateFusionInstruction( @@ -119,18 +117,37 @@ TEST_F(HloGraphDumperTest, NestedFusion) { HasSubstr(inner_sum->name())); } -TEST_F(HloGraphDumperTest, Constant) { +TEST(HloGraphDumperTest, Constant) { HloComputation::Builder b("b"); auto instruction = b.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(-42))); instruction->set_name("i_am_a_constant_root_instruction"); - auto m = CreateNewModule(); - HloComputation* root_computation = m->AddEntryComputation(b.Build()); + HloModuleConfig config; + HloModule m(TestName(), config); + HloComputation* root_computation = m.AddEntryComputation(b.Build()); string graph = hlo_graph_dumper::DumpGraph( *root_computation, /*label=*/"an_empty_graph", DebugOptions()); EXPECT_THAT(graph, HasSubstr("an_empty_graph")); EXPECT_THAT(graph, Not(HasSubstr("i_am_a_constant_root_instruction"))); } +TEST(HloGraphDumperTest, TupleConstant) { + Shape tuple_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {3, 2}), ShapeUtil::MakeShape(S32, {4, 5})}); + HloComputation::Builder b("b"); + auto constant = b.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateFromShape(tuple_shape))); + auto gte = b.AddInstruction(HloInstruction::CreateGetTupleElement( + ShapeUtil::MakeShape(F32, {3, 2}), constant, 0)); + + HloModuleConfig config; + HloModule m(TestName(), config); + HloComputation* root_computation = m.AddEntryComputation(b.Build(gte)); + string graph = hlo_graph_dumper::DumpGraph( + *root_computation, /*label=*/"tuple_constant", DebugOptions()); + EXPECT_THAT(graph, HasSubstr("tuple_constant")); + EXPECT_THAT(graph, HasSubstr("constant (f32[3,2], s32[4,5])")); +} + } // anonymous namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index a714d0e114245021c28da26beae444dbd3d99bb5..857cd39adb8d320ce1ebe9f718e82596b3757889 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -109,6 +109,7 @@ StatusOr> HloInstruction::CreateFromProto( instruction->name_ = proto.name(); instruction->metadata_ = proto.metadata(); + instruction->set_backend_config(proto.backend_config()); if (proto.has_literal()) { TF_ASSIGN_OR_RETURN(instruction->literal_, Literal::CreateFromProto(proto.literal())); @@ -437,7 +438,7 @@ HloInstruction::CreateCrossReplicaSum( << "Outfeed shape " << shape << " must be compatible with operand shape " << operand->shape(); instruction->AppendOperand(operand); - instruction->outfeed_config_ = outfeed_config.ToString(); + instruction->outfeed_config_ = std::string(outfeed_config); instruction->outfeed_shape_ = shape; return instruction; } @@ -792,23 +793,11 @@ HloInstruction::CreateBroadcastSequence( return instruction; } -// We put the fusion kind into the instruction's name for transpose-dot fusions, -// since those fusions are really just describing a type of dot rather than -// generating a novel computation. -static string FusionNodeName(HloInstruction::FusionKind fusion_kind) { - switch (fusion_kind) { - case HloInstruction::FusionKind::kTransposeDot: - return "dot_fusion"; - default: - return "fusion"; - } -} - /* static */ std::unique_ptr HloInstruction::CreateFusion( const Shape& shape, FusionKind fusion_kind, HloInstruction* fused_root) { auto instruction = WrapUnique(new HloInstruction(HloOpcode::kFusion, shape)); instruction->fusion_kind_ = fusion_kind; - instruction->name_ = FusionNodeName(fusion_kind); + instruction->name_ = "fusion"; instruction->set_parent(fused_root->parent()); instruction->set_metadata(fused_root->metadata()); instruction->CloneAndFuseInternal(fused_root); @@ -824,7 +813,7 @@ static string FusionNodeName(HloInstruction::FusionKind fusion_kind) { instruction->AppendOperand(operand); } instruction->fusion_kind_ = fusion_kind; - instruction->name_ = FusionNodeName(fusion_kind); + instruction->name_ = "fusion"; instruction->called_computations_.push_back(fusion_computation); fusion_computation->SetFusionInstruction(instruction.get()); return instruction; @@ -1167,7 +1156,7 @@ bool HloInstruction::HasSideEffect() const { for (auto operand : operands) { instruction->AppendOperand(operand); } - instruction->custom_call_target_ = custom_call_target.ToString(); + instruction->custom_call_target_ = std::string(custom_call_target); return instruction; } @@ -1179,7 +1168,7 @@ bool HloInstruction::HasSideEffect() const { for (auto operand : operands) { instruction->AppendOperand(operand); } - instruction->channel_name_ = channel_name.ToString(); + instruction->channel_name_ = std::string(channel_name); instruction->cost_estimate_ns_ = cost_estimate_ns; return instruction; } @@ -1231,12 +1220,15 @@ bool HloInstruction::HasSideEffect() const { std::unique_ptr HloInstruction::CloneWithNewOperands( const Shape& shape, tensorflow::gtl::ArraySlice new_operands, - HloModule* module) const { + HloModule* module, CloneMap* clone_map) const { VLOG(3) << "CloneWithNewOperands:\n " << ToString(); VLOG(3) << " new operands:"; for (const HloInstruction* new_operand : new_operands) { VLOG(3) << " %" << new_operand->name(); } + if (module == nullptr) { + module = GetModule(); + } std::unique_ptr clone; @@ -1342,7 +1334,8 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( break; case HloOpcode::kFft: CHECK_EQ(new_operands.size(), 1); - return CreateFft(shape, new_operands[0], fft_type_, fft_length_); + clone = CreateFft(shape, new_operands[0], fft_type_, fft_length_); + break; case HloOpcode::kCrossReplicaSum: clone = CreateCrossReplicaSum(shape, new_operands); break; @@ -1415,9 +1408,15 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( case HloOpcode::kConstant: clone = CreateConstant(literal_->CloneToUnique()); break; - case HloOpcode::kFusion: - clone = CloneFusionWithNewOperands(shape, new_operands, module); + case HloOpcode::kFusion: { + CHECK_NE(module, nullptr); + auto new_fused_computation = module->AddEmbeddedComputation( + fused_instructions_computation()->Clone("clone", module, clone_map)); + clone = CreateFusion(/*shape=*/shape, /*fusion_kind=*/fusion_kind(), + /*operands=*/new_operands, + /*fusion_computation=*/new_fused_computation); break; + } case HloOpcode::kParameter: clone = CreateParameter(parameter_number_, shape, name_); break; @@ -1481,15 +1480,19 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( } SetupDerivedInstruction(clone.get()); clone->set_parent(parent_); + clone->set_backend_config(backend_config()); + if (clone_map != nullptr) { + InsertOrDie(clone_map, this, clone.get()); + } return clone; } HloInstruction::~HloInstruction() {} -std::unique_ptr HloInstruction::Clone(const string& suffix, - HloModule* module) const { +std::unique_ptr HloInstruction::Clone( + const string& suffix, HloModule* module, CloneMap* clone_map) const { std::unique_ptr clone = - CloneWithNewOperands(shape_, operands_, module); + CloneWithNewOperands(shape_, operands_, module, clone_map); if (suffix.empty()) { clone->name_ = name(); } else { @@ -1526,71 +1529,6 @@ std::unique_ptr HloInstruction::Clone(const string& suffix, return clone; } -std::unique_ptr HloInstruction::CloneFusionWithNewOperands( - const Shape& shape, tensorflow::gtl::ArraySlice operands, - HloModule* module) const { - CHECK_EQ(opcode_, HloOpcode::kFusion); - CHECK(parent() != nullptr); - - auto new_instruction = - WrapUnique(new HloInstruction(HloOpcode::kFusion, shape)); - // Add the operands to our new fusion instruction. - for (HloInstruction* new_operand : operands) { - new_instruction->AppendOperand(new_operand); - } - // Clone all the fused instructions for the new fusion instruction. - HloInstructionMap old_to_new; - std::list> new_fused_instructions; - // Create the list of fused parameters by mapping through the cloned, - // fused instructions. - for (HloInstruction* old_fused_parameter : - fused_instructions_computation()->parameter_instructions()) { - new_fused_instructions.push_back( - old_fused_parameter->Clone("clone", module)); - HloInstruction* new_fusion_parameter = new_fused_instructions.back().get(); - InsertOrDie(&old_to_new, old_fused_parameter, new_fusion_parameter); - } - for (auto old_fused_instruction : - fused_instructions_computation()->MakeInstructionPostOrder()) { - if (old_fused_instruction->opcode() == HloOpcode::kParameter) { - FindOrDie(old_to_new, old_fused_instruction); - continue; - } - std::vector new_operands; - for (int64 operand_idx = 0; - operand_idx < old_fused_instruction->operand_count(); ++operand_idx) { - HloInstruction* old_operand = - old_fused_instruction->mutable_operand(operand_idx); - new_operands.push_back(FindOrDie(old_to_new, old_operand)); - } - new_fused_instructions.push_back( - old_fused_instruction->CloneWithNewOperands( - old_fused_instruction->shape(), new_operands, module)); - HloInstruction* new_fused_instruction = new_fused_instructions.back().get(); - new_fused_instruction->set_parent(parent_); - InsertOrDie(&old_to_new, old_fused_instruction, new_fused_instruction); - } - new_instruction->fusion_kind_ = fusion_kind_; - auto computation_builder = HloComputation::Builder( - fused_instructions_computation()->name() + ".clone", - new_instruction.get()); - // We iterated the fusion instructions in reverse post order which means - // that we must reverse our new list of fusion instructions. - for (auto new_fused_instruction_iter = new_fused_instructions.rbegin(); - new_fused_instruction_iter != new_fused_instructions.rend(); - ++new_fused_instruction_iter) { - computation_builder.AddInstruction(std::move(*new_fused_instruction_iter)); - } - if (module == nullptr) { - module = GetModule(); - } - auto fused_root_ = fused_expression_root(); - new_instruction->called_computations_.push_back( - CHECK_NOTNULL(module)->AddEmbeddedComputation( - computation_builder.Build(FindOrDie(old_to_new, fused_root_)))); - return new_instruction; -} - std::pair HloInstruction::LatestNonGteAncestorAndIndex() const { const HloInstruction* hlo = this; @@ -2172,6 +2110,9 @@ string HloInstruction::ToString(const HloPrintOptions& options) const { !metadata_.source_file().empty())) { StrAppend(&result, ", metadata={", xla::OpMetadataToString(metadata_), "}"); } + if (options.print_backend_config() && !backend_config().empty()) { + StrAppend(&result, ", backend_config=\"", CEscape(backend_config()), "\""); + } return result; } @@ -2357,6 +2298,7 @@ std::vector HloInstruction::ExtraAttributesToString( extra.push_back( StrCat("custom_call_target=\"", CEscape(custom_call_target_), "\"")); } + return extra; } @@ -2386,6 +2328,7 @@ HloInstructionProto HloInstruction::ToProto() const { } *proto.mutable_metadata() = metadata_; + proto.set_backend_config(backend_config()); if (literal_ != nullptr) { *proto.mutable_literal() = literal_->ToProto(); } @@ -2487,8 +2430,6 @@ string HloInstruction::ToCategory() const { return "input fusion"; case FusionKind::kOutput: return "output fusion"; - case FusionKind::kTransposeDot: - return "dot"; case FusionKind::kCustom: return "custom fusion"; } @@ -2971,6 +2912,7 @@ Status HloInstruction::AcceptOrdered( continue; } + // TODO(b/78350259): Eliminate const laundering. HloInstruction* instruction = const_cast(const_instruction); @@ -3270,8 +3212,6 @@ string ToString(HloInstruction::FusionKind kind) { return "kInput"; case HloInstruction::FusionKind::kOutput: return "kOutput"; - case HloInstruction::FusionKind::kTransposeDot: - return "kTransposeDot"; case HloInstruction::FusionKind::kCustom: return "kCustom"; } @@ -3288,9 +3228,6 @@ StatusOr StringToFusionKind( if (kind_name == "kOutput") { return HloInstruction::FusionKind::kOutput; } - if (kind_name == "kTransposeDot") { - return HloInstruction::FusionKind::kTransposeDot; - } if (kind_name == "kCustom") { return HloInstruction::FusionKind::kCustom; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index a5e9aecb9e7f5204b53186abca78033215a75828..14be58d069e0d8520666766aedc6390bf3d57094 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -66,6 +66,7 @@ class HloPrintOptions { : print_large_constants_(false), print_subcomputation_references_(true), print_metadata_(true), + print_backend_config_(true), compact_operands_(false), print_operand_shape_(true), print_program_shape_(true), @@ -77,6 +78,7 @@ class HloPrintOptions { .set_print_large_constants(true) .set_print_subcomputation_references(true) .set_print_metadata(false) + .set_print_backend_config(false) .set_print_operand_shape(false) .set_print_program_shape(false) .set_print_percent(false); @@ -99,12 +101,18 @@ class HloPrintOptions { return *this; } - // If true, metatdata will be printed. + // If true, metadata will be printed. HloPrintOptions& set_print_metadata(bool value) { print_metadata_ = value; return *this; } + // If true, backend_config will be printed. + HloPrintOptions& set_print_backend_config(bool value) { + print_backend_config_ = value; + return *this; + } + // If true, operands' shapes will be printed. HloPrintOptions& set_print_operand_shape(bool value) { print_operand_shape_ = value; @@ -141,6 +149,7 @@ class HloPrintOptions { return print_subcomputation_references_; } bool print_metadata() const { return print_metadata_; } + bool print_backend_config() const { return print_metadata_; } bool compact_operands() const { return compact_operands_; } bool print_operand_shape() const { return print_operand_shape_; } bool print_program_shape() const { return print_program_shape_; } @@ -151,6 +160,7 @@ class HloPrintOptions { bool print_large_constants_; bool print_subcomputation_references_; bool print_metadata_; + bool print_backend_config_; bool compact_operands_; bool print_operand_shape_; bool print_program_shape_; @@ -167,7 +177,6 @@ class HloInstruction { kOutput, // Op's output is fused into the op itself. // REQUIRES: At least one operand buffer must be able // to alias the output buffer. - kTransposeDot, // Fused into a dot with transposed operands. kCustom, // Custom category for backend-specific fusions that // do not match any of the more specific ones. }; @@ -643,6 +652,8 @@ class HloInstruction { // Detaches an instruction from its operands. That is, remove the instruction // from each operand's user set. This should only be called prior to // deallocating the instruction. + // + // TODO(b/78305363): Make this automatic when deleting an instruction. void DetachFromOperands(); // Performs a postorder DFS visit using this node as the root. If @@ -1157,23 +1168,30 @@ class HloInstruction { // Precondition: opcode() == HloOpcode::kRng RandomDistribution random_distribution() const; + // See documentation for Clone(). + using CloneMap = std::unordered_map; + // Clones the HLO instruction. The clone will have the same opcode, shape, and // operands. After creation the clone has no uses. "this" (the instruction // cloned from) is not changed. Suffix is the string to append to the name of - // the instruction to form the name of the cloned instruction. If the module - // pointer is not nullptr, it will be the module where the cloned computations - // will be added to (in order to support deep cloning). Ignores the control - // predecessors and successors of this HLO instruction. + // the instruction to form the name of the cloned instruction. Ignores the + // control predecessors and successors of this HLO instruction. + // + // If the module pointer is not nullptr, then any cloned computations will be + // added to this module in order to support deep cloning. Otherwise the module + // of the instruction is used. + // + // If clone_map is not nullptr, then each original instruction that is cloned + // will be inserted and map to its clone. clone_map should not already contain + // any of the instructions to clone. std::unique_ptr Clone(const string& suffix = "clone", - HloModule* module = nullptr) const; + HloModule* module = nullptr, + CloneMap* clone_map = nullptr) const; - // Clones the HLO instruction as above but with new shape and operands. If - // the module pointer is not nullptr, it will be the module where the cloned - // computations will be added to (in order to support deep cloning). Ignores - // the control predecessors and successors of this HLO instruction. + // Clones the HLO instruction as above but with new shape and operands. std::unique_ptr CloneWithNewOperands( const Shape& shape, tensorflow::gtl::ArraySlice operands, - HloModule* module = nullptr) const; + HloModule* module = nullptr, CloneMap* clone_map = nullptr) const; // Returns the computations this instruction directly calls (if any). const std::vector& called_computations() const { @@ -1245,7 +1263,7 @@ class HloInstruction { // Gets/sets the string identifier for this instruction. const string& name() const { return name_; } - void set_name(tensorflow::StringPiece name) { name_ = name.ToString(); } + void set_name(tensorflow::StringPiece name) { name_ = std::string(name); } // Use the given NameUniquer to select a unique name for the instruction based // on the instruction's existing name. @@ -1262,6 +1280,19 @@ class HloInstruction { // if no id has been assigned yet). int unique_id() const { return unique_id_; } + // Returns the backend-specific configuration for how a backend should compile + // this HLO. The meaning of the field is backend specific. Not for use before + // or during general HLO optimization, since HLO optimizations do not preserve + // this field and they cannot interpret it due to its meaning being backend + // specific. + // + // TODO(b/78194644): Introduce structured configuration format as per + // go/xla-heuristics. + const string& backend_config() const { return backend_config_; } + void set_backend_config(string backend_config) { + backend_config_ = std::move(backend_config); + } + // Sets the debug metadata for this instruction. void set_metadata(const OpMetadata& metadata) { metadata_ = metadata; } const OpMetadata& metadata() const { return metadata_; } @@ -1283,6 +1314,7 @@ class HloInstruction { // Get/Set the number of partitions per outer dimension (in order, starting // with outer-most dimension first). Currently used by the parallel cpu // backend to partition HLOs into parallel tasks. + // // TODO(b/62783254) Replace these methods with a more general way to // annotate HLOs with backend-specific information. const std::vector& outer_dimension_partitions() const { @@ -1510,6 +1542,10 @@ class HloInstruction { // The string representation of the infeed configuration. string infeed_config_; + // The backend-specific configuration for how a backend should compile this + // HLO. See the documentation on backend_config(). + string backend_config_; + // String identifier for instruction. string name_; diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 5b65b1152c8298a8954890374626ae5329dccff9..909cdc0b6269edaa09806fbe5c2f08197f7dc730 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -1102,7 +1102,7 @@ TEST_F(HloInstructionTest, CloneOfFusionPreservesShape) { auto module = CreateNewModule(); auto* computation = module->AddEntryComputation(builder.Build()); HloInstruction* fusion = computation->CreateFusionInstruction( - {dot, reshape}, HloInstruction::FusionKind::kTransposeDot); + {dot, reshape}, HloInstruction::FusionKind::kLoop); auto fusion2 = fusion->Clone(); const HloInstruction* root = fusion->fused_expression_root(); @@ -1169,7 +1169,7 @@ TEST_F(HloInstructionTest, NestedFusionEquality) { auto computation = module->AddEntryComputation(builder.Build()); auto nested_fusion = computation->CreateFusionInstruction( - {dot, b_t}, HloInstruction::FusionKind::kTransposeDot); + {dot, b_t}, HloInstruction::FusionKind::kLoop); auto fusion = computation->CreateFusionInstruction( {add, nested_fusion}, HloInstruction::FusionKind::kOutput); @@ -1246,13 +1246,6 @@ TEST_F(HloInstructionTest, Stringification) { auto module = CreateNewModule(); auto* computation = module->AddEntryComputation(builder.Build()); - HloInstruction* fusion = computation->CreateFusionInstruction( - {dot, reshape}, HloInstruction::FusionKind::kTransposeDot); - - EXPECT_EQ( - fusion->ToString(options), - "%dot_fusion = f32[5,20]{1,0} fusion(f32[5,10]{1,0} %x, " - "f32[20,10]{1,0} %y), kind=kTransposeDot, calls=%fused_computation"); HloInstruction* loop = builder.AddInstruction( HloInstruction::CreateWhile(sout, computation, computation, x)); diff --git a/tensorflow/compiler/xla/service/hlo_matchers.cc b/tensorflow/compiler/xla/service/hlo_matchers.cc index 69deac263ee58f9e4d46987a54f09b11d650950a..7e4b8834357d39099f76450b849d6b5624e4e3b4 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.cc +++ b/tensorflow/compiler/xla/service/hlo_matchers.cc @@ -17,10 +17,13 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/test.h" +#include "tensorflow/core/lib/strings/str_util.h" namespace xla { namespace testing { +using ::tensorflow::str_util::Join; + bool HloMatcher::MatchAndExplain( const HloInstruction* instruction, ::testing::MatchResultListener* listener) const { @@ -195,6 +198,41 @@ void HloShardingMatcher::DescribeTo(std::ostream* os) const { } } +bool HloDotWithContractingDimsMatcher::MatchAndExplain( + const HloInstruction* instruction, + ::testing::MatchResultListener* listener) const { + if (!HloMatcher::MatchAndExplain(instruction, listener)) { + return false; + } + + const DotDimensionNumbers& dim_nums = instruction->dot_dimension_numbers(); + if (dim_nums.lhs_contracting_dimensions_size() != 1 || + dim_nums.lhs_contracting_dimensions(0) != lhs_contracting_dim_) { + *listener << instruction->ToString() + << " has wrong lhs_contracting_dimensions (got {" + << Join(dim_nums.lhs_contracting_dimensions(), ",") << "} want {" + << lhs_contracting_dim_ << "})"; + return false; + } + + if (dim_nums.rhs_contracting_dimensions_size() != 1 || + dim_nums.rhs_contracting_dimensions(0) != rhs_contracting_dim_) { + *listener << instruction->ToString() + << " has wrong rhs_contracting_dimensions (got {" + << Join(dim_nums.rhs_contracting_dimensions(), ",") << "} want {" + << rhs_contracting_dim_ << "})"; + return false; + } + + return true; +} + +void HloDotWithContractingDimsMatcher::DescribeTo(std::ostream* os) const { + HloMatcher::DescribeTo(os); + *os << " with lhs_contracting_dims={" << lhs_contracting_dim_ + << "} and rhs_contracting_dims={" << rhs_contracting_dim_ << "}"; +} + } // namespace testing void PrintTo(const HloInstruction* inst, ::std::ostream* os) { diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index 5175736a2506c85836577a7f2ba2359a3d5a6b18..c33bdadf1c7145bf2aff09b01423c6c21382da0c 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -131,6 +131,27 @@ class HloShardingMatcher tensorflow::gtl::optional sharding_; }; +// Matches a Dot HLO instruction with specific LHS and RHS contracting +// dimensions. +class HloDotWithContractingDimsMatcher : public HloMatcher { + public: + explicit HloDotWithContractingDimsMatcher( + ::testing::Matcher lhs, + ::testing::Matcher rhs, int64 lhs_contracting_dim, + int64 rhs_contracting_dim) + : HloMatcher(HloOpcode::kDot, /*operands=*/{lhs, rhs}), + lhs_contracting_dim_(lhs_contracting_dim), + rhs_contracting_dim_(rhs_contracting_dim) {} + + bool MatchAndExplain(const HloInstruction* instruction, + ::testing::MatchResultListener* listener) const override; + void DescribeTo(std::ostream* os) const override; + + private: + int64 lhs_contracting_dim_; + int64 rhs_contracting_dim_; +}; + // HloInstruction* matchers for opcode and operands. Example: // namespace op = xla::opcode_matchers; // EXPECT_THAT(instruction, @@ -158,7 +179,6 @@ HLO_MATCHER(Convolution); HLO_MATCHER(Copy); HLO_MATCHER(CrossReplicaSum); HLO_MATCHER(Divide); -HLO_MATCHER(Dot); HLO_MATCHER(DynamicSlice); HLO_MATCHER(DynamicUpdateSlice); HLO_MATCHER(Eq); @@ -310,6 +330,30 @@ inline ::testing::Matcher NoSharding() { new ::xla::testing::HloShardingMatcher(tensorflow::gtl::nullopt)); } +inline ::testing::Matcher Dot( + ::testing::Matcher lhs_matcher, + ::testing::Matcher rhs_matcher) { + return ::testing::MakeMatcher(new ::xla::testing::HloMatcher( + ::xla::HloOpcode::kDot, {lhs_matcher, rhs_matcher})); +} + +// Matches a Dot HLO instruction if it has exactly one lhs contracting dimension +// equal to `lhs_contracting_dim` and exactly one rhs contracting dimension +// equal to `rhs_contracting_dim`. +// +// Currently the HLO verifier rejects Dot operations with more than one +// contracting dimension (even though we can represent these in the +// DotDimensionNumbers proto) so there is no need to generalize this to support +// multiple contracting dimensions. +inline ::testing::Matcher Dot( + ::testing::Matcher lhs_matcher, + ::testing::Matcher rhs_matcher, + int64 lhs_contracting_dim, int64 rhs_contracting_dim) { + return ::testing::MakeMatcher( + new ::xla::testing::HloDotWithContractingDimsMatcher( + lhs_matcher, rhs_matcher, lhs_contracting_dim, rhs_contracting_dim)); +} + #undef HLO_MATCHER } // namespace opcode_matchers diff --git a/tensorflow/compiler/xla/service/hlo_matchers_test.cc b/tensorflow/compiler/xla/service/hlo_matchers_test.cc index f2463060b7cd653dffb408f8df17f44fe0c1a97c..016cc01e33840aa195dfc0a21e8ac8f3d24a3e06 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers_test.cc +++ b/tensorflow/compiler/xla/service/hlo_matchers_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace op = xla::testing::opcode_matchers; using ::testing::_; @@ -165,5 +166,41 @@ TEST(HloMatchersTest, ShardingMatcher) { "has incorrect sharding (expected: {maximal device=0})"); } +TEST(HloMatchersTest, DotMatcher) { + string hlo_string = R"( +HloModule DotOperationFusion_TransposeFusion + +ENTRY DotOperationFusion_TransposeFusion { + arg0 = f32[1,256] parameter(0) + arg1 = f32[256,1024] parameter(1) + ROOT dot = f32[1,1024] dot(arg0, arg1), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + HloInstruction* root = module->entry_computation()->root_instruction(); + + EXPECT_THAT(root, op::Dot(op::Parameter(0), op::Parameter(1), + /*lhs_contracting_dim=*/1, + /*rhs_contracting_dim=*/0)); + + EXPECT_THAT( + Explain(root, op::Dot(op::Parameter(0), op::Parameter(1), + /*lhs_contracting_dim=*/0, + /*rhs_contracting_dim=*/0)), + "%dot = f32[1,1024]{1,0} dot(f32[1,256]{1,0} %arg0, f32[256,1024]{1,0} " + "%arg1), lhs_contracting_dims={1}, rhs_contracting_dims={0} has wrong " + "lhs_contracting_dimensions (got {1} want {0})"); + + EXPECT_THAT( + Explain(root, op::Dot(op::Parameter(0), op::Parameter(1), + /*lhs_contracting_dim=*/1, + /*rhs_contracting_dim=*/1)), + "%dot = f32[1,1024]{1,0} dot(f32[1,256]{1,0} %arg0, f32[256,1024]{1,0} " + "%arg1), lhs_contracting_dims={1}, rhs_contracting_dims={0} has wrong " + "rhs_contracting_dimensions (got {0} want {1})"); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index d4bad16f7976fc6ddc70c1497ae4004407aba94c..5308fb5848341b6faee64bc1ad865f9bb3bcdbe9 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -46,6 +46,18 @@ HloModule::HloModule(const string& name, const HloModuleConfig& config) config_(config), unique_id_(next_unique_module_id_++) {} +StatusOr HloModule::LaunderConstInstructionFromModule( + const HloInstruction* hlo) { + if (hlo == nullptr) { + return nullptr; + } + + TF_RET_CHECK(hlo->GetModule() == this); + + // TODO(b/78350259): Eliminate const laundering. + return const_cast(hlo); +} + HloComputation* HloModule::AddComputationInternal( std::unique_ptr computation, bool is_entry, bool uniquify_names) { @@ -55,7 +67,7 @@ HloComputation* HloModule::AddComputationInternal( // If the module configuration has no entry layout computation set, create a // default one based on the program shape. - if (!config_.has_entry_computation_layout()) { + if (!config_.has_host_entry_computation_layout()) { config_.SetDefaultComputationLayout( entry_computation_->ComputeProgramShape()); } @@ -229,11 +241,14 @@ StatusOr> HloModule::CreateFromProto( TF_RET_CHECK(proto.has_program_shape()) << "No program shape found in the proto"; const auto& expected_program_shape = proto.program_shape(); - TF_RET_CHECK(expected_program_shape.parameters_size() == - module_config.entry_computation_layout().parameter_count()); + TF_RET_CHECK( + expected_program_shape.parameters_size() == + module_config.device_entry_computation_layout().parameter_count()); for (int i = 0; i < expected_program_shape.parameters_size(); ++i) { const Shape& parameter_shape = - module_config.entry_computation_layout().parameter_layout(i).shape(); + module_config.device_entry_computation_layout() + .parameter_layout(i) + .shape(); TF_RET_CHECK(ShapeUtil::Compatible(expected_program_shape.parameters(i), parameter_shape)) << "HloModuleConfig has different shape for parameter " << i @@ -243,7 +258,7 @@ StatusOr> HloModule::CreateFromProto( << ", actual: " << ShapeUtil::HumanStringWithLayout(parameter_shape); } const Shape& result_shape = - module_config.entry_computation_layout().result_layout().shape(); + module_config.device_entry_computation_layout().result_layout().shape(); TF_RET_CHECK( ShapeUtil::Compatible(expected_program_shape.result(), result_shape)) << "HloModuleConfig has different result shape than the HLO module. " @@ -303,7 +318,7 @@ StatusOr HloModule::CreateModuleConfigFromProto( // The module config is constructed with default layouts regardless of what is // passed in via the ProgramShape. Set the layouts to the appropriate values. ComputationLayout* entry_layout = - module_config.mutable_entry_computation_layout(); + module_config.mutable_host_entry_computation_layout(); for (int64 i = 0; i < entry_layout->parameter_count(); ++i) { TF_RETURN_IF_ERROR( entry_layout->mutable_parameter_layout(i)->CopyLayoutFromShape( @@ -311,6 +326,8 @@ StatusOr HloModule::CreateModuleConfigFromProto( } TF_RETURN_IF_ERROR(entry_layout->mutable_result_layout()->CopyLayoutFromShape( program_shape.result())); + *module_config.mutable_device_entry_computation_layout() = + module_config.host_entry_computation_layout(); return module_config; } @@ -535,6 +552,14 @@ uint64 HloModule::RandomNew64() const { return rng_(); } +HloComputation* HloModule::GetComputationWithName( + tensorflow::StringPiece name) { + auto it = c_find_if(computations(), [&](HloComputation* computation) { + return computation->name() == name; + }); + return it == computations().end() ? nullptr : *it; +} + /* static */ std::atomic HloModule::next_unique_module_id_(0); } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index aa843ead51747902ce9536e80c2045e257b0dc2a..1604a7261240e5bfef7c4fb1583488bf0ae4421a 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/name_uniquer.h" #include "tensorflow/compiler/xla/service/versioned_computation_handle.h" #include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/gtl/iterator_range.h" #include "tensorflow/core/platform/logging.h" @@ -98,12 +99,20 @@ class HloModule { return entry_computation_; } - ComputationLayout* mutable_entry_computation_layout() { - return config_.mutable_entry_computation_layout(); + ComputationLayout* mutable_host_entry_computation_layout() { + return config_.mutable_host_entry_computation_layout(); } - const ComputationLayout& entry_computation_layout() const { - return config_.entry_computation_layout(); + const ComputationLayout& host_entry_computation_layout() const { + return config_.host_entry_computation_layout(); + } + + ComputationLayout* mutable_device_entry_computation_layout() { + return config_.mutable_device_entry_computation_layout(); + } + + const ComputationLayout& device_entry_computation_layout() const { + return config_.device_entry_computation_layout(); } const VersionedComputationHandle& entry_computation_handle() const { @@ -130,6 +139,10 @@ class HloModule { MakeUnwrappingIterator(computations_.end())}; } + // Returns the computation in this module that has the name `name`. Returns + // null if there is no such computation. + HloComputation* GetComputationWithName(tensorflow::StringPiece name); + // Gets the number of computations in this module. int64 computation_count() const { return computations_.size(); } @@ -204,6 +217,25 @@ class HloModule { // the lifetime of this process. int unique_id() const { return unique_id_; } + // Returns a non-const version of the passed-in const HloInstruction*. This is + // safe on the argument that if you have a non-const module, then you can + // access all instructions in the module as non-const. + // + // Returns an error if the passed-in instruction is not from this module, + // except that it is allowed to pass in a null pointer. + // + // TODO(b/78350259): Eliminate const laundering. The argument above is not + // reliable since at any time someone could add or discover a way for a + // non-const module to transitively contain a const HloInstruction. The + // reliable way to do this would be to create a const laundering map from a + // module, mapping each encountered HloInstruction to its non-const version + // and then look up each instruction in need of laundering in that map, but + // this is much more expensive and complicated. This returns a Status instead + // of doing a CHECK-failure in part to make it strongly apparent that this is + // something that can fail. + StatusOr LaunderConstInstructionFromModule( + const HloInstruction* hlo); + private: HloComputation* AddComputationInternal( std::unique_ptr computation, bool is_entry, diff --git a/tensorflow/compiler/xla/service/hlo_module_config.cc b/tensorflow/compiler/xla/service/hlo_module_config.cc index 4205b0402cb8b2c31141d65be652cd84c22e7262..dae5578a3158fecb8219e518841dec1020b2ca98 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.cc +++ b/tensorflow/compiler/xla/service/hlo_module_config.cc @@ -31,11 +31,13 @@ using tensorflow::strings::StrAppend; HloModuleConfig::HloModuleConfig() {} HloModuleConfig::HloModuleConfig(const ProgramShape& program_shape) - : entry_computation_layout_(program_shape) {} + : host_entry_computation_layout_(program_shape), + device_entry_computation_layout_(program_shape) {} void HloModuleConfig::SetDefaultComputationLayout( const ProgramShape& program_shape) { - entry_computation_layout_ = ComputationLayout(program_shape); + host_entry_computation_layout_ = ComputationLayout(program_shape); + device_entry_computation_layout_ = ComputationLayout(program_shape); } string HloModuleConfig::compilation_cache_key() const { @@ -44,11 +46,18 @@ string HloModuleConfig::compilation_cache_key() const { StrAppend(&key, "::("); std::vector params; for (const ShapeLayout& param_layout : - entry_computation_layout_->parameter_layouts()) { + host_entry_computation_layout_->parameter_layouts()) { params.push_back(param_layout.shape().DebugString()); } StrAppend(&key, tensorflow::str_util::Join(params, ", "), ") => ", - entry_computation_layout_->result_shape().SerializeAsString()); + host_entry_computation_layout_->result_shape().SerializeAsString()); + for (const ShapeLayout& param_layout : + device_entry_computation_layout_->parameter_layouts()) { + params.push_back(param_layout.shape().DebugString()); + } + StrAppend( + &key, tensorflow::str_util::Join(params, ", "), ") => ", + device_entry_computation_layout_->result_shape().SerializeAsString()); if (seed() != 0) { // TODO(b/32083678): force recompilation to reset global state. static std::atomic counter{0}; diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h index 586a03d412681cacdd780f48e77baf4cd4c51415..cdb0b29a2399b387bc617262032e9083ba079625 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.h +++ b/tensorflow/compiler/xla/service/hlo_module_config.h @@ -41,26 +41,44 @@ class HloModuleConfig { explicit HloModuleConfig(const ProgramShape& program_shape); // Checks if this config has an entry computation layout already. - bool has_entry_computation_layout() const { - return entry_computation_layout_.has_value(); + bool has_host_entry_computation_layout() const { + return host_entry_computation_layout_.has_value(); + } + + bool has_device_entry_computation_layout() const { + return device_entry_computation_layout_.has_value(); } // Sets the entry computation layout for this config. If the entry computation // layout already exists, it is silently replaced. void SetDefaultComputationLayout(const ProgramShape& program_shape); - // Returns a constant reference to the layout of the entry computation. + // Returns a constant reference to the on-host layout of the entry + // computation. Assumes the layout was set. + const ComputationLayout& host_entry_computation_layout() const { + CHECK(host_entry_computation_layout_.has_value()); + return *host_entry_computation_layout_; + } + + // Returns a mutable pointer to the layout of the on-host entry computation. // Assumes the layout was set. - const ComputationLayout& entry_computation_layout() const { - CHECK(entry_computation_layout_.has_value()); - return *entry_computation_layout_; + ComputationLayout* mutable_host_entry_computation_layout() { + CHECK(host_entry_computation_layout_.has_value()); + return &(*host_entry_computation_layout_); } - // Returns a mutable pointer to the layout of the entry computation. Assumes - // the layout was set. - ComputationLayout* mutable_entry_computation_layout() { - CHECK(entry_computation_layout_.has_value()); - return &(*entry_computation_layout_); + // Returns a constant reference to the on-device layout of the entry + // computation. Assumes the layout was set. + const ComputationLayout& device_entry_computation_layout() const { + CHECK(device_entry_computation_layout_.has_value()); + return *device_entry_computation_layout_; + } + + // Returns a mutable pointer to the layout of the on-device entry computation. + // Assumes the layout was set. + ComputationLayout* mutable_device_entry_computation_layout() { + CHECK(device_entry_computation_layout_.has_value()); + return &(*device_entry_computation_layout_); } // Returns whether to enable HLO-level profiling. @@ -109,7 +127,8 @@ class HloModuleConfig { private: // If you add new members, be sure to update compilation_cache_key. - tensorflow::gtl::optional entry_computation_layout_; + tensorflow::gtl::optional host_entry_computation_layout_; + tensorflow::gtl::optional device_entry_computation_layout_; // Whether this is a 'host module'. bool is_host_module_ = false; diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc index 5120775737bfa32bbb656421216f2b3fbef590ea..d8f1ab916b5c5c500c2d8dcd8605be083f95862a 100644 --- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc +++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc @@ -90,7 +90,7 @@ StatusOr HloPassPipeline::Run(HloModule* module) { return Status::OK(); }; - string prefix = name().ToString() + ": pipeline start"; + string prefix = std::string(name()) + ": pipeline start"; bool changed = false; string message; TF_RETURN_IF_ERROR( @@ -98,12 +98,12 @@ StatusOr HloPassPipeline::Run(HloModule* module) { const string xla_dump_per_pass_hlo_proto_to = module->config().debug_options().xla_dump_per_pass_hlo_proto_to(); if (!xla_dump_per_pass_hlo_proto_to.empty()) { - DumpModuleProto(*module, xla_dump_per_pass_hlo_proto_to, name().ToString(), - "pipeline_start"); + DumpModuleProto(*module, xla_dump_per_pass_hlo_proto_to, + std::string(name()), "pipeline_start"); } for (auto& pass : passes_) { - if (disabled_passes.count(pass->name().ToString()) > 0) { + if (disabled_passes.count(std::string(pass->name())) > 0) { VLOG(1) << " Skipping HLO pass " << pass->name() << ", disabled by --xla_disable_hlo_passes"; continue; @@ -121,7 +121,7 @@ StatusOr HloPassPipeline::Run(HloModule* module) { run_invariant_checkers(StrCat("after running pass: ", pass->name()))); if (!xla_dump_per_pass_hlo_proto_to.empty()) { DumpModuleProto(*module, xla_dump_per_pass_hlo_proto_to, - name().ToString(), pass->name().ToString()); + std::string(name()), std::string(pass->name())); } changed |= changed_this_pass; diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index b0632448933df4b7681a0704c58d697b5ec68a1f..b171d41a31ed23f0886e7363289ea56c92216572 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/primitive_util.h" +#include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/flatten_call_graph.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_dce.h" @@ -1216,7 +1217,7 @@ StatusOr HloRematerialization::Run( // Create initial sequence of HLO instructions. TF_ASSIGN_OR_RETURN(*sequence, CreateMemoryMinimizingSequence( *module, - [this](const LogicalBuffer& buffer) { + [this](const BufferValue& buffer) { return size_function_(buffer.shape()); }, scheduler_algorithm_)); diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index 1a767628f6e2d33df353366974fb866e89f0df5a..23ace5afeab30d658e53258a7120d4a329cc90db 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -430,6 +430,15 @@ StatusOr> ListMemoryScheduler( return ListScheduler::Run(computation, points_to_analysis, size_function); } +StatusOr> PostOrderMemoryScheduler( + const HloComputation& computation, + const TuplePointsToAnalysis& points_to_analysis, + const LogicalBuffer::SizeFunction& size_function) { + const auto& post_order = computation.MakeInstructionPostOrder(); + return std::vector{post_order.begin(), + post_order.end()}; +} + StatusOr> DefaultMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, @@ -459,7 +468,22 @@ StatusOr> DefaultMemoryScheduler( size_function)); VLOG(2) << "Min-memory dfs sequence: " << HumanReadableNumBytes(dfs_memory); - if (list_memory <= dfs_memory) { + TF_ASSIGN_OR_RETURN( + std::vector post_order_sequence, + PostOrderMemoryScheduler(computation, points_to_analysis, size_function)); + TF_ASSIGN_OR_RETURN( + const int64 post_order_memory, + MinimumMemoryForComputation(computation, post_order_sequence, + points_to_analysis, size_function)); + VLOG(2) << "Min-memory post order sequence: " + << HumanReadableNumBytes(post_order_memory); + + if (post_order_memory < std::min(list_memory, dfs_memory)) { + VLOG(2) << "Chose min-memory post_order sequence: " + << HumanReadableNumBytes(post_order_memory); + return post_order_sequence; + + } else if (list_memory <= dfs_memory) { VLOG(2) << "Chose min-memory list sequence: " << HumanReadableNumBytes(list_memory); return list_sequence; diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.h b/tensorflow/compiler/xla/service/hlo_scheduling.h index 068e68383deb170ded1c9b09a8b7ceb8c4c0ab4b..fcb006f818fd1d55a09475042779dd60de945697 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.h +++ b/tensorflow/compiler/xla/service/hlo_scheduling.h @@ -55,6 +55,12 @@ StatusOr> DFSMemoryScheduler( const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function); +// Naive Post Order scheduler +StatusOr> PostOrderMemoryScheduler( + const HloComputation& computation, + const TuplePointsToAnalysis& points_to_analysis, + const LogicalBuffer::SizeFunction& size_function); + // The default scheduling algorithm. Runs both the list scheduler // and the DFS scheduler, and chooses whichever returns a lower min-memory, // not accounting for fragmentation. diff --git a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc index 74544c4a67a819d341056aba4cf6b321a5a86c0a..92df7c1427f282ccdde2df494c41b3f2a98cf7b3 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc @@ -77,7 +77,7 @@ TEST_F(MinimumMemoryForSequenceTest, MultiComputation) { HloComputation* entry_computation = module->AddEntryComputation(builder.Build()); - auto size_fn = [](const LogicalBuffer& buffer) { + auto size_fn = [](const BufferValue& buffer) { return ShapeUtil::ByteSizeOf(buffer.shape(), /*pointer_size=*/8); }; @@ -124,7 +124,7 @@ TEST_F(HloSchedulingTest, LastUseScheduledFirst) { TF_ASSERT_OK_AND_ASSIGN( SequentialHloOrdering::HloModuleSequence sequence, - CreateMemoryMinimizingSequence(*module, [](const LogicalBuffer& buffer) { + CreateMemoryMinimizingSequence(*module, [](const BufferValue& buffer) { return ShapeUtil::ByteSizeOf(buffer.shape()); })); // Verify that all instructions are in the sequence. @@ -160,7 +160,7 @@ ENTRY root { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, tools::Parse(module_str)); - auto size_fn = [](const LogicalBuffer& buffer) { + auto size_fn = [](const BufferValue& buffer) { return ShapeUtil::ByteSizeOf(buffer.shape(), /*pointer_size=*/8); }; TF_ASSERT_OK_AND_ASSIGN( diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index 994de441237493b5e2254a0a66763d6195c5ea85..7f7e3f7dab03ce0ad64bd0fcfe4ddd020d31bf56 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -367,10 +367,14 @@ HloSharding HloSharding::GetSubSharding(const Shape& shape, const ShapeIndex& index) const { CHECK(IsTuple()); - ShapeTree sub_shape_tree(ShapeUtil::GetSubshape(shape, index), - Replicate()); + Shape sub_shape = ShapeUtil::GetSubshape(shape, index); + ShapeTree sub_shape_tree(sub_shape, Replicate()); sub_shape_tree.CopySubtreeFrom(GetAsShapeTree(shape), index, {}); - return Tuple(sub_shape_tree); + if (ShapeUtil::IsTuple(sub_shape)) { + return Tuple(sub_shape_tree); + } else { + return sub_shape_tree.element({}); + } } std::ostream& operator<<(std::ostream& out, const HloSharding& sharding) { diff --git a/tensorflow/compiler/xla/service/hlo_tfgraph_builder_test.cc b/tensorflow/compiler/xla/service/hlo_tfgraph_builder_test.cc index f8d98f0678596750bb76462e550085753678e860..be156d765dc10d54eaf301e90883babbc5693e28 100644 --- a/tensorflow/compiler/xla/service/hlo_tfgraph_builder_test.cc +++ b/tensorflow/compiler/xla/service/hlo_tfgraph_builder_test.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/hlo_tfgraph_builder.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc index 05b7dce3d1ecf935b80ba1cb46ef089b7b3b6f33..7b27dbfec376b8ba16d00285f10e2cc291e07a61 100644 --- a/tensorflow/compiler/xla/service/hlo_value.cc +++ b/tensorflow/compiler/xla/service/hlo_value.cc @@ -29,9 +29,11 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" namespace xla { @@ -69,7 +71,7 @@ std::ostream& operator<<(std::ostream& out, const HloUse& use) { HloValue::HloValue(HloValue::Id id, HloInstruction* instruction, const ShapeIndex& index, bool is_phi) - : id_(id), is_phi_(is_phi) { + : BufferValue(instruction, index, id), is_phi_(is_phi) { // The defining position is always the first element in the positions_ vector. positions_.push_back(HloPosition{instruction, index}); } @@ -90,8 +92,8 @@ string HloValue::ToShortString() const { string index_str = ShapeUtil::IsTuple(defining_instruction()->shape()) ? defining_index().ToString() : ""; - return StrCat(id_, " ", is_phi_ ? "PHI " : "", defining_instruction()->name(), - index_str); + return StrCat(id(), " ", is_phi_ ? "PHI " : "", + defining_instruction()->name(), index_str); } string HloValue::ToString(int indent) const { diff --git a/tensorflow/compiler/xla/service/hlo_value.h b/tensorflow/compiler/xla/service/hlo_value.h index 2a711e8b42590c29d0aaab95dcf110063ada3182..a1151f65e07dffdcd52f645f61dcc9b4f26459c0 100644 --- a/tensorflow/compiler/xla/service/hlo_value.h +++ b/tensorflow/compiler/xla/service/hlo_value.h @@ -16,16 +16,20 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_VALUE_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_VALUE_H_ -#include +#include #include #include +#include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/shape_tree.h" +#include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" namespace xla { @@ -80,30 +84,9 @@ struct HloUse { std::ostream& operator<<(std::ostream& out, const HloUse& use); -// Class describing a value used by the dataflow analysis. XLA arrays are -// trivially a single HloValue. Tuples are made up of more than one HloValue: an -// HloValue for the pointer vector, and an HloValue for each child element. -// -// Every HloValue is defined by a particular instruction and most instructions -// define only a single HloValue. Instructions which define a single HloValue -// include array-shaped instructions such as Add but also includes Tuple-shaped -// instructions such as Tuple. The Tuple instruction defines a single HloValue -// which is a vector of pointers to the values containing the Tuple -// instruction's operands. Though the result of the Tuple instruction includes -// multiple values only the top-level HloValue (the vector of pointers) is -// defined by the Tuple instruction. The values containing the tuple elements -// are defined by earlier instructions, usually the operands of the Tuple -// instruction. -// -// Instructions which construct both the tuple *and* the tuple elements define -// more than one HloValue. This includes (at least) tuple-shaped Constant, -// Parameter, Infeed and While instructions. These tuple-shaped instructions do -// not assemble a tuple from existing HloValues like the Tuple instruction does, -// but rather define all the HloValues in the tuple. -class HloValue { +// HloDataflowAnalysis uses this subclass of BufferValue. +class HloValue : public BufferValue { public: - using Id = int64; - // Predicate comparing HloValues by increasing id, useful for std::sort. static bool IdLessThan(const HloValue* a, const HloValue* b) { return a->id() < b->id(); @@ -120,6 +103,7 @@ class HloValue { // dataflow analysis (HloDataflowAnalysis::ssa_form_ is true). HloValue(Id id, HloInstruction* instruction, const ShapeIndex& index, bool is_phi = false); + ~HloValue() override {} // Sets the positions in the module at which the HloValue appears. Updates // uses. Should be called once and only once. The defining position should not @@ -127,10 +111,6 @@ class HloValue { void SetPositionsAndComputeUses( tensorflow::gtl::ArraySlice positions); - // Return a unique identifier for this HloValue. This value is used for stable - // sorting and iteration - Id id() const { return id_; } - // Returns whether this value is a phi value. bool is_phi() const { return is_phi_; } @@ -142,12 +122,18 @@ class HloValue { return defining_position().instruction; } + HloInstruction* instruction() const override { + return defining_instruction(); + } + // Return the shape index at which this HloValue is defined in the output of // its defining instruction. const ShapeIndex& defining_index() const { return defining_position().index; } + const ShapeIndex& index() const override { return defining_index(); } + // Return the shape of this HloValue. - const Shape& shape() const { return defining_position().shape(); } + const Shape& shape() const override { return defining_position().shape(); } // Return all positions of the HloValue in the module. const std::vector& positions() const { return positions_; } @@ -164,12 +150,11 @@ class HloValue { // Return a single-line string representation of the value. string ToShortString() const; - string ToString(int indent = 0) const; + string ToString(int indent) const; - private: - // Unique identifier for this HloValue. Used for stable sorting and iteration. - const Id id_; + string ToString() const override { return ToString(0); } + private: // Whether this instruction is a phi value. const bool is_phi_; diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 8a30cbf9cd622ffb64d345ddaf0dc88f34850bfc..096ebb7946e08ba697a2c5eb93a71255586e489d 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -116,7 +116,7 @@ Status ShapeVerifier::HandleOutfeed(HloInstruction* outfeed) { // produces no HLO value in the graph. if (!ShapeUtil::Compatible(outfeed->outfeed_shape(), outfeed->operand(0)->shape())) { - return InvalidArgument( + return InternalError( "Expected outfeed to have shape compatible with operand's shape %s, " "actual shape is %s:\n%s", ShapeUtil::HumanString(outfeed->operand(0)->shape()).c_str(), @@ -200,7 +200,7 @@ Status ShapeVerifier::HandleTranspose(HloInstruction* transpose) { transpose->operand(0)->shape(), transpose->dimensions())); } -Status ShapeVerifier::HandleParameter(HloInstruction*) { +Status ShapeVerifier::HandleParameter(HloInstruction* hlo) { return tensorflow::Status::OK(); } @@ -410,7 +410,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) { if (fp_type == PRIMITIVE_TYPE_INVALID) { fp_type = subshape.element_type(); } else if (fp_type != subshape.element_type()) { - return FailedPrecondition( + return InternalError( "Seen floating point types of different precisions in " "%s, but mixed precision is disallowed.", instruction->ToString().c_str()); @@ -490,7 +490,7 @@ Status ShapeVerifier::CheckShape(const HloInstruction* instruction, } } if (!compatible) { - return InvalidArgument( + return InternalError( "Expected instruction to have shape compatible with %s, actual " "shape is %s:\n%s", ShapeUtil::HumanString(inferred_shape).c_str(), @@ -541,7 +541,7 @@ Status ShapeVerifier::CheckVariadicShape(const HloInstruction* instruction) { Status ShapeVerifier::CheckSameChannel(const HloInstruction* instr1, const HloInstruction* instr2) { if (instr1->channel_id() != instr2->channel_id()) { - return FailedPrecondition( + return InternalError( "Expected to have the same channel id, actual channel ids are: %s " "(%lld), %s (%lld)", instr1->ToString().c_str(), instr1->channel_id(), @@ -571,22 +571,22 @@ string ComputationsToString( Status VerifyHloStructure(HloModule* module) { for (const HloComputation* computation : module->computations()) { if (computation->parent() == nullptr) { - return FailedPrecondition("Computation %s has a null parent pointer", - computation->name().c_str()); + return InternalError("Computation %s has a null parent pointer", + computation->name().c_str()); } if (computation->parent() != module) { - return FailedPrecondition( + return InternalError( "Computation %s parent() does not point to parent module", computation->name().c_str()); } for (const HloInstruction* instruction : computation->instructions()) { if (instruction->parent() == nullptr) { - return FailedPrecondition("Instruction %s has a null parent pointer", - instruction->name().c_str()); + return InternalError("Instruction %s has a null parent pointer", + instruction->name().c_str()); } if (instruction->parent() != computation) { - return FailedPrecondition( + return InternalError( "Instruction %s parent() does not point to parent computation", instruction->name().c_str()); } @@ -602,7 +602,7 @@ Status VerifyHloStructure(HloModule* module) { for (int i = 0; i < instruction->operand_count(); ++i) { const HloInstruction* operand = instruction->operand(i); if (operand->parent() != instruction->parent()) { - return FailedPrecondition( + return InternalError( "Operand %d (%s) of instruction %s is in a different " "computation: %s vs %s", i, operand->name().c_str(), instruction->name().c_str(), @@ -619,7 +619,7 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const { // The parent fusion instruction of the fusion computation must be 'fusion'. HloComputation* fused_computation = fusion->fused_instructions_computation(); if (fusion != fused_computation->FusionInstruction()) { - return FailedPrecondition( + return InternalError( "Instruction of fused computation does not match expected instruction " "%s.", fusion->ToString().c_str()); @@ -635,37 +635,37 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const { for (auto* instruction : fused_computation->instructions()) { if (fused_root == instruction) { if (root_owned) { - return FailedPrecondition("Root appears more than once in %s.", - fusion->ToString().c_str()); + return InternalError("Root appears more than once in %s.", + fusion->ToString().c_str()); } root_owned = true; } for (int i = 0; i < fused_parameters.size(); ++i) { if (fused_parameters[i] == instruction) { if (parameter_owned[i]) { - return FailedPrecondition("Parameter appears more than once in %s.", - fusion->ToString().c_str()); + return InternalError("Parameter appears more than once in %s.", + fusion->ToString().c_str()); } parameter_owned[i] = true; } } } if (!root_owned) { - return FailedPrecondition("Root not found in computation of %s.", - fusion->ToString().c_str()); + return InternalError("Root not found in computation of %s.", + fusion->ToString().c_str()); } // Make sure all the parameter_owned entries are set for (int i = 0; i < parameter_owned.size(); i++) { if (!parameter_owned[i]) { - return FailedPrecondition("Parameter %d not found in computation of %s.", - i, fusion->ToString().c_str()); + return InternalError("Parameter %d not found in computation of %s.", i, + fusion->ToString().c_str()); } } // Fused root must have no users. if (fused_root->user_count() != 0) { - return FailedPrecondition("Root of %s may not have users.", - fusion->ToString().c_str()); + return InternalError("Root of %s may not have users.", + fusion->ToString().c_str()); } // All uses of fused instructions must be in the fusion computation, and every @@ -674,13 +674,13 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const { fusion->fused_instructions_computation()->instructions()) { if (instruction != fused_root) { if (instruction->user_count() == 0) { - return FailedPrecondition( - "Non-root instruction %s in %s must have users.", - instruction->ToString().c_str(), fusion->ToString().c_str()); + return InternalError("Non-root instruction %s in %s must have users.", + instruction->ToString().c_str(), + fusion->ToString().c_str()); } for (auto& user : instruction->users()) { if (fused_computation != user->parent()) { - return FailedPrecondition( + return InternalError( "Non-root instruction %s in %s may not have external users.", instruction->ToString().c_str(), fusion->ToString().c_str()); } @@ -695,34 +695,33 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const { for (auto fused_param : fused_parameters) { int64 param_no = fused_param->parameter_number(); if (param_no < 0) { - return FailedPrecondition( - "Unexpected negative parameter number %lld in %s.", param_no, - fusion->ToString().c_str()); + return InternalError("Unexpected negative parameter number %lld in %s.", + param_no, fusion->ToString().c_str()); } if (param_no >= fused_parameters.size()) { - return FailedPrecondition( + return InternalError( "Unexpected parameter number %lld in %s: higher then number of " "parameters %lu.", param_no, fusion->ToString().c_str(), fused_parameters.size()); } if (parameter_numbers[param_no]) { - return FailedPrecondition( + return InternalError( "Did not expect parameter number %lld more than once in %s.", param_no, fusion->ToString().c_str()); } parameter_numbers[param_no] = true; if (!ShapeUtil::Compatible(fused_param->shape(), fusion->operand(param_no)->shape())) { - return FailedPrecondition( + return InternalError( "Shape mismatch between parameter number %lld and its operand in %s.", param_no, fusion->ToString().c_str()); } } - // Make sure all the parameter_numbers entries were seen + // Make sure all the parameter_numbers entries were seen. for (int i = 0; i < parameter_numbers.size(); i++) { if (!parameter_numbers[i]) { - return FailedPrecondition("Did not see parameter number %d in %s.", i, - fusion->ToString().c_str()); + return InternalError("Did not see parameter number %d in %s.", i, + fusion->ToString().c_str()); } } diff --git a/tensorflow/compiler/xla/service/human_readable_profile_builder.h b/tensorflow/compiler/xla/service/human_readable_profile_builder.h index fc24acd2713f4cd8af2816ffdf085e84a4920cbc..fb36d3a0d6532b4157152c49f08f4f247a7c6d89 100644 --- a/tensorflow/compiler/xla/service/human_readable_profile_builder.h +++ b/tensorflow/compiler/xla/service/human_readable_profile_builder.h @@ -32,7 +32,7 @@ class HumanReadableProfileBuilder { explicit HumanReadableProfileBuilder(tensorflow::StringPiece computation_name, int64 total_cycles, double clock_rate_ghz) - : computation_name_(computation_name.ToString()), + : computation_name_(std::string(computation_name)), total_cycles_(total_cycles), clock_rate_ghz_(clock_rate_ghz) { CHECK_GE(clock_rate_ghz, 1e-9); @@ -47,9 +47,10 @@ class HumanReadableProfileBuilder { tensorflow::StringPiece category, int64 cycles, int64 flop_count, int64 transcendental_count, int64 bytes_accessed, float optimal_seconds) { - op_infos_.push_back( - {op_name.ToString(), short_name.ToString(), category.ToString(), cycles, - flop_count, transcendental_count, bytes_accessed, optimal_seconds}); + op_infos_.push_back({std::string(op_name), std::string(short_name), + std::string(category), cycles, flop_count, + transcendental_count, bytes_accessed, + optimal_seconds}); } // Gets the human-readable profile. diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index dc1a39e9fa9fd3ef5c55bd86309fe23f5ef51dd5..6bb2ca19fe235d61dfad2c7cde2f31c797628c1d 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -28,6 +28,25 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" namespace xla { +namespace { +// These nodes can always be duplicated into consumers, even if +// InstructionFusion::may_duplicate_ is false. +// +// In general these should be nodes that get *cheaper* the more they're +// duplicated (and fused into consumers). +// +// TODO(jlebar): Duplicating instructions when we have a variable called "may +// duplicate" that's equal to false is not pretty. +bool IsAlwaysDuplicable(const HloInstruction& instruction) { + // We are always willing to duplicate a widening type-conversion instruction + // if it means we can fuse the convert into a consumer. This allows the + // consumer to read less memory, which is almost always a performance win. + return instruction.opcode() == HloOpcode::kConvert && + ShapeUtil::ByteSizeOf(instruction.operand(0)->shape()) < + ShapeUtil::ByteSizeOf(instruction.shape()); +} +} // namespace + /*static*/ bool InstructionFusion::IsExpensive( const HloInstruction& instruction) { switch (instruction.opcode()) { @@ -418,9 +437,11 @@ HloInstruction* InstructionFusion::Fuse(HloInstruction* producer, bool InstructionFusion::ShouldFuse(HloInstruction* consumer, int64 operand_index) { HloInstruction* producer = consumer->mutable_operand(operand_index); + // Cost condition: don't duplicate expensive instructions. if (FusionWouldDuplicate(*producer, *consumer) && - (is_expensive_(*producer) || !may_duplicate_)) { + (!may_duplicate_ || is_expensive_(*producer)) && + !IsAlwaysDuplicable(*producer)) { return false; } diff --git a/tensorflow/compiler/xla/service/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/instruction_fusion_test.cc index e78b99a80cf41318faa1cb709428b8ba0f531944..6dd8fa1ab08737f0d77c47a1f8ed59a85b4f2bbd 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion_test.cc @@ -21,6 +21,8 @@ limitations under the License. namespace xla { +namespace op = xla::testing::opcode_matchers; + using InstructionFusionTest = HloTestBase; TEST_F(InstructionFusionTest, PotentialBitcastReshapeOfParameterUnfused) { @@ -124,7 +126,7 @@ TEST_F(InstructionFusionTest, FuseCheapNonDuplicatableOps) { EXPECT_EQ(Count(*module, HloOpcode::kFusion), 1) << module->ToString(); // Make sure the add hasn't been duplicated. - EXPECT_EQ(Count(*module, HloOpcode::kFusion), 1) << module->ToString(); + EXPECT_EQ(Count(*module, HloOpcode::kAdd), 1) << module->ToString(); } TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusableRecursively) { @@ -291,4 +293,29 @@ TEST_F(InstructionFusionTest, AllowEffectiveUnaryDuplication) { .ValueOrDie()); } +TEST_F(InstructionFusionTest, + WideningConvertsAreAlwaysDuplicableIntoConsumers) { + auto module = tools::Parse(R"( + HloModule test_module + ENTRY Test { + p0 = f16[100] parameter(0) + c = f32[100] convert(p0) + add = f32[100] add(c, c) + ROOT mul = f32[100] multiply(c, c) + })") + .ValueOrDie(); + + // The convert should be fused into the add and mul, even though may_duplicate + // is false, because it's always beneficial to fuse/duplicate widening + // converts into consumers. + EXPECT_TRUE( + InstructionFusion(InstructionFusion::IsExpensive, /*may_duplicate=*/false) + .Run(module.get()) + .ValueOrDie()) + << module->ToString(); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Fusion(op::Parameter())); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/interpreter/README.md b/tensorflow/compiler/xla/service/interpreter/README.md index 4c19a1b916d42149c670f4d3bd1d11cff87cf075..0b21b251c3f663540292d98e5a609b3e27446d38 100644 --- a/tensorflow/compiler/xla/service/interpreter/README.md +++ b/tensorflow/compiler/xla/service/interpreter/README.md @@ -5,7 +5,7 @@ evaluating the result of the HLO graph directly with HloEvaluator, without lowering it further (to LLVM IR for example) before execution as other backends (CPU and GPU for example) do. -Its key componenets are: +Its key components are: * [`InterpreterCompiler`] despite the inherited naming of "compiler", all `InterpreterCompiler` really does is the following: diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index 76b3ecad26fe92e910fd3fe0e405c726da7e14b7..eecbbcb93df64b09acb5e009d3db79e51dab0c93 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -45,7 +45,7 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) { HloPassPipeline pipeline("Interpreter"); pipeline.AddPass( - hlo_module->mutable_entry_computation_layout()); + hlo_module->device_entry_computation_layout()); return pipeline.Run(hlo_module).status(); } diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 2494569db53f260b900b3d5d3d0d2da5b1fc5f73..cfa7ba5e81ddd003978a2bd763384581c55b5c83 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -909,22 +909,19 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { } LayoutAssignment::LayoutAssignment( - ComputationLayout* entry_computation_layout, + const ComputationLayout& entry_computation_layout, ChannelLayoutConstraints* channel_constraints) : entry_computation_layout_(entry_computation_layout), channel_layout_constraints_(channel_constraints) { VLOG(1) << "entry computation layout given to layout assignment: " - << entry_computation_layout_->ToString(); + << entry_computation_layout_.ToString(); // Layouts of all parameter instructions must be set. for (const ShapeLayout& parameter_layout : - entry_computation_layout_->parameter_layouts()) { + entry_computation_layout_.parameter_layouts()) { CHECK(parameter_layout.LayoutIsSet()); } - // If the result layout is not set, then choose the default. - // TODO(b/29118294): Choose a better layout in this case. - if (!entry_computation_layout_->result_layout().LayoutIsSet()) { - entry_computation_layout_->mutable_result_layout()->SetToDefaultLayout(); - } + // TODO(b/29118294): Choose a better layout if the result layout is not set. + CHECK(entry_computation_layout_.result_layout().LayoutIsSet()); } std::unique_ptr LayoutAssignment::ChooseOperandLayoutFromOutputLayout( @@ -1597,7 +1594,7 @@ StatusOr LayoutAssignment::Run(HloModule* module) { } if (computation == module->entry_computation()) { TF_RETURN_IF_ERROR(RunOnComputation( - *entry_computation_layout_, *points_to_analysis, + entry_computation_layout_, *points_to_analysis, module->entry_computation(), channel_layout_constraints_)); } else { ComputationLayout computation_layout(computation->ComputeProgramShape()); diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index ae4986d6ad9bc3de100eab9cc38b709bb56c7813..9663a793fdd7d4968700707a1003319e89ea19a3 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -281,14 +281,14 @@ class LayoutAssignment : public HloPassInterface { // the case that no particular layout is requested. // // channel_constraints is both an input and output. Any sends or recvs that - // are present in channel_constraints will be layed out as constrained. Any - // unconstrained sends or recvs will be layed out as locally optimal and their + // are present in channel_constraints will be laid out as constrained. Any + // unconstrained sends or recvs will be laid out as locally optimal and their // layout will be added as a constraint to channel_constraints. // // If channel_constraints is nullptr, no kSend or kRecvs must be contained // within any module passed to `Run`. explicit LayoutAssignment( - ComputationLayout* entry_computation_layout, + const ComputationLayout& entry_computation_layout, ChannelLayoutConstraints* channel_constraints = nullptr); ~LayoutAssignment() override {} tensorflow::StringPiece name() const override { return "layout-assignment"; } @@ -402,7 +402,7 @@ class LayoutAssignment : public HloPassInterface { // necessary conditions. Status CheckLayouts(HloModule* module); - ComputationLayout* entry_computation_layout_; + const ComputationLayout& entry_computation_layout_; protected: // Sets up the copy instruction according to the characteristic (sharding, diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 4b1c9bad41de8030cf14bc6d1c0db21b9c56c3bf..7e1bb11eaada0e62b82c50903c9848f0a3a8307b 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -53,7 +53,7 @@ class LayoutAssignmentTest : public HloTestBase { protected: void AssignLayouts(HloModule* module, ComputationLayout* entry_computation_layout) { - LayoutAssignment layout_assignment(entry_computation_layout); + LayoutAssignment layout_assignment(*entry_computation_layout); EXPECT_IS_OK(layout_assignment.Run(module).status()); } }; @@ -285,7 +285,7 @@ TEST_F(LayoutAssignmentTest, ConflictingLayoutTuple) { TF_CHECK_OK(computation_layout.mutable_result_layout()->CopyLayoutFromShape( result_shape)); - LayoutAssignment layout_assignment(&computation_layout); + LayoutAssignment layout_assignment(computation_layout); AssignLayouts(module.get(), &computation_layout); // Layout assignment should have deep copied the result of the computation to @@ -488,7 +488,7 @@ class OperandsMustBeTheSameLayoutAssignment : public LayoutAssignment { public: explicit OperandsMustBeTheSameLayoutAssignment( ComputationLayout* entry_computation_layout) - : LayoutAssignment(entry_computation_layout) {} + : LayoutAssignment(*entry_computation_layout) {} protected: Status PropagateBufferConstraint( @@ -808,7 +808,7 @@ TEST_F(LayoutAssignmentTest, InternalErrorOnBitcast) { ComputationLayout computation_layout( module->entry_computation()->ComputeProgramShape()); - LayoutAssignment layout_assignment(&computation_layout); + LayoutAssignment layout_assignment(computation_layout); Status error_status = layout_assignment.Run(module.get()).status(); EXPECT_FALSE(error_status.ok()); EXPECT_THAT( diff --git a/tensorflow/compiler/xla/service/liveness_util.cc b/tensorflow/compiler/xla/service/liveness_util.cc index 68c99256a246edcf43a8358f667fc4458b9b4fea..79dfd1e409f1556a50e9ba6c845cbf9774fb1a02 100644 --- a/tensorflow/compiler/xla/service/liveness_util.cc +++ b/tensorflow/compiler/xla/service/liveness_util.cc @@ -173,9 +173,9 @@ bool HasUniqueFusedUseOfOperandAt( // (2) Is a loop fusion instruction where the only use of 'operand' at 'index' // in the set 'user.fused_instructions' is a DynamicUpdateSlice fused root // at operand 0. Or... -// (3) Is a kDot -> kAdd (or fused kTransposeDot -> kAdd) output fusion -// instruction where the only use of 'operand' at 'index' in the set -// 'user.fused_instructions' is a kAdd fused root at operand 0 or 1. Or... +// (3) Is a kDot -> kAdd output fusion instruction where the only use of +// 'operand' at 'index' in the set 'user.fused_instructions' is a kAdd fused +// root at operand 0 or 1. Or... // (4) The 'user' of 'operand' is DynamicUpdateSlice or While at operand index // 0. // @@ -209,17 +209,13 @@ bool CanShareOperandBufferWithUser( user->fused_expression_root()->opcode() == HloOpcode::kAdd) { // Output fusion with kAdd fused root. - // Check if one operand of kAdd fused root is either kDot, or nested - // kFusion of kind kTransposeDot. + // Check if one operand of kAdd fused root is kDot or kConvolution. auto* add = user->fused_expression_root(); auto add_operand_it = std::find_if(add->operands().begin(), add->operands().end(), [&](HloInstruction* operand) { return operand->opcode() == HloOpcode::kConvolution || - operand->opcode() == HloOpcode::kDot || - (operand->opcode() == HloOpcode::kFusion && - operand->fusion_kind() == - HloInstruction::FusionKind::kTransposeDot); + operand->opcode() == HloOpcode::kDot; }); if (add_operand_it == add->operands().end()) { return false; @@ -314,17 +310,13 @@ bool CanShareOperandBufferWithUser(HloInstruction* operand, user->fused_expression_root()->opcode() == HloOpcode::kAdd) { // Output fusion with kAdd fused root. - // Check if one operand of kAdd fused root is either kDot, or nested - // kFusion of kind kTransposeDot. + // Check if one operand of kAdd fused root is kDot, or kConvolution. auto* add = user->fused_expression_root(); auto add_operand_it = std::find_if(add->operands().begin(), add->operands().end(), [&](HloInstruction* operand) { return operand->opcode() == HloOpcode::kConvolution || - operand->opcode() == HloOpcode::kDot || - (operand->opcode() == HloOpcode::kFusion && - operand->fusion_kind() == - HloInstruction::FusionKind::kTransposeDot); + operand->opcode() == HloOpcode::kDot; }); if (add_operand_it == add->operands().end()) { return false; diff --git a/tensorflow/compiler/xla/service/liveness_util_test.cc b/tensorflow/compiler/xla/service/liveness_util_test.cc index f8b309488eeb5391b1cad5db760934ec1f7e3521..c01b52df62ee67eb2c6249bfa0baf8366dd3c331 100644 --- a/tensorflow/compiler/xla/service/liveness_util_test.cc +++ b/tensorflow/compiler/xla/service/liveness_util_test.cc @@ -303,48 +303,6 @@ TEST_F(CanShareOperandBufferWithUserTest, FusedDotAdd) { *dataflow_analysis_)); } -TEST_F(CanShareOperandBufferWithUserTest, FusedTransposeDotAdd) { - auto builder = HloComputation::Builder(TestName()); - Shape data_shape = ShapeUtil::MakeShape(F32, {2, 2}); - - auto a = builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR2({{1.0, 0.0}, {0.0, 1.0}}))); - auto b = builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR2({{2.0, 2.0}, {2.0, 2.0}}))); - auto b_t = builder.AddInstruction( - HloInstruction::CreateTranspose(data_shape, b, {1, 0})); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); - auto dot = builder.AddInstruction( - HloInstruction::CreateDot(data_shape, a, b_t, dot_dnums)); - - auto one = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1.0))); - auto add_operand = builder.AddInstruction( - HloInstruction::CreateBroadcast(data_shape, one, {1})); - - auto add = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape, HloOpcode::kAdd, dot, add_operand)); - - BuildModule(builder.Build()); - - auto nested_fusion = computation_->CreateFusionInstruction( - {dot, b_t}, HloInstruction::FusionKind::kTransposeDot); - - auto fusion = computation_->CreateFusionInstruction( - {add, nested_fusion}, HloInstruction::FusionKind::kOutput); - RunAnalysis(); - - // Output fused transpose-dot-add should be share buffer with 'add_operand'. - EXPECT_TRUE(CanShareOperandBufferWithUser(add_operand, {}, fusion, {}, - *points_to_analysis_)); - - EXPECT_TRUE(CanShareOperandBufferWithUser(add_operand, {}, fusion, {}, - *dataflow_analysis_)); -} - TEST_F(CanShareOperandBufferWithUserTest, OutputFusionCantAliasOperandBuffer) { auto builder = HloComputation::Builder(TestName()); Shape data_shape = ShapeUtil::MakeShape(F32, {2, 2}); diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index 3312a888443233139841ce7a5e3173f907605e1d..7323abeb2077154f82828bcda3e90eb45a67138a 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -333,18 +333,7 @@ llvm::Value* IrArray::EmitArrayElementAddress( } CHECK_EQ(index.size(), ShapeUtil::Rank(*shape_)); - std::vector actual_index; - bool is_implicit_broadcast = false; - // We perform broadcasting when the operand shape has dimension(s) of size - // 1. In this case we fix the index value for that dimension to zero. This - // effectively broadcasts along this dimension. - for (int64 i = 0; i < index.size(); ++i) { - auto dim = shape_->dimensions(i); - actual_index.push_back(dim == 1 ? ir_builder->getInt64(0) : index[i]); - is_implicit_broadcast |= dim == 1; - } - - if (!is_implicit_broadcast && index.LinearValidOnShape(*shape_)) { + if (index.LinearValidOnShape(*shape_)) { llvm::Module* module = ir_builder->GetInsertBlock()->getParent()->getParent(); return ir_builder->CreateInBoundsGEP( @@ -354,6 +343,15 @@ llvm::Value* IrArray::EmitArrayElementAddress( {index.linear()}, llvm_ir::AsStringRef(name)); } + std::vector actual_index; + for (int64 i = 0; i < index.size(); ++i) { + // When dimension i is of size 1, LLVM optimization is able to replace + // index[i] with 0. However, setting index[i] to 0 here still allows LLVM to + // produce better code in some cases. + auto dim = shape_->dimensions(i); + actual_index.push_back(dim == 1 ? ir_builder->getInt64(0) : index[i]); + } + // "base_ptr_" has the type of "*" // (e.g. [3 x [2 x float]]*). Therefore, the address of the indexed element // should be computed by diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc index 7b227ce294176cfbbf7308bbf65afe21814f3dea..497b48ff227d7d1f158080529372df44b6932b24 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc @@ -36,8 +36,8 @@ ForLoop::ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, bool prevent_unrolling, bool prevent_vectorization) - : prefix_(prefix.ToString()), - suffix_(suffix.ToString()), + : prefix_(std::string(prefix)), + suffix_(std::string(suffix)), start_index_(start_index), end_index_(end_index), step_(step), diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h index 20069ce5a28184a5a9216d1a3751d1cee547727d..d915f95db134918a173a9711936bb1e2f1ea0d95 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h @@ -174,7 +174,7 @@ class ForLoopNest { : ForLoopNest(/*name=*/"", ir_builder) {} ForLoopNest(tensorflow::StringPiece name, llvm::IRBuilder<>* ir_builder) - : name_(name.ToString()), + : name_(std::string(name)), outer_loop_preheader_bb_(nullptr), outer_loop_exit_bb_(nullptr), inner_loop_body_bb_(nullptr), diff --git a/tensorflow/compiler/xla/service/logical_buffer.cc b/tensorflow/compiler/xla/service/logical_buffer.cc index 68553bed121917850aaae41c6154f7895ed1add9..c742d35a7bcafa66692195a513992c9cfbb39335 100644 --- a/tensorflow/compiler/xla/service/logical_buffer.cc +++ b/tensorflow/compiler/xla/service/logical_buffer.cc @@ -15,9 +15,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/logical_buffer.h" -#include -#include - #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/types.h" @@ -28,43 +25,20 @@ namespace xla { LogicalBuffer::LogicalBuffer(HloInstruction* instruction, const ShapeIndex& index, Id id) - : instruction_(instruction), id_(id), color_(kInvalidColor), index_(index) { - const auto& s = shape(); - is_array_ = ShapeUtil::IsArray(s); - is_tuple_ = ShapeUtil::IsTuple(s); -} + : BufferValue(instruction, index, id), + instruction_(instruction), + index_(index) {} + +LogicalBuffer::~LogicalBuffer() {} string LogicalBuffer::ToString() const { + string color_string; + if (has_color()) { + color_string = tensorflow::strings::StrCat(" @", color().value()); + } return tensorflow::strings::StrCat(instruction_->name(), "[", tensorflow::str_util::Join(index_, ","), - "](#", id_, " @", color_.value(), ")"); -} - -std::ostream& operator<<(std::ostream& out, const LogicalBuffer& buffer) { - out << buffer.ToString(); - return out; -} - -/*static*/ LogicalBufferProto::Location LogicalBuffer::ToLocationProto( - const HloInstruction& instruction, const ShapeIndex& index) { - LogicalBufferProto::Location proto; - proto.set_computation_name(instruction.parent()->name()); - proto.set_instruction_name(instruction.name()); - for (const int64 index_entry : index) { - proto.add_shape_index(index_entry); - } - return proto; -} - -LogicalBufferProto LogicalBuffer::ToProto(const SizeFunction& size_fn) const { - LogicalBufferProto proto; - proto.set_id(id_); - proto.set_size(size_fn(*this)); - LogicalBufferProto::Location proto_location = - ToLocationProto(*instruction_, index_); - proto.mutable_defined_at()->Swap(&proto_location); - proto.set_color(color_.value()); - return proto; + "](#", id(), color_string, ")"); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/logical_buffer.h b/tensorflow/compiler/xla/service/logical_buffer.h index 67b205e289e626f4db16c39a0a9ddf8618678c3a..f9ba5a554740c9d4cc2643fe59d18ba76c30d03b 100644 --- a/tensorflow/compiler/xla/service/logical_buffer.h +++ b/tensorflow/compiler/xla/service/logical_buffer.h @@ -16,11 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LOGICAL_BUFFER_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_LOGICAL_BUFFER_H_ -#include -#include #include -#include +#include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -33,133 +31,30 @@ limitations under the License. namespace xla { -// Class describing a contiguous sequence of elements (ie, C array) which form -// the components of Shaped values in XLA. XLA arrays are trivially a -// single LogicalBuffer. Tuple values are made up of more than one -// LogicalBuffer: a LogicalBuffer for the pointers to elements, and a -// LogicalBuffer for each child element. -// -// Every buffer is defined by a particular instruction and most instructions -// define only a single buffer. Instructions which define a single buffer -// include array-shaped instructions such as Add but also includes Tuple-shaped -// instructions such as Tuple. The Tuple instruction defines a single buffer -// which is a vector of pointers to the buffers containing the Tuple -// instruction's operands. Though the result of the Tuple instruction includes -// multiple buffers only the top-level buffer (the vector of pointers) is -// defined by the Tuple instruction. The buffers containing the tuple elements -// are defined by earlier instructions, usually the operands of the Tuple -// instruction. -// -// Instructions which construct both the tuple *and* the tuple elements define -// more than one buffer. This includes (at least) tuple-shaped Constant, -// Parameter, Infeed and While instructions. The tuple-shaped instructions do -// not assemble a tuple from existing buffers like the Tuple instruction does, -// but rather define the entire tuple. -// -// Some instructions, such as Bitcast, define no buffers. These instructions -// simply forward buffers from their operands. -// -// The LogicalBuffer object describes which HLO instruction defines a buffer and -// where within that instruction's output shape the buffer is defined. The -// location within the output shape is indicated by LogicalBuffer::index() which -// is defined identically to the index used in -// ShapeUtil::GetSubshape(). Examples: -// -// %add = Add(%foo, %bar) -// %tuple_constant = Constant({1, {42, 43}}) -// -// %add defines a single array-shaped buffer LogicalBuffer(%add, {}) which holds -// the array result of the add operation. The nested-tuple-shaped -// %tuple_constant defines 5 buffers described by the following LogicalBuffer -// objects: -// -// LogicalBuffer(%tuple_constant, {}) // "Top-level" buffer: vector of -// // pointers to LogicalBuffers at -// // indices {0} and {1} -// LogicalBuffer(%tuple_constant, {0}) // Holds value "1" -// LogicalBuffer(%tuple_constant, {1}) // Holds nested tuple: vector of -// // pointers to LogicalBuffers at -// // indices {1, 0} and {1, 1} -// LogicalBuffer(%tuple_constant, {1, 0}) // Holds value "42" -// LogicalBuffer(%tuple_constant, {1, 1}) // Holds value "43" -class LogicalBuffer { +// TuplePointsToAnalysis uses this subclass of BufferValue. +class LogicalBuffer : public BufferValue { public: - TF_LIB_GTL_DEFINE_INT_TYPE(Color, int64); - - // Id is a unique identifier for the LogicalBuffer to facilitate efficient - // collections of LogicalBuffers with stable iteration order. - // LogicalBuffers are typically created and accessed through - // TuplePointsToAnalysis, and points-to analysis assigns each LogicalBuffer a - // unique value. - using Id = int64; - - // Functions which return the size and alignment of a logical buffer in bytes. - using SizeFunction = std::function; - using AlignmentFunction = std::function; - LogicalBuffer(HloInstruction* instruction, const ShapeIndex& index, Id id); - - Id id() const { return id_; } + ~LogicalBuffer() override; // Return the instruction that defines the buffer. - HloInstruction* instruction() const { return instruction_; } + HloInstruction* instruction() const override { return instruction_; } // Return the index within the output of the instruction where the buffer is // defined. Index used defined as in ShapeUtil::GetSubshape() - const ShapeIndex& index() const { return index_; } - - // Return the color of the logical buffer. Differently colored buffers can - // not be parts of the same allocation. - Color color() const { - CHECK_NE(color_, kInvalidColor) - << "Should not query the color of a buffer that was never colored"; - return color_; - } - - void set_color(Color color) { - CHECK_NE(color, kInvalidColor) - << "Should not set the color of a buffer to the invalid color"; - color_ = color; - } - - bool has_color() const { return color_ != kInvalidColor; } + const ShapeIndex& index() const override { return index_; } // Return the shape of the buffer. This reference points into the shape field // of the instruction defining the buffer. Therefore, the returned shape will // contain the layout of instruction, if any. - const Shape& shape() const { + const Shape& shape() const override { return ShapeUtil::GetSubshape(instruction_->shape(), index_); } - // Returns true if this buffer is the top-level output buffer of the defining - // HLO instruction. This is equivalent to index == {}. - bool IsTopLevel() const { return index_.empty(); } - - // Whether this buffer contains a tuple. - bool IsTuple() const { return is_tuple_; } - - // Whether this buffer contains an array. - bool IsArray() const { return is_array_; } - - // operator< is required for std::set. - bool operator<(const LogicalBuffer& other) const { return id_ < other.id_; } - - string ToString() const; - LogicalBufferProto ToProto(const SizeFunction& size_fn) const; - - // Returns the LogicalBufferProto::Location that serializes the given - // instruction and index. - static LogicalBufferProto::Location ToLocationProto( - const HloInstruction& instruction, const ShapeIndex& index); - - const Color kInvalidColor = Color(-1); + string ToString() const override; private: HloInstruction* instruction_; - Id id_ : 62; - bool is_array_ : 1; - bool is_tuple_ : 1; - Color color_; ShapeIndex index_; // Similar to HLO constructs (HloInstruction, etc), pointers are used for @@ -167,8 +62,6 @@ class LogicalBuffer { TF_DISALLOW_COPY_AND_ASSIGN(LogicalBuffer); }; -std::ostream& operator<<(std::ostream& out, const LogicalBuffer& buffer); - } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_SERVICE_LOGICAL_BUFFER_H_ diff --git a/tensorflow/compiler/xla/service/name_uniquer.cc b/tensorflow/compiler/xla/service/name_uniquer.cc index f74bcb0b79355c8e69890487266cbc5f2a4500be..3a6a7c25f4b727c7112dbcbcb4f3d892679a0011 100644 --- a/tensorflow/compiler/xla/service/name_uniquer.cc +++ b/tensorflow/compiler/xla/service/name_uniquer.cc @@ -53,7 +53,7 @@ NameUniquer::NameUniquer(const string& separator) { } string NameUniquer::GetUniqueName(tensorflow::StringPiece prefix) { - string root = GetSanitizedName(prefix.empty() ? "name" : prefix.ToString()); + string root = GetSanitizedName(prefix.empty() ? "name" : std::string(prefix)); // Strip away numeric suffix (if any). Only recognize separator if it is in // the middle of the name. diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h index 586f6ef7a9c4f17f69340e77be17aec2f677a791..d3bc47e61e0e75fa2ef181988700f88cec9c1d76 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher.h +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -702,6 +702,30 @@ class HloInstructionPatternOperandImpl { HloInstructionPattern operand_; }; +// An HloInstructionPattern implementation that matches only if the instruction +// is a fusion node with a particular kind. +template +class HloInstructionPatternFusionKindImpl { + public: + explicit constexpr HloInstructionPatternFusionKindImpl( + const Previous& previous, ::xla::HloInstruction::FusionKind kind) + : previous_(previous), kind_(kind) {} + + bool Match(const ::xla::HloInstruction* inst) const { + return previous_.Match(inst) && inst->opcode() == HloOpcode::kFusion && + inst->fusion_kind() == kind_; + } + + bool Match(::xla::HloInstruction* inst) const { + return previous_.Match(inst) && inst->opcode() == HloOpcode::kFusion && + inst->fusion_kind() == kind_; + } + + private: + Previous previous_; + ::xla::HloInstruction::FusionKind kind_; +}; + // A pattern that matches HloInstructions. template class HloInstructionPattern { @@ -807,6 +831,16 @@ class HloInstructionPattern { matched_inst_); } + // Modifies the pattern to match only if the instruction is a fusion node with + // the given kind. + constexpr HloInstructionPattern> + WithFusionKind(HloInstruction::FusionKind kind) const { + return HloInstructionPattern>( + HloInstructionPatternFusionKindImpl(impl_, kind), matched_inst_); + } + private: Impl impl_; HloInstructionType** matched_inst_; diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc index c88157c312524fb273e6df368d2ef61d679d1d8b..204e8c99209fa95adb868a676bb9e5144fed432c 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc +++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc @@ -170,5 +170,28 @@ TEST(PatternMatcherTest, TupleShape) { Match(&tuple_shape, match::Shape().WithSubshape({0, 0}, match::Shape()))); } +TEST(PatternMatcherTest, FusionKind) { + constexpr char kModuleStr[] = R"( + HloModule test_module + + fused_computation { + ROOT fp0 = f32[] parameter(0) + } + + ENTRY while.v11 { + p0 = f32[] parameter(0) + ROOT fusion = f32[] fusion(p0), kind=kLoop, calls=fused_computation + })"; + TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, tools::Parse(kModuleStr)); + + auto* root = hlo_module->entry_computation()->root_instruction(); + EXPECT_TRUE(Match( + root, match::Op().WithFusionKind(HloInstruction::FusionKind::kLoop))); + EXPECT_FALSE(Match( + root, match::Op().WithFusionKind(HloInstruction::FusionKind::kInput))); + EXPECT_FALSE(Match(root->operand(0), match::Op().WithFusionKind( + HloInstruction::FusionKind::kLoop))); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc index e2c07e38271df8b8875b2c9291f18ba41a9e6acd..688cceff0cd10df62a4093f00ad3331ca77652e0 100644 --- a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc +++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc @@ -75,7 +75,7 @@ StatusOr ReducePrecisionInsertion::insert_after( return false; } - // Check that we haven't already inserted an equivalant reduce-precision + // Check that we haven't already inserted an equivalent reduce-precision // operation after this instruction. (The zero-user case occurs when this is // the root instruction.) if (instruction->user_count() > 0) { diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 6e0d07a12f906b4b95d521e957ac28c84dd28774..495f8801ba82ecbcf9f6e5db5507ef8785c752d6 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -91,6 +91,34 @@ tensorflow::Status RecordResult(const ShapedBuffer& result, return tensorflow::Status::OK(); } +// Records the arguments used to invoke a computation in an HloSnapshot proto. +tensorflow::Status RecordArguments( + const tensorflow::gtl::ArraySlice arguments, + se::StreamExecutor* executor, TransferManager* transfer_manager, + HloSnapshot* module) { + module->clear_arguments(); + for (const ShapedBuffer* argument : arguments) { + TF_ASSIGN_OR_RETURN( + std::unique_ptr literal, + transfer_manager->TransferLiteralFromDevice(executor, *argument)); + *module->add_arguments() = literal->ToProto(); + } + return tensorflow::Status::OK(); +} + +// Records the result of a computation in a HloSnapshot proto. +tensorflow::Status RecordResult(const ShapedBuffer& result, + se::StreamExecutor* executor, + TransferManager* transfer_manager, + HloSnapshot* module) { + module->clear_result(); + TF_ASSIGN_OR_RETURN( + std::unique_ptr literal, + transfer_manager->TransferLiteralFromDevice(executor, result)); + *module->mutable_result() = literal->ToProto(); + return tensorflow::Status::OK(); +} + } // namespace ServiceOptions& ServiceOptions::set_platform(se::Platform* platform) { @@ -268,8 +296,10 @@ StatusOr> Service::CreateModuleConfig( const ExecutionOptions* execution_options, const UserComputation* user_computation) { auto config = MakeUnique(program_shape); - auto* computation_layout = config->mutable_entry_computation_layout(); - + ComputationLayout* host_computation_layout = + config->mutable_host_entry_computation_layout(); + ComputationLayout* device_computation_layout = + config->mutable_device_entry_computation_layout(); if (program_shape.parameters_size() != argument_shapes.size()) { return InvalidArgument("computation takes %d parameters, but %zu given", program_shape.parameters_size(), @@ -294,9 +324,10 @@ StatusOr> Service::CreateModuleConfig( i, ShapeUtil::HumanString(program_shape.parameters(i)).c_str(), ShapeUtil::HumanString(*argument_shapes[i]).c_str()); } - TF_RETURN_IF_ERROR( - computation_layout->mutable_parameter_layout(i)->CopyLayoutFromShape( - *argument_shapes[i])); + TF_RETURN_IF_ERROR(host_computation_layout->mutable_parameter_layout(i) + ->CopyLayoutFromShape(*argument_shapes[i])); + TF_RETURN_IF_ERROR(device_computation_layout->mutable_parameter_layout(i) + ->CopyLayoutFromShape(*argument_shapes[i])); } if (execution_options != nullptr && execution_options->has_shape_with_output_layout()) { @@ -305,10 +336,17 @@ StatusOr> Service::CreateModuleConfig( TF_RETURN_IF_ERROR(ValidateResultShapeWithLayout(shape_with_output_layout, program_shape.result())); TF_RETURN_IF_ERROR( - computation_layout->mutable_result_layout()->CopyLayoutFromShape( + host_computation_layout->mutable_result_layout()->CopyLayoutFromShape( + shape_with_output_layout)); + TF_RETURN_IF_ERROR( + device_computation_layout->mutable_result_layout()->CopyLayoutFromShape( shape_with_output_layout)); } else { - computation_layout->mutable_result_layout()->Clear(); + // If the result layout is not set, then choose the default. + // TODO(b/29118294): Allow the compiler to choose a better layout in this + // case. + host_computation_layout->mutable_result_layout()->SetToDefaultLayout(); + device_computation_layout->mutable_result_layout()->SetToDefaultLayout(); } config->set_replica_count(options_.number_of_replicas()); @@ -409,6 +447,28 @@ StatusOr>> Service::BuildExecutables( DeviceMemoryAllocator* device_allocator) { VLOG(1) << Printf("BuildExecutable on service %p", this); + // Dump computation proto state if flag is set. + std::vector> hlo_snapshots; + for (int64 i = 0; i < module_protos.size(); ++i) { + const string& directory_path = + module_configs[i]->debug_options().xla_dump_computations_to(); + const string& execution_directory_path = + module_configs[i]->debug_options().xla_dump_executions_to(); + if (directory_path.empty() && execution_directory_path.empty()) { + continue; + } + auto hlo_snapshot = MakeUnique(); + *hlo_snapshot->mutable_hlo()->mutable_hlo_module() = *module_protos[i]; + if (!directory_path.empty()) { + string filename = + Printf("computation_%lld__%s", module_protos[i]->id(), + module_protos[i]->entry_computation_name().c_str()); + TF_RETURN_IF_ERROR( + Executable::DumpToDirectory(directory_path, filename, *hlo_snapshot)); + hlo_snapshots.push_back(std::move(hlo_snapshot)); + } + } + VLOG(1) << "Computations:"; for (const HloModuleProto* proto : module_protos) { VLOG(1) << proto->name(); @@ -429,9 +489,31 @@ StatusOr>> Service::BuildExecutables( backend->compiler()->Compile(std::move(modules), std::move(executors), device_allocator)); + for (size_t i = 0; i < module_protos.size(); ++i) { + if (!module_configs[i]->debug_options().xla_dump_executions_to().empty()) { + executables[i]->set_hlo_snapshot(std::move(hlo_snapshots[i])); + } + } + return std::move(executables); } +Status Service::ValidateEntryComputationLayout(HloModule* module) { + const ComputationLayout& on_device = + module->device_entry_computation_layout(); + for (int64 i = 0; i < on_device.parameter_count(); ++i) { + TF_RET_CHECK(ShapeUtil::Equal( + on_device.parameter_shape(i), + execute_backend_->transfer_manager()->HostShapeToDeviceShape( + module->host_entry_computation_layout().parameter_shape(i)))); + } + TF_RET_CHECK(ShapeUtil::Equal( + module->device_entry_computation_layout().result_shape(), + execute_backend_->transfer_manager()->HostShapeToDeviceShape( + module->host_entry_computation_layout().result_shape()))); + return tensorflow::Status::OK(); +} + StatusOr> Service::BuildExecutable( const VersionedComputationHandle& versioned_handle, std::unique_ptr module_config, Backend* backend, @@ -470,6 +552,8 @@ StatusOr> Service::BuildExecutable( TF_ASSIGN_OR_RETURN( module, backend->compiler()->RunHloPasses(std::move(module), executor, device_allocator)); + // Check that on-host and on-device shapes are consistent. + TF_RETURN_IF_ERROR(ValidateEntryComputationLayout(module.get())); TF_ASSIGN_OR_RETURN(std::unique_ptr executable, backend->compiler()->RunBackend( @@ -826,7 +910,7 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, CreateModuleConfig(*program_shape, replicated_arguments.front(), request.execution_options(), user_computation)); VLOG(3) << "ExecuteParallel created HloModuleConfig computation layout: " - << module_config->entry_computation_layout().ToString(); + << module_config->host_entry_computation_layout().ToString(); // Adds to the vectors to build and execute the computations after the loop. all_arguments.push_back(replicated_arguments); @@ -929,7 +1013,7 @@ tensorflow::Status Service::ExecuteGraphParallel( /*user_computation=*/nullptr)); VLOG(3) << "ExecuteGraphParallel created HloModuleConfig computation layout: " - << module_config->entry_computation_layout().ToString(); + << module_config->host_entry_computation_layout().ToString(); // Adds to the vectors to build and execute the computations after the loop. all_arguments.push_back(replicated_arguments); @@ -1079,7 +1163,7 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg, arg->execution_options(), user_computation)); VLOG(3) << "Execute created HloModuleConfig computation layout: " - << module_config->entry_computation_layout().ToString(); + << module_config->host_entry_computation_layout().ToString(); TF_ASSIGN_OR_RETURN( std::shared_ptr executable, @@ -1125,6 +1209,22 @@ StatusOr> Service::BuildExecutable( "BuildExecutable on service %p with serialized module proto: %s", this, module_proto.name().c_str()); + // Dump computation proto state if flag is set. + auto hlo_snapshot = MakeUnique(); + const string& directory_path = + module_config->debug_options().xla_dump_computations_to(); + const string& execution_directory_path = + module_config->debug_options().xla_dump_executions_to(); + if (!directory_path.empty() || !execution_directory_path.empty()) { + *hlo_snapshot->mutable_hlo()->mutable_hlo_module() = module_proto; + if (!directory_path.empty()) { + string filename = Printf("computation_%lld__%s", module_proto.id(), + module_proto.entry_computation_name().c_str()); + TF_RETURN_IF_ERROR( + Executable::DumpToDirectory(directory_path, filename, *hlo_snapshot)); + } + } + TF_ASSIGN_OR_RETURN(std::unique_ptr module, HloModule::CreateFromProto(module_proto, *module_config)); @@ -1133,6 +1233,8 @@ StatusOr> Service::BuildExecutable( TF_ASSIGN_OR_RETURN( module, backend->compiler()->RunHloPasses(std::move(module), executor, device_allocator)); + // Check that on-host and on-device shapes are consistent. + TF_RETURN_IF_ERROR(ValidateEntryComputationLayout(module.get())); TF_ASSIGN_OR_RETURN(std::unique_ptr executable, backend->compiler()->RunBackend( @@ -1175,12 +1277,31 @@ tensorflow::Status Service::ExecuteGraph(const ExecuteGraphRequest* arg, execute_backend_->default_stream_executor(), /*device_allocator=*/nullptr)); + if (executable->dumping_snapshot()) { + executable->hlo_snapshot()->set_execution_platform( + execute_backend_->platform()->Name()); + TF_RETURN_IF_ERROR(RecordArguments( + replicated_arguments.front(), + execute_backend_->default_stream_executor(), + execute_backend_->transfer_manager(), executable->hlo_snapshot())); + } + TF_ASSIGN_OR_RETURN( *result->mutable_output(), ExecuteAndRegisterResult( executable.get(), replicated_arguments, execute_backend_.get(), "result of " + arg->computation().name(), result->mutable_profile())); + if (executable->dumping_snapshot()) { + TF_ASSIGN_OR_RETURN( + const ShapedBuffer* result_buffer, + allocation_tracker_.ResolveForReplica(result->output(), 0)); + TF_RETURN_IF_ERROR(RecordResult( + *result_buffer, execute_backend_->default_stream_executor(), + execute_backend_->transfer_manager(), executable->hlo_snapshot())); + TF_RETURN_IF_ERROR(executable->DumpHloSnapshot()); + } + VLOG(1) << "successfully completed 'execute-graph' request"; return tensorflow::Status::OK(); } @@ -1215,7 +1336,7 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, arg->execution_options(), user_computation)); VLOG(3) << "ExecuteAsync created HloModuleConfig computation layout: " - << module_config->entry_computation_layout().ToString(); + << module_config->host_entry_computation_layout().ToString(); ExecutionProfile profile; diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 476bd0597de735a9f777be78f5ab01dac1188525..f84fe407e05da371da66ba33efd6e8165198cf2c 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -295,6 +295,9 @@ class Service : public ServiceInterface { const ExecutionOptions& execution_options, tensorflow::gtl::ArraySlice arguments); + // Assert that host- and device-shapes are in a consistent state. + Status ValidateEntryComputationLayout(HloModule* module); + protected: friend class LocalExecutable; diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 48b2922e77b78719e5d3469cbaa4fc15969de91b..c493547d9e83e19c09329f32873de3a9a330b460 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -172,11 +172,11 @@ tensorflow::Status ExpectNotTupleOrOpaque(const Shape& shape, tensorflow::StringPiece op_type) { if (ShapeUtil::IsTuple(shape)) { return InvalidArgument("Expected non-tuple argument for %s, but got %s.", - op_type.ToString().c_str(), + std::string(op_type).c_str(), ShapeUtil::HumanString(shape).c_str()); } else if (ShapeUtil::IsOpaque(shape)) { return InvalidArgument("Expected non-opaque argument for %s, but got %s.", - op_type.ToString().c_str(), + std::string(op_type).c_str(), ShapeUtil::HumanString(shape).c_str()); } else { return tensorflow::Status::OK(); diff --git a/tensorflow/compiler/xla/service/source_map_util.h b/tensorflow/compiler/xla/service/source_map_util.h index a776d745f4e56ca4f3d2480740259832bbc85011..18e2651abb1600a7b9ffb79de887b8795717e55e 100644 --- a/tensorflow/compiler/xla/service/source_map_util.h +++ b/tensorflow/compiler/xla/service/source_map_util.h @@ -23,7 +23,7 @@ limitations under the License. namespace xla { namespace source_map_util { -// Creates an INVALID_ARUGMENT status with the given format string. +// Creates an INVALID_ARGUMENT status with the given format string. // // Also, attempts to extract the OpMetadata for parameter_number on executable // and append it to the status message for source mapping to user code. diff --git a/tensorflow/compiler/xla/service/transpose_folding.cc b/tensorflow/compiler/xla/service/transpose_folding.cc index 3efd38ce0daa3e3f3398b32463019df6cd10a009..f7a5512fec47f75a72d31464ebac556ae41b36b9 100644 --- a/tensorflow/compiler/xla/service/transpose_folding.cc +++ b/tensorflow/compiler/xla/service/transpose_folding.cc @@ -35,7 +35,8 @@ TransposeFolding::OperandIndices CanFoldOperandsIntoDot( const HloInstruction& dot, const TransposeFolding::TransposableGemmOperandsFn& transposable_gemm_operands) { - if (HloOpcode::kDot != dot.opcode()) { + if (HloOpcode::kDot != dot.opcode() || + dot.dot_dimension_numbers().lhs_batch_dimensions_size() != 0) { return {}; } @@ -44,6 +45,8 @@ TransposeFolding::OperandIndices CanFoldOperandsIntoDot( auto& operand = *dot.operand(i); if (operand.IsRank2Transpose()) { operand_set.push_back(i); + } else if (ShapeUtil::Rank(operand.shape()) != 2) { + return {}; } } @@ -74,23 +77,39 @@ using InstructionOperandsPair = // Folds the operands of `dot` that are foldable transposes. `computation` is // the parent HLO computation of `dot`. -// -// Returns whether the module is changed. -bool FoldTransposeIntoDot(InstructionOperandsPair pair) { - auto* dot = pair.first; - std::vector instructions_to_fuse(1, dot); - for (const int64 operand_index : pair.second) { - instructions_to_fuse.push_back(dot->mutable_operand(operand_index)); - } - - // Early-exit if no operands are foldable. - if (instructions_to_fuse.size() == 1) { - return false; +Status FoldTransposeIntoDot(InstructionOperandsPair pair) { + HloInstruction* dot = pair.first; + + DotDimensionNumbers new_dim_numbers = dot->dot_dimension_numbers(); + HloInstruction* new_lhs = dot->mutable_operand(0); + HloInstruction* new_rhs = dot->mutable_operand(1); + + CHECK_EQ(new_dim_numbers.lhs_batch_dimensions_size(), 0); + CHECK_EQ(new_dim_numbers.rhs_batch_dimensions_size(), 0); + CHECK_EQ(new_dim_numbers.lhs_contracting_dimensions_size(), 1); + CHECK_EQ(new_dim_numbers.rhs_contracting_dimensions_size(), 1); + + for (int64 operand_index : pair.second) { + // We've checked that there aren't any batch dimensions and that the inputs + // are rank 2, and shape inference guarantees that there is exactly one + // contracting dimension. + if (operand_index == 0) { + CHECK_EQ(new_lhs->opcode(), HloOpcode::kTranspose); + new_dim_numbers.set_lhs_contracting_dimensions( + 0, 1 - new_dim_numbers.lhs_contracting_dimensions(0)); + new_lhs = new_lhs->mutable_operand(0); + } else { + CHECK_EQ(operand_index, 1); + CHECK_EQ(new_rhs->opcode(), HloOpcode::kTranspose); + new_dim_numbers.set_rhs_contracting_dimensions( + 0, 1 - new_dim_numbers.rhs_contracting_dimensions(0)); + new_rhs = new_rhs->mutable_operand(0); + } } - dot->parent()->CreateFusionInstruction( - instructions_to_fuse, HloInstruction::FusionKind::kTransposeDot); - return true; + std::unique_ptr new_dot = HloInstruction::CreateDot( + dot->shape(), new_lhs, new_rhs, new_dim_numbers); + return dot->parent()->ReplaceWithNewInstruction(dot, std::move(new_dot)); } // Folds the operands of `convolution` that are foldable transposes. @@ -205,7 +224,8 @@ StatusOr TransposeFolding::Run(HloModule* module) { bool changed = false; for (InstructionOperandsPair& pair : foldable_dots) { - changed |= FoldTransposeIntoDot(pair); + TF_RETURN_IF_ERROR(FoldTransposeIntoDot(pair)); + changed = true; } for (InstructionOperandsPair& pair : foldable_convolutions) { changed |= FoldTransposeIntoConvolution(pair); diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc index c7c41603459189d11a0399740bf3df2507e621a5..f73f1227aaf1630a9e7c43bb508732c5518ef929 100644 --- a/tensorflow/compiler/xla/service/transpose_folding_test.cc +++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc @@ -19,11 +19,12 @@ limitations under the License. #include #include -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/shape_inference.h" @@ -31,9 +32,12 @@ limitations under the License. #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/platform/logging.h" +namespace op = xla::testing::opcode_matchers; + namespace xla { namespace { @@ -54,83 +58,102 @@ class TransposeFoldingTest : public HloTestBase { }; TEST_F(TransposeFoldingTest, FoldDotTranspose) { - auto builder = HloComputation::Builder("entry_computation"); - HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter( - /*parameter_number=*/0, ShapeUtil::MakeShape(F32, {2, 3}), - /*name=*/"x")); - HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter( - /*parameter_number=*/1, ShapeUtil::MakeShape(F32, {2, 3}), - /*name=*/"y")); - HloInstruction* transpose_y = - builder.AddInstruction(HloInstruction::CreateTranspose( - ShapeUtil::MakeShape(F32, {3, 2}), y, {1, 0})); - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); - HloInstruction* dot = builder.AddInstruction( - HloInstruction::CreateDot(ShapeUtil::MakeShape(F32, {2, 2}), /*lhs=*/x, - /*rhs=*/transpose_y, dot_dnums)); + string hlo_string = R"( +HloModule FoldDotTranspose + +ENTRY entry_computation { + x = f32[2,3]{1,0} parameter(0) + y = f32[2,3]{1,0} parameter(1) + transpose = f32[3,2]{1,0} transpose(y), dimensions={1,0} + ROOT dot = f32[2,2]{1,0} dot(x, transpose), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); - auto module = CreateNewModule("test_module"); - HloComputation* entry_computation = - module->AddEntryComputation(builder.Build(dot)); FoldTranspose(module.get()); - // Instructions after folding: x, y, and the fusion. - std::unordered_set instruction_set( - entry_computation->instructions().begin(), - entry_computation->instructions().end()); - CHECK_EQ(1, instruction_set.erase(x)) << "x is not in entry_computation."; - CHECK_EQ(1, instruction_set.erase(y)) << "y is not in entry_computation."; - CHECK_EQ(1, instruction_set.size()) - << "entry_computation should contain exactly 3 instructions."; - HloInstruction* fusion = *instruction_set.begin(); - EXPECT_EQ(HloOpcode::kFusion, fusion->opcode()); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Dot(op::Parameter(0), op::Parameter(1), + /*lhs_contracting_dim=*/1, /*rhs_contracting_dim=*/1)); +} + +TEST_F(TransposeFoldingTest, DontFoldTransposeOfBatchDim) { + string hlo_string = R"( +HloModule FoldDotTranspose - // The fusion instruction should contain two parameters, one transpose and - // one dot. - EXPECT_EQ(4, fusion->fused_instruction_count()); +ENTRY entry_computation { + x = f32[2,3] parameter(0) + y = f32[3,2] parameter(1) + transpose = f32[2,3] transpose(y), dimensions={1,0} + ROOT dot = f32[2] dot(x, transpose), lhs_batch_dims={0}, rhs_batch_dims={0}, lhs_contracting_dims={1}, rhs_contracting_dims={1} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + + TransposeFolding transpose_folding( + [](const HloInstruction& dot, + const TransposeFolding::OperandIndices& candidate_operands) { + return candidate_operands; + }, + [](const HloInstruction& convolution, + const TransposeFolding::OperandIndices& candidate_operands) { + return candidate_operands; + }); + TF_ASSERT_OK_AND_ASSIGN(bool changed, transpose_folding.Run(module.get())); + EXPECT_FALSE(changed); +} + +TEST_F(TransposeFoldingTest, DontFoldTransposeOfRank1Dot) { + string hlo_string = R"( +HloModule FoldDotTranspose + +ENTRY entry_computation { + x = f32[3] parameter(0) + y = f32[3,2] parameter(1) + transpose = f32[2,3] transpose(y), dimensions={1,0} + ROOT dot = f32[2] dot(x, transpose), lhs_batch_dims={}, rhs_batch_dims={0}, lhs_contracting_dims={0}, rhs_contracting_dims={1} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + + TransposeFolding transpose_folding( + [](const HloInstruction& dot, + const TransposeFolding::OperandIndices& candidate_operands) { + return candidate_operands; + }, + [](const HloInstruction& convolution, + const TransposeFolding::OperandIndices& candidate_operands) { + return candidate_operands; + }); + TF_ASSERT_OK_AND_ASSIGN(bool changed, transpose_folding.Run(module.get())); + EXPECT_FALSE(changed); } TEST_F(TransposeFoldingTest, FoldDotTransposeConstant) { - auto builder = HloComputation::Builder("entry_computation"); - // 2x1 - HloInstruction* const0 = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2({{1}, {2}}))); - // 3x2 - HloInstruction* const1 = - builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR2({{1, 2}, {3, 4}, {5, 6}}))); - HloInstruction* transpose0 = - builder.AddInstruction(HloInstruction::CreateTranspose( - ShapeUtil::MakeShape(F32, {1, 2}), const0, {1, 0})); - HloInstruction* transpose1 = - builder.AddInstruction(HloInstruction::CreateTranspose( - ShapeUtil::MakeShape(F32, {2, 3}), const1, {1, 0})); - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); - HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateDot( - ShapeUtil::MakeShape(F32, {1, 3}), - /*lhs=*/transpose0, /*rhs=*/transpose1, dot_dnums)); + string hlo_string = R"( +HloModule FoldDotTransposeConstant + +ENTRY entry_computation { + constant = f32[2,1]{1,0} constant(f32[2,1] { { 1 }, { 2 } }) + transpose = f32[1,2]{1,0} transpose(constant), dimensions={1,0} + constant.1 = f32[3,2]{1,0} constant(f32[3,2] { { 1, 2 }, { 3, 4 }, { 5, 6 } }) + transpose.1 = f32[2,3]{1,0} transpose(constant.1), dimensions={1,0} + ROOT dot = f32[1,3]{1,0} dot(transpose, transpose.1), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); - auto module = CreateNewModule("test_module"); - HloComputation* entry_computation = - module->AddEntryComputation(builder.Build(dot)); FoldTranspose(module.get()); - for (auto* instruction : entry_computation->instructions()) { - if (instruction->opcode() == HloOpcode::kFusion) { - CHECK_EQ(2, instruction->operand_count()); - EXPECT_EQ(const0, instruction->operand(0)); - EXPECT_EQ(const1, instruction->operand(1)); - } - } - - // The created fusion instruction should contain two parameters, two - // transposes (one for each parameter) and one dot. - EXPECT_EQ(5, - entry_computation->root_instruction()->fused_instruction_count()); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Dot(op::Constant(), op::Constant(), + /*lhs_contracting_dim=*/0, /*rhs_contracting_dim=*/1)); } TEST_F(TransposeFoldingTest, FuseDotWithConstantOperands) { @@ -164,50 +187,32 @@ TEST_F(TransposeFoldingTest, FuseDotWithConstantOperands) { EXPECT_EQ(6, callee_computation->instruction_count()); } -TEST_F(TransposeFoldingTest, FoldDotTransposeInWhile) { - auto builder = HloComputation::Builder("entry_computation"); - HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter( - /*parameter_number=*/0, ShapeUtil::MakeShape(F32, {2, 3}), - /*name=*/"x")); - HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter( - /*parameter_number=*/1, ShapeUtil::MakeShape(F32, {2, 3}), - /*name=*/"y")); - HloInstruction* transpose_y = - builder.AddInstruction(HloInstruction::CreateTranspose( - ShapeUtil::MakeShape(F32, {3, 2}), y, {1, 0})); - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); - HloInstruction* dot = builder.AddInstruction( - HloInstruction::CreateDot(ShapeUtil::MakeShape(F32, {2, 2}), /*lhs=*/x, - /*rhs=*/transpose_y, dot_dnums)); - - auto module = CreateNewModule("test_module"); - HloComputation* entry_computation = - module->AddEntryComputation(builder.Build(dot)); +TEST_F(TransposeFoldingTest, FoldDotTransposeInCall) { + string hlo_string = R"( +HloModule FoldDotTransposeInCall - HloInstruction* call = module->OutlineExpressionFromComputation( - {transpose_y, dot}, "outlined", entry_computation); +callee { + name.0 = f32[2,3]{1,0} parameter(0) + name.1 = f32[2,3]{1,0} parameter(1) + transpose.clone = f32[3,2]{1,0} transpose(name.0), dimensions={1,0} + ROOT dot.clone = f32[2,2]{1,0} dot(name.1, transpose.clone), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +ENTRY entry_computation { + y = f32[2,3]{1,0} parameter(1) + x = f32[2,3]{1,0} parameter(0) + ROOT call = f32[2,2]{1,0} call(y, x), to_apply=callee +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); FoldTranspose(module.get()); - // Instructions after folding: x, y, and the fusion. - std::unordered_set instruction_set( - entry_computation->instructions().begin(), - entry_computation->instructions().end()); - CHECK_EQ(1, instruction_set.erase(x)) << "x is not in entry_computation."; - CHECK_EQ(1, instruction_set.erase(y)) << "y is not in entry_computation."; - CHECK_EQ(1, instruction_set.erase(call)) - << "call is not in entry_computation."; - CHECK(instruction_set.empty()) - << "entry_computation should contain exactly 3 instructions."; - HloInstruction* fusion = - call->called_computations().front()->root_instruction(); - EXPECT_EQ(HloOpcode::kFusion, fusion->opcode()); - - // The fusion instruction should contain two parameters, one transpose and - // one dot. - EXPECT_EQ(4, fusion->fused_instruction_count()); + const HloComputation* callee = module->GetComputationWithName("callee"); + ASSERT_NE(callee, nullptr); + EXPECT_THAT(callee->root_instruction(), + op::Dot(op::Parameter(1), op::Parameter(0), + /*lhs_contracting_dim=*/1, /*rhs_contracting_dim=*/1)); } // Test that a two dimension swap of the kernel gets folded into convolution. @@ -222,7 +227,7 @@ TEST_F(TransposeFoldingTest, FoldConvDimSwapTransposeRhs) { HloInstruction* transpose_y = builder.AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(F32, {2, 3, 1, 1}), y, {1, 0, 2, 3})); - auto dnums = ComputationBuilder::CreateDefaultConvDimensionNumbers(); + auto dnums = XlaBuilder::CreateDefaultConvDimensionNumbers(); Window window; for (int i = 0; i < 2; ++i) { WindowDimension* dim = window.add_dimensions(); @@ -275,7 +280,7 @@ TEST_F(TransposeFoldingTest, FoldConvComplexTransposeRhs) { HloInstruction* transpose_y = builder.AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(F32, {2, 3, 1, 1}), y, {1, 3, 0, 2})); - auto dnums = ComputationBuilder::CreateDefaultConvDimensionNumbers(); + auto dnums = XlaBuilder::CreateDefaultConvDimensionNumbers(); Window window; for (int i = 0; i < 2; ++i) { WindowDimension* dim = window.add_dimensions(); @@ -334,7 +339,7 @@ TEST_F(TransposeFoldingTest, FoldConvTransposeLhs) { HloInstruction* transpose_x = builder.AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(F32, {2, 3, 1, 1}), x, {1, 0, 2, 3})); - auto dnums = ComputationBuilder::CreateDefaultConvDimensionNumbers(); + auto dnums = XlaBuilder::CreateDefaultConvDimensionNumbers(); Window window; for (int i = 0; i < 2; ++i) { WindowDimension* dim = window.add_dimensions(); @@ -398,7 +403,7 @@ TEST_F(TransposeFoldingTest, FoldConvComplexTransposeLhs) { HloInstruction* transpose_x = builder.AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(F32, {2, 3, 1, 1}), x, {1, 0, 3, 2})); - auto dnums = ComputationBuilder::CreateDefaultConvDimensionNumbers(); + auto dnums = XlaBuilder::CreateDefaultConvDimensionNumbers(); Window window; for (int i = 0; i < 2; ++i) { WindowDimension* dim = window.add_dimensions(); diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc new file mode 100644 index 0000000000000000000000000000000000000000..10fc4958fae06414dbe7a3a0a798cb5c6e0f35c2 --- /dev/null +++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc @@ -0,0 +1,128 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h" +#include "tensorflow/compiler/xla/service/while_util.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/gtl/flatmap.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" + +namespace xla { + +// Replaces all uses of old_instr with new_instr except the use at +// `while_body_root` (which must be a tuple instruction) at index `tuple_index`. +// This utility helps us replace an instruction in the while body with a +// constant while still keeping it trivially loop invariant. +static Status ReplaceUsesWhileKeepingLoopInvariance( + HloInstruction* old_instr, HloInstruction* new_instr, + HloInstruction* while_body_root, int64 tuple_index) { + CHECK_EQ(while_body_root->opcode(), HloOpcode::kTuple); + + std::vector users; + users.reserve(old_instr->user_count()); + c_copy(old_instr->users(), std::back_inserter(users)); + + for (auto* user : users) { + for (int64 i = 0, e = user->operand_count(); i < e; i++) { + if (user->operand(i) == old_instr && + !(user == while_body_root && i == tuple_index)) { + TF_RETURN_IF_ERROR(user->ReplaceOperandWith(i, new_instr)); + } + } + } + + return Status::OK(); +} + +StatusOr WhileLoopConstantSinking::TrySinkingConstantsIntoWhileBody( + HloInstruction* while_instr) { + HloComputation* while_body = while_instr->while_body(); + + const HloInstruction& init_value = *while_instr->operand(0); + if (init_value.opcode() != HloOpcode::kTuple) { + return false; + } + + bool changed = false; + + for (HloInstruction* invariant_gte : + WhileUtil::GetInvariantGTEsForWhileBody(*while_body)) { + int64 index = invariant_gte->tuple_index(); + const HloInstruction& invariant_value = *init_value.operand(index); + if (invariant_value.opcode() == HloOpcode::kConstant) { + auto* constant_instr = + while_body->AddInstruction(invariant_value.Clone(/*suffix=*/".sunk")); + TF_RETURN_IF_ERROR(ReplaceUsesWhileKeepingLoopInvariance( + invariant_gte, constant_instr, while_body->root_instruction(), + index)); + changed = true; + } + } + + return changed; +} + +StatusOr WhileLoopConstantSinking::Run(HloModule* module) { + VLOG(2) << "HLO module before WhileLoopConstantSinking:"; + XLA_VLOG_LINES(2, module->ToString()); + + bool changed = false; + std::vector while_instrs; + for (auto* comp : module->MakeNonfusionComputations()) { + // Right now we don't particulary care about optimizing while-of-while + // patterns. If/When we do, we'll want to visit the outer while (while_0) + // before we visit the inner while (while_1): + // + // while_1_body(state) { + // val = gte(state, 0) // Loop invariant + // use(val) + // } + // + // while_0_body(state) { + // val = gte(state, 0) // Loop invariant + // while_1 = while(init=tuple(val, ...), body=while_1_body, ...) + // ... + // } + // + // main { + // while_0 = while(init=(constant, ...), body=while_0_body, ...) + // } + // + // This will let us sink the constant into the outer while first and then + // into the inner while in a single run of this pass. + c_copy_if(comp->instructions(), std::back_inserter(while_instrs), + [](const HloInstruction* instr) { + return instr->opcode() == HloOpcode::kWhile; + }); + } + + for (HloInstruction* while_instr : while_instrs) { + // We only sink into while loop bodies, but this can be extended to + // transform conditions as well. + TF_ASSIGN_OR_RETURN(bool result, + TrySinkingConstantsIntoWhileBody(while_instr)); + changed |= result; + } + + if (changed) { + VLOG(2) << "HLO module after WhileLoopConstantSinking:"; + XLA_VLOG_LINES(2, module->ToString()); + } else { + VLOG(2) << "HLO module unchanged after WhileLoopConstantSinking"; + } + + return changed; +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking.h b/tensorflow/compiler/xla/service/while_loop_constant_sinking.h new file mode 100644 index 0000000000000000000000000000000000000000..21fb8568a84985692026e145c363500a154a1599 --- /dev/null +++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking.h @@ -0,0 +1,68 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_WHILE_LOOP_CONSTANT_SINKING_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_WHILE_LOOP_CONSTANT_SINKING_H_ + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" +#include "tensorflow/compiler/xla/statusor.h" + +namespace xla { + +// Sinks while loop invariant values that happen to be constants into the while +// loop body. This is probably not a win in isolation but may unlock further +// optimizations like constant folding. +// +// state = (..., const, ...) +// while (pred(state)) { +// (..., v, ...) = state +// use(v) +// state = (..., v, ...) +// } +// +// => +// +// state = (..., const, ...) +// while (pred(state)) { +// (..., v, ...) = state +// use(const) +// state = (..., v, ...) +// } +// +// Note that it leaves the `v` in place to keep that component of the state +// tuple trivially loop invariant. WhileLoopSimplifier will later get rid of +// `v`. +// +// We only sink into while loop bodies, but this can be extended to transform +// conditions as well. +// +// TODO(b/79121449): We should also sink broadcasts of constants. +class WhileLoopConstantSinking : public HloPassInterface { + public: + ~WhileLoopConstantSinking() override = default; + + tensorflow::StringPiece name() const override { + return "while-loop-invariant-code-motion"; + } + + StatusOr Run(HloModule* module) override; + + private: + StatusOr TrySinkingConstantsIntoWhileBody(HloInstruction* while_instr); +}; +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_WHILE_LOOP_CONSTANT_SINKING_H_ diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc b/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..0d2288d8ea6ebb0ac4ac9468a211b161438fc5f1 --- /dev/null +++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc @@ -0,0 +1,200 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h" + +#include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { + +namespace op = xla::testing::opcode_matchers; +using ::testing::_; + +class WhileLoopConstantSinkingTest : public ::testing::Test {}; + +TEST_F(WhileLoopConstantSinkingTest, SinkOneConstant) { + const char* const hlo_string = R"( +HloModule ModuleWithWhile + +body { + p_body = (f32[2],f32[2]) parameter(0) + p_body.0 = f32[2] get-tuple-element((f32[2],f32[2]) p_body), index=0 + p_body.1 = f32[2] get-tuple-element((f32[2],f32[2]) p_body), index=1 + + add.0 = f32[2] add(p_body.0, p_body.1) + ROOT root = (f32[2],f32[2]) tuple(add.0, p_body.1) +} + +condition { + p_cond = (f32[2],f32[2]) parameter(0) + ROOT result = pred[] constant(true) +} + +ENTRY entry { + const_0 = f32[2] constant({1, 2}) + const_1 = f32[2] constant({2, 1}) + while_init = (f32[2],f32[2]) tuple(const_0, const_1) + ROOT while = (f32[2],f32[2]) while(while_init), condition=condition, body=body +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + + TF_ASSERT_OK_AND_ASSIGN(bool changed, + WhileLoopConstantSinking{}.Run(module.get())); + ASSERT_TRUE(changed); + + auto* while_body = module->GetComputationWithName("body"); + EXPECT_THAT(while_body->root_instruction(), + op::Tuple(op::Add(_, op::Constant()), _)); +} + +TEST_F(WhileLoopConstantSinkingTest, KeepConstantsLoopInvariant) { + const char* const hlo_string = R"( +HloModule ModuleWithWhile + +body { + p_body = (f32[2],f32[2],f32[2]) parameter(0) + p_body.0 = f32[2] get-tuple-element((f32[2],f32[2],f32[2]) p_body), index=0 + p_body.1 = f32[2] get-tuple-element((f32[2],f32[2],f32[2]) p_body), index=1 + p_body.2 = f32[2] get-tuple-element((f32[2],f32[2],f32[2]) p_body), index=2 + + add.0 = f32[2] add(p_body.1, p_body.2) + ROOT root = (f32[2],f32[2],f32[2]) tuple(add.0, p_body.1, p_body.2) +} + +condition { + p_cond = (f32[2],f32[2],f32[2]) parameter(0) + ROOT result = pred[] constant(true) +} + +ENTRY entry { + const_0 = f32[2] constant({1, 2}) + const_1 = f32[2] constant({2, 1}) + const_2 = f32[2] constant({3, 1}) + while_init = (f32[2],f32[2],f32[2]) tuple(const_0, const_1, const_2) + ROOT while = (f32[2],f32[2],f32[2]) while(while_init), condition=condition, body=body +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + + TF_ASSERT_OK_AND_ASSIGN(bool changed, + WhileLoopConstantSinking{}.Run(module.get())); + ASSERT_TRUE(changed); + + auto* while_body = module->GetComputationWithName("body"); + EXPECT_THAT(while_body->root_instruction(), + op::Tuple(op::Add(op::Constant(), op::Constant()), + op::GetTupleElement(op::Parameter(0)), + op::GetTupleElement(op::Parameter(0)))); +} + +TEST_F(WhileLoopConstantSinkingTest, TupleShapedConstants) { + const char* const hlo_string = R"( +HloModule ModuleWithWhile + +body { + p_b = (f32[2],(f32[2],f32[2])) parameter(0) + p_b.0 = f32[2] get-tuple-element((f32[2],f32[2],f32[2]) p_b), index=0 + p_b.1 = (f32[2],f32[2]) get-tuple-element((f32[2],(f32[2],f32[2])) p_b), index=1 + + p_b.1.1 = f32[2] get-tuple-element(p_b.1), index=0 + + ROOT root = (f32[2],f32[2],f32[2]) tuple(p_b.1.1, p_b.1) +} + +condition { + p_cond = (f32[2],(f32[2],f32[2])) parameter(0) + ROOT result = pred[] constant(true) +} + +ENTRY entry { + const_0 = f32[2] constant({1, 2}) + const_1 = (f32[2], f32[2]) constant((f32[2], f32[2]) ({2, 1},{3,1})) + while_init = (f32[2],(f32[2],f32[2])) tuple(const_0, const_1) + ROOT while = (f32[2],(f32[2],f32[2])) while(while_init), condition=condition, body=body +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + + TF_ASSERT_OK_AND_ASSIGN(bool changed, + WhileLoopConstantSinking{}.Run(module.get())); + ASSERT_TRUE(changed); + + auto* while_body = module->GetComputationWithName("body"); + EXPECT_THAT(while_body->root_instruction(), + op::Tuple(op::GetTupleElement(op::Constant(), 0), + op::GetTupleElement(op::Parameter(0)))); +} + +TEST_F(WhileLoopConstantSinkingTest, DuplicateGTEs) { + // This test shows that the pass fails to optimize non-canonical IR. + // + // Even though the input IR has a constant value for p_b.2.dup, + // WhileLoopConstantSinking doesn't try to detect this. Instead, it relies on + // prior runs of HLO CSE to have commoned these identical GTE instructions. + + const char* const hlo_string = R"( +HloModule ModuleWithWhile + +body { + p_b = (f32[2],f32[2],f32[2]) parameter(0) + + p_b.1 = f32[2] get-tuple-element((f32[2],f32[2],f32[2]) p_b), index=1 + p_b.2 = f32[2] get-tuple-element((f32[2],f32[2],f32[2]) p_b), index=2 + p_b.2.dup = f32[2] get-tuple-element((f32[2],f32[2],f32[2]) p_b), index=2 + + add.0 = f32[2] add(p_b.1, p_b.2.dup) + ROOT root = (f32[2],f32[2],f32[2]) tuple(add.0, p_b.1, p_b.2) +} + +condition { + p_cond = (f32[2],f32[2],f32[2]) parameter(0) + ROOT result = pred[] constant(true) +} + +ENTRY entry { + const_0 = f32[2] constant({1, 2}) + const_1 = f32[2] constant({2, 1}) + const_2 = f32[2] constant({3, 1}) + while_init = (f32[2],f32[2],f32[2]) tuple(const_0, const_1, const_2) + ROOT while = (f32[2],f32[2],f32[2]) while(while_init), condition=condition, body=body +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + + TF_ASSERT_OK_AND_ASSIGN(bool changed, + WhileLoopConstantSinking{}.Run(module.get())); + ASSERT_TRUE(changed); + + auto* while_body = module->GetComputationWithName("body"); + EXPECT_THAT(while_body->root_instruction(), + op::Tuple(op::Add(op::Constant(), ::testing::Not(op::Constant())), + op::GetTupleElement(op::Parameter(0)), + op::GetTupleElement(op::Parameter(0)))); +} +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc index 3ef0cdff6751258e4489ce350deb0931fdf69ef9..321fdeb1ea313d2bc00b0210b422f36915f41453 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc @@ -115,25 +115,6 @@ static bool NotWorthHoistingIndividually(const HloInstruction& instruction) { } } -// Populates `gte_set` with the GetTupleElement instructions in `while_body` -// that access elements in the parameter tuple that don't change across -// iterations. Assumes `while_body` is the body computation of the while loop -// in question. -static void GatherInvariantGTEs(HloComputation* while_body, - FlatSet* gte_set) { - const HloInstruction::InstructionVector root_operands = - while_body->root_instruction()->operands(); - for (int i = 0; i < root_operands.size(); i++) { - HloInstruction* instr = root_operands[i]; - if (instr->opcode() == HloOpcode::kGetTupleElement && - instr->tuple_index() == i && - instr->operand(0) == while_body->parameter_instruction(0) && - ShapeUtil::IsArray(instr->shape())) { - InsertOrDie(gte_set, instr); - } - } -} - static StatusOr TryHoistingInvariantInstructionsFromWhileBody( HloInstruction* while_instr) { auto print_no_metadata = HloPrintOptions{}.set_print_metadata(false); @@ -172,7 +153,13 @@ static StatusOr TryHoistingInvariantInstructionsFromWhileBody( // unhoisted_invariant_instructions -- they can be legally hoisted, but there // is no benefit to hoisting them unless something that uses it is also // hoisted. - GatherInvariantGTEs(while_body, &unhoisted_invariant_instructions); + for (auto* instr : WhileUtil::GetInvariantGTEsForWhileBody(*while_body)) { + if (ShapeUtil::IsArray(instr->shape())) { + // TODO(b/79147885): We should try to generalize this to tuples for + // uniformity's sake, if nothing else. + InsertOrDie(&unhoisted_invariant_instructions, instr); + } + } if (unhoisted_invariant_instructions.empty()) { // There are no obviously loop invariant elements in the state being diff --git a/tensorflow/compiler/xla/service/while_util.cc b/tensorflow/compiler/xla/service/while_util.cc index bd0794184328b7926543c4275b3b915f51e7b812..ed20b36292a7f24385603627d74fc72ba6b3b724 100644 --- a/tensorflow/compiler/xla/service/while_util.cc +++ b/tensorflow/compiler/xla/service/while_util.cc @@ -244,4 +244,21 @@ static Shape MakeLoopStateShape(const WhileUtil::LoopStateTy& init_values) { } return result; } + +/*static*/ std::vector WhileUtil::GetInvariantGTEsForWhileBody( + const HloComputation& while_body) { + std::vector result; + const HloInstruction::InstructionVector root_operands = + while_body.root_instruction()->operands(); + for (int i = 0; i < root_operands.size(); i++) { + HloInstruction* instr = root_operands[i]; + if (instr->opcode() == HloOpcode::kGetTupleElement && + instr->tuple_index() == i && + instr->operand(0) == while_body.parameter_instruction(0)) { + result.push_back(instr); + } + } + return result; +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/while_util.h b/tensorflow/compiler/xla/service/while_util.h index 1688d4674269c36c5b356f262dbd5d958572e101..322d27b88cae60cb051f5fafdde70e2aafedbc1e 100644 --- a/tensorflow/compiler/xla/service/while_util.h +++ b/tensorflow/compiler/xla/service/while_util.h @@ -74,6 +74,12 @@ class WhileUtil { HloComputation* computation, int32 trip_count, const LoopStateTy& init_values, const LoopBodyGeneratorTy& loop_body_generator); + + // Returns the GetTupleElement instructions in `while_body` that access + // elements in the parameter tuple that don't change across iterations. + // Assumes `while_body` is the body computation of the while loop in question. + static std::vector GetInvariantGTEsForWhileBody( + const HloComputation& while_body); }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/while_util_test.cc b/tensorflow/compiler/xla/service/while_util_test.cc index cf0d0db99bd92b6b364b4e28e56a0902d4065963..974bc542a34d0af6d41ed29f36df87f4c164a360 100644 --- a/tensorflow/compiler/xla/service/while_util_test.cc +++ b/tensorflow/compiler/xla/service/while_util_test.cc @@ -126,5 +126,42 @@ TEST(WhileUtilTest, MakeTwoInstructionsLive) { op::GetTupleElement(op::Parameter(0), 3))); } +TEST(WhileUtilTest, GetInvariantGTEsForWhileBody) { + const char* const hlo_string = R"( +HloModule ModuleWithWhile + +body { + param.b = (s32[], s32[]) parameter(0) + gte.0 = s32[] get-tuple-element(param.b), index=0 + gte.1 = s32[] get-tuple-element(param.b), index=1 + add = s32[] add(gte.0, gte.1) + ROOT tuple = (s32[], s32[]) tuple(gte.0, add) +} + +cond { + param.c = (s32[], s32[]) parameter(0) + ROOT constant = pred[] constant(true) +} + +ENTRY main { + init = (s32[], s32[]) parameter(0) + ROOT while = (s32[], s32[]) while(init), condition=cond, body=body +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + + HloComputation* while_body = module->GetComputationWithName("body"); + + ASSERT_NE(while_body, nullptr) + << "Expected exactly one while_body computation"; + + std::vector gte_list = + WhileUtil::GetInvariantGTEsForWhileBody(*while_body); + + ASSERT_EQ(gte_list.size(), 1); + EXPECT_EQ((*gte_list.begin())->name(), "gte.0"); +} } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/zero_sized_hlo_elimination_test.cc b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination_test.cc index a4e67cc9d9b8eea195ec409e8c502c5c2fbe1352..f5331280ee9f252aa5717baab88f2c203be5c372 100644 --- a/tensorflow/compiler/xla/service/zero_sized_hlo_elimination_test.cc +++ b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination_test.cc @@ -19,7 +19,6 @@ limitations under the License. #include #include -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" diff --git a/tensorflow/compiler/xla/service_interface.h b/tensorflow/compiler/xla/service_interface.h index 5b44c26b7c7b082556d9533cf3b3b1b98e5e4b09..4f64fe8f835017c3c7093988ae947fe21c377406 100644 --- a/tensorflow/compiler/xla/service_interface.h +++ b/tensorflow/compiler/xla/service_interface.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_INTERFACE_H_ #include "tensorflow/compiler/xla/xla.pb.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" namespace xla { diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index d58baa3220a73fcb09354bd16465629bc73fd428..7a897f6f8f99e65285e1be0757a55f703fc81c72 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/iterator_range.h" #include "tensorflow/core/lib/gtl/optional.h" +#include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -510,7 +511,7 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { break; } else if (must_end) { return InvalidArgument("Expected end of tuple; got: \"%s\"", - s->ToString().c_str()); + std::string(*s).c_str()); } shapes.emplace_back(); TF_ASSIGN_OR_RETURN(shapes.back(), ParseShapeStringInternal(s)); @@ -540,7 +541,7 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { if (!tensorflow::strings::safe_strto64(input.c_str(), &element)) { return InvalidArgument( "Invalid s64 value in parsed shape string: \"%s\" in \"%s\"", - input.c_str(), s->ToString().c_str()); + input.c_str(), std::string(*s).c_str()); } return element; }; @@ -593,7 +594,7 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { } return InvalidArgument("Invalid shape string to parse: \"%s\"", - s->ToString().c_str()); + std::string(*s).c_str()); } } // namespace @@ -602,7 +603,7 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { TF_ASSIGN_OR_RETURN(Shape shape, ParseShapeStringInternal(&s)); if (!s.empty()) { return InvalidArgument("Invalid shape string to parse: \"%s\"", - s.ToString().c_str()); + std::string(s).c_str()); } return shape; } @@ -1472,4 +1473,26 @@ std::ostream& operator<<(std::ostream& out, const Shape& shape) { return out; } +/*static*/ size_t ShapeUtil::Hash(const Shape& shape) { + using tensorflow::hash; + using tensorflow::Hash64Combine; + + size_t hash_value = hash()(shape.element_type()); + + if (shape.tuple_shapes().empty()) { + for (int64 dim : shape.dimensions()) { + hash_value = Hash64Combine(hash_value, hash()(dim)); + } + + hash_value = Hash64Combine(hash_value, LayoutUtil::Hash(shape.layout())); + } else { + hash_value = 0; + for (const Shape& subshape : shape.tuple_shapes()) { + hash_value = Hash64Combine(hash_value, ShapeUtil::Hash(subshape)); + } + } + + return hash_value; +} + } // namespace xla diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 5fa728e7c2fa5faf6ba347198fdc99e56ca4c324..cb8bf5a2b9e5d06f73e2116ed08630249ae8f970 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -650,6 +650,9 @@ class ShapeUtil { .ok()); } + // Compute a hash for `shape`. + static size_t Hash(const Shape& shape); + private: // Validates all of the non-layout properties of the shape -- this is a helper // used by both the layout-optional and layout-required public method. diff --git a/tensorflow/compiler/xla/statusor.h b/tensorflow/compiler/xla/statusor.h index cccbce5fc83af87396f4d51eb9e785cea93aba0b..0e1387c93938fa520562fcd63ac107a82b089a51 100644 --- a/tensorflow/compiler/xla/statusor.h +++ b/tensorflow/compiler/xla/statusor.h @@ -13,13 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// StatusOr is the union of a Status object and a T -// object. StatusOr models the concept of an object that is either a -// usable value, or an error Status explaining why such a value is -// not present. To this end, StatusOr does not allow its Status -// value to be Status::OK. Furthermore, the value of a StatusOr -// must not be null. This is enforced by a debug check in most cases, -// but even when it is not, clients must not set the value to null. +// StatusOr is the union of a Status object and a T object. StatusOr models +// the concept of an object that is either a value, or an error Status +// explaining why such a value is not present. To this end, StatusOr does not +// allow its Status value to be Status::OK. // // The primary use-case for StatusOr is as the return value of a // function which may fail. diff --git a/tensorflow/compiler/xla/statusor_test.cc b/tensorflow/compiler/xla/statusor_test.cc index f9d25945bc617507735fb6c4d011c39723497f69..7d76370e85d57fd6e27ee2d1ca1df068ccb5405a 100644 --- a/tensorflow/compiler/xla/statusor_test.cc +++ b/tensorflow/compiler/xla/statusor_test.cc @@ -75,6 +75,14 @@ TEST(StatusOr, ElementType) { static_assert(std::is_same::element_type, char>(), ""); } +TEST(StatusOr, NullPointerStatusOr) { + // As a very special case, null-plain-pointer StatusOr used to be an + // error. Test that it no longer is. + StatusOr null_status(nullptr); + EXPECT_TRUE(null_status.ok()); + EXPECT_EQ(null_status.ValueOrDie(), nullptr); +} + TEST(StatusOr, TestNoDefaultConstructorInitialization) { // Explicitly initialize it with an error code. StatusOr statusor(tensorflow::errors::Cancelled("")); diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 840292010d50fde3d36983de9f6f4f0e4cfc7ed6..b982cf0dbc4ed00b9c0b0d98c1ec4e5584860717 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -632,6 +632,7 @@ xla_test( "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", @@ -1866,7 +1867,10 @@ xla_test( xla_test( name = "local_client_execute_test", + # TODO(b/79375911): Test times out in LLVM at normal size. + size = "large", srcs = ["local_client_execute_test.cc"], + shard_count = 30, tags = ["optonly"], deps = [ "//tensorflow/compiler/xla:literal_util", @@ -1932,24 +1936,6 @@ xla_test( ], ) -xla_test( - name = "set_return_value_test", - srcs = ["set_return_value_test.cc"], - deps = [ - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla/client:computation_builder", - "//tensorflow/compiler/xla/client:local_client", - "//tensorflow/compiler/xla/client/xla_client:xla_builder", - "//tensorflow/compiler/xla/client/xla_client:xla_computation", - "//tensorflow/compiler/xla/tests:client_library_test_base", - "//tensorflow/compiler/xla/tests:hlo_test_base", - "//tensorflow/compiler/xla/tests:literal_test_util", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/core:lib", - "//tensorflow/core:test", - ], -) - xla_test( name = "reshape_motion_test", srcs = ["reshape_motion_test.cc"], diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index 22660c35dcaa0ebbb553aa2d5e2412043a2bb300..41f9a5f66649dd0d697287c5e2af322fc63c1396 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -94,27 +94,13 @@ string ClientLibraryTestBase::TestName() const { return ::testing::UnitTest::GetInstance()->current_test_info()->name(); } -template StatusOr> ClientLibraryTestBase::Execute( - BuilderT* builder, tensorflow::gtl::ArraySlice arguments) { + XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments) { // Build the computation, as a convenience. TF_ASSIGN_OR_RETURN(auto computation, builder->Build()); return client_->Execute(computation, arguments, &execution_options_); } -StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( - const Computation& computation, - tensorflow::gtl::ArraySlice arguments, - const Shape* shape_with_output_layout) { - ExecutionOptions execution_options = execution_options_; - if (shape_with_output_layout != nullptr) { - *execution_options.mutable_shape_with_output_layout() = - *shape_with_output_layout; - } - return client_->ExecuteAndTransfer(computation, arguments, - &execution_options); -} - StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( const XlaComputation& computation, tensorflow::gtl::ArraySlice arguments, @@ -128,17 +114,6 @@ StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( &execution_options); } -template <> -StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments, - const Shape* shape_with_output_layout) { - // Build the computation, as a convenience. - TF_ASSIGN_OR_RETURN(auto computation, builder->Build()); - return ExecuteAndTransfer(computation, arguments, shape_with_output_layout); -} - -template <> StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_output_layout) { @@ -162,18 +137,6 @@ ClientLibraryTestBase::ExecuteAndTransferReference( &execution_options); } -std::unique_ptr ClientLibraryTestBase::ExecuteOrDie( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments) { - return Execute(builder, arguments).ConsumeValueOrDie(); -} - -std::unique_ptr ClientLibraryTestBase::ExecuteAndTransferOrDie( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments) { - return ExecuteAndTransfer(builder, arguments).ConsumeValueOrDie(); -} - string ClientLibraryTestBase::ExecuteToString( XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments) { auto computation_status = builder->Build(); @@ -191,32 +154,6 @@ string ClientLibraryTestBase::ExecuteToString( } } -string ClientLibraryTestBase::ExecuteToString( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments) { - auto computation_status = builder->Build(); - if (!computation_status.ok()) { - return computation_status.status().ToString(); - } - auto computation = computation_status.ConsumeValueOrDie(); - - auto result = - client_->ExecuteAndTransfer(computation, arguments, &execution_options_); - if (!result.ok()) { - return result.status().ToString(); - } else { - return result.ValueOrDie()->ToString(); - } -} - -void ClientLibraryTestBase::ComputeAndCompareR1( - ComputationBuilder* builder, const tensorflow::core::Bitmap& expected, - tensorflow::gtl::ArraySlice arguments) { - std::unique_ptr expected_literal = Literal::CreateR1(expected); - ClientLibraryTestBase::ComputeAndCompareLiteral(builder, *expected_literal, - arguments); -} - void ClientLibraryTestBase::ComputeAndCompareR1( XlaBuilder* builder, const tensorflow::core::Bitmap& expected, tensorflow::gtl::ArraySlice arguments) { @@ -225,18 +162,16 @@ void ClientLibraryTestBase::ComputeAndCompareR1( arguments); } -template void ClientLibraryTestBase::ComputeAndCompareLiteral( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_layout) { EXPECT_IS_OK(ComputeAndCompareLiteralWithStatus(builder, expected, arguments, shape_with_layout)); } -template void ClientLibraryTestBase::ComputeAndCompareLiteral( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error, const Shape* shape_with_layout) { EXPECT_IS_OK(ComputeAndCompareLiteralWithStatus(builder, expected, arguments, @@ -245,7 +180,7 @@ void ClientLibraryTestBase::ComputeAndCompareLiteral( tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithAllOutputLayouts( - const xla::Computation& computation, const Literal& expected, + const xla::XlaComputation& computation, const Literal& expected, tensorflow::gtl::ArraySlice arguments, const std::function& verify_output) { @@ -271,7 +206,7 @@ ClientLibraryTestBase::ComputeAndCompareLiteralWithAllOutputLayouts( tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithAllInputLayouts( - const xla::Computation& computation, const Literal& expected, + const xla::XlaComputation& computation, const Literal& /*expected*/, tensorflow::gtl::ArraySlice arguments, const std::function& verify_output, @@ -334,28 +269,8 @@ ClientLibraryTestBase::ComputeAndCompareLiteralWithAllInputLayouts( return choose(0); } -tensorflow::Status -ClientLibraryTestBase::ComputeAndCompareLiteralWithAllOutputLayouts( - const xla::XlaComputation& /*computation*/, const Literal& /*expected*/, - tensorflow::gtl::ArraySlice /*arguments*/, - const std::function& /*verify_output*/) { - return Unimplemented("not yet implemented for XlaComputation"); -} - -tensorflow::Status -ClientLibraryTestBase::ComputeAndCompareLiteralWithAllInputLayouts( - const xla::XlaComputation& /*computation*/, const Literal& /*expected*/, - tensorflow::gtl::ArraySlice /*arguments*/, - const std::function& /*verify_output*/, - const Shape* /*output_with_layout*/) { - return Unimplemented("not yet implemented for XlaComputation"); -} - -template tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments_passed_in, const Shape* shape_with_layout) { std::vector arguments(arguments_passed_in.begin(), @@ -412,9 +327,8 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( return tensorflow::Status::OK(); } -template tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments_passed_in, ErrorSpec error, const Shape* shape_with_layout) { std::vector arguments(arguments_passed_in.begin(), @@ -484,9 +398,8 @@ void ClientLibraryTestBase::ComputeAndCompareR1U8( EXPECT_EQ(expected, actual->GetR1U8AsString()); } -template void ClientLibraryTestBase::ComputeAndCompareTuple( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments) { auto actual_status = ExecuteAndTransfer(builder, arguments); EXPECT_IS_OK(actual_status.status()); @@ -497,9 +410,8 @@ void ClientLibraryTestBase::ComputeAndCompareTuple( LiteralTestUtil::ExpectEqual(expected, *actual); } -template void ClientLibraryTestBase::ComputeAndCompareTuple( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { auto actual_status = ExecuteAndTransfer(builder, arguments); EXPECT_IS_OK(actual_status.status()); @@ -510,60 +422,6 @@ void ClientLibraryTestBase::ComputeAndCompareTuple( LiteralTestUtil::ExpectNear(expected, *actual, error); } -void ClientLibraryTestBase::ComputeAndCompare( - ComputationBuilder* builder, const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments) { - auto status_or_data = ComputeValueAndReference(builder, operand, arguments); - EXPECT_IS_OK(status_or_data); - if (!status_or_data.ok()) { - return; - } - std::unique_ptr reference, result; - std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); - LiteralTestUtil::ExpectEqual(*reference, *result); -} - -void ClientLibraryTestBase::ComputeAndCompare( - ComputationBuilder* builder, const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { - auto status_or_data = ComputeValueAndReference(builder, operand, arguments); - EXPECT_IS_OK(status_or_data); - if (!status_or_data.ok()) { - return; - } - std::unique_ptr reference, result; - std::tie(reference, result) = status_or_data.ConsumeValueOrDie(); - LiteralTestUtil::ExpectNear(*reference, *result, error); -} - -StatusOr, std::unique_ptr>> -ClientLibraryTestBase::ComputeValueAndReference( - ComputationBuilder* builder, const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments) { - // Transfer the arguments to the executor service. We put the unique_ptr's - // into a vector to keep the data alive on the service until the end of this - // function. - std::vector> argument_data; - for (const auto& arg : arguments) { - TF_ASSIGN_OR_RETURN(auto data, client_->TransferToServer(arg)); - argument_data.push_back(std::move(data)); - } - - // Create raw pointers to the GlobalData for the rest of the call stack. - std::vector argument_data_ptr; - std::transform( - argument_data.begin(), argument_data.end(), - std::back_inserter(argument_data_ptr), - [](const std::unique_ptr& data) { return data.get(); }); - - TF_ASSIGN_OR_RETURN( - auto reference, - builder->ComputeConstant(operand, /*output_layout=*/nullptr, arguments)); - TF_ASSIGN_OR_RETURN(auto result, - ExecuteAndTransfer(builder, argument_data_ptr)); - return std::make_pair(std::move(reference), std::move(result)); -} - void ClientLibraryTestBase::ComputeAndCompare( XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments) { auto status_or_data = ComputeValueAndReference(builder, arguments); @@ -651,8 +509,8 @@ XlaComputation ClientLibraryTestBase::CreateScalarMax() { return computation_status.ConsumeValueOrDie(); } -Computation ClientLibraryTestBase::CreateScalarReluSensitivity() { - ComputationBuilder builder(client_, "relu_sensitivity"); +XlaComputation ClientLibraryTestBase::CreateScalarReluSensitivity() { + XlaBuilder builder("relu_sensitivity"); auto shape = ShapeUtil::MakeShape(use_bfloat16_ ? BF16 : F32, {}); auto activation = builder.Parameter(0, shape, "activation"); auto backprop = builder.Parameter(1, shape, "backprop"); @@ -693,14 +551,6 @@ ClientLibraryTestBase::CreatePatternedMatrixWithZeroPadding(int rows, int cols, return array; } -ComputationDataHandle ClientLibraryTestBase::AddParam( - const Literal& argument, ComputationBuilder* builder) { - ComputationDataHandle data_handle; - arguments_.push_back(CreateParameterAndTransferLiteral( - arguments_.size(), argument, "", builder, &data_handle)); - return data_handle; -} - XlaOp ClientLibraryTestBase::AddParam(const Literal& argument, XlaBuilder* builder) { XlaOp data_handle; @@ -709,59 +559,39 @@ XlaOp ClientLibraryTestBase::AddParam(const Literal& argument, return data_handle; } -ComputationDataHandle ClientLibraryTestBase::CreateConstantFromLiteral( - const Literal& literal, ComputationBuilder* builder) { - return builder->ConstantLiteral( - use_bfloat16_ ? *LiteralTestUtil::ConvertF32ToBF16(literal) : literal); -} - XlaOp ClientLibraryTestBase::CreateConstantFromLiteral(const Literal& literal, XlaBuilder* builder) { return builder->ConstantLiteral( use_bfloat16_ ? *LiteralTestUtil::ConvertF32ToBF16(literal) : literal); } -template void ClientLibraryTestBase::ComputeAndCompareLiteral( - ComputationBuilder* builder, const Literal& expected, - tensorflow::gtl::ArraySlice arguments, - const Shape* shape_with_layout); - -template void ClientLibraryTestBase::ComputeAndCompareLiteral( - XlaBuilder* builder, const Literal& expected, - tensorflow::gtl::ArraySlice arguments, - const Shape* shape_with_layout); - -template void ClientLibraryTestBase::ComputeAndCompareLiteral( - ComputationBuilder* builder, const Literal& expected, - tensorflow::gtl::ArraySlice arguments, ErrorSpec error, - const Shape* shape_with_layout); - -template void ClientLibraryTestBase::ComputeAndCompareLiteral( - XlaBuilder* builder, const Literal& expected, - tensorflow::gtl::ArraySlice arguments, ErrorSpec error, - const Shape* shape_with_layout); - -template void ClientLibraryTestBase::ComputeAndCompareTuple( - ComputationBuilder* builder, const Literal& expected, - tensorflow::gtl::ArraySlice arguments); - -template void ClientLibraryTestBase::ComputeAndCompareTuple( - XlaBuilder* builder, const Literal& expected, - tensorflow::gtl::ArraySlice arguments); - -template void ClientLibraryTestBase::ComputeAndCompareTuple( - ComputationBuilder* builder, const Literal& expected, - tensorflow::gtl::ArraySlice arguments, ErrorSpec error); - -template void ClientLibraryTestBase::ComputeAndCompareTuple( - XlaBuilder* builder, const Literal& expected, - tensorflow::gtl::ArraySlice arguments, ErrorSpec error); - -template StatusOr> ClientLibraryTestBase::Execute( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments); +std::unique_ptr +ClientLibraryTestBase::CreateParameterAndTransferLiteral(int64 parameter_number, + const Literal& literal, + const string& name, + XlaBuilder* builder, + XlaOp* data_handle) { + return CreateParameterAndTransferLiteral(parameter_number, literal, name, + nullptr, builder, data_handle); +} -template StatusOr> ClientLibraryTestBase::Execute( - XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments); +std::unique_ptr +ClientLibraryTestBase::CreateParameterAndTransferLiteral( + int64 parameter_number, const Literal& literal, const string& name, + const DeviceHandle* device_handle, XlaBuilder* builder, + XlaOp* data_handle) { + const Literal* param_literal = &literal; + std::unique_ptr converted_literal; + if (use_bfloat16_) { + converted_literal = LiteralTestUtil::ConvertF32ToBF16(literal); + param_literal = converted_literal.get(); + } + std::unique_ptr data = + client_->TransferToServer(*param_literal, device_handle) + .ConsumeValueOrDie(); + *data_handle = + builder->Parameter(parameter_number, param_literal->shape(), name); + return data; +} } // namespace xla diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 32eea7c2f3a65d2b4a83435ec6258ea9cf6aaf6a..16e838e60ffbd7b22878ac21c760ade599f33594 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -25,10 +25,9 @@ limitations under the License. #include "tensorflow/compiler/xla/array3d.h" #include "tensorflow/compiler/xla/array4d.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/statusor.h" @@ -91,21 +90,11 @@ class ClientLibraryTestBase : public ::testing::Test { // Convenience methods for building and running a computation with the member // execution options. Modify execution_options_ in your test if you want to // customize the options. - template StatusOr> Execute( - BuilderT* builder, tensorflow::gtl::ArraySlice arguments); + XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments); - // TODO(b/74197823): Remove the template type 'BuilderT' in all methods once - // the migration to XlaBuilder is complete. - - template StatusOr> ExecuteAndTransfer( - BuilderT* builder, tensorflow::gtl::ArraySlice arguments, - const Shape* shape_with_output_layout = nullptr); - - StatusOr> ExecuteAndTransfer( - const Computation& computation, - tensorflow::gtl::ArraySlice arguments, + XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_output_layout = nullptr); StatusOr> ExecuteAndTransfer( @@ -121,101 +110,90 @@ class ClientLibraryTestBase : public ::testing::Test { tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_output_layout = nullptr); - // Convenience OrDie variants of above methods. - std::unique_ptr ExecuteOrDie( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments); - std::unique_ptr ExecuteAndTransferOrDie( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments); - // Run a computation and return its value as a string. If an error // occurs, then instead return the error as a string. string ExecuteToString(XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments); - string ExecuteToString(ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments); // Convenience methods for building and running a computation, transferring // the result, and comparing it to the expected value(s). Methods are // templated on the native host type which maps to specific XLA types (See - // ComputationBuilder/XlaBuilder for details). For each rank, two forms are + // XlaBuilder for details). For each rank, two forms are // provided: one for floating point types with an ErrorSpec parameter, and one // for integral types without the ErrorSpec parameter. - template - void ComputeAndCompareR0(BuilderT* builder, NativeT expected, + template + void ComputeAndCompareR0(XlaBuilder* builder, NativeT expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR0(BuilderT* builder, NativeT expected, + template + void ComputeAndCompareR0(XlaBuilder* builder, NativeT expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); - template - void ComputeAndCompareR1(BuilderT* builder, + template + void ComputeAndCompareR1(XlaBuilder* builder, tensorflow::gtl::ArraySlice expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR1(BuilderT* builder, + template + void ComputeAndCompareR1(XlaBuilder* builder, tensorflow::gtl::ArraySlice expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); // As above, but uses a bitmap to hold the predicate vector to avoid // deficiencies of vector. - void ComputeAndCompareR1(ComputationBuilder* builder, - const tensorflow::core::Bitmap& expected, - tensorflow::gtl::ArraySlice arguments); void ComputeAndCompareR1(XlaBuilder* builder, const tensorflow::core::Bitmap& expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR2(BuilderT* builder, const Array2D& expected, + template + void ComputeAndCompareR2(XlaBuilder* builder, + const Array2D& expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR2(BuilderT* builder, const Array2D& expected, + template + void ComputeAndCompareR2(XlaBuilder* builder, + const Array2D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); - template - void ComputeAndCompareR3(BuilderT* builder, const Array3D& expected, + template + void ComputeAndCompareR3(XlaBuilder* builder, + const Array3D& expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR3(BuilderT* builder, const Array3D& expected, + template + void ComputeAndCompareR3(XlaBuilder* builder, + const Array3D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); - template - void ComputeAndCompareR4(BuilderT* builder, const Array4D& expected, + template + void ComputeAndCompareR4(XlaBuilder* builder, + const Array4D& expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR4(BuilderT* builder, const Array4D& expected, + template + void ComputeAndCompareR4(XlaBuilder* builder, + const Array4D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); // Build and run the computation and compare the result with the given // literal. shape_with_layout indicates the result layout to request when // calling Execute. - template void ComputeAndCompareLiteral( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_layout = nullptr); - template void ComputeAndCompareLiteral( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error, const Shape* shape_with_layout = nullptr); // ComputeAndCompare variant which returns an error status. - template tensorflow::Status ComputeAndCompareLiteralWithStatus( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_layout = nullptr); - template tensorflow::Status ComputeAndCompareLiteralWithStatus( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error, const Shape* shape_with_layout = nullptr); @@ -227,25 +205,13 @@ class ClientLibraryTestBase : public ::testing::Test { // Convenience method for running a built computation, transferring the // result, and comparing it to the expected tuple literal. - template void ComputeAndCompareTuple( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments); - template void ComputeAndCompareTuple( - BuilderT* builder, const Literal& expected, + XlaBuilder* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); - // Convenience method for running a built computation and comparing the result - // with the HloEvaluator. - void ComputeAndCompare(ComputationBuilder* builder, - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments); - void ComputeAndCompare(ComputationBuilder* builder, - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments, - ErrorSpec error); - // Convenience method for running a built computation and comparing the result // with the reference result. void ComputeAndCompare(XlaBuilder* builder, @@ -257,7 +223,7 @@ class ClientLibraryTestBase : public ::testing::Test { // Create scalar operations for use in reductions. XlaComputation CreateScalarRelu(); XlaComputation CreateScalarMax(); - Computation CreateScalarReluSensitivity(); + XlaComputation CreateScalarReluSensitivity(); // Special case convenience functions for creating filled arrays. @@ -297,34 +263,25 @@ class ClientLibraryTestBase : public ::testing::Test { // server, then stores into "data_handle" the global handle for that // parameter. When the use_bfloat16 flag is set but the literal has F32 // elements, the literal will be converted to BF16 before being transferred. - template std::unique_ptr CreateParameterAndTransferLiteral( int64 parameter_number, const Literal& literal, const string& name, - BuilderT* builder, HandleT* data_handle); + XlaBuilder* builder, XlaOp* data_handle); // As above, but the caller can specify the device that the literal is // transferred to. If device_handle is nullptr, the literal will be // transferred to the default device. - template std::unique_ptr CreateParameterAndTransferLiteral( int64 parameter_number, const Literal& literal, const string& name, - const DeviceHandle* device_handle, BuilderT* builder, - HandleT* data_handle); + const DeviceHandle* device_handle, XlaBuilder* builder, + XlaOp* data_handle); // Creates a parameter instruction and sets the value that will be passed to // the computation as specified. This function must be used for all parameters // or none and no parameters must be passed when invoking the computation if // using this mechanism. If using this mechanism, then each parameter must be // set exactly once. The first added parameter gets index 0, then 1 and so on. - ComputationDataHandle AddParam(const Literal& argument, - ComputationBuilder* builder); XlaOp AddParam(const Literal& argument, XlaBuilder* builder); - template - ComputationDataHandle AddParam(const Array& argument, - ComputationBuilder* builder) { - return AddParam(*Literal::CreateFromArray(argument), builder); - } template XlaOp AddParam(const Array& argument, XlaBuilder* builder) { return AddParam(*Literal::CreateFromArray(argument), builder); @@ -333,18 +290,11 @@ class ClientLibraryTestBase : public ::testing::Test { // Creates a constant instruction with the given literal. When the // use_bfloat16 flag is set but the literal has F32 elements, the elements // will be converted to BF16s. - ComputationDataHandle CreateConstantFromLiteral(const Literal& literal, - ComputationBuilder* builder); XlaOp CreateConstantFromLiteral(const Literal& literal, XlaBuilder* builder); // Creates a constant instruction with the given array. When the use_bfloat16 // flag is set but the array has float elements, the elements will be // converted to bfloat16s. - template - ComputationDataHandle CreateConstantFromArray(const Array& array, - ComputationBuilder* builder) { - return CreateConstantFromLiteral(*Literal::CreateFromArray(array), builder); - } template XlaOp CreateConstantFromArray(const Array& array, @@ -353,13 +303,6 @@ class ClientLibraryTestBase : public ::testing::Test { } // Same as CreateConstantFromArray, but for scalars. - template - ComputationDataHandle CreateConstantFromScalar(NativeT value, - ComputationBuilder* builder) { - return CreateConstantFromLiteral(*Literal::CreateR0(value), - builder); - } - template XlaOp CreateConstantFromScalar(NativeT value, XlaBuilder* builder) { return CreateConstantFromLiteral(*Literal::CreateR0(value), @@ -374,12 +317,12 @@ class ClientLibraryTestBase : public ::testing::Test { // // When the use_bfloat16 flag is set but NativeT is float, the data will be // converted to bfloat16. - template + template std::unique_ptr CreateR0Parameter(NativeT value, int64 parameter_number, const string& name, - BuilderT* builder, - HandleT* data_handle); + XlaBuilder* builder, + XlaOp* data_handle); // Creates a parameter instruction that wraps the given values and then stores // into "data_handle" the global handle for that parameter. @@ -389,10 +332,10 @@ class ClientLibraryTestBase : public ::testing::Test { // // When the use_bfloat16 flag is set but NativeT is float, the data will be // converted to bfloat16. - template + template std::unique_ptr CreateR1Parameter( tensorflow::gtl::ArraySlice values, int64 parameter_number, - const string& name, BuilderT* builder, HandleT* data_handle); + const string& name, XlaBuilder* builder, XlaOp* data_handle); // Creates a parameter instruction that wraps the given constant array // "array_2d" and then stores to "data_handle" the global handle for that @@ -403,10 +346,10 @@ class ClientLibraryTestBase : public ::testing::Test { // // When the use_bfloat16 flag is set but NativeT is float, the data will be // converted to bfloat16. - template + template std::unique_ptr CreateR2Parameter( const Array2D& array_2d, int64 parameter_number, - const string& name, BuilderT* builder, HandleT* data_handle); + const string& name, XlaBuilder* builder, XlaOp* data_handle); // Creates a parameter instruction that wraps the given constant array // "array_3d" and then stores to "data_handle" the global handle for that @@ -417,10 +360,10 @@ class ClientLibraryTestBase : public ::testing::Test { // // When the use_bfloat16 flag is set but NativeT is float, the data will be // converted to bfloat16. - template + template std::unique_ptr CreateR3Parameter( const Array3D& array_3d, int64 parameter_number, - const string& name, BuilderT* builder, HandleT* data_handle); + const string& name, XlaBuilder* builder, XlaOp* data_handle); // Getter and setter for the use_bfloat16 flag, which indicates whether to run // tests with all float-type input/output converted to bfloat16. @@ -435,21 +378,6 @@ class ClientLibraryTestBase : public ::testing::Test { ExecutionOptions execution_options_; private: - // Build and run the computation with all permutations of output layouts. - tensorflow::Status ComputeAndCompareLiteralWithAllOutputLayouts( - const xla::Computation& computation, const Literal& expected, - tensorflow::gtl::ArraySlice arguments, - const std::function& verify_output); - // Build and run the computation with all permutations of layouts of all input - // arguments. - tensorflow::Status ComputeAndCompareLiteralWithAllInputLayouts( - const xla::Computation& computation, const Literal& expected, - tensorflow::gtl::ArraySlice arguments, - const std::function& verify_output, - const Shape* output_with_layout = nullptr); - tensorflow::Status ComputeAndCompareLiteralWithAllOutputLayouts( const xla::XlaComputation& computation, const Literal& expected, tensorflow::gtl::ArraySlice arguments, @@ -462,13 +390,6 @@ class ClientLibraryTestBase : public ::testing::Test { const string& error_message)>& verify_output, const Shape* output_with_layout = nullptr); - // Executes the computation and calculates the expected reference value using - // the HloEvaluator. Returns two literals in the order of (expected, actual). - StatusOr, std::unique_ptr>> - ComputeValueAndReference(ComputationBuilder* builder, - const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice arguments); - // Executes the computation and calculates the expected reference value using // the reference client. Returns two literals in the order of (expected, // actual). @@ -484,9 +405,9 @@ class ClientLibraryTestBase : public ::testing::Test { std::vector> arguments_; }; -template +template void ClientLibraryTestBase::ComputeAndCompareR0( - BuilderT* builder, NativeT expected, + XlaBuilder* builder, NativeT expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR0(expected); @@ -494,9 +415,9 @@ void ClientLibraryTestBase::ComputeAndCompareR0( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR0( - BuilderT* builder, NativeT expected, + XlaBuilder* builder, NativeT expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || @@ -510,9 +431,9 @@ void ClientLibraryTestBase::ComputeAndCompareR0( arguments, error); } -template +template void ClientLibraryTestBase::ComputeAndCompareR1( - BuilderT* builder, tensorflow::gtl::ArraySlice expected, + XlaBuilder* builder, tensorflow::gtl::ArraySlice expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR1(expected); @@ -520,9 +441,9 @@ void ClientLibraryTestBase::ComputeAndCompareR1( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR1( - BuilderT* builder, tensorflow::gtl::ArraySlice expected, + XlaBuilder* builder, tensorflow::gtl::ArraySlice expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || @@ -536,9 +457,9 @@ void ClientLibraryTestBase::ComputeAndCompareR1( arguments, error); } -template +template void ClientLibraryTestBase::ComputeAndCompareR2( - BuilderT* builder, const Array2D& expected, + XlaBuilder* builder, const Array2D& expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR2FromArray2D(expected); @@ -546,9 +467,9 @@ void ClientLibraryTestBase::ComputeAndCompareR2( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR2( - BuilderT* builder, const Array2D& expected, + XlaBuilder* builder, const Array2D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || @@ -562,9 +483,9 @@ void ClientLibraryTestBase::ComputeAndCompareR2( arguments, error); } -template +template void ClientLibraryTestBase::ComputeAndCompareR3( - BuilderT* builder, const Array3D& expected, + XlaBuilder* builder, const Array3D& expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR3FromArray3D(expected); @@ -572,9 +493,9 @@ void ClientLibraryTestBase::ComputeAndCompareR3( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR3( - BuilderT* builder, const Array3D& expected, + XlaBuilder* builder, const Array3D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || @@ -588,9 +509,9 @@ void ClientLibraryTestBase::ComputeAndCompareR3( arguments, error); } -template +template void ClientLibraryTestBase::ComputeAndCompareR4( - BuilderT* builder, const Array4D& expected, + XlaBuilder* builder, const Array4D& expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR4FromArray4D(expected); @@ -598,9 +519,9 @@ void ClientLibraryTestBase::ComputeAndCompareR4( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR4( - BuilderT* builder, const Array4D& expected, + XlaBuilder* builder, const Array4D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || @@ -614,10 +535,10 @@ void ClientLibraryTestBase::ComputeAndCompareR4( arguments, error); } -template +template std::unique_ptr ClientLibraryTestBase::CreateR0Parameter( NativeT value, int64 parameter_number, const string& name, - BuilderT* builder, HandleT* data_handle) { + XlaBuilder* builder, XlaOp* data_handle) { std::unique_ptr literal = Literal::CreateR0(value); if (use_bfloat16_ && literal->shape().element_type() == F32) { literal = LiteralTestUtil::ConvertF32ToBF16(*literal); @@ -628,10 +549,10 @@ std::unique_ptr ClientLibraryTestBase::CreateR0Parameter( return data; } -template +template std::unique_ptr ClientLibraryTestBase::CreateR1Parameter( tensorflow::gtl::ArraySlice values, int64 parameter_number, - const string& name, BuilderT* builder, HandleT* data_handle) { + const string& name, XlaBuilder* builder, XlaOp* data_handle) { std::unique_ptr literal = Literal::CreateR1(values); if (use_bfloat16_ && literal->shape().element_type() == F32) { literal = LiteralTestUtil::ConvertF32ToBF16(*literal); @@ -642,10 +563,10 @@ std::unique_ptr ClientLibraryTestBase::CreateR1Parameter( return data; } -template +template std::unique_ptr ClientLibraryTestBase::CreateR2Parameter( const Array2D& array_2d, int64 parameter_number, - const string& name, BuilderT* builder, HandleT* data_handle) { + const string& name, XlaBuilder* builder, XlaOp* data_handle) { std::unique_ptr literal = Literal::CreateR2FromArray2D(array_2d); if (use_bfloat16_ && literal->shape().element_type() == F32) { literal = LiteralTestUtil::ConvertF32ToBF16(*literal); @@ -656,10 +577,10 @@ std::unique_ptr ClientLibraryTestBase::CreateR2Parameter( return data; } -template +template std::unique_ptr ClientLibraryTestBase::CreateR3Parameter( const Array3D& array_3d, int64 parameter_number, - const string& name, BuilderT* builder, HandleT* data_handle) { + const string& name, XlaBuilder* builder, XlaOp* data_handle) { std::unique_ptr literal = Literal::CreateR3FromArray3D(array_3d); if (use_bfloat16_ && literal->shape().element_type() == F32) { literal = LiteralTestUtil::ConvertF32ToBF16(*literal); @@ -695,37 +616,6 @@ std::unique_ptr> ClientLibraryTestBase::CreatePseudorandomR2( return result; } -template -std::unique_ptr -ClientLibraryTestBase::CreateParameterAndTransferLiteral(int64 parameter_number, - const Literal& literal, - const string& name, - BuilderT* builder, - HandleT* data_handle) { - return CreateParameterAndTransferLiteral(parameter_number, literal, name, - nullptr, builder, data_handle); -} - -template -std::unique_ptr -ClientLibraryTestBase::CreateParameterAndTransferLiteral( - int64 parameter_number, const Literal& literal, const string& name, - const DeviceHandle* device_handle, BuilderT* builder, - HandleT* data_handle) { - const Literal* param_literal = &literal; - std::unique_ptr converted_literal; - if (use_bfloat16_) { - converted_literal = LiteralTestUtil::ConvertF32ToBF16(literal); - param_literal = converted_literal.get(); - } - std::unique_ptr data = - client_->TransferToServer(*param_literal, device_handle) - .ConsumeValueOrDie(); - *data_handle = - builder->Parameter(parameter_number, param_literal->shape(), name); - return data; -} - } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_TESTS_CLIENT_LIBRARY_TEST_BASE_H_ diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 6b3efba4f80e45d230d3df9274d0fd40c6fb8c42..efa5aed2d1af8ede6de7ec5273bc229be8901f1c 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -798,5 +798,250 @@ XLA_TYPED_TEST(DotOperationTest_F16F32F64, this->error_spec_); } +TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} + + XlaBuilder builder(TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{96.0, 105.0, 114.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} + + XlaBuilder builder(TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{105.0}, {105.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( + DotOfGatherOptimizationWithConstRHSReverseMM)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} + + XlaBuilder builder(TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{105.0, 105.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( + DotOfGatherOptimizationWithConstLHSReverseMM)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} + + XlaBuilder builder(TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{96.0}, {105.0}, {114.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSRows)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0}, + {3.0, 4.0}, + {5.0, 6.0}, + {6.0, 5.0}, + {4.0, 3.0}, + {2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} + + XlaBuilder builder(TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{126.0, 129.0, 132.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSRows)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0}, + {3.0, 4.0}, + {5.0, 6.0}, + {6.0, 5.0}, + {4.0, 3.0}, + {2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} + + XlaBuilder builder(TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{129.0}, {129.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSCols)))) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} + + XlaBuilder builder(TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{56.0, 168.0, 91.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSCols)))) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} + + XlaBuilder builder(TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{168.0}, {168.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/filecheck.cc b/tensorflow/compiler/xla/tests/filecheck.cc index a5f6872c46c7800b8b76a571a2546795f8814fb5..93d1c921c4a138cda55ed7338b8e3aa82518d114 100644 --- a/tensorflow/compiler/xla/tests/filecheck.cc +++ b/tensorflow/compiler/xla/tests/filecheck.cc @@ -38,7 +38,7 @@ StatusOr RunFileCheck(const string& input, const string& pattern) { TF_RETURN_IF_ERROR(tensorflow::WriteStringToFile(env, pattern_path, pattern)); // Invoke FileCheck to check whether input matches `pattern`. - const char* file_check_path_suffix = "external/llvm/FileCheck"; + const char* file_check_path_suffix = "org_tensorflow/external/llvm/FileCheck"; string file_check_path; if (const char* test_srcdir = getenv("TEST_SRCDIR")) { file_check_path = JoinPath(test_srcdir, file_check_path_suffix); @@ -66,6 +66,11 @@ StatusOr RunFileCheck(const string& input, const string& pattern) { // the error message generated by FileCheck and the inputs. bool succeeded = (exit_status == 0); if (!succeeded) { + LOG(WARNING) << "Tried to execute FileCheck at " << file_check_path; + if (!env->FileExists(file_check_path).ok()) { + LOG(WARNING) << "NOTE: FileCheck binary does not exist!"; + } + LOG(WARNING) << "FileCheck error: " << standard_error; LOG(WARNING) << "FileCheck input was:"; XLA_LOG_LINES(tensorflow::WARNING, input); diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc index 8b64f2e6315bc424b4a0eeb266e2e5ff16efdf6e..12598579c7032e954c4a4875ab8e6475b112f5ae 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -95,7 +95,10 @@ HloTestBase::HloTestBase(se::Platform* test_platform, /* static */ std::unique_ptr HloTestBase::CreateNewModule(const string& name) { HloModuleConfig config; - config.set_debug_options(GetDebugOptionsForTest()); + auto debug_options = HloTestBase::GetDebugOptionsForTest(); + debug_options.set_xla_gpu_max_kernel_unroll_factor(1); + config.set_debug_options(debug_options); + return MakeUnique(name, VersionedComputationHandle(), config); } diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h index 6491208895f9ecf15c938ed480f90bce0114ac30..9539ae06801628baedaea69024b7760ebefa6e3a 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_test_base.h @@ -177,9 +177,13 @@ class HloTestBase : public ::testing::Test { // 'layout'. void ForceParameterLayout(HloModule* module, int64 param_no, const Layout& layout) { - ASSERT_LT(param_no, - module->mutable_entry_computation_layout()->parameter_count()); - module->mutable_entry_computation_layout() + ASSERT_LT( + param_no, + module->mutable_host_entry_computation_layout()->parameter_count()); + module->mutable_host_entry_computation_layout() + ->mutable_parameter_layout(param_no) + ->ResetLayout(layout); + module->mutable_device_entry_computation_layout() ->mutable_parameter_layout(param_no) ->ResetLayout(layout); } @@ -187,7 +191,10 @@ class HloTestBase : public ::testing::Test { // Convenience method to force the layout of the computation result in a // module. The result layout of 'module' is set to 'layout'. void ForceResultLayout(HloModule* module, const Layout& layout) { - module->mutable_entry_computation_layout() + module->mutable_host_entry_computation_layout() + ->mutable_result_layout() + ->ResetLayout(layout); + module->mutable_device_entry_computation_layout() ->mutable_result_layout() ->ResetLayout(layout); } @@ -195,7 +202,10 @@ class HloTestBase : public ::testing::Test { // Convenience method to clear the layout of the computation result in // 'module'. void ForceClearResultLayout(HloModule* module) { - module->mutable_entry_computation_layout() + module->mutable_host_entry_computation_layout() + ->mutable_result_layout() + ->Clear(); + module->mutable_device_entry_computation_layout() ->mutable_result_layout() ->Clear(); } diff --git a/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc b/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc index 3023df47cda33f5d11abc921fd0355d48f761107..2c45f19c090d2690878430363bf0d20252b2f3df 100644 --- a/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc +++ b/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc @@ -62,8 +62,8 @@ void LLVMIRGenTestBase::CompileAheadOfTimeAndVerifyIr( std::unique_ptr hlo_module, const AotCompilationOptions& options, const string& pattern, bool match_optimized_ir) { SetIrHook(match_optimized_ir); - ASSERT_TRUE( - CompileToAotCompilationResult(std::move(hlo_module), options).ok()); + TF_ASSERT_OK( + CompileToAotCompilationResult(std::move(hlo_module), options).status()); ResetIrHook(); StatusOr filecheck_result = RunFileCheck(ir_, pattern); diff --git a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc index 3704ddd8010bf727b75ff81b63605e8b7ffe2ca8..a366afe8262e1f537b225e395bba9cb2fc22683a 100644 --- a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc +++ b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc @@ -21,7 +21,8 @@ limitations under the License. #include "llvm/ADT/Triple.h" #include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/compiler/xla/types.h" @@ -29,27 +30,31 @@ limitations under the License. #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/logging.h" +namespace { + using xla::string; -xla::Computation Doubler(xla::Client* client) { - xla::ComputationBuilder builder(client, "doubler"); +xla::XlaComputation Doubler() { + xla::XlaBuilder builder("doubler"); auto r0f32 = xla::ShapeUtil::MakeShape(xla::F32, {}); auto x = builder.Parameter(0, r0f32, "x"); builder.Mul(x, builder.ConstantR0(2.0)); return std::move(builder.Build().ValueOrDie()); } +} // namespace + int main(int argc, char** argv) { tensorflow::port::InitMain(argv[0], &argc, &argv); auto client = xla::ClientLibrary::GetOrCreateCompileOnlyClient().ValueOrDie(); - xla::ComputationBuilder builder(client, "aot_test_helper"); + xla::XlaBuilder builder("aot_test_helper"); auto opaque_shape = xla::ShapeUtil::MakeOpaqueShape(); auto opaque_param = builder.Parameter(0, opaque_shape, "x"); auto r0f32 = xla::ShapeUtil::MakeShape(xla::F32, {}); auto sum = builder.CustomCall("SumStructElements", {opaque_param}, r0f32); - builder.Call(Doubler(client), {sum}); + builder.Call(Doubler(), {sum}); if (argc != 2) { LOG(FATAL) << "local_client_aot_test_helper TARGET_CPU"; @@ -71,8 +76,8 @@ int main(int argc, char** argv) { llvm::Triple triple(xla::llvm_ir::AsStringRef(triple_string)); - xla::Computation computation = builder.Build().ConsumeValueOrDie(); - xla::CompileOnlyClient::AotComputationInstance instance{ + xla::XlaComputation computation = builder.Build().ConsumeValueOrDie(); + xla::CompileOnlyClient::AotXlaComputationInstance instance{ &computation, /*argument_layouts=*/{&opaque_shape}, &r0f32}; xla::cpu::CpuAotCompilationOptions options( diff --git a/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc b/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc index 7fa61eb33c2930ac8192ac965a71122001f808d3..464cc012140d4838de88c5bf5b3b2f1372c2c19b 100644 --- a/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc +++ b/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc @@ -52,12 +52,7 @@ class MatOpsSimpleTest : public ClientLibraryTestBase {}; template class MatOpsSimpleTest_F16F32 : public MatOpsSimpleTest {}; -// TODO(bixia): This test for F16 failed on GPU 02-25-2018. -#ifdef XLA_TEST_BACKEND_GPU -TYPED_TEST_CASE(MatOpsSimpleTest_F16F32, ::testing::Types); -#else TYPED_TEST_CASE(MatOpsSimpleTest_F16F32, TypesF16F32); -#endif XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, ExpTwoByTwoValues) { using T = TypeParam; @@ -72,8 +67,7 @@ XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, ExpTwoByTwoValues) { Literal::CreateR2FromArray2D({{2.71828f, 1.00000f}, // row 0 {0.36788f, 1.64872f}}); // row 1 - this->template ComputeAndCompareLiteral(&builder, *expected, {}, - ErrorSpec(1e-5)); + this->ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-5)); } XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, MapTwoByTwo) { @@ -101,8 +95,7 @@ XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, MapTwoByTwo) { std::unique_ptr expected = Literal::CreateR2FromArray2D({{1.5f, 0.5f}, // row 0 {-0.5f, 1.0f}}); // row 1 - this->template ComputeAndCompareLiteral(&builder, *expected, {}, - ErrorSpec(1e-5)); + this->ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-5)); } XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, MaxTwoByTwoValues) { @@ -121,8 +114,7 @@ XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, MaxTwoByTwoValues) { std::unique_ptr expected = Literal::CreateR2FromArray2D({{7.0f, 6.0f}, // row 0 {3.0f, -4.0f}}); // row 1 - this->template ComputeAndCompareLiteral(&builder, *expected, {}, - ErrorSpec(1e-6)); + this->ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-6)); } struct TestLinspaceMaxParam { @@ -171,11 +163,8 @@ string PrintTestLinspaceMaxParam( } #ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 -// TODO(bixia): This test failed on GPU 02-25-2018 -#ifdef XLA_TEST_BACKEND_CPU XLA_TEST_P(TestLinspaceMaxParametric, TestF16) { TestImpl(); } #endif -#endif XLA_TEST_P(TestLinspaceMaxParametric, TestF32) { TestImpl(); } INSTANTIATE_TEST_CASE_P( diff --git a/tensorflow/compiler/xla/tests/set_return_value_test.cc b/tensorflow/compiler/xla/tests/set_return_value_test.cc deleted file mode 100644 index 29f79ec28a1ae6fcd5299846e85eec992ad2e46f..0000000000000000000000000000000000000000 --- a/tensorflow/compiler/xla/tests/set_return_value_test.cc +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "tensorflow/compiler/xla/client/computation_builder.h" -#include "tensorflow/compiler/xla/client/local_client.h" -#include "tensorflow/compiler/xla/tests/client_library_test_base.h" -#include "tensorflow/compiler/xla/tests/literal_test_util.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/test.h" - -namespace xla { -namespace { - -class SetReturnValueTest : public ClientLibraryTestBase {}; - -TEST_F(SetReturnValueTest, NoSetValue) { - ComputationBuilder builder(client_, "no_set_value"); - auto alpha = builder.ConstantR0(1.0); - auto x = builder.ConstantR1( - {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); - auto ax = builder.Add(alpha, x); - auto aax = builder.Add(alpha, ax); - - std::vector expected = {1.0, 3.0, 4.0, 0.0, -1.0, - 5.0, 6.0, -2.0, -3.0, 7.0}; - - ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); -} - -TEST_F(SetReturnValueTest, SetValue) { - ComputationBuilder builder(client_, "set_value"); - auto alpha = builder.ConstantR0(1.0); - auto x = builder.ConstantR1( - {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); - auto ax = builder.Add(alpha, x); - auto aax = builder.Add(alpha, ax); - auto builder_status = builder.SetReturnValue(ax); - EXPECT_TRUE(builder_status.ok()); - - std::vector expected = {0.0, 2.0, 3.0, -1.0, -2.0, - 4.0, 5.0, -3.0, -4.0, 6.0}; - - ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); -} - -TEST_F(SetReturnValueTest, SetValueAndModify) { - ComputationBuilder builder(client_, "set_value_and_modify"); - auto alpha = builder.ConstantR0(1.0); - auto x = builder.ConstantR1( - {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); - auto ax = builder.Add(alpha, x); - auto aax = builder.Add(alpha, ax); - auto builder_status = builder.SetReturnValue(ax); - EXPECT_TRUE(builder_status.ok()); - auto aaax = builder.Add(alpha, aax); - - std::vector expected = {0.0, 2.0, 3.0, -1.0, -2.0, - 4.0, 5.0, -3.0, -4.0, 6.0}; - - ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); -} - -TEST_F(SetReturnValueTest, SetValueMultipleTimesAndModify) { - ComputationBuilder builder(client_, "set_value_multiple_times_and_modify"); - auto alpha = builder.ConstantR0(1.0); - auto x = builder.ConstantR1( - {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); - auto ax = builder.Add(alpha, x); - auto aax = builder.Add(alpha, ax); - auto builder_status = builder.SetReturnValue(aax); - EXPECT_TRUE(builder_status.ok()); - auto aaax = builder.Add(alpha, aax); - builder_status = builder.SetReturnValue(ax); - EXPECT_TRUE(builder_status.ok()); - auto aaaax = builder.Add(alpha, aaax); - - std::vector expected = {0.0, 2.0, 3.0, -1.0, -2.0, - 4.0, 5.0, -3.0, -4.0, 6.0}; - - ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); -} - -} // namespace -} // namespace xla diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc index 997a1d8273736af31994ebbd07ff3857d1e8e0b5..810cc25f1b5b1199984a3229909a70f9548c7dd2 100644 --- a/tensorflow/compiler/xla/tests/test_utils.cc +++ b/tensorflow/compiler/xla/tests/test_utils.cc @@ -339,8 +339,7 @@ StatusOr>> MakeFakeArguments( return std::move(arguments); } -Status VerifyHloModule(const se::Platform& platform, HloModule* const module, - bool allow_mixed_precision) { +Status VerifyHloModule(HloModule* const module, bool allow_mixed_precision) { return HloVerifier(allow_mixed_precision).Run(module).status(); } diff --git a/tensorflow/compiler/xla/tests/test_utils.h b/tensorflow/compiler/xla/tests/test_utils.h index 30c147910cae85e1ebdddc22e637a6c1fd577c20..f483cdebea5c7c8a43e73ab57748a93c97bb78d7 100644 --- a/tensorflow/compiler/xla/tests/test_utils.h +++ b/tensorflow/compiler/xla/tests/test_utils.h @@ -68,7 +68,7 @@ StatusOr>> MakeFakeArguments( // Check that a given module satisfies various constraints before trying to // execute it. -Status VerifyHloModule(const se::Platform& platform, HloModule* const module, +Status VerifyHloModule(HloModule* const module, bool allow_mixed_precision = false); } // namespace xla diff --git a/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc b/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc index 3dded3f7157195b2c7aaac2ff9aac79ca4611d05..5cce7a2bf82c1a8403536a91e67910f949ef185a 100644 --- a/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc +++ b/tensorflow/compiler/xla/tests/vector_ops_simple_test.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/computation.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/local_client.h" @@ -350,7 +349,7 @@ XLA_TEST_F(VecOpsSimpleTest, ClampTenValuesConstantNonzeroLower) { } XLA_TEST_F(VecOpsSimpleTest, ClampValuesConstantS64) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto zero = builder.ConstantR0(0); auto one = builder.ConstantR0(10); auto x = builder.ConstantR1({-3, 3, 9, 13}); diff --git a/tensorflow/compiler/xla/text_literal_reader.cc b/tensorflow/compiler/xla/text_literal_reader.cc index 44f874cd2ae8e6f65dc282b8675f195ec9c09415..56702feab9a4e8d00df3a165ab994aef2d42d830 100644 --- a/tensorflow/compiler/xla/text_literal_reader.cc +++ b/tensorflow/compiler/xla/text_literal_reader.cc @@ -42,7 +42,7 @@ StatusOr> TextLiteralReader::ReadPath( << "TextLiteralReader no longer supports reading .gz files"; std::unique_ptr file; Status s = - tensorflow::Env::Default()->NewRandomAccessFile(path.ToString(), &file); + tensorflow::Env::Default()->NewRandomAccessFile(std::string(path), &file); if (!s.ok()) { return s; } @@ -92,7 +92,7 @@ StatusOr> TextLiteralReader::ReadAllLines() { tensorflow::StringPiece sp(shape_string); if (tensorflow::str_util::RemoveWhitespaceContext(&sp) > 0) { - string tmp = sp.ToString(); + string tmp = std::string(sp); shape_string = tmp; } TF_ASSIGN_OR_RETURN(Shape shape, ShapeUtil::ParseShapeString(shape_string)); @@ -124,10 +124,10 @@ StatusOr> TextLiteralReader::ReadAllLines() { line.c_str()); } float value; - if (!tensorflow::strings::safe_strtof(value_string.ToString().c_str(), + if (!tensorflow::strings::safe_strtof(std::string(value_string).c_str(), &value)) { return InvalidArgument("could not parse value as float: \"%s\"", - value_string.ToString().c_str()); + std::string(value_string).c_str()); } SplitByDelimToStringPieces(coordinates_string, ',', &coordinates); coordinate_values.clear(); @@ -136,7 +136,7 @@ StatusOr> TextLiteralReader::ReadAllLines() { if (!tensorflow::strings::safe_strto64(piece, &coordinate_value)) { return InvalidArgument( "could not parse coordinate member as int64: \"%s\"", - piece.ToString().c_str()); + std::string(piece).c_str()); } coordinate_values.push_back(coordinate_value); } diff --git a/tensorflow/compiler/xla/text_literal_writer.cc b/tensorflow/compiler/xla/text_literal_writer.cc index 3fee467594d8423c707abf07a0622a738437830a..6e3061b78a554f028b2ffae2e0590d91a4fe48e2 100644 --- a/tensorflow/compiler/xla/text_literal_writer.cc +++ b/tensorflow/compiler/xla/text_literal_writer.cc @@ -33,7 +33,7 @@ namespace xla { /* static */ tensorflow::Status TextLiteralWriter::WriteToPath( const Literal& literal, tensorflow::StringPiece path) { std::unique_ptr f; - auto s = tensorflow::Env::Default()->NewWritableFile(path.ToString(), &f); + auto s = tensorflow::Env::Default()->NewWritableFile(std::string(path), &f); if (!s.ok()) { return s; } diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD index 0bc4045a5490319994b6cf24daf99fe856167507..78ab2dccafc37aa4f93da0b8d5b39a779ddd5db8 100644 --- a/tensorflow/compiler/xla/tools/BUILD +++ b/tensorflow/compiler/xla/tools/BUILD @@ -88,6 +88,7 @@ cc_library( "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:testing", + "//tensorflow/compiler/xla/service:hlo_proto", "//tensorflow/compiler/xla/service:session_proto", "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:framework_internal", diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc index fc0e4444521247734fc240a03da669244fe1a6a4..350db126535e418cbfa914edd958f47ba90a3ee5 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc @@ -230,7 +230,7 @@ TokKind HloLexer::LexIdentifier() { } } - str_val_ = identifier.ToString(); + str_val_ = std::string(identifier); return TokKind::kIdent; } diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index fdbfc0210ea63ac4350ba48ac3354d23c53c69a7..156a06c596c3f1550213cb5ac5d11834a80b7181 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -30,6 +30,7 @@ namespace { using tensorflow::StringPiece; using tensorflow::gtl::optional; +using tensorflow::str_util::Join; using tensorflow::str_util::Split; using tensorflow::str_util::SplitAndParseAsInts; using tensorflow::strings::Printf; @@ -53,7 +54,7 @@ class HloParser { std::unique_ptr ConsumeHloModule() { return std::move(module_); } // Returns the error information. - string GetError() const { return tensorflow::str_util::Join(error_, "\n"); } + string GetError() const { return Join(error_, "\n"); } private: // ParseXXX returns false if an error occurred. @@ -242,10 +243,10 @@ bool HloParser::Error(LocTy loc, StringPiece msg) { std::vector error_lines; error_lines.push_back( StrCat("was parsing ", line, ":", col, ": error: ", msg)); - error_lines.push_back(lexer_.GetLine(loc).ToString()); + error_lines.push_back(std::string(lexer_.GetLine(loc))); error_lines.push_back(col == 0 ? "" : StrCat(string(col - 1, ' '), "^")); - error_.push_back(tensorflow::str_util::Join(error_lines, "\n")); + error_.push_back(Join(error_lines, "\n")); VLOG(1) << "Error: " << error_.back(); return false; } @@ -303,12 +304,18 @@ bool HloParser::ParseComputations() { // set the layouts to what the hlo text says. for (int p = 0; p < computation->num_parameters(); p++) { const Shape& param_shape = computation->parameter_instruction(p)->shape(); - TF_CHECK_OK(module_->mutable_entry_computation_layout() + TF_CHECK_OK(module_->mutable_host_entry_computation_layout() + ->mutable_parameter_layout(p) + ->CopyLayoutFromShape(param_shape)); + TF_CHECK_OK(module_->mutable_device_entry_computation_layout() ->mutable_parameter_layout(p) ->CopyLayoutFromShape(param_shape)); } const Shape& result_shape = computation->root_instruction()->shape(); - TF_CHECK_OK(module_->mutable_entry_computation_layout() + TF_CHECK_OK(module_->mutable_host_entry_computation_layout() + ->mutable_result_layout() + ->CopyLayoutFromShape(result_shape)); + TF_CHECK_OK(module_->mutable_device_entry_computation_layout() ->mutable_result_layout() ->CopyLayoutFromShape(result_shape)); } @@ -433,6 +440,10 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, optional metadata; attrs["metadata"] = {/*required=*/false, AttrTy::kMetadata, &metadata}; + optional backend_config; + attrs["backend_config"] = {/*required=*/false, AttrTy::kString, + &backend_config}; + HloInstruction* instruction; switch (opcode) { case HloOpcode::kParameter: { @@ -1087,8 +1098,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, instruction->set_name(name); - // Add common attrs (sharding, control predecessors) to the instruction, if - // they were seen. + // Add shared attributes like metadata to the instruction, if they were seen. if (sharding) { instruction->set_sharding( HloSharding::FromProto(sharding.value()).ValueOrDie()); @@ -1105,6 +1115,9 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (metadata) { instruction->set_metadata(*metadata); } + if (backend_config) { + instruction->set_backend_config(std::move(*backend_config)); + } return AddInstruction(name, instruction, name_loc); } // NOLINT(readability/fn_size) @@ -1482,11 +1495,10 @@ bool HloParser::ParseDenseLiteral(std::unique_ptr* literal, std::vector elems_seen_until_dim(elems_seen_per_dim.begin(), elems_seen_per_dim.begin() + dim); return StrCat("[", - tensorflow::str_util::Join( - elems_seen_until_dim, ",", - [](string* out, const int64& num_elems) { - tensorflow::strings::StrAppend(out, num_elems - 1); - }), + Join(elems_seen_until_dim, ",", + [](string* out, const int64& num_elems) { + tensorflow::strings::StrAppend(out, num_elems - 1); + }), "]"); }; do { @@ -1674,7 +1686,7 @@ bool HloParser::ParseSparseLiteralHelper(std::unique_ptr* literal, return Error( index_loc, StrCat("invalid multi-dimension index for shape with rank ", rank, - ": [", tensorflow::str_util::Join(index, ", "), "]")); + ": [", Join(index, ", "), "]")); } } if (!ParseToken(TokKind::kColon, @@ -1842,7 +1854,19 @@ bool HloParser::ParseAttributeHelper( } auto attr_it = attrs.find(name); if (attr_it == attrs.end()) { - return Error(loc, Printf("unexpected attribute %s", name.c_str())); + string allowed_attrs; + if (attrs.empty()) { + allowed_attrs = "No attributes are allowed here."; + } else { + allowed_attrs = StrCat( + "Allowed attributes: ", + Join(attrs, ", ", + [&](string* out, const std::pair& kv) { + StrAppend(out, kv.first); + })); + } + return Error(loc, Printf("unexpected attribute \"%s\". %s", name.c_str(), + allowed_attrs.c_str())); } AttrTy attr_type = attr_it->second.attr_type; void* attr_out_ptr = attr_it->second.result; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index adc8b1d620eb65fdca19072831360b71847abf9e..e100d8cda14eabbec3942bf442aa99cc04daada4 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -65,7 +65,7 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { R"(HloModule constant_pred_module ENTRY %constant_pred () -> pred[] { - ROOT %constant = pred[] constant(true), metadata={op_type="const" op_name="\"it\'s not a problem\n" source_file="path/to/test.cc" source_line=68} + ROOT %constant = pred[] constant(true), metadata={op_type="const" op_name="\"it\'s not a problem\n" source_file="path/to/test.cc" source_line=68}, backend_config="foo\" bar" } )" @@ -81,13 +81,14 @@ ENTRY %constant_s32 () -> s32[] { )" }, -// f32 constant, but the value is not a decimal +// f32 constant, but the value is not a decimal and there is a backend +// configuration { "ConstantF32", R"(HloModule ConstantF32_module ENTRY %ConstantF32.v4 () -> f32[] { - ROOT %constant = f32[] constant(42) + ROOT %constant = f32[] constant(42), backend_config="this is a configuration" } )" @@ -1013,6 +1014,19 @@ ENTRY %SelectScalarS32True.v4 () -> s32[] { // but the constant names will not be exactly the same. } +TEST_F(HloParserTest, ConfigurationField) { + const string original = R"(HloModule AModule +ENTRY %configuration_test() -> s32[] { + %constant = s32[] constant(42), backend_config="foo bar" +})"; + auto result = Parse(original); + TF_ASSERT_OK(result.status()); + EXPECT_EQ("foo bar", result.ValueOrDie() + ->entry_computation() + ->root_instruction() + ->backend_config()); +} + TEST_F(HloParserTest, LiteralDimensionsMismatch_1) { const string original = R"(HloModule some_2_module @@ -1092,7 +1106,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2 %input = f32[1,2,1]{2,1,0} parameter(0) %copy = f32[1,2,1]{2,0,1} copy(f32[1,2,1]{2,1,0} %input) %filter = f32[1,1,1]{2,1,0} parameter(1) - ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), sharding={maximal device=1}, dim_labels=b0f_0io->b0f, window={pad=1_1 size=2} + ROOT %convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), sharding={maximal device=1}, backend_config="foo", dim_labels=b0f_0io->b0f, window={pad=1_1 size=2} } )"; @@ -1138,7 +1152,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { )"; ExpectHasSubstr(Parse(original).status().error_message(), - "unexpected attribute calls"); + "unexpected attribute \"calls\""); } TEST_F(HloParserTest, MissingAttribute) { @@ -1239,7 +1253,7 @@ ENTRY %Reduce (input: f32[8,16,256]) -> f32[8,16] { auto module = Parse(original); TF_ASSERT_OK(module.status()); - auto program_layout = module.ValueOrDie()->entry_computation_layout(); + auto program_layout = module.ValueOrDie()->host_entry_computation_layout(); ASSERT_EQ(program_layout.parameter_count(), 1); auto param_layout = program_layout.parameter_layout(0).layout(); auto result_layout = program_layout.result_layout().layout(); diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index 62a353ad09af009e4abf47664a5c5f7bd70a049e..d8cedad65ea68ef86b94394a1accf2c08517c0b2 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -42,6 +42,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/execution_options_util.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/session.pb.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -75,9 +76,14 @@ struct Options { // // Similarly, infeeds fake data of shape fake_infeed_shape if it is provided; // otherwise, no infeed is performed. -StatusOr> ReplayComputation( - const SessionModule& module, Client* client, const Options& opts) { - TF_ASSIGN_OR_RETURN(Computation computation, client->LoadSnapshot(module)); +template +StatusOr> ReplayComputation(const ModuleT& module, + Client* client, + const Options& opts) { + static_assert(std::is_same::value || + std::is_same::value, + "Proto must be in HloSnapshot or SessionModule format"); + TF_ASSIGN_OR_RETURN(auto computation, client->LoadSnapshot(module)); std::vector> arguments; if (opts.use_fake_data) { @@ -153,6 +159,38 @@ int RealMain(tensorflow::gtl::ArraySlice args, const Options& opts) { tensorflow::Env* env = tensorflow::Env::Default(); int exit_status = EXIT_SUCCESS; for (char* arg : args) { + HloSnapshot snapshot; + auto status = tensorflow::ReadBinaryProto(env, arg, &snapshot); + if (status.ok()) { + StatusOr> result_status = + ReplayComputation(snapshot, client, opts); + if (!result_status.ok()) { + fprintf(stderr, "%s: error: %s\n", arg, + result_status.status().ToString().c_str()); + exit_status = EXIT_FAILURE; + continue; + } + + std::unique_ptr result = result_status.ConsumeValueOrDie(); + if (result != nullptr) { + fprintf(stdout, "%s: %s :: %s:%s\n", arg, + snapshot.hlo().hlo_module().name().c_str(), + ShapeUtil::HumanString(result->shape()).c_str(), + result->ToString().c_str()); + if (snapshot.has_result()) { + std::unique_ptr literal = + Literal::CreateFromProto(snapshot.result()).ConsumeValueOrDie(); + fprintf(stdout, "was %s:%s\n", + ShapeUtil::HumanString(snapshot.result().shape()).c_str(), + literal->ToString().c_str()); + } + } + + continue; + } + fprintf(stderr, "%s: is not HloSnapshot: %s. Trying as SessionModule...\n", + arg, status.ToString().c_str()); + SessionModule module; TF_CHECK_OK(tensorflow::ReadBinaryProto(env, arg, &module)); StatusOr> result_status = diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index d23f9e5918f54c4f385f3b16fd84bbee51ed5a95..750d72d797b4f8680e13597ac02f6f9fa6e37bcd 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -134,6 +134,8 @@ enum Format { // example, Convert) are ignored. // // See the XLA documentation for more information on shapes and layouts. +// +// LINT.IfChange message Layout { // The method used to store the data in memory. The format determines which of // the other fields are used by the layout. @@ -159,9 +161,12 @@ message Layout { // memory. This field must be unset unless the format is SPARSE. int64 max_sparse_elements = 5; - // Important: if any field is added, be sure to modify ShapeUtil::Equal() - // appropriately to account for the new field. + // Important: if any field is added, be sure to modify ShapeUtil::Equal() and + // LayoutUtil::Hash appropriately to account for the new field. } +// LINT.ThenChange( \ +// https://www.tensorflow.org/code/tensorflow/compiler/xla/shape_util.cc, \ +// https://www.tensorflow.org/code/tensorflow/compiler/xla/layout_util.cc) // A shape describes the number of dimensions in the array, the size of each // dimension, and the primitive component type. @@ -170,6 +175,8 @@ message Layout { // defined. // // See the XLA documentation for more information on shapes and layouts. +// +// LINT.IfChange message Shape { reserved 1; reserved "rank"; @@ -190,9 +197,12 @@ message Shape { // The layout used to back this shape. Layout layout = 5; - // Important: if any field is added, be sure to modify ShapeUtil::Equal() and - // ShapeUtil::Compatible() appropriately to account for the new field. + // Important: if any field is added, be sure to modify ShapeUtil::Equal(), + // ShapeUtil::Compatible() and ShapeUtil::Hash() appropriately to account for + // the new field. } +// LINT.ThenChange( \ +// https://www.tensorflow.org/code/tensorflow/compiler/xla/shape_util.cc) // Shape of the parameters and output of a computation (like a traditional // function signature). diff --git a/tensorflow/contrib/autograph/converters/asserts.py b/tensorflow/contrib/autograph/converters/asserts.py index 2d9e2c58e3afcef5c18f477a7a29e518e98e672e..3b0db677ce5e417e7afea8d8fe4121a0352bb6d7 100644 --- a/tensorflow/contrib/autograph/converters/asserts.py +++ b/tensorflow/contrib/autograph/converters/asserts.py @@ -33,7 +33,7 @@ class AssertsTransformer(transformer.Base): # Note: The lone tf.Assert call will be wrapped with control_dependencies # by side_effect_guards. template = """ - tf.Assert(test, [msg]) + tf.Assert(test, (msg,)) """ if node.msg is None: diff --git a/tensorflow/contrib/autograph/converters/break_statements.py b/tensorflow/contrib/autograph/converters/break_statements.py index 91de82f0a78ccae711298d78364810dd099a5c38..1be1c96dd31bf05b746fae6a2b02774e20ca0c4f 100644 --- a/tensorflow/contrib/autograph/converters/break_statements.py +++ b/tensorflow/contrib/autograph/converters/break_statements.py @@ -114,9 +114,9 @@ class BreakStatementTransformer(transformer.Base): template, var_name=break_var, for_stmt=node) - extra_cond = templates.replace_as_expression( + extra_test = templates.replace_as_expression( 'not var_name', var_name=break_var) - anno.setanno(node[1], 'extra_cond', extra_cond) + anno.setanno(node[1], 'extra_test', extra_test) return node diff --git a/tensorflow/contrib/autograph/converters/control_flow.py b/tensorflow/contrib/autograph/converters/control_flow.py index 2e26cdb3d9387d358e0225555506f199e9945d0b..935a2786db0289c67860be2da97e3f554f12500c 100644 --- a/tensorflow/contrib/autograph/converters/control_flow.py +++ b/tensorflow/contrib/autograph/converters/control_flow.py @@ -207,7 +207,7 @@ class ControlFlowTransformer(transformer.Base): def body_name(state_ssf): body return state_ssf, - state_ast_tuple = ag__.while_loop( + state_ast_tuple = ag__.while_stmt( test_name, body_name, (state,), (extra_deps,)) """ node = templates.replace( @@ -252,31 +252,31 @@ class ControlFlowTransformer(transformer.Base): state_ast_tuple = gast.Tuple([n.ast() for n in state], None) node_body = ast_util.rename_symbols(node.body, ssf_map) - if anno.hasanno(node, 'extra_cond'): - extra_cond = anno.getanno(node, 'extra_cond') - extra_cond = ast_util.rename_symbols(extra_cond, ssf_map) + if anno.hasanno(node, 'extra_test'): + extra_test = anno.getanno(node, 'extra_test') + extra_test = ast_util.rename_symbols(extra_test, ssf_map) else: - extra_cond = parser.parse_expression('True') + extra_test = parser.parse_expression('True') template = """ - def extra_cond_name(state_ssf): - return extra_cond_expr + def extra_test_name(state_ssf): + return extra_test_expr def body_name(iterate, state_ssf): body return state_ssf, - state_ast_tuple = ag__.for_loop( - iterated, extra_cond_name, body_name, (state,)) + state_ast_tuple = ag__.for_stmt( + iter_, extra_test_name, body_name, (state,)) """ node = templates.replace( template, state=state, state_ssf=state_ssf, state_ast_tuple=state_ast_tuple, - iterated=node.iter, + iter_=node.iter, iterate=node.target, - extra_cond_name=self.context.namer.new_symbol('extra_cond', + extra_test_name=self.context.namer.new_symbol('extra_test', all_referenced), - extra_cond_expr=extra_cond, + extra_test_expr=extra_test, body_name=self.context.namer.new_symbol('loop_body', all_referenced), body=node_body) diff --git a/tensorflow/contrib/autograph/impl/config.py b/tensorflow/contrib/autograph/impl/config.py index 2600088595a12761b1138c4649c06882bd8fd000..878bb7e12f2b39a0ec40004ff2c7ac3ab8031e14 100644 --- a/tensorflow/contrib/autograph/impl/config.py +++ b/tensorflow/contrib/autograph/impl/config.py @@ -33,7 +33,7 @@ DEFAULT_UNCOMPILED_MODULES = set(( (utils.__name__,), # All of tensorflow's subpackages. Unlike the root tf module, they don't - # have well-known names. Not refering to the module directly to avoid + # have well-known names. Not referring to the module directly to avoid # circular imports. ( utils.__name__[:-len('.contrib.autograph.utils')],), diff --git a/tensorflow/contrib/autograph/operators/__init__.py b/tensorflow/contrib/autograph/operators/__init__.py index 04b4734551d3227a1c611d668f006a157c2c2dd3..38b761d97d54bdaee4da91269964469b482895ae 100644 --- a/tensorflow/contrib/autograph/operators/__init__.py +++ b/tensorflow/contrib/autograph/operators/__init__.py @@ -19,11 +19,19 @@ conditionals and loops, implemented in functional form, using for example closures for the body. """ +# Naming conventions: +# * operator names match the name usually used for the respective Python +# idiom; examples: for_stmt, list_append +# * operator arguments match either of: +# - the corresponding Python AST attribute (e.g. the condition of an if +# statement is called test) if the operator represents an AST construct +# - the names used in the Python docs, if the operator is a function (e.g. +# list_ and x for append, see +# https://docs.python.org/3.7/tutorial/datastructures.html) + from __future__ import absolute_import from __future__ import division from __future__ import print_function -# TODO(mdan): Add a container for implementation-specific toggles (throughout). - -from tensorflow.contrib.autograph.operators.control_flow import for_loop -from tensorflow.contrib.autograph.operators.control_flow import while_loop +from tensorflow.contrib.autograph.operators.control_flow import for_stmt +from tensorflow.contrib.autograph.operators.control_flow import while_stmt diff --git a/tensorflow/contrib/autograph/operators/control_flow.py b/tensorflow/contrib/autograph/operators/control_flow.py index d9d8b0d593e5372942ca6423d10022f0f56d78ce..671c9ccc13eaa887522cfc248a6d56d7ab9719ca 100644 --- a/tensorflow/contrib/autograph/operators/control_flow.py +++ b/tensorflow/contrib/autograph/operators/control_flow.py @@ -25,44 +25,55 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_math_ops -# TODO(mdan): Rename _loop to _stmt to follow Python nomenclature. -# TODO(mdan): Rename arguments to match the AST names. - -def for_loop(iterated, extra_cond, loop_body, init_state): +def for_stmt(iter_, extra_test, body, init_state): """Functional form of a for statement. - The loop operates on a so-called state, which includes all symbols that are - variant across loop iterations, excluding the iterate. In what follows we - refer to state as either a tuple of entities that represent an actual state, - or a list of arguments of the corresponding types. + The loop operates on a state, which includes all symbols that are + variant across loop iterations, excluding the iterate as well as the + variables local to the loop. + + For example, given the loop below that calculates the geometric and + arithmetic means or some numbers: + + geo_mean = 1 + arith_mean = 0 + for i in range(n): + a = numbers[i] + geo_mean *= a + arith_mean += a + + The state is represented by the variables geo_mean and arith_mean. The + argument for initial_state may contain the tuple (1, 0), the body will + include the arguments geo_mean and arith_mean and will return a tuple + representing the new values for geo_mean and respectively arith_mean. Args: - iterated: The entity being iterated over. - extra_cond: Callable with the state as arguments, and boolean return type. + iter_: The entity being iterated over. + extra_test: Callable with the state as arguments, and boolean return type. An additionnal loop condition. - loop_body: Callable with the iterate and the state as arguments, and + body: Callable with the iterate and the state as arguments, and state as return type. The actual loop body. init_state: Tuple containing the initial state. Returns: Tuple containing the final state. """ - if tensor_util.is_tensor(iterated): - return _known_len_for_loop(iterated, extra_cond, loop_body, init_state) - elif isinstance(iterated, dataset_ops.Dataset): - return _dataset_for_loop(iterated, extra_cond, loop_body, init_state) + if tensor_util.is_tensor(iter_): + return _known_len_for_stmt(iter_, extra_test, body, init_state) + elif isinstance(iter_, dataset_ops.Dataset): + return _dataset_for_stmt(iter_, extra_test, body, init_state) else: - return _py_for_loop(iterated, extra_cond, loop_body, init_state) + return _py_for_stmt(iter_, extra_test, body, init_state) -def _py_for_loop(iterated, extra_cond, loop_body, init_state): - """Overload of for_loop that executes a Python for loop.""" +def _py_for_stmt(iter_, extra_test, body, init_state): + """Overload of for_stmt that executes a Python for loop.""" state = init_state - for iterate in iterated: - if not extra_cond(*state): + for target in iter_: + if not extra_test(*state): break - state = loop_body(iterate, *state) + state = body(target, *state) # TODO(mdan): Remove this special case. if len(state) == 1: @@ -70,23 +81,23 @@ def _py_for_loop(iterated, extra_cond, loop_body, init_state): return state -def _known_len_for_loop(iterated, extra_cond, loop_body, init_state): - """Overload of for_loop that iterates over objects that define a length.""" - n = builtins.dynamic_len(iterated) +def _known_len_for_stmt(iter_, extra_test, body, init_state): + """Overload of for_stmt that iterates over objects that define a length.""" + n = builtins.dynamic_len(iter_) def while_body(iterate_index, *state): - iterate = iterated[iterate_index] - new_state = loop_body(iterate, *state) + iterate = iter_[iterate_index] + new_state = body(iterate, *state) return (iterate_index + 1,) + new_state def while_cond(iterate_index, *state): - return gen_math_ops.logical_and(iterate_index < n, extra_cond(*state)) + return gen_math_ops.logical_and(iterate_index < n, extra_test(*state)) - results = while_loop( + results = while_stmt( while_cond, while_body, init_state=(0,) + init_state, - extra_deps=(iterated,), + extra_deps=(iter_,), opts=dict(maximum_iterations=n)) # Dropping the iteration index because it's not syntactically visible. results = results[1:] @@ -97,8 +108,8 @@ def _known_len_for_loop(iterated, extra_cond, loop_body, init_state): return results -def _dataset_for_loop(ds, extra_cond, loop_body, init_state): - """Overload of for_loop that iterates over TF Datasets.""" +def _dataset_for_stmt(ds, extra_test, body, init_state): + """Overload of for_stmt that iterates over TF Datasets.""" # Because Datsets only expose get_next, in the style of Python iterators, # we are forced to unpack the loop as: # @@ -117,15 +128,15 @@ def _dataset_for_loop(ds, extra_cond, loop_body, init_state): epoch_number, iterate = iterator.get_next() def while_body(epoch_number, iterate, *state): - new_state = loop_body(iterate, *state) + new_state = body(iterate, *state) epoch_number, iterate = iterator.get_next() return (epoch_number, iterate) + new_state def while_cond(epoch_number, iterate, *state): del iterate - return gen_math_ops.logical_and(epoch_number < 1, extra_cond(*state)) + return gen_math_ops.logical_and(epoch_number < 1, extra_test(*state)) - results = while_loop( + results = while_stmt( while_cond, while_body, init_state=(epoch_number, iterate) + init_state, @@ -140,7 +151,7 @@ def _dataset_for_loop(ds, extra_cond, loop_body, init_state): return results -def while_loop(loop_cond, loop_body, init_state, extra_deps, opts=None): +def while_stmt(test, body, init_state, extra_deps, opts=None): """Functional form of a while statement. The loop operates on a so-called state, which includes all symbols that are @@ -149,13 +160,13 @@ def while_loop(loop_cond, loop_body, init_state, extra_deps, opts=None): of the corresponding types. Args: - loop_cond: Callable with the state as arguments, and boolean return type. + test: Callable with the state as arguments, and boolean return type. The loop condition. - loop_body: Callable with the state as arguments, and state as return type. + body: Callable with the state as arguments, and state as return type. The actual loop body. init_state: Tuple containing the initial state. extra_deps: Tuple containing additional entities on which the loop may - depend, such as loop invariants referenced by loop_cond. Used + depend, such as loop invariants referenced by test. Used exclusively for dispatch control. opts: Optional dict of extra loop parameters. @@ -163,27 +174,27 @@ def while_loop(loop_cond, loop_body, init_state, extra_deps, opts=None): Tuple containing the final state. """ # TODO(mdan): Consider adding a generic mechanism for dynamic dispatch. - # That could be somethins as simple as a collection of dispatch rules, with + # That could be something as simple as a collection of dispatch rules, with # some prioritization. if any(tensor_util.is_tensor(v) for v in init_state + extra_deps): - return _tf_while_loop(loop_cond, loop_body, init_state, opts) + return _tf_while_stmt(test, body, init_state, opts) else: - return _py_while_loop(loop_cond, loop_body, init_state, opts) + return _py_while_stmt(test, body, init_state, opts) -def _tf_while_loop(loop_cond, loop_body, init_state, opts): - """Overload of while_loop that stages a TF while_loop.""" +def _tf_while_stmt(test, body, init_state, opts): + """Overload of while_stmt that stages a TF while_stmt.""" if opts is None: opts = {} - return control_flow_ops.while_loop(loop_cond, loop_body, init_state, **opts) + return control_flow_ops.while_loop(test, body, init_state, **opts) -def _py_while_loop(loop_cond, loop_body, init_state, opts): - """Overload of while_loop that executes a Python while loop.""" +def _py_while_stmt(test, body, init_state, opts): + """Overload of while_stmt that executes a Python while loop.""" del opts state = init_state - while loop_cond(*state): - state = loop_body(*state) + while test(*state): + state = body(*state) return state diff --git a/tensorflow/contrib/autograph/operators/control_flow_test.py b/tensorflow/contrib/autograph/operators/control_flow_test.py index a0cd0bfa82bb052d55dfe30f8700fc33a794a59f..b14d7edba38461692d9e999a6ce80a5fd84ba80d 100644 --- a/tensorflow/contrib/autograph/operators/control_flow_test.py +++ b/tensorflow/contrib/autograph/operators/control_flow_test.py @@ -29,28 +29,28 @@ from tensorflow.python.platform import test class ForLoopTest(test.TestCase): def test_tensor(self): - s = control_flow.for_loop( + s = control_flow.for_stmt( constant_op.constant([1, 2, 3, 4]), - extra_cond=lambda s: True, - loop_body=lambda i, s: (s + i,), + extra_test=lambda s: True, + body=lambda i, s: (s + i,), init_state=(0,)) with self.test_session() as sess: self.assertEqual((10,), sess.run(s)) def test_python(self): - s = control_flow.for_loop( + s = control_flow.for_stmt( range(5), - extra_cond=lambda s: True, - loop_body=lambda i, s: (s + i,), + extra_test=lambda s: True, + body=lambda i, s: (s + i,), init_state=(0,)) self.assertEqual(10, s) def test_dataset(self): to_int32 = lambda i: math_ops.cast(i, dtypes.int32) - s = control_flow.for_loop( + s = control_flow.for_stmt( dataset_ops.Dataset.range(5).map(to_int32), - extra_cond=lambda s: True, - loop_body=lambda i, s: (s + i,), + extra_test=lambda s: True, + body=lambda i, s: (s + i,), init_state=(0,)) with self.test_session() as sess: self.assertEqual((10,), sess.run(s)) @@ -60,9 +60,9 @@ class WhileLoopTest(test.TestCase): def test_tensor(self): n = constant_op.constant(5) - results = control_flow.while_loop( - loop_cond=lambda i, s: i < n, - loop_body=lambda i, s: (i + 1, s + i,), + results = control_flow.while_stmt( + test=lambda i, s: i < n, + body=lambda i, s: (i + 1, s + i,), init_state=(0, 0), extra_deps=(n,)) with self.test_session() as sess: @@ -70,9 +70,9 @@ class WhileLoopTest(test.TestCase): def test_python(self): n = 5 - results = control_flow.while_loop( - loop_cond=lambda i, s: i < n, - loop_body=lambda i, s: (i + 1, s + i), + results = control_flow.while_stmt( + test=lambda i, s: i < n, + body=lambda i, s: (i + 1, s + i), init_state=(0, 0), extra_deps=(n,)) self.assertEqual((5, 10), results) diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/BUILD b/tensorflow/contrib/autograph/pyct/static_analysis/BUILD index 83f3bafc4217649db6499566d548c1657428ad0b..8064a967cd389e88d3febbeb21cac87b0fef9e18 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/BUILD +++ b/tensorflow/contrib/autograph/pyct/static_analysis/BUILD @@ -19,6 +19,7 @@ py_library( srcs = [ "activity.py", "annos.py", + "cfg.py", "live_values.py", "type_info.py", ], @@ -43,6 +44,19 @@ py_test( ], ) +py_test( + name = "cfg_test", + srcs = ["cfg_test.py"], + srcs_version = "PY2AND3", + tags = ["no_windows"], + deps = [ + ":static_analysis", + "//tensorflow/contrib/autograph/pyct", + "//tensorflow/python:client_testlib", + "@gast_archive//:gast", + ], +) + py_test( name = "live_values_test", srcs = ["live_values_test.py"], diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/activity.py b/tensorflow/contrib/autograph/pyct/static_analysis/activity.py index 2c14c2c8c23810c64446eb9e7ffc5402ce9a2298..4d7b0cbb7b8f6ee5bd64553644dc3ec9b8bca95b 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/activity.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/activity.py @@ -23,11 +23,12 @@ import copy import gast from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import qual_names from tensorflow.contrib.autograph.pyct import transformer -from tensorflow.contrib.autograph.pyct.qual_names import QN from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno # TODO(mdan): Add support for PY3 (e.g. Param vs arg). +# TODO(alexbw): Ignore named literals (e.g. None) class Scope(object): @@ -43,16 +44,20 @@ class Scope(object): used: identifiers referenced in this scope """ - def __init__(self, parent, isolated=True): + def __init__(self, parent, isolated=True, add_unknown_symbols=False): """Create a new scope. Args: parent: A Scope or None. isolated: Whether the scope is isolated, that is, whether variables created in this scope should be visible to the parent scope. + add_unknown_symbols: Whether to handle attributed and subscripts + without having first seen the base name. + E.g., analyzing the statement 'x.y = z' without first having seen 'x'. """ self.isolated = isolated self.parent = parent + self.add_unknown_symbols = add_unknown_symbols self.modified = set() self.created = set() self.used = set() @@ -134,13 +139,17 @@ class Scope(object): self.params.add(name) def mark_creation(self, name, writes_create_symbol=False): + """Mark a qualified name as created.""" if name.is_composite(): parent = name.parent - if self.has(parent): - if not writes_create_symbol: - return + if not writes_create_symbol: + return else: - raise ValueError('Unknown symbol "%s".' % parent) + if not self.has(parent): + if self.add_unknown_symbols: + self.mark_read(parent) + else: + raise ValueError('Unknown symbol "%s".' % parent) self.created.add(name) def mark_write(self, name): @@ -163,17 +172,25 @@ class Scope(object): class ActivityAnalyzer(transformer.Base): - """Annotates nodes with local scope information. See Scope.""" + """Annotates nodes with local scope information. - def __init__(self, context, parent_scope): + See Scope. + + The use of this class requires that qual_names.resolve() has been called on + the node. This class will ignore nodes have not been + annotated with their qualified names. + """ + + def __init__(self, context, parent_scope=None, add_unknown_symbols=False): super(ActivityAnalyzer, self).__init__(context) - self.scope = Scope(parent_scope) + self.scope = Scope(parent_scope, None, add_unknown_symbols) self._in_return_statement = False + self._in_aug_assign = False @property def _in_constructor(self): - innermost = self.enclosing_entities[-1] if len(self.enclosing_entities) > 1: + innermost = self.enclosing_entities[-1] parent = self.enclosing_entities[-2] return isinstance(parent, gast.ClassDef) and innermost.name == '__init__' return False @@ -184,6 +201,7 @@ class ActivityAnalyzer(transformer.Base): # TODO(mdan): The 'self' argument is not guaranteed to be called 'self'. if qn.has_attr and qn.parent.qn == ('self',): return True + return False def _track_symbol(self, node, @@ -201,12 +219,14 @@ class ActivityAnalyzer(transformer.Base): self.scope.mark_write(qn.parent) if writes_create_symbol: self.scope.mark_creation(qn, writes_create_symbol=True) + if self._in_aug_assign: + self.scope.mark_read(qn) elif isinstance(node.ctx, gast.Load): self.scope.mark_read(qn) elif isinstance(node.ctx, gast.Param): # Param contexts appear in function defs, so they have the meaning of # defining a variable. - # TODO(mdan): This bay be incorrect with nested functions. + # TODO(mdan): This may be incorrect with nested functions. # For nested functions, we'll have to add the notion of hiding args from # the parent scope, not writing to them. self.scope.mark_creation(qn) @@ -222,6 +242,14 @@ class ActivityAnalyzer(transformer.Base): if self._in_return_statement: self.scope.mark_returned(qn) + def visit_AugAssign(self, node): + # Special rules for AugAssign. In Assign, the target is only written, + # but in AugAssig (e.g. a += b), the target is both read and written. + self._in_aug_assign = True + self.generic_visit(node) + self._in_aug_assign = False + return node + def visit_Name(self, node): self.generic_visit(node) self._track_symbol(node) @@ -295,7 +323,7 @@ class ActivityAnalyzer(transformer.Base): def visit_FunctionDef(self, node): if self.scope: - qn = QN(node.name) + qn = qual_names.QN(node.name) self.scope.mark_write(qn) current_scope = self.scope body_scope = Scope(current_scope, isolated=True) @@ -355,5 +383,32 @@ class ActivityAnalyzer(transformer.Base): return node +def get_read(node, context): + """Return the variable names as QNs (qual_names.py) read by this statement.""" + analyzer = ActivityAnalyzer(context, None, True) + analyzer.visit(node) + return analyzer.scope.used + + +def get_updated(node, context): + """Return the variable names created or mutated by this statement. + + This function considers assign statements, augmented assign statements, and + the targets of for loops, as well as function arguments. + For example, `x[0] = 2` will return `x`, `x, y = 3, 4` will return `x` and + `y`, `for i in range(x)` will return `i`, etc. + Args: + node: An AST node + context: An EntityContext instance + + Returns: + A set of variable names (QNs, see qual_names.py) of all the variables + created or mutated. + """ + analyzer = ActivityAnalyzer(context, None, True) + analyzer.visit(node) + return analyzer.scope.created | analyzer.scope.modified + + def resolve(node, context, parent_scope=None): return ActivityAnalyzer(context, parent_scope).visit(node) diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py index ef79a295bfa3940705d2f341edd4eda74d7d7068..fdbd349af9d3325af114a7206d89617134278f14 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py @@ -123,7 +123,7 @@ class ActivityAnalyzerTest(test.TestCase): recursive=True) node = qual_names.resolve(node) node = activity.resolve(node, ctx) - return node + return node, ctx def test_local_markers(self): @@ -133,7 +133,7 @@ class ActivityAnalyzerTest(test.TestCase): b -= 1 return b - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) self.assertFalse( anno.getanno(node.body[0].body[0].value, NodeAnno.IS_LOCAL)) # c in b = c @@ -156,6 +156,7 @@ class ActivityAnalyzerTest(test.TestCase): expected - actual, actual - expected)) def assertScopeIsRmc(self, scope, used, modified, created): + """Assert the scope contains specific used, modified & created variables.""" self.assertSymbolSetsAre(used, scope.used, 'read') self.assertSymbolSetsAre(modified, scope.modified, 'modified') self.assertSymbolSetsAre(created, scope.created, 'created') @@ -168,7 +169,7 @@ class ActivityAnalyzerTest(test.TestCase): print(a, b) return c - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) print_node = node.body[0].body[2] if isinstance(print_node, gast.Print): # Python 2 @@ -191,7 +192,7 @@ class ActivityAnalyzerTest(test.TestCase): foo(a, b) # pylint:disable=undefined-variable return c - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) call_node = node.body[0].body[2].value # We basically need to detect which variables are captured by the call # arguments. @@ -208,7 +209,7 @@ class ActivityAnalyzerTest(test.TestCase): foo(a.b, a.c) return a.d - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) call_node = node.body[0].body[1].value self.assertScopeIsRmc( anno.getanno(call_node, NodeAnno.ARGS_SCOPE), @@ -234,7 +235,7 @@ class ActivityAnalyzerTest(test.TestCase): foo(a[0], a[b]) return a[c] - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) call_node = node.body[0].body[2].value self.assertScopeIsRmc( anno.getanno(call_node, NodeAnno.ARGS_SCOPE), @@ -258,7 +259,7 @@ class ActivityAnalyzerTest(test.TestCase): b -= 1 return b, c - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) while_node = node.body[0].body[1] self.assertScopeIsRmc( anno.getanno(while_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'), @@ -278,7 +279,7 @@ class ActivityAnalyzerTest(test.TestCase): b -= 1 return b, c - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) for_node = node.body[0].body[1] self.assertScopeIsRmc( anno.getanno(for_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'), ('c',)) @@ -299,7 +300,7 @@ class ActivityAnalyzerTest(test.TestCase): u = -y return z, u - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) if_node = node.body[0].body[0] self.assertScopeIsRmc( anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('x', 'y', 'z'), @@ -326,7 +327,7 @@ class ActivityAnalyzerTest(test.TestCase): d = 1 return d - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) if_node = node.body[0].body[0] self.assertScopeIsRmc( anno.getanno(if_node, NodeAnno.BODY_SCOPE), @@ -358,7 +359,7 @@ class ActivityAnalyzerTest(test.TestCase): d = 1 return d - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) if_node = node.body[0].body[0] self.assertScopeIsRmc( anno.getanno(if_node, NodeAnno.BODY_SCOPE), @@ -390,7 +391,7 @@ class ActivityAnalyzerTest(test.TestCase): a = b * b return a - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) inner_if_node = node.body[0].body[0].body[0] self.assertScopeIsRmc( anno.getanno(inner_if_node, NodeAnno.BODY_SCOPE), ('b',), ('a',), @@ -413,7 +414,7 @@ class ActivityAnalyzerTest(test.TestCase): b -= f(i) return b, c - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) fn_def_node = node.body[0].body[0] self.assertScopeIsRmc( @@ -434,7 +435,7 @@ class ActivityAnalyzerTest(test.TestCase): self.b = a self.b.c = 1 - node = self._parse_and_analyze(TestClass) + node, _ = self._parse_and_analyze(TestClass) init_node = node.body[0].body[0] self.assertScopeIsRmc( anno.getanno(init_node, NodeAnno.BODY_SCOPE), @@ -448,15 +449,118 @@ class ActivityAnalyzerTest(test.TestCase): def test_fn(a): a[0] += 1 - node = self._parse_and_analyze(test_fn) + node, _ = self._parse_and_analyze(test_fn) fn_node = node.body[0] self.assertScopeIsRmc( anno.getanno(fn_node, NodeAnno.BODY_SCOPE), - ('a',), + ('a', 'a[0]'), ('a', 'a[0]'), ('a',), ) + def test_return_vars_are_read(self): + + def test_fn(a, b, c): # pylint: disable=unused-argument + return c + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node.body[0] + self.assertScopeIsRmc( + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), + ('c',), + (), + ( + 'a', + 'b', + 'c', + ), + ) + + def test_aug_assign(self): + + def test_fn(a, b): + a += b + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node.body[0] + self.assertScopeIsRmc( + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), + ('a', 'b'), + ('a'), + ('a', 'b'), + ) + + def test_aug_assign_rvalues(self): + + a = dict(bar=3) + + def foo(): + return a + + def test_fn(x): + foo()['bar'] += x + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node.body[0] + self.assertScopeIsRmc( + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), + ('foo', 'x'), + (), + ('x',), + ) + + def test_params_created(self): + + def test_fn(a, b): # pylint: disable=unused-argument + return b + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node.body[0] + self.assertScopeIsRmc( + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('b',), (('')), + (('a', 'b'))) + + def test_get_read(self): + + def test_fn(x, y): + z = test_fn(x, y) + return z + + node, ctx = self._parse_and_analyze(test_fn) + node = node.body[0].body[0] + read_vars = activity.get_read(node, ctx) + self.assertEqual(read_vars, set(map(qual_names.QN, ('test_fn', 'x', 'y')))) + + def test_fn2(x, y, z): + z += test_fn2(x, y, z) + return z + + node, ctx = self._parse_and_analyze(test_fn2) + node = node.body[0].body[0] + read_vars = activity.get_read(node, ctx) + self.assertEqual(read_vars, + set(map(qual_names.QN, ('test_fn2', 'x', 'y', 'z')))) + + def test_get_updated(self): + + def test_fn(x, y): + z = test_fn(x, y) + return z + + node, ctx = self._parse_and_analyze(test_fn) + node = node.body[0].body[0] + updated_vars = activity.get_updated(node, ctx) + self.assertEqual(updated_vars, set(map(qual_names.QN, ('z')))) + + def test_fn2(x, y, z): + z += test_fn2(x, y, z) + return z + + node, ctx = self._parse_and_analyze(test_fn2) + node = node.body[0].body[0] + updated_vars = activity.get_updated(node, ctx) + self.assertEqual(updated_vars, set(map(qual_names.QN, ('z')))) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/cfg.py b/tensorflow/contrib/autograph/pyct/static_analysis/cfg.py new file mode 100644 index 0000000000000000000000000000000000000000..230e4cc0f3311ac5ad1e80c2591896ee48866280 --- /dev/null +++ b/tensorflow/contrib/autograph/pyct/static_analysis/cfg.py @@ -0,0 +1,431 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Control flow graph analysis. + +Given a Python AST we construct a control flow graph, with edges both to the +next and previous statements (so it can easily walk the graph both ways). Its +nodes contain the AST of the statements. It can then perform forward or backward +analysis on this CFG. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import namedtuple +import functools +import operator + +import gast + +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct.static_analysis import activity + + +class CfgNode(object): + """A node in the CFG.""" + __slots__ = ['next', 'value', 'prev'] + + def __init__(self, value): + self.next = set() + self.prev = set() + self.value = value + + +class Cfg(namedtuple('Cfg', ['entry', 'exit'])): + """A Control Flow Graph. + + Each statement is represented as a node. For control flow statements such + as conditionals and loops the conditional itself is a node which either + branches or cycles, respectively. + Attributes: + entry: The entry node, which contains the `gast.arguments` node of the + function definition. + exit: The exit node. This node is special because it has no value (i.e. no + corresponding AST node). This is because Python functions can have + multiple return statements. + """ + pass + + +class CfgBuilder(gast.NodeVisitor): + """Construct a control flow graph. + + Construct a CFG starting from a FunctionDef node. + Usage: + cfg_obj = CfgBuilder().build_cfg(fndef_node) + """ + + def __init__(self): + # The current leaves of the CFG + self.current_leaves = [] + # TODO(alexbw): generalize to break, return, continue, yield, etc. + # A stack of lists, tracking continue statements + self.continue_ = [] + # A stack of lists tracking break nodes + self.break_ = [] + + def set_current_leaves(self, cfg_node): + """Link this cfg_node to the current leaves. + + This is the central function for building the CFG. It links the current + head cfg_nodes to the passed cfg_node. It then resets the head to the + passed cfg_node. + + Args: + cfg_node: A CfgNode instance. + """ + for head in self.current_leaves: + head.next.add(cfg_node) + # While we're linking the CFG forward, add backlinks + cfg_node.prev.add(head) + self.current_leaves = [cfg_node] + + def build_cfg(self, node): + """Build a CFG for a function. + + Implementation of building a CFG for dataflow analysis. See, e.g.: + https://www.seas.harvard.edu/courses/cs252/2011sp/slides/Lec02-Dataflow.pdf + + Args: + node: A function definition the body of which to analyze. + Returns: + A CFG object. + Raises: + TypeError: If the input is not a function definition. + """ + if not isinstance(node, gast.FunctionDef): + raise TypeError('input must be a function definition') + entry_cfg_node = CfgNode(node.args) + self.current_leaves = [entry_cfg_node] + self.visit_statements(node.body) + exit_cfg_node = CfgNode(None) + self.set_current_leaves(exit_cfg_node) + return Cfg(entry_cfg_node, exit_cfg_node) + + def visit_statements(self, nodes): + for node in nodes: + # Check for control flow + if isinstance(node, (gast.For, gast.While, gast.If, gast.Try, gast.Break, + gast.Continue, gast.With)): + self.visit(node) + else: + expr = CfgNode(node) + self.set_current_leaves(expr) + + def generic_visit(self, node): + raise ValueError('unknown control flow') + + def visit_If(self, node): + # TODO(alexbw): change this to use immutable tuples instead of lists + # The current head will hold the conditional + test = CfgNode(node.test) + self.set_current_leaves(test) + # Handle the body + self.visit_statements(node.body) + body_exit = self.current_leaves + self.current_leaves = [] + self.current_leaves.append(test) + # Handle the orelse + self.visit_statements(node.orelse) + self.current_leaves.extend(body_exit) + + def visit_While(self, node): + test = CfgNode(node.test) + self.set_current_leaves(test) + # Start a new level of nesting + self.break_.append([]) + self.continue_.append([]) + # Handle the body + self.visit_statements(node.body) + self.current_leaves.extend(self.continue_.pop()) + self.set_current_leaves(test) + # Handle the orelse + self.visit_statements(node.orelse) + # The break statements and the test go to the next node + self.current_leaves.extend(self.break_.pop()) + + def visit_For(self, node): + iter_ = CfgNode(node.iter) + self.set_current_leaves(iter_) + self.break_.append([]) + self.continue_.append([]) + self.visit_statements(node.body) + self.current_leaves.extend(self.continue_.pop()) + self.set_current_leaves(iter_) + self.current_leaves.extend(self.break_.pop()) + + def visit_Break(self, node): + self.break_[-1].extend(self.current_leaves) + self.current_leaves[:] = [] + + def visit_Continue(self, node): + self.continue_[-1].extend(self.current_leaves) + self.current_leaves[:] = [] + + def visit_Try(self, node): + self.visit_statements(node.body) + body = self.current_leaves + handlers = [] + for handler in node.handlers: + self.current_leaves = body[:] + self.visit_statements(handler.body) + handlers.extend(self.current_leaves) + self.current_leaves = body + self.visit_statements(node.orelse) + self.current_leaves = handlers + self.current_leaves + self.visit_statements(node.finalbody) + + def visit_With(self, node): + for item in node.items: + self.set_current_leaves(CfgNode(item)) + self.visit_statements(node.body) + + +# TODO(alexbw): once CFG analysis occurs at a block level, +# this extra class will not be necessary +class PropagateAnalysis(gast.NodeVisitor): + """Port analysis annotations from statements to their enclosing blocks.""" + + def __init__(self, analysis): + self.transfer_fn = analysis.transfer_fn + self.in_label = analysis.in_label + self.out_label = analysis.out_label + super(PropagateAnalysis, self).__init__() + + def visit_If(self, node): + # Depth-first. + self.generic_visit(node) + incoming = anno.getanno(node.body[0], self.in_label) + incoming |= anno.getanno(node.test, self.in_label) + outgoing = anno.getanno(node.body[-1], self.out_label) + outgoing |= anno.getanno(node.test, self.out_label) + if node.orelse: + orelse_outgoing = anno.getanno(node.orelse[-1], self.out_label) + outgoing = self.transfer_fn(outgoing, orelse_outgoing) + anno.setanno(node, self.in_label, incoming) + anno.setanno(node, self.out_label, outgoing) + + def visit_For(self, node): + self.generic_visit(node) + incoming = set(anno.getanno(node.body[0], self.in_label)) + incoming -= set((anno.getanno(node.target, anno.Basic.QN),)) + outgoing = anno.getanno(node.body[-1], self.out_label) + if node.orelse: + orelse_outgoing = anno.getanno(node.orelse[-1], self.out_label) + outgoing = self.transfer_fn(outgoing, orelse_outgoing) + anno.setanno(node, self.in_label, frozenset(incoming)) + anno.setanno(node, self.out_label, outgoing) + + def visit_While(self, node): + self.generic_visit(node) + incoming = anno.getanno(node.body[0], self.in_label) + incoming |= anno.getanno(node.test, self.in_label) + outgoing = anno.getanno(node.body[-1], self.out_label) + if node.orelse: + orelse_outgoing = anno.getanno(node.orelse[-1], self.out_label) + outgoing = self.transfer_fn(outgoing, orelse_outgoing) + anno.setanno(node, self.in_label, incoming) + anno.setanno(node, self.out_label, outgoing) + + def visit_With(self, node): + self.generic_visit(node) + incoming = anno.getanno(node.body[0], self.in_label) + for item in node.items: + incoming |= anno.getanno(item, self.in_label) + outgoing = anno.getanno(node.body[-1], self.out_label) + anno.setanno(node, self.in_label, incoming) + anno.setanno(node, self.out_label, outgoing) + + +# TODO(alexbw): Abstract the CFG walking machinery into a superclass +# which is parameterized on which fields it selects when walking. +# TODO(alexbw): Abstract the application of dataflow analysis +class Forward(object): + """Forward analysis on CFG. + + Args: + label: A name for this analysis e.g. 'active' for activity analysis. The AST + nodes in the CFG will be given annotations 'name_in', 'name_out', + 'name_gen' and 'name_kill' which contain the incoming values, outgoing + values, values generated by the statement, and values deleted by the + statement respectively. + transfer_fn: Either the AND or OR operator. If the AND operator is used it + turns into forward must analysis (i.e. a value will only be carried + forward if it appears on all incoming paths). The OR operator means that + forward may analysis is done (i.e. the union of incoming values will be + taken). + """ + + def __init__(self, label, context, transfer_fn=operator.or_): + self.transfer_fn = transfer_fn + self.context = context + self.out_label = label + '_out' + self.in_label = label + '_in' + self.gen_label = label + '_gen' + self.kill_label = label + '_kill' + + # TODO(alexbw): see if we can simplify by visiting breadth-first + def visit(self, node): + """Depth-first walking the CFG, applying dataflow information propagtion.""" + # node.value is None only for the exit CfgNode. + if not node.value: + return + + if anno.hasanno(node.value, self.out_label): + before = hash(anno.getanno(node.value, self.out_label)) + else: + before = None + preds = [ + anno.getanno(pred.value, self.out_label) + for pred in node.prev + if anno.hasanno(pred.value, self.out_label) + ] + if preds: + incoming = functools.reduce(self.transfer_fn, preds[1:], preds[0]) + else: + incoming = frozenset() + anno.setanno(node.value, self.in_label, incoming) + gen, kill = self.get_gen_kill(node, incoming) + anno.setanno(node.value, self.gen_label, gen) + anno.setanno(node.value, self.kill_label, kill) + anno.setanno(node.value, self.out_label, (incoming - kill) | gen) + + if hash(anno.getanno(node.value, self.out_label)) != before: + for succ in node.next: + self.visit(succ) + + def get_gen_kill(self, cfg_node, incoming): + """Calculate Gen and Kill properties of a CFG node in dataflow analysis. + + A function which takes the CFG node as well as a set of incoming + values. It must return a set of newly generated values by the statement as + well as a set of deleted (killed) values. + + Args: + cfg_node: A CfgNode instance. + incoming: + """ + raise NotImplementedError() + + +class Backward(Forward): + """Backward analysis on CFG.""" + + def visit(self, cfg_node): + # cfg_node.value is None for the exit node, which will be visited only once + if not cfg_node.value: + for pred in cfg_node.prev: + self.visit(pred) + return + + if anno.hasanno(cfg_node.value, self.in_label): + before = hash(anno.getanno(cfg_node.value, self.in_label)) + else: + before = None + succs = [ + anno.getanno(succ.value, self.in_label) + for succ in cfg_node.next + if anno.hasanno(succ.value, self.in_label) + ] + if succs: + incoming = functools.reduce(self.transfer_fn, succs[1:], succs[0]) + else: + incoming = frozenset() + anno.setanno(cfg_node.value, self.out_label, incoming) + gen, kill = self.get_gen_kill(cfg_node, incoming) + anno.setanno(cfg_node.value, self.gen_label, gen) + anno.setanno(cfg_node.value, self.kill_label, kill) + anno.setanno(cfg_node.value, self.in_label, (incoming - kill) | gen) + if hash(anno.getanno(cfg_node.value, self.in_label)) != before: + for pred in cfg_node.prev: + self.visit(pred) + + +def run_analyses(node, analyses): + """Perform dataflow analysis on all functions within an AST. + + Args: + node: An AST node on which to run dataflow analysis. + analyses: Either an instance of the Forward or Backward dataflow analysis + class, or a list or tuple of them. + + Returns: + node: The node, but now with annotations on the AST nodes containing the + results of the dataflow analyses. + """ + if not isinstance(analyses, (tuple, list)): + analyses = (analyses,) + for analysis in analyses: + if not isinstance(analysis, (Forward, Backward)): + raise TypeError('not a valid forward analysis object') + + for child_node in gast.walk(node): + if isinstance(child_node, gast.FunctionDef): + cfg_obj = CfgBuilder().build_cfg(child_node) + for analysis in analyses: + if isinstance(analysis, Backward): + analysis.visit(cfg_obj.exit) + elif isinstance(analysis, Forward): + analysis.visit(cfg_obj.entry) + for analysis in analyses: + PropagateAnalysis(analysis).visit(node) + return node + + +class Liveness(Backward): + """Perform a liveness analysis. + + Each statement is annotated with a set of variables that may be used + later in the program. + """ + + def __init__(self, context): + super(Liveness, self).__init__('live', context) + + def get_gen_kill(self, node, _): + gen = activity.get_read(node.value, self.context) + kill = activity.get_updated(node.value, self.context) + return gen, kill + + +class ReachingDefinitions(Forward): + """Perform reaching definition analysis. + + Each statement is annotated with a set of (variable, definition) pairs. + """ + + def __init__(self, context): + super(ReachingDefinitions, self).__init__('definitions', context) + + def get_gen_kill(self, node, incoming): + definitions = activity.get_updated(node.value, self.context) + gen = frozenset((id_, node.value) for id_ in definitions) + kill = frozenset(def_ for def_ in incoming if def_[0] in definitions) + return gen, kill + + +class Defined(Forward): + """Perform defined variable analysis. + + Each statement is annotated with a set of variables which are guaranteed to + be defined at that point. + """ + + def __init__(self, context): + super(Defined, self).__init__('defined', context, transfer_fn=operator.and_) + + def get_gen_kill(self, node, _): + gen = activity.get_updated(node.value, self.context) + return gen, frozenset() diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/cfg_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/cfg_test.py new file mode 100644 index 0000000000000000000000000000000000000000..af7eaf30e8d403acc18d79ac1dd9e98673c333a2 --- /dev/null +++ b/tensorflow/contrib/autograph/pyct/static_analysis/cfg_test.py @@ -0,0 +1,252 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for cfg module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools + +import gast + +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import context +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import qual_names +from tensorflow.contrib.autograph.pyct.static_analysis import cfg +from tensorflow.python.platform import test + + +class CFGTest(test.TestCase): + + def _parse_and_analyze(self, test_fn, namespace, arg_types=None): + arg_types = arg_types or {} + node, source = parser.parse_entity(test_fn) + ctx = context.EntityContext( + namer=None, + source_code=source, + source_file=None, + namespace=namespace, + arg_values=None, + arg_types=arg_types, + owner_type=None, + recursive=True) + node = qual_names.resolve(node) + return node, ctx + + def _check_anno_matches(self, node, anno_name, var_names): + if isinstance(var_names, str): + var_names = (var_names,) + qual_vars = set() + for var_name in var_names: + if isinstance(var_name, str): + if '[' in var_name or ']' in var_name: + raise ValueError('Annotation matching not supported with subscript.') + if '.' not in var_name: + qual_vars.add(qual_names.QN(var_name)) + else: + attrs = var_name.split('.') + this_qn = functools.reduce(qual_names.QN, attrs[1:], + qual_names.QN(attrs[0])) + qual_vars.add(this_qn) + self.assertEqual(anno.getanno(node, anno_name), qual_vars) + + def test_reaching(self): + + def f(x): + print(x) + while True: + x = x + x = x + return x + + node, ctx = self._parse_and_analyze(f, {}) + cfg.run_analyses(node, cfg.ReachingDefinitions(ctx)) + body = node.body[0].body + # Only the argument reaches the expression + def_in = anno.getanno(body[0], 'definitions_in') + # One element, x, from arguments + self.assertEqual(set(type(d[1]) for d in def_in), set((gast.arguments,))) + + while_body = body[1].body + def_in = anno.getanno(while_body[0], 'definitions_in') + # One definition, two possible sources. + # - One from an assignment (if the loop is entered) + # - The other from the arguments (if loop is not entered) + self.assertEqual( + set(type(d[1]) for d in def_in), set((gast.arguments, gast.Assign))) + + def_in = anno.getanno(while_body[1], 'definitions_in') + # If we've reached this line, the only reaching definition of x is the + # Assign node in previous line + self.assertEqual(set(type(d[1]) for d in def_in), set((gast.Assign,))) + + def_in = anno.getanno(body[2], 'definitions_in') + # Same situation as while_body[0] + self.assertEqual( + set(type(d[1]) for d in def_in), set((gast.arguments, gast.Assign))) + + def test_defined(self): + + def f(x): + if x: + y = 2 # pylint: disable=unused-variable + return x + + node, ctx = self._parse_and_analyze(f, {}) + cfg.run_analyses(node, cfg.Defined(ctx)) + body = node.body[0].body + # only x is for sure defined at the end + self._check_anno_matches(body[1], 'defined_in', 'x') + # at the end of the if body both x and y are defined + if_body = body[0].body + self._check_anno_matches(if_body[0], 'defined_out', ('x', 'y')) + + # TODO(alexbw): b/73926938 split this test up + def test_live(self): + + def get_live_annotated_fnbody(f): + node, ctx = self._parse_and_analyze(f, {}) + cfg.run_analyses(node, cfg.Liveness(ctx)) + body = node.body[0].body + return body + + def f1(x): + a = g(x) # pylint: disable=undefined-variable + b = h(a) # pylint: disable=undefined-variable, unused-variable + return x + + def f2(x, a): # pylint: disable=unused-argument + if a > 0: # x should not be live + x = 0 + if a > 1: + x = 1 + else: + x = 2 + + def f3(x, a): + if a > 0: # x and a should be live + x = 0 + if a > 1: # x and a should be live_in + x = 1 + return x # x should be live + + def f4(x, a): + if a > 0: # x should be live + x = 0 + x += 1 + + def f5(x, a): + if a > 0: # x.y should be live + x.y = 0 + return x.y + + def f6(x): + return x # should this cause x.* to be live? + + def f7(x, n): + for i in range(n): + x += i + return x + + def f8(x, f): + with f: + x += 1 + + body = get_live_annotated_fnbody(f1) + self._check_anno_matches(body[1], 'live_in', ('a', 'h', 'x')) + self._check_anno_matches(body[2], 'live_in', ('x')) + self._check_anno_matches(body[0], 'live_in', ('g', 'h', 'x')) + self._check_anno_matches(body[2], 'live_out', ()) + + body = get_live_annotated_fnbody(f2) + self._check_anno_matches(body[0], 'live_in', ('a')) + self._check_anno_matches(body[1], 'live_in', ('a')) + + body = get_live_annotated_fnbody(f3) + self._check_anno_matches(body[0], 'live_in', ('a', 'x')) + self._check_anno_matches(body[1], 'live_in', ('a', 'x')) + self._check_anno_matches(body[2], 'live_in', ('x')) + + body = get_live_annotated_fnbody(f4) + self._check_anno_matches(body[0], 'live_in', ('x', 'a')) + self._check_anno_matches(body[1], 'live_in', ('x')) + + body = get_live_annotated_fnbody(f5) + self._check_anno_matches(body[0], 'live_in', ('x', 'x.y', 'a')) + + body = get_live_annotated_fnbody(f6) + self._check_anno_matches(body[0], 'live_in', ('x')) + + body = get_live_annotated_fnbody(f7) + self._check_anno_matches(body[0], 'live_in', ('x', 'n', 'range')) + self._check_anno_matches(body[1], 'live_in', ('x')) + + body = get_live_annotated_fnbody(f8) + self._check_anno_matches(body[0], 'live_in', ('f', 'x')) + + def test_node_equality(self): + node_a = gast.parse('y = x').body[0] + node_b = gast.parse('y = x').body[0] + self.assertNotEqual(node_a, node_b) + + def test_nested_functions_defined(self): + + def f(x): + y = x * 2 + + def g(z): + return z + y + + return g(x) + + node, ctx = self._parse_and_analyze(f, {}) + cfg.run_analyses(node, cfg.Defined(ctx)) + + body = node.body[0].body + self.assertEqual( + anno.getanno(body[2], 'defined_in'), + frozenset(map(qual_names.QN, ('g', 'x', 'y')))) + + # TODO(alexbw): CFG analysis doesn't currently cross FunctionDef boundaries. + # NOTE: 'z' is easy to find, but 'y' is not identified as + # defined, because CFG analysis is applied with each function separately. + # fndef_body = body[1].body + # self.assertEqual( + # anno.getanno(fndef_body[0], 'defined_in'), + # frozenset(map(qual_names.QN, ('z', 'y')))) + + def test_nested_functions_dont_leak_definitions(self): + + def f(x): + print(x) + + def g(): + y = 2 + return y + + return g() # y is not defined here + + node, ctx = self._parse_and_analyze(f, {}) + cfg.run_analyses(node, cfg.Defined(ctx)) + body = node.body[0].body + self.assertEqual( + anno.getanno(body[2], 'defined_in'), + frozenset(map(qual_names.QN, ('x', 'g')))) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py index 9d6cc9245aa463d0c8cfc7ad209736357b6c0323..f06b73c00d0bebb2717a79b7894e2addf914daba 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py @@ -501,11 +501,18 @@ def sparse_make_stats_update( example_partition_ids) # Compute aggregate stats for each partition. + # Since unsorted_segment_sum can be numerically unstable, use 64bit + # operation. + gradients64 = math_ops.cast(gradients, dtypes.float64) + hessians64 = math_ops.cast(hessians, dtypes.float64) per_partition_gradients = math_ops.unsorted_segment_sum( - gradients, mapped_partitions, array_ops.size(unique_partitions)) + gradients64, mapped_partitions, array_ops.size(unique_partitions)) per_partition_hessians = math_ops.unsorted_segment_sum( - hessians, mapped_partitions, array_ops.size(unique_partitions)) - + hessians64, mapped_partitions, array_ops.size(unique_partitions)) + per_partition_gradients = math_ops.cast(per_partition_gradients, + dtypes.float32) + per_partition_hessians = math_ops.cast(per_partition_hessians, + dtypes.float32) # Prepend a bias feature per partition that accumulates the stats for all # examples in that partition. bias_feature_ids = array_ops.fill( diff --git a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py index 1b184d296b329cee481db67992e77d1e33e18035..50cc00afdcc77fedc9bf8c94a9a6fcf2a28ebde9 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py +++ b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py @@ -187,7 +187,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject): stamp_token: Expected current token. next_stamp_token: Next value for the token. Returns: - A list of quantiles or approximate boundaries. + The flush operation. """ return gen_quantile_ops.quantile_accumulator_flush( quantile_accumulator_handle=self._quantile_accumulator_handle, diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index 08c1dcdd028829e6ef290965347d184ed42f416d..e53d86ec612f299c800753d67ceee79acb5db497 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -369,7 +369,7 @@ class GradientBoostedDecisionTreeModel(object): Returns: a dictionary of prediction results - ENSEMBLE_STAMP, PREDICTION, PARTITION_IDS, - NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPED. + NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPTED. """ ensemble_stats = training_ops.tree_ensemble_stats(ensemble_handle, ensemble_stamp) diff --git a/tensorflow/contrib/checkpoint/__init__.py b/tensorflow/contrib/checkpoint/__init__.py index 1192cc44a17823f69db28947308a8b839a83e57e..e529b25b3caa1e9f7e08522de9e08401ef639eca 100644 --- a/tensorflow/contrib/checkpoint/__init__.py +++ b/tensorflow/contrib/checkpoint/__init__.py @@ -16,7 +16,10 @@ For creating and managing dependencies: +@@CheckpointableObjectGraph @@dot_graph_from_checkpoint +@@object_metadata +@@NoDependency @@split_dependency """ @@ -26,6 +29,9 @@ from __future__ import print_function from tensorflow.contrib.checkpoint.python.split_dependency import split_dependency from tensorflow.contrib.checkpoint.python.visualize import dot_graph_from_checkpoint +from tensorflow.core.protobuf.checkpointable_object_graph_pb2 import CheckpointableObjectGraph +from tensorflow.python.training.checkpointable import NoDependency +from tensorflow.python.training.checkpointable_utils import object_metadata from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/checkpoint/python/visualize.py b/tensorflow/contrib/checkpoint/python/visualize.py index 86fbdb41d2c37803f2bd71b5aa2f72845c87d448..9a3b23bb2c30ee601f5f94da31ad182399a04e4f 100644 --- a/tensorflow/contrib/checkpoint/python/visualize.py +++ b/tensorflow/contrib/checkpoint/python/visualize.py @@ -17,10 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.core.protobuf import checkpointable_object_graph_pb2 from tensorflow.python import pywrap_tensorflow -from tensorflow.python.framework import errors_impl from tensorflow.python.training import checkpointable +from tensorflow.python.training import checkpointable_utils def dot_graph_from_checkpoint(save_path): @@ -52,20 +51,9 @@ def dot_graph_from_checkpoint(save_path): A graph in DOT format as a string. """ reader = pywrap_tensorflow.NewCheckpointReader(save_path) - try: - object_graph_string = reader.get_tensor( - checkpointable.OBJECT_GRAPH_PROTO_KEY) - except errors_impl.NotFoundError: - raise ValueError( - ('The specified checkpoint "%s" does not appear to be object-based (it ' - 'is missing the key "%s"). Likely it was created with a name-based ' - 'saver and does not contain an object dependency graph.') % ( - save_path, checkpointable.OBJECT_GRAPH_PROTO_KEY)) + object_graph = checkpointable_utils.object_metadata(save_path) shape_map = reader.get_variable_to_shape_map() dtype_map = reader.get_variable_to_dtype_map() - object_graph = ( - checkpointable_object_graph_pb2.CheckpointableObjectGraph()) - object_graph.ParseFromString(object_graph_string) graph = 'digraph {\n' def _escape(name): return name.replace('"', '\\"') diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index d81f6a0ae8a445de86b505ddd103b9b704f3443b..0708d6b7b9f0ba549aea091a265f42890e50d223 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -172,19 +172,20 @@ if (tensorflow_OPTIMIZE_FOR_NATIVE_ARCH) endif() endif() +include(CheckCXXCompilerFlag) + +# OpenMP Support +CHECK_CXX_COMPILER_FLAG("-fopenmp" GCC_OPENMP_SUPPORT) +if (GCC_OPENMP_SUPPORT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") +endif() +CHECK_CXX_COMPILER_FLAG("/openmp" MSVC_OPENMP_SUPPORT) +if (MSVC_OPENMP_SUPPORT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") +endif() + # MSVC SIMD instructions if (tensorflow_WIN_CPU_SIMD_OPTIONS) - include(CheckCXXCompilerFlag) - if (tensorflow_ENABLE_MKL_SUPPORT) - add_definitions(-DINTEL_MKL -DEIGEN_USE_VML) - if (NOT tensorflow_ENABLE_MKLDNN_SUPPORT) - add_definitions(-DINTEL_MKL_ML) - endif() - endif() - CHECK_CXX_COMPILER_FLAG("-fopenmp" COMPILER_OPT_OPENMP_SUPPORT) - if (COMPILER_OPT_OPENMP_SUPPORT) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") - endif() if (WIN32) CHECK_CXX_COMPILER_FLAG(${tensorflow_WIN_CPU_SIMD_OPTIONS} COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) if(COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) @@ -323,7 +324,9 @@ if(HAIKU) list(APPEND tensorflow_EXTERNAL_LIBRARIES network) endif() +# MKL Support if (tensorflow_ENABLE_MKL_SUPPORT) + add_definitions(-DINTEL_MKL -DEIGEN_USE_VML) if (WIN32) find_path(MKL_HOME_PLATFORM mkl PATHS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../ @@ -359,6 +362,8 @@ if (tensorflow_ENABLE_MKL_SUPPORT) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn) include_directories(${mkldnn_INCLUDE_DIRS}) + else (tensorflow_ENABLE_MKLDNN_SUPPORT) + add_definitions(-DINTEL_MKL_ML) endif() endif (tensorflow_ENABLE_MKL_SUPPORT) diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake index 116d42309394b92407cef79c9d3a975f494bc3ff..8942f3eecf07fff893884795a104422529357bf8 100644 --- a/tensorflow/contrib/cmake/external/zlib.cmake +++ b/tensorflow/contrib/cmake/external/zlib.cmake @@ -31,7 +31,8 @@ else (systemlib_ZLIB) set(ZLIB_URL https://github.com/madler/zlib) set(ZLIB_BUILD ${CMAKE_CURRENT_BINARY_DIR}/zlib/src/zlib) set(ZLIB_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/zlib/install) - set(ZLIB_TAG 50893291621658f355bc5b4d450a8d06a563053d) + # Match zlib version in tensorflow/workspace.bzl + set(ZLIB_TAG v1.2.11) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index f38c9e05135f9f8d2fb3e2efedb7223e06e4983a..1505d3e2083b5a3446a7f85d59c73816e65e1a2a 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -68,6 +68,7 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder_ops_util.cc" "${tensorflow_source_dir}/tensorflow/contrib/coder/ops/coder_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/directed_interleave_dataset_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/ignore_errors_dataset_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/prefetching_kernels.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/threadpool_dataset_op.cc" diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 92f2ab6dea8e7da5dd8481639eda24e31c06848f..5942ff3363a96de70df7e13d0857e4ad82e35fee 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -267,6 +267,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/kernel_tests/variable_scope_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/functional_ops_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/py_func_test.py" + # Flaky on Windows cpu with py36 (b/73556968) + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/sparse_reshape_op_test.py" # Windows file management related issues. "${tensorflow_source_dir}/tensorflow/python/training/evaluation_test.py" # training tests diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index d2beff849eb8d177b9a4e8e6fea1943a17748fd3..2d2cbdc1990ed9d8e58c0032cbc141a52271838f 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -52,6 +52,7 @@ from __future__ import print_function import numpy as np +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.layers import utils from tensorflow.python.ops import array_ops @@ -147,7 +148,9 @@ def crf_log_norm(inputs, sequence_lengths, transition_params): # partition function. forward_cell = CrfForwardRnnCell(transition_params) # Sequence length is not allowed to be less than zero. - sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1) + sequence_lengths_less_one = math_ops.maximum( + constant_op.constant(0, dtype=sequence_lengths.dtype), + sequence_lengths - 1) _, alphas = rnn.dynamic_rnn( cell=forward_cell, inputs=rest_of_input, diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index d59dd17aea42618075e69516bcfa4ee2b9eafc81..6017e27e731e3e8bcdee516ea291b17cd0782e63 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -32,7 +32,7 @@ py_test( py_test( name = "bucketing_test", - size = "small", + size = "medium", srcs = ["bucketing_test.py"], srcs_version = "PY2AND3", deps = [ @@ -320,11 +320,15 @@ py_test( deps = [ "//tensorflow/contrib/data/python/ops:resampling", "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", "//tensorflow/python:string_ops", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index a4a0ce79b6013d8813f2d8d294168ea8189d53ef..2568b899d7ea1be685036ad8af93f584f861c951 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -427,7 +427,9 @@ class BatchDatasetTest(test.TestCase): self.assertEqual([None], dataset.output_shapes[1][0].as_list()) self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list()) - def _testMapAndBatchDatasetHelper(self, num_parallel_batches=1): + def _testMapAndBatchDatasetHelper(self, + num_parallel_calls=None, + num_parallel_batches=None): """Test a dataset that maps a TF function across its input elements.""" # The pipeline is TensorSliceDataset -> # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size). @@ -446,6 +448,7 @@ class BatchDatasetTest(test.TestCase): batching.map_and_batch( map_func=_map_fn, batch_size=batch_size, + num_parallel_calls=num_parallel_calls, num_parallel_batches=num_parallel_batches)) .make_initializable_iterator()) init_op = iterator.initializer @@ -497,12 +500,18 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.InvalidArgumentError): sess.run(init_op, feed_dict={count: 14, batch_size: 0}) - def testMapAndBatchDataset(self): + def testMapAndBatch(self): return self._testMapAndBatchDatasetHelper() - def testMapAndBatchDatasetWithParallelBatching(self): + def testMapAndBatchWithParallelBatches(self): return self._testMapAndBatchDatasetHelper(num_parallel_batches=10) + def testMapAndBatchWithSequentialCalls(self): + return self._testMapAndBatchDatasetHelper(num_parallel_calls=1) + + def testMapAndBatchWithParallelCalls(self): + return self._testMapAndBatchDatasetHelper(num_parallel_calls=2) + def _testMapAndBatchPartialBatchHelper(self, drop_remainder=False): iterator = ( dataset_ops.Dataset.range(10).apply( @@ -630,9 +639,7 @@ class BatchDatasetSerializationTest( lambda x: array_ops.fill([x], x)).apply( batching.dense_to_sparse_batch(4, [12])) - # TODO(b/70988345): Re-enable when sparse tensors are properly supported by - # the DatasetSerializationTestBase. - def _testDenseToSparseBatchDatasetCore(self): + def testDenseToSparseBatchDatasetCore(self): components = np.random.randint(5, size=(40,)).astype(np.int32) diff_comp = np.random.randint(2, size=(100,)).astype(np.int32) @@ -684,7 +691,7 @@ class UnbatchDatasetSerializationTest( class MapAndBatchDatasetSerializationTest( dataset_serialization_test_base.DatasetSerializationTestBase): - def testSerializationCore(self): + def testNumParallelBatches(self): range_size = 11 num_repeats = 2 batch_size = 5 @@ -711,6 +718,33 @@ class MapAndBatchDatasetSerializationTest( self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True), num_outputs_drop_remainder) + def testNumParallelCalls(self): + range_size = 11 + num_repeats = 2 + batch_size = 5 + total_outputs = range_size * num_repeats + num_outputs_drop_remainder = total_outputs // batch_size + num_outputs_keep_remainder = int(math.ceil(total_outputs / batch_size)) + num_parallel_calls = 7 + + def build_ds(range_start, drop_remainder=False): + + def _map_fn(x): + return math_ops.square(x) + + return dataset_ops.Dataset.range( + range_start, range_start + range_size).repeat(num_repeats).apply( + batching.map_and_batch( + map_func=_map_fn, + batch_size=batch_size, + num_parallel_calls=num_parallel_calls, + drop_remainder=drop_remainder)) + + self.run_core_tests(lambda: build_ds(10), lambda: build_ds(15), + num_outputs_keep_remainder) + self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True), + num_outputs_drop_remainder) + class PaddedBatchDatasetSerializationTest( dataset_serialization_test_base.DatasetSerializationTestBase): diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py index 55a56b83a8efba899c6b296264d766839a824da5..bd3e034211c4aa454e4f8f6b09f14935d7a3b35c 100644 --- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -35,6 +36,179 @@ from tensorflow.python.ops import string_ops from tensorflow.python.platform import test +class GroupByReducerTest(test.TestCase): + + def checkResults(self, dataset, shapes, values): + self.assertEqual(shapes, dataset.output_shapes) + get_next = dataset.make_one_shot_iterator().get_next() + with self.test_session() as sess: + for expected in values: + got = sess.run(get_next) + self.assertEqual(got, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSum(self): + reducer = grouping.Reducer( + init_func=lambda _: np.int64(0), + reduce_func=lambda x, y: x + y, + finalize_func=lambda x: x) + for i in range(1, 11): + dataset = dataset_ops.Dataset.range(2 * i).apply( + grouping.group_by_reducer(lambda x: x % 2, reducer)) + self.checkResults( + dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i]) + + def testAverage(self): + + def reduce_fn(x, y): + return (x[0] * x[1] + math_ops.cast(y, dtypes.float32)) / ( + x[1] + 1), x[1] + 1 + + reducer = grouping.Reducer( + init_func=lambda _: (0.0, 0.0), + reduce_func=reduce_fn, + finalize_func=lambda x: x[0]) + for i in range(1, 11): + dataset = dataset_ops.Dataset.range(2 * i).apply( + grouping.group_by_reducer( + lambda x: math_ops.cast(x, dtypes.int64) % 2, reducer)) + self.checkResults( + dataset, shapes=tensor_shape.scalar(), values=[i - 1, i]) + + def testConcat(self): + components = np.array(list("abcdefghijklmnopqrst")).view(np.chararray) + reducer = grouping.Reducer( + init_func=lambda x: "", + reduce_func=lambda x, y: x + y[0], + finalize_func=lambda x: x) + for i in range(1, 11): + dataset = dataset_ops.Dataset.zip( + (dataset_ops.Dataset.from_tensor_slices(components), + dataset_ops.Dataset.range(2 * i))).apply( + grouping.group_by_reducer(lambda x, y: y % 2, reducer)) + self.checkResults( + dataset, + shapes=tensor_shape.scalar(), + values=[b"acegikmoqs" [:i], b"bdfhjlnprt" [:i]]) + + def testSparseSum(self): + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1], dtype=np.int64)), + dense_shape=np.array([1, 1])) + + reducer = grouping.Reducer( + init_func=lambda _: _sparse(np.int64(0)), + reduce_func=lambda x, y: _sparse(x.values[0] + y.values[0]), + finalize_func=lambda x: x.values[0]) + for i in range(1, 11): + dataset = dataset_ops.Dataset.range(2 * i).map(_sparse).apply( + grouping.group_by_reducer(lambda x: x.values[0] % 2, reducer)) + self.checkResults( + dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i]) + + def testChangingStateShape(self): + + def reduce_fn(x, _): + # Statically known rank, but dynamic length. + larger_dim = array_ops.concat([x[0], x[0]], 0) + # Statically unknown rank. + larger_rank = array_ops.expand_dims(x[1], 0) + return larger_dim, larger_rank + + reducer = grouping.Reducer( + init_func=lambda x: ([0], 1), + reduce_func=reduce_fn, + finalize_func=lambda x: x) + + for i in range(1, 11): + dataset = dataset_ops.Dataset.from_tensors(np.int64(0)).repeat(i).apply( + grouping.group_by_reducer(lambda x: x, reducer)) + self.assertEqual([None], dataset.output_shapes[0].as_list()) + self.assertIs(None, dataset.output_shapes[1].ndims) + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + with self.test_session() as sess: + x, y = sess.run(get_next) + self.assertAllEqual([0] * (2**i), x) + self.assertAllEqual(np.array(1, ndmin=i), y) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testTypeMismatch(self): + reducer = grouping.Reducer( + init_func=lambda x: constant_op.constant(1, dtype=dtypes.int32), + reduce_func=lambda x, y: constant_op.constant(1, dtype=dtypes.int64), + finalize_func=lambda x: x) + + dataset = dataset_ops.Dataset.range(10) + with self.assertRaisesRegexp( + TypeError, + "The element types for the new state must match the initial state."): + dataset.apply( + grouping.group_by_reducer(lambda _: np.int64(0), reducer)) + + # TODO(b/78665031): Remove once non-scalar keys are supported. + def testInvalidKeyShape(self): + reducer = grouping.Reducer( + init_func=lambda x: np.int64(0), + reduce_func=lambda x, y: x + y, + finalize_func=lambda x: x) + + dataset = dataset_ops.Dataset.range(10) + with self.assertRaisesRegexp( + ValueError, "`key_func` must return a single tf.int64 tensor."): + dataset.apply( + grouping.group_by_reducer(lambda _: np.int64((0, 0)), reducer)) + + # TODO(b/78665031): Remove once non-int64 keys are supported. + def testInvalidKeyType(self): + reducer = grouping.Reducer( + init_func=lambda x: np.int64(0), + reduce_func=lambda x, y: x + y, + finalize_func=lambda x: x) + + dataset = dataset_ops.Dataset.range(10) + with self.assertRaisesRegexp( + ValueError, "`key_func` must return a single tf.int64 tensor."): + dataset.apply( + grouping.group_by_reducer(lambda _: "wrong", reducer)) + + +class GroupByReducerSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_dataset(self, components): + reducer = grouping.Reducer( + init_func=lambda _: np.int64(0), + reduce_func=lambda x, y: x + y, + finalize_func=lambda x: x) + + return dataset_ops.Dataset.from_tensor_slices(components).apply( + grouping.group_by_reducer(lambda x: x % 5, reducer)) + + def testCoreGroupByReducer(self): + components = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) + self.verify_unused_iterator( + lambda: self._build_dataset(components), 5, verify_exhausted=True) + self.verify_init_before_restore( + lambda: self._build_dataset(components), 5, verify_exhausted=True) + self.verify_multiple_breaks( + lambda: self._build_dataset(components), 5, verify_exhausted=True) + self.verify_reset_restored_iterator( + lambda: self._build_dataset(components), 5, verify_exhausted=True) + self.verify_restore_in_empty_graph( + lambda: self._build_dataset(components), 5, verify_exhausted=True) + diff_components = np.array([5, 4, 3, 2, 1, 0], dtype=np.int64) + self.verify_restore_in_modified_graph( + lambda: self._build_dataset(components), + lambda: self._build_dataset(diff_components), + 5, + verify_exhausted=True) + + class GroupByWindowTest(test.TestCase): def testSimple(self): diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 5f47dcb33999119a690bd633f0c97a12a1ae1c84..bdc003a8a5bd646e1d5c598befa2694da512d0a9 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -18,6 +18,9 @@ from __future__ import division from __future__ import print_function import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import time +from absl.testing import parameterized from tensorflow.contrib.data.python.ops import resampling from tensorflow.python.data.ops import dataset_ops @@ -30,52 +33,98 @@ from tensorflow.python.platform import test from tensorflow.python.util import compat -class ResampleTest(test.TestCase): +def _time_resampling( + test_obj, data_np, target_dist, init_dist, num_to_sample): + dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat() - def testInitialKnownDistribution(self): - self._testDistribution(initial_known=True) + # Reshape distribution via rejection sampling. + dataset = dataset.apply( + resampling.rejection_resample( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist, + seed=142)) - def testInitialNotKnownDistribution(self): - self._testDistribution(initial_known=False) + get_next = dataset.make_one_shot_iterator().get_next() - def _testDistribution(self, initial_known): + with test_obj.test_session() as sess: + start_time = time.time() + for _ in xrange(num_to_sample): + sess.run(get_next) + end_time = time.time() + + return end_time - start_time + + +class ResampleTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + ("InitialDistributionKnown", True), + ("InitialDistributionUnknown", False)) + def testDistribution(self, initial_known): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] initial_dist = [0.2] * 5 if initial_known else None - iterator = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle( - 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply( - resampling.rejection_resample( - target_dist=target_dist, - initial_dist=initial_dist, - class_func=lambda c, _: c, - seed=27)).make_one_shot_iterator()) - get_next = iterator.get_next() + classes = math_ops.to_int64(classes) # needed for Windows build. + dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle( + 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat() + + get_next = dataset.apply( + resampling.rejection_resample( + target_dist=target_dist, + initial_dist=initial_dist, + class_func=lambda c, _: c, + seed=27)).make_one_shot_iterator().get_next() with self.test_session() as sess: returned = [] - with self.assertRaises(errors.OutOfRangeError): - while True: - returned.append(sess.run(get_next)) + while len(returned) < 4000: + returned.append(sess.run(get_next)) returned_classes, returned_classes_and_data = zip(*returned) _, returned_data = zip(*returned_classes_and_data) self.assertAllEqual([compat.as_bytes(str(c)) for c in returned_classes], returned_data) total_returned = len(returned_classes) - # Subsampling rejects a large percentage of the initial data in - # this case. - self.assertGreater(total_returned, 20000 * 0.2) class_counts = np.array([ len([True for v in returned_classes if v == c]) for c in range(5)]) returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2) + @parameterized.named_parameters( + ("OnlyInitial", True), + ("NotInitial", False)) + def testEdgeCasesSampleFromInitialDataset(self, only_initial_dist): + init_dist = [0.5, 0.5] + target_dist = [0.5, 0.5] if only_initial_dist else [0.0, 1.0] + num_classes = len(init_dist) + # We don't need many samples to test that this works. + num_samples = 100 + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + dataset = dataset_ops.Dataset.from_tensor_slices(data_np) + + # Reshape distribution. + dataset = dataset.apply( + resampling.rejection_resample( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist)) + + get_next = dataset.make_one_shot_iterator().get_next() + + with self.test_session() as sess: + returned = [] + with self.assertRaises(errors.OutOfRangeError): + while True: + returned.append(sess.run(get_next)) + def testRandomClasses(self): init_dist = [0.25, 0.25, 0.25, 0.25] target_dist = [0.0, 0.0, 0.0, 1.0] num_classes = len(init_dist) - # We don't need many samples to test a dirac-delta target distribution + # We don't need many samples to test a dirac-delta target distribution. num_samples = 100 data_np = np.random.choice(num_classes, num_samples, p=init_dist) @@ -109,5 +158,23 @@ class ResampleTest(test.TestCase): self.assertAllClose(target_dist, bincount, atol=1e-2) + +class ResampleDatasetBenchmark(test.Benchmark): + + def benchmarkResamplePerformance(self): + init_dist = [0.25, 0.25, 0.25, 0.25] + target_dist = [0.0, 0.0, 0.0, 1.0] + num_classes = len(init_dist) + # We don't need many samples to test a dirac-delta target distribution + num_samples = 1000 + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + resample_time = _time_resampling( + self, data_np, target_dist, init_dist, num_to_sample=1000) + + self.report_benchmark( + iters=1000, wall_time=resample_time, name="benchmark_resample") + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py index 1a97a84b2cba13e82c8af9c4c8ee413ee8264a5e..eb2ceff893543f710d4f0246adf4e6367a2deeb0 100644 --- a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py @@ -28,6 +28,7 @@ from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -35,15 +36,19 @@ from tensorflow.python.platform import test class ScanDatasetTest(test.TestCase): - def _count(self, start, step): - return dataset_ops.Dataset.from_tensors(0).repeat(None).apply( - scan_ops.scan(start, lambda state, _: (state + step, state))) + def _counting_dataset(self, start, scan_fn): + return dataset_ops.Dataset.from_tensors(0).repeat().apply( + scan_ops.scan(start, scan_fn)) def testCount(self): + def make_scan_fn(step): + return lambda state, _: (state + step, state) + start = array_ops.placeholder(dtypes.int32, shape=[]) step = array_ops.placeholder(dtypes.int32, shape=[]) take = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = self._count(start, step).take(take).make_initializable_iterator() + iterator = self._counting_dataset( + start, make_scan_fn(step)).take(take).make_initializable_iterator() next_element = iterator.get_next() with self.test_session() as sess: @@ -78,6 +83,37 @@ class ScanDatasetTest(test.TestCase): self.assertEqual(5, self.evaluate(next_element())) self.assertEqual(8, self.evaluate(next_element())) + def testSparseCount(self): + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1])), + dense_shape=np.array([1, 1])) + + def make_scan_fn(step): + return lambda state, _: (_sparse(state.values[0] + step), state) + + start = array_ops.placeholder(dtypes.int32, shape=[]) + step = array_ops.placeholder(dtypes.int32, shape=[]) + take = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = self._counting_dataset( + _sparse(start), + make_scan_fn(step)).take(take).make_initializable_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + + for start_val, step_val, take_val in [(0, 1, 10), (0, 1, 0), (10, 1, 10), + (10, 2, 10), (10, -1, 10), + (10, -2, 10)]: + sess.run(iterator.initializer, + feed_dict={start: start_val, step: step_val, take: take_val}) + for expected, _ in zip( + itertools.count(start_val, step_val), range(take_val)): + self.assertEqual(expected, sess.run(next_element).values[0]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + def testChangingStateShape(self): # Test the fixed-point shape invariant calculations: start with # initial values with known shapes, and use a scan function that @@ -132,7 +168,7 @@ class ScanDatasetTest(test.TestCase): scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn)) -class ScanDatasetSerialzationTest( +class ScanDatasetSerializationTest( dataset_serialization_test_base.DatasetSerializationTestBase): def _build_dataset(self, num_elements): diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 5b04c5316cfbb7577b3f8b3b6d364fc665d14c21..7a3e42cc72755c67b910db99c0238f6ba780a942 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -193,6 +193,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":batching", + ":interleave_ops", ":scan_ops", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", @@ -202,6 +203,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:random_ops", "//tensorflow/python/data/ops:dataset_ops", + "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index 2152bcde84aae6b0c2b368e43750aafab3a04bf2..b9393de4e90ae2597045b29070934b94e18cfcbd 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -364,7 +364,7 @@ class _RestructuredDataset(dataset_ops.Dataset): with the structure of `dataset`. """ super(_RestructuredDataset, self).__init__() - self._dataset = dataset + self._input_dataset = dataset if not allow_unsafe_cast: # Validate that the types are compatible. @@ -408,7 +408,7 @@ class _RestructuredDataset(dataset_ops.Dataset): self._output_classes = output_classes def _as_variant_tensor(self): - return self._dataset._as_variant_tensor() # pylint: disable=protected-access + return self._input_dataset._as_variant_tensor() # pylint: disable=protected-access @property def output_classes(self): @@ -466,14 +466,14 @@ def assert_element_shape(expected_shapes): class _MapAndBatchDataset(dataset_ops.MapDataset): """A `Dataset` that maps a function over a batch of elements.""" - def __init__(self, input_dataset, map_func, batch_size, num_parallel_batches, + def __init__(self, input_dataset, map_func, batch_size, num_parallel_calls, drop_remainder): """See `Dataset.map()` for details.""" super(_MapAndBatchDataset, self).__init__(input_dataset, map_func) self._batch_size_t = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") - self._num_parallel_batches_t = ops.convert_to_tensor( - num_parallel_batches, dtype=dtypes.int64, name="num_parallel_batches") + self._num_parallel_calls_t = ops.convert_to_tensor( + num_parallel_calls, dtype=dtypes.int64, name="num_parallel_calls") self._drop_remainder_t = ops.convert_to_tensor( drop_remainder, dtype=dtypes.bool, name="drop_remainder") @@ -483,12 +483,12 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): def _as_variant_tensor(self): # pylint: disable=protected-access input_resource = self._input_dataset._as_variant_tensor() - return gen_dataset_ops.map_and_batch_dataset( + return gen_dataset_ops.map_and_batch_dataset_v2( input_resource, self._map_func.captured_inputs, f=self._map_func, batch_size=self._batch_size_t, - num_parallel_batches=self._num_parallel_batches_t, + num_parallel_calls=self._num_parallel_calls_t, drop_remainder=self._drop_remainder_t, output_types=nest.flatten( sparse.as_dense_types(self.output_types, self.output_classes)), @@ -511,8 +511,9 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): def map_and_batch(map_func, batch_size, - num_parallel_batches=1, - drop_remainder=False): + num_parallel_batches=None, + drop_remainder=False, + num_parallel_calls=None): """Fused implementation of `map` and `batch`. Maps `map_func` across `batch_size` consecutive elements of this dataset @@ -528,21 +529,37 @@ def map_and_batch(map_func, nested structure of tensors. batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of consecutive elements of this dataset to combine in a single batch. - num_parallel_batches: A `tf.int64` scalar `tf.Tensor`, representing the - number of batches to create in parallel. On one hand, higher values can - help mitigate the effect of stragglers. On the other hand, higher values - can increase contention if CPU is scarce. - drop_remainder: A `tf.bool` scalar `tf.Tensor`, representing whether the - last batch should be dropped in case its size is smaller than desired; - the default behavior is not to drop the smaller batch. + num_parallel_batches: (Optional.) A `tf.int64` scalar `tf.Tensor`, + representing the number of batches to create in parallel. On one hand, + higher values can help mitigate the effect of stragglers. On the other + hand, higher values can increase contention if CPU is scarce. + drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing + whether the last batch should be dropped in case its size is smaller than + desired; the default behavior is not to drop the smaller batch. + num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`, + representing the number of elements to process in parallel. If not + specified, `batch_size * num_parallel_batches` elements will be + processed in parallel. Returns: A `Dataset` transformation function, which can be passed to @{tf.data.Dataset.apply}. + + Raises: + ValueError: If both `num_parallel_batches` and `num_parallel_calls` are + specified. """ + if num_parallel_batches is None and num_parallel_calls is None: + num_parallel_calls = batch_size + elif num_parallel_batches is not None and num_parallel_calls is None: + num_parallel_calls = batch_size * num_parallel_batches + elif num_parallel_batches is not None and num_parallel_calls is not None: + raise ValueError("The `num_parallel_batches` and `num_parallel_calls` " + "arguments are mutually exclusive.") + def _apply_fn(dataset): return _MapAndBatchDataset(dataset, map_func, batch_size, - num_parallel_batches, drop_remainder) + num_parallel_calls, drop_remainder) return _apply_fn diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 0531f9cbb9da6e6df85fa46940ab1661ad742eb4..ea229b5b27b117984e508fa4edc6f1cf713008b4 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops @@ -33,6 +34,35 @@ from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import math_ops +def group_by_reducer(key_func, reducer): + """A transformation that groups elements and performs a reduction. + + This transformation maps element of a dataset to a key using `key_func` and + groups the elements by key. The `reducer` is used to process each group; its + `init_func` is used to initialize state for each group when it is created, the + `reduce_func` is used to update the state every time an element is mapped to + the matching group, and the `finalize_func` is used to map the final state to + an output value. + + Args: + key_func: A function mapping a nested structure of tensors + (having shapes and types defined by `self.output_shapes` and + `self.output_types`) to a scalar `tf.int64` tensor. + reducer: An instance of `Reducer`, which captures the reduction logic using + the `init_func`, `reduce_func`, and `finalize_func` functions. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + """ + + def _apply_fn(dataset): + """Function from `Dataset` to `Dataset` that applies the transformation.""" + return GroupByReducerDataset(dataset, key_func, reducer) + + return _apply_fn + + def group_by_window(key_func, reduce_func, window_size=None, @@ -227,6 +257,250 @@ class _VariantDataset(dataset_ops.Dataset): return self._output_types +class GroupByReducerDataset(dataset_ops.Dataset): + """A `Dataset` that groups its input and performs a reduction.""" + + def __init__(self, input_dataset, key_func, reducer): + """See `group_by_reducer()` for details.""" + super(GroupByReducerDataset, self).__init__() + + self._input_dataset = input_dataset + + self._make_key_func(key_func, input_dataset) + self._make_init_func(reducer.init_func) + self._make_reduce_func(reducer.reduce_func, input_dataset) + self._make_finalize_func(reducer.finalize_func) + + def _make_key_func(self, key_func, input_dataset): + """Make wrapping Defun for key_func.""" + + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) + def tf_key_func(*args): + """A wrapper for Defun that facilitates shape inference.""" + # Pass in shape information from the input_dataset. + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): + arg.set_shape(shape) + + nested_args = nest.pack_sequence_as(input_dataset.output_types, args) + nested_args = sparse.deserialize_sparse_tensors( + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) + # pylint: disable=protected-access + if dataset_ops._should_unpack_args(nested_args): + ret = key_func(*nested_args) + # pylint: enable=protected-access + else: + ret = key_func(nested_args) + ret = ops.convert_to_tensor(ret) + if ret.dtype != dtypes.int64 or ret.get_shape() != tensor_shape.scalar(): + raise ValueError( + "`key_func` must return a single tf.int64 tensor. " + "Got type=%s and shape=%s" % (ret.dtype, ret.get_shape())) + return ret + + self._key_func = tf_key_func + self._key_func.add_to_graph(ops.get_default_graph()) + + def _make_init_func(self, init_func): + """Make wrapping Defun for init_func.""" + + @function.Defun(dtypes.int64) + def tf_init_func(key): + """A wrapper for Defun that facilitates shape inference.""" + key.set_shape([]) + ret = init_func(key) + # Convert any `SparseTensorValue`s to `SparseTensor`s and all other + # values to tensors. + ret = nest.pack_sequence_as(ret, [ + sparse_tensor.SparseTensor.from_value(t) + if sparse_tensor.is_sparse(t) else ops.convert_to_tensor(t) + for t in nest.flatten(ret) + ]) + + self._state_classes = sparse.get_classes(ret) + self._state_shapes = nest.pack_sequence_as( + ret, [t.get_shape() for t in nest.flatten(ret)]) + self._state_types = nest.pack_sequence_as( + ret, [t.dtype for t in nest.flatten(ret)]) + + # Serialize any sparse tensors. + ret = nest.pack_sequence_as( + ret, [t for t in nest.flatten(sparse.serialize_sparse_tensors(ret))]) + return nest.flatten(ret) + + self._init_func = tf_init_func + self._init_func.add_to_graph(ops.get_default_graph()) + + def _make_reduce_func(self, reduce_func, input_dataset): + """Make wrapping Defun for reduce_func.""" + + # Iteratively rerun the reduce function until reaching a fixed point on + # `self._state_shapes`. + need_to_rerun = True + while need_to_rerun: + + # Create a list in which `tf_reduce_func` will store the new shapes. + flat_new_state_shapes = [] + + @function.Defun(*(nest.flatten( + sparse.as_dense_types( + self._state_types, self._state_classes)) + nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes)))) + def tf_reduce_func(*args): + """A wrapper for Defun that facilitates shape inference.""" + for arg, shape in zip( + args, + nest.flatten( + sparse.as_dense_shapes(self._state_shapes, self._state_classes)) + + nest.flatten( + sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes))): + arg.set_shape(shape) + + pivot = len(nest.flatten(self._state_shapes)) + nested_state_args = nest.pack_sequence_as(self._state_types, + args[:pivot]) + nested_state_args = sparse.deserialize_sparse_tensors( + nested_state_args, self._state_types, self._state_shapes, + self._state_classes) + nested_input_args = nest.pack_sequence_as(input_dataset.output_types, + args[pivot:]) + nested_input_args = sparse.deserialize_sparse_tensors( + nested_input_args, input_dataset.output_types, + input_dataset.output_shapes, input_dataset.output_classes) + + ret = reduce_func(nested_state_args, nested_input_args) + + # Convert any `SparseTensorValue`s to `SparseTensor`s and all other + # values to tensors. + ret = nest.pack_sequence_as(ret, [ + sparse_tensor.SparseTensor.from_value(t) + if sparse_tensor.is_sparse(t) else ops.convert_to_tensor(t) + for t in nest.flatten(ret) + ]) + + # Extract shape information from the returned values. + flat_new_state = nest.flatten(ret) + flat_new_state_shapes.extend([t.get_shape() for t in flat_new_state]) + + # Extract and validate type information from the returned values. + for t, dtype in zip(flat_new_state, nest.flatten(self._state_types)): + if t.dtype != dtype: + raise TypeError( + "The element types for the new state must match the initial " + "state. Expected %s; got %s." % + (self._state_types, + nest.pack_sequence_as(self._state_types, + [t.dtype for t in flat_new_state]))) + + # Serialize any sparse tensors. + ret = nest.pack_sequence_as( + ret, + [t for t in nest.flatten(sparse.serialize_sparse_tensors(ret))]) + return nest.flatten(ret) + + # Use the private method that will execute `tf_reduce_func` but delay + # adding it to the graph in case we need to rerun the function. + tf_reduce_func._create_definition_if_needed() # pylint: disable=protected-access + + flat_state_shapes = nest.flatten(self._state_shapes) + weakened_state_shapes = [ + old.most_specific_compatible_shape(new) + for old, new in zip(flat_state_shapes, flat_new_state_shapes) + ] + + need_to_rerun = False + for old_shape, weakened_shape in zip(flat_state_shapes, + weakened_state_shapes): + if old_shape.ndims is not None and ( + weakened_shape.ndims is None or + old_shape.as_list() != weakened_shape.as_list()): + need_to_rerun = True + break + + if need_to_rerun: + self._state_shapes = nest.pack_sequence_as(self._state_shapes, + weakened_state_shapes) + + self._reduce_func = tf_reduce_func + self._reduce_func.add_to_graph(ops.get_default_graph()) + + def _make_finalize_func(self, finalize_func): + """Make wrapping Defun for finalize_func.""" + + @function.Defun(*(nest.flatten( + sparse.as_dense_types(self._state_types, self._state_classes)))) + def tf_finalize_func(*args): + """A wrapper for Defun that facilitates shape inference.""" + for arg, shape in zip( + args, + nest.flatten( + sparse.as_dense_shapes(self._state_shapes, self._state_classes))): + arg.set_shape(shape) + + nested_args = nest.pack_sequence_as(self._state_types, args) + nested_args = sparse.deserialize_sparse_tensors( + nested_args, self._state_types, self._state_shapes, + self._state_classes) + + ret = finalize_func(nested_args) + + # Convert any `SparseTensorValue`s to `SparseTensor`s and all other + # values to tensors. + ret = nest.pack_sequence_as(ret, [ + sparse_tensor.SparseTensor.from_value(t) + if sparse_tensor.is_sparse(t) else ops.convert_to_tensor(t) + for t in nest.flatten(ret) + ]) + + self._output_classes = sparse.get_classes(ret) + self._output_shapes = nest.pack_sequence_as( + ret, [t.get_shape() for t in nest.flatten(ret)]) + self._output_types = nest.pack_sequence_as( + ret, [t.dtype for t in nest.flatten(ret)]) + + # Serialize any sparse tensors. + ret = nest.pack_sequence_as( + ret, [t for t in nest.flatten(sparse.serialize_sparse_tensors(ret))]) + return nest.flatten(ret) + + self._finalize_func = tf_finalize_func + self._finalize_func.add_to_graph(ops.get_default_graph()) + + @property + def output_classes(self): + return self._output_classes + + @property + def output_shapes(self): + return self._output_shapes + + @property + def output_types(self): + return self._output_types + + def _as_variant_tensor(self): + return gen_dataset_ops.group_by_reducer_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._key_func.captured_inputs, + self._init_func.captured_inputs, + self._reduce_func.captured_inputs, + self._finalize_func.captured_inputs, + key_func=self._key_func, + init_func=self._init_func, + reduce_func=self._reduce_func, + finalize_func=self._finalize_func, + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + class GroupByWindowDataset(dataset_ops.Dataset): """A `Dataset` that groups its input and performs a windowed reduction.""" @@ -336,3 +610,30 @@ class GroupByWindowDataset(dataset_ops.Dataset): sparse.as_dense_types(self.output_types, self.output_classes)), output_shapes=nest.flatten( sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + +class Reducer(object): + """A reducer is used for reducing a set of elements. + + A reducer is represented as a tuple of the three functions: + 1) initialization function: key => initial state + 2) reduce function: (old state, input) => new state + 3) finalization function: state => result + """ + + def __init__(self, init_func, reduce_func, finalize_func): + self._init_func = init_func + self._reduce_func = reduce_func + self._finalize_func = finalize_func + + @property + def init_func(self): + return self._init_func + + @property + def reduce_func(self): + return self._reduce_func + + @property + def finalize_func(self): + return self._finalize_func diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index a182dddd38d23d096979eebb8de29f07573833dd..bad6edd5147d832228c412919f1e6e782aafc40f 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -20,10 +20,12 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.contrib.data.python.ops import scan_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import logging_ops @@ -50,79 +52,182 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): A `Dataset` transformation function, which can be passed to @{tf.data.Dataset.apply}. """ - def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" - dist_estimation_batch_size = 32 target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist") class_values_ds = dataset.map(class_func) + + # Get initial distribution. if initial_dist is not None: initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist") - acceptance_dist = _calculate_acceptance_probs(initial_dist_t, - target_dist_t) + acceptance_dist, prob_of_original = ( + _calculate_acceptance_probs_with_mixing(initial_dist_t, + target_dist_t)) initial_dist_ds = dataset_ops.Dataset.from_tensors( initial_dist_t).repeat() acceptance_dist_ds = dataset_ops.Dataset.from_tensors( acceptance_dist).repeat() + prob_of_original_ds = dataset_ops.Dataset.from_tensors( + prob_of_original).repeat() + else: + initial_dist_ds = _estimate_initial_dist_ds( + target_dist_t, class_values_ds) + acceptance_and_original_prob_ds = initial_dist_ds.map( + lambda initial: _calculate_acceptance_probs_with_mixing( + initial, target_dist_t)) + acceptance_dist_ds = acceptance_and_original_prob_ds.map( + lambda accept_prob, _: accept_prob) + prob_of_original_ds = acceptance_and_original_prob_ds.map( + lambda _, prob_original: prob_original) + filtered_ds = _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, + class_values_ds, seed) + # Prefetch filtered dataset for speed. + filtered_ds = filtered_ds.prefetch(3) + + prob_original_static = _get_prob_original_static( + initial_dist_t, target_dist_t) if initial_dist is not None else None + if prob_original_static == 1: + return dataset_ops.Dataset.zip((class_values_ds, dataset)) + elif prob_original_static == 0: + return filtered_ds else: - num_classes = (target_dist_t.shape[0].value or - array_ops.shape(target_dist_t)[0]) - smoothing_constant = 10 - initial_examples_per_class_seen = array_ops.fill( - [num_classes], np.int64(smoothing_constant)) - - def update_estimate_and_tile(num_examples_per_class_seen, c): - updated_examples_per_class_seen, dist = _estimate_data_distribution( - c, num_examples_per_class_seen) - tiled_dist = array_ops.tile( - array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) - return updated_examples_per_class_seen, tiled_dist - - initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) - .apply(scan_ops.scan(initial_examples_per_class_seen, - update_estimate_and_tile)) - .apply(batching.unbatch())) - acceptance_dist_ds = initial_dist_ds.map( - lambda initial: _calculate_acceptance_probs(initial, target_dist_t)) - - def maybe_warn_on_large_rejection(accept_dist, initial_dist): - proportion_rejected = math_ops.reduce_sum( - (1 - accept_dist) * initial_dist) - return control_flow_ops.cond( - math_ops.less(proportion_rejected, .5), - lambda: accept_dist, - lambda: logging_ops.Print( # pylint: disable=g-long-lambda - accept_dist, [proportion_rejected, initial_dist, accept_dist], - message="Proportion of examples rejected by sampler is high: ", - summarize=100, - first_n=10)) - - acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds, - initial_dist_ds)) - .map(maybe_warn_on_large_rejection)) - - def _gather_and_copy(class_val, acceptance_prob, data): - return (class_val, array_ops.gather(acceptance_prob, class_val), data) - current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( - (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) - filtered_ds = ( - current_probabilities_and_class_and_data_ds - .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) - return filtered_ds.map(lambda class_value, _, data: (class_value, data)) + return interleave_ops.sample_from_datasets( + [dataset_ops.Dataset.zip((class_values_ds, dataset)), filtered_ds], + weights=prob_of_original_ds.map(lambda prob: [(prob, 1.0 - prob)]), + seed=seed) return _apply_fn -def _calculate_acceptance_probs(initial_probs, target_probs): - """Calculate the per-class acceptance rates. +def _get_prob_original_static(initial_dist_t, target_dist_t): + """Returns the static probability of sampling from the original. + + `tensor_util.constant_value(prob_of_original)` returns `None` if it encounters + an Op that it isn't defined for. We have some custom logic to avoid this. + + Args: + initial_dist_t: A tensor of the initial distribution. + target_dist_t: A tensor of the target distribution. + + Returns: + The probability of sampling from the original distribution as a constant, + if it is a constant, or `None`. + """ + init_static = tensor_util.constant_value(initial_dist_t) + target_static = tensor_util.constant_value(target_dist_t) + + if init_static is None or target_static is None: + return None + else: + return np.min(target_static / init_static) + + +def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds, + seed): + """Filters a dataset based on per-class acceptance probabilities. Args: - initial_probs: The class probabilities of the data. - target_probs: The desired class proportion in minibatches. + dataset: The dataset to be filtered. + acceptance_dist_ds: A dataset of acceptance probabilities. + initial_dist_ds: A dataset of the initial probability distribution, given or + estimated. + class_values_ds: A dataset of the corresponding classes. + seed: (Optional.) Python integer seed for the resampler. + Returns: - A list of the per-class acceptance probabilities. + A dataset of (class value, data) after filtering. + """ + def maybe_warn_on_large_rejection(accept_dist, initial_dist): + proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist) + return control_flow_ops.cond( + math_ops.less(proportion_rejected, .5), + lambda: accept_dist, + lambda: logging_ops.Print( # pylint: disable=g-long-lambda + accept_dist, [proportion_rejected, initial_dist, accept_dist], + message="Proportion of examples rejected by sampler is high: ", + summarize=100, + first_n=10)) + + acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds, + initial_dist_ds)) + .map(maybe_warn_on_large_rejection)) + + def _gather_and_copy(class_val, acceptance_prob, data): + return class_val, array_ops.gather(acceptance_prob, class_val), data + + current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( + (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) + filtered_ds = ( + current_probabilities_and_class_and_data_ds + .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) + return filtered_ds.map(lambda class_value, _, data: (class_value, data)) + + +def _estimate_initial_dist_ds( + target_dist_t, class_values_ds, dist_estimation_batch_size=32, + smoothing_constant=10): + num_classes = (target_dist_t.shape[0].value or + array_ops.shape(target_dist_t)[0]) + initial_examples_per_class_seen = array_ops.fill( + [num_classes], np.int64(smoothing_constant)) + + def update_estimate_and_tile(num_examples_per_class_seen, c): + updated_examples_per_class_seen, dist = _estimate_data_distribution( + c, num_examples_per_class_seen) + tiled_dist = array_ops.tile( + array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) + return updated_examples_per_class_seen, tiled_dist - This method is based on solving the following analysis: + initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) + .apply(scan_ops.scan(initial_examples_per_class_seen, + update_estimate_and_tile)) + .apply(batching.unbatch())) + + return initial_dist_ds + + +def _get_target_to_initial_ratio(initial_probs, target_probs): + # Add tiny to initial_probs to avoid divide by zero. + denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny) + return target_probs / denom + + +def _estimate_data_distribution(c, num_examples_per_class_seen): + """Estimate data distribution as labels are seen. + + Args: + c: The class labels. Type `int32`, shape `[batch_size]`. + num_examples_per_class_seen: Type `int64`, shape `[num_classes]`, + containing counts. + + Returns: + num_examples_per_lass_seen: Updated counts. Type `int64`, shape + `[num_classes]`. + dist: The updated distribution. Type `float32`, shape `[num_classes]`. + """ + num_classes = num_examples_per_class_seen.get_shape()[0].value + # Update the class-count based on what labels are seen in batch. + num_examples_per_class_seen = math_ops.add( + num_examples_per_class_seen, math_ops.reduce_sum( + array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0)) + init_prob_estimate = math_ops.truediv( + num_examples_per_class_seen, + math_ops.reduce_sum(num_examples_per_class_seen)) + dist = math_ops.cast(init_prob_estimate, dtypes.float32) + return num_examples_per_class_seen, dist + + +def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs): + """Calculates the acceptance probabilities and mixing ratio. + + In this case, we assume that we can *either* sample from the original data + distribution with probability `m`, or sample from a reshaped distribution + that comes from rejection sampling on the original distribution. This + rejection sampling is done on a per-class basis, with `a_i` representing the + probability of accepting data from class `i`. + + This method is based on solving the following analysis for the reshaped + distribution: Let F be the probability of a rejection (on any example). Let p_i be the proportion of examples in the data in class i (init_probs) @@ -151,39 +256,39 @@ def _calculate_acceptance_probs(initial_probs, target_probs): 0 <= t_i <= 1, sum_i(t_i) = 1 ``` - A solution for a_i in terms of the other variables is the following: ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` - """ - # Add tiny to initial_probs to avoid divide by zero. - denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny) - ratio_l = target_probs / denom - # Calculate list of acceptance probabilities. - max_ratio = math_ops.reduce_max(ratio_l) - return ratio_l / max_ratio + If we try to minimize the amount of data rejected, we get the following: + M_max = max_i [ t_i / p_i ] + M_min = min_i [ t_i / p_i ] -def _estimate_data_distribution(c, num_examples_per_class_seen): - """Estimate data distribution as labels are seen. + The desired probability of accepting data if it comes from class `i`: + + a_i = (t_i/p_i - m) / (M_max - m) + + The desired probability of pulling a data element from the original dataset, + rather than the filtered one: + + m = M_min Args: - c: The class labels. Type `int32`, shape `[batch_size]`. - num_examples_per_class_seen: Type `int64`, shape `[num_classes]`, - containing counts. + initial_probs: A Tensor of the initial probability distribution, given or + estimated. + target_probs: A Tensor of the corresponding classes. Returns: - num_examples_per_lass_seen: Updated counts. Type `int64`, shape - `[num_classes]`. - dist: The updated distribution. Type `float32`, shape `[num_classes]`. + (A 1D Tensor with the per-class acceptance probabilities, the desired + probability of pull from the original distribution.) """ - num_classes = num_examples_per_class_seen.get_shape()[0].value - # Update the class-count based on what labels are seen in batch. - num_examples_per_class_seen = math_ops.add( - num_examples_per_class_seen, math_ops.reduce_sum( - array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0)) - init_prob_estimate = math_ops.truediv( - num_examples_per_class_seen, - math_ops.reduce_sum(num_examples_per_class_seen)) - dist = math_ops.cast(init_prob_estimate, dtypes.float32) - return num_examples_per_class_seen, dist + ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs) + max_ratio = math_ops.reduce_max(ratio_l) + min_ratio = math_ops.reduce_min(ratio_l) + + # Target prob to sample from original distribution. + m = min_ratio + + # TODO(joelshor): Simplify fraction, if possible. + a_i = (ratio_l - m) / (max_ratio - m) + return a_i, m \ No newline at end of file diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 60ef7efba4bb2bc281bc624ec3f58117ffa9a824..e911ad0fa0541f2d8b991d66182dd002c2ecaab0 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -24,6 +24,7 @@ from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import gen_dataset_ops @@ -36,18 +37,22 @@ class _ScanDataset(dataset_ops.Dataset): self._input_dataset = input_dataset with ops.name_scope("initial_state"): + # Convert any `SparseTensorValue`s to `SparseTensor`s and all other + # values to tensors. self._initial_state = nest.pack_sequence_as(initial_state, [ - ops.convert_to_tensor(t, name="component_%d" % i) + sparse_tensor.SparseTensor.from_value(t) + if sparse_tensor.is_sparse(t) else ops.convert_to_tensor( + t, name="component_%d" % i) for i, t in enumerate(nest.flatten(initial_state)) ]) - # Compute initial values for the state shapes and types based on - # the initial state. These will be refined by running - # `tf_scan_func` one or more times below. - # TODO(b/68937811): Allow the initial state to be a tf.SparseTensor. + # Compute initial values for the state classes, shapes and types based on + # the initial state. The shapes may be refined by running `tf_scan_func` one + # or more times below. + self._state_classes = sparse.get_classes(self._initial_state) self._state_shapes = nest.pack_sequence_as( self._initial_state, - [t.shape for t in nest.flatten(self._initial_state)]) + [t.get_shape() for t in nest.flatten(self._initial_state)]) self._state_types = nest.pack_sequence_as( self._initial_state, [t.dtype for t in nest.flatten(self._initial_state)]) @@ -62,67 +67,102 @@ class _ScanDataset(dataset_ops.Dataset): need_to_rerun = True while need_to_rerun: - flat_state_shapes = nest.flatten(self._state_shapes) - flat_state_types = nest.flatten(self._state_types) - - # Create a list in which `tf_scan_func` will store the s + # Create a list in which `tf_scan_func` will store the new shapes. flat_new_state_shapes = [] - @function.Defun(*(flat_state_types + nest.flatten( - sparse.as_dense_types(input_dataset.output_types, - input_dataset.output_classes)))) + @function.Defun(*(nest.flatten( + sparse.as_dense_types( + self._state_types, self._state_classes)) + nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes)))) def tf_scan_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the state and input_dataset. - # TODO(b/69424092): Check that neither inputs nor outputs are sparse. - dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, - input_dataset.output_classes) - for arg, shape in zip(args, - flat_state_shapes + nest.flatten(dense_shapes)): + for arg, shape in zip( + args, + nest.flatten( + sparse.as_dense_shapes(self._state_shapes, self._state_classes)) + + nest.flatten( + sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes))): arg.set_shape(shape) - pivot = len(flat_state_shapes) - old_state = nest.pack_sequence_as(self._initial_state, args[:pivot]) - input_value = nest.pack_sequence_as(input_dataset.output_types, - args[pivot:]) - - ret = scan_func(old_state, input_value) + pivot = len(nest.flatten(self._state_shapes)) + print(self._state_classes) + nested_state_args = nest.pack_sequence_as(self._state_types, + args[:pivot]) + nested_state_args = sparse.deserialize_sparse_tensors( + nested_state_args, self._state_types, self._state_shapes, + self._state_classes) + print(input_dataset.output_classes) + nested_input_args = nest.pack_sequence_as(input_dataset.output_types, + args[pivot:]) + nested_input_args = sparse.deserialize_sparse_tensors( + nested_input_args, input_dataset.output_types, + input_dataset.output_shapes, input_dataset.output_classes) + + ret = scan_func(nested_state_args, nested_input_args) if not isinstance(ret, collections.Sequence) or len(ret) != 2: raise TypeError("The scan function must return a pair comprising the " "new state and the output value.") + + # Convert any `SparseTensorValue`s to `SparseTensor`s and all other + # values to tensors. + ret = nest.pack_sequence_as(ret, [ + sparse_tensor.SparseTensor.from_value(t) + if sparse_tensor.is_sparse(t) else ops.convert_to_tensor(t) + for t in nest.flatten(ret) + ]) new_state, output_value = ret - flat_new_state = [ - ops.convert_to_tensor(t) for t in nest.flatten(new_state) - ] - flat_output_value = [ - ops.convert_to_tensor(t) for t in nest.flatten(output_value) - ] + # Extract and validate class information from the returned values. + for t, clazz in zip( + nest.flatten(new_state), nest.flatten(self._state_classes)): + if not isinstance(t, clazz): + raise TypeError( + "The element classes for the new state must match the initial " + "state. Expected %s; got %s." % + (self._state_classes, + nest.pack_sequence_as( + self._state_types, + [type(t) for t in nest.flatten(new_state)]))) + self._output_classes = sparse.get_classes(output_value) # Extract shape information from the returned values. - flat_new_state_shapes.extend([t.shape for t in flat_new_state]) + flat_new_state_shapes.extend( + [t.get_shape() for t in nest.flatten(new_state)]) self._output_shapes = nest.pack_sequence_as( - output_value, [t.shape for t in flat_output_value]) + output_value, [t.get_shape() for t in nest.flatten(output_value)]) # Extract and validate type information from the returned values. - for t, dtype in zip(flat_new_state, flat_state_types): + for t, dtype in zip( + nest.flatten(new_state), nest.flatten(self._state_types)): if t.dtype != dtype: raise TypeError( "The element types for the new state must match the initial " "state. Expected %s; got %s." % - (self._state_types, nest.pack_sequence_as( - self._state_types, [t.dtype for t in flat_new_state]))) - self._output_classes = nest.pack_sequence_as( - output_value, [ops.Tensor for _ in flat_output_value]) + (self._state_types, + nest.pack_sequence_as( + self._state_types, + [t.dtype for t in nest.flatten(new_state)]))) self._output_types = nest.pack_sequence_as( - output_value, [t.dtype for t in flat_output_value]) - - return flat_new_state + flat_output_value + output_value, [t.dtype for t in nest.flatten(output_value)]) + + # Serialize any sparse tensors. + new_state = nest.pack_sequence_as(new_state, [ + t for t in nest.flatten(sparse.serialize_sparse_tensors(new_state)) + ]) + output_value = nest.pack_sequence_as(output_value, [ + t for t in nest.flatten( + sparse.serialize_sparse_tensors(output_value)) + ]) + return nest.flatten(new_state) + nest.flatten(output_value) # Use the private method that will execute `tf_scan_func` but delay # adding it to the graph in case we need to rerun the function. tf_scan_func._create_definition_if_needed() # pylint: disable=protected-access + flat_state_shapes = nest.flatten(self._state_shapes) weakened_state_shapes = [ original.most_specific_compatible_shape(new) for original, new in zip(flat_state_shapes, flat_new_state_shapes) @@ -150,7 +190,7 @@ class _ScanDataset(dataset_ops.Dataset): input_t = self._input_dataset._as_variant_tensor() # pylint: disable=protected-access return gen_dataset_ops.scan_dataset( input_t, - nest.flatten(self._initial_state), + nest.flatten(sparse.serialize_sparse_tensors(self._initial_state)), self._scan_func.captured_inputs, f=self._scan_func, output_types=nest.flatten( diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index c2834d822664b9d60690c5d5dd527bbbd01a106f..8dfcaf6032e1602ed76a8a995553c5d398c4a778 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -21,6 +21,7 @@ py_library( srcs = ["values.py"], visibility = ["//tensorflow:internal"], deps = [ + ":input_ops", ":prefetching_ops_v2", "//tensorflow/contrib/data/python/ops:batching", "//tensorflow/contrib/eager/python:datasets", @@ -42,6 +43,7 @@ cuda_py_test( srcs = ["values_test.py"], additional_deps = [ ":mirrored_strategy", + ":multi_worker_test_base", ":values", "//tensorflow/core:protos_all_py", "//tensorflow/python/data/ops:dataset_ops", @@ -57,6 +59,9 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/estimator:model_fn", ], + tags = [ + "no_pip", + ], ) py_library( @@ -81,6 +86,19 @@ py_library( ], ) +py_library( + name = "multi_worker_strategy", + srcs = ["multi_worker_strategy.py"], + visibility = ["//tensorflow:internal"], + deps = [ + ":mirrored_strategy", + ":values", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:training", + "//tensorflow/python:util", + ], +) + py_library( name = "one_device_strategy", srcs = ["one_device_strategy.py"], @@ -216,6 +234,24 @@ cuda_py_test( ], ) +py_library( + name = "multi_worker_test_base", + testonly = 1, + srcs = ["multi_worker_test_base.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + ], + deps = [ + "//tensorflow/core:protos_all_py", + "//tensorflow/python:distributed_framework_test_lib", + "//tensorflow/python:platform", + "//tensorflow/python:session", + "//tensorflow/python:training", + "//tensorflow/python/eager:test", + ], +) + py_library( name = "step_fn", srcs = ["step_fn.py"], @@ -479,3 +515,34 @@ cuda_py_test( "//tensorflow/python/data/ops:iterator_ops", ], ) + +py_library( + name = "input_ops", + srcs = ["input_ops.py"], + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/python:framework_ops", + "//tensorflow/python/data/util:nest", + ], +) + +cuda_py_test( + name = "input_ops_test", + srcs = ["input_ops_test.py"], + additional_deps = [ + ":input_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:batching", + "//tensorflow/contrib/data/python/ops:interleave_ops", + "//tensorflow/python:errors", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:io_ops", + "//tensorflow/python/data/ops:readers", + "//tensorflow/python:util", + ], + tags = [ + "no_pip", + ], +) diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py index 946310aa6fc2101d75e86d3ff2e9f3284e6c6625..45d191127ee7349a59a7e3efa29baeda6445c44a 100644 --- a/tensorflow/contrib/distribute/python/combinations.py +++ b/tensorflow/contrib/distribute/python/combinations.py @@ -265,6 +265,10 @@ class NamedDistribution(object): one_device_strategy = NamedDistribution( "OneDeviceCPU", one_device_strategy.OneDeviceStrategy("/cpu:0"), None) +tpu_strategy_single_iteration = NamedDistribution( + "TPUSingleIteration", + tpu_strategy.TPUStrategy(iterations_per_step=1), + required_tpu=True) tpu_strategy = NamedDistribution( "TPU", tpu_strategy.TPUStrategy(), required_tpu=True) mirrored_strategy_with_gpu_and_cpu = NamedDistribution( diff --git a/tensorflow/contrib/distribute/python/cross_tower_ops.py b/tensorflow/contrib/distribute/python/cross_tower_ops.py index cff717db80f0bdd377b3c9c7e8ca3578ff273930..c6a1bf6a9f65828c45617ae18a1b0989f9d46225 100644 --- a/tensorflow/contrib/distribute/python/cross_tower_ops.py +++ b/tensorflow/contrib/distribute/python/cross_tower_ops.py @@ -53,15 +53,14 @@ def _validate_value_destination_pairs(value_destination_pairs): return True +# TODO(yuefengz): consider calling this function in the caller of CrossTowerOps. def _get_devices_from(destinations): if isinstance(destinations, value_lib.DistributedValues): return list(destinations.devices) elif isinstance(destinations, six.string_types): - return [device_util.canonicalize(destinations)] + return [device_util.resolve(destinations)] else: - return [ - device_util.canonicalize(destination) for destination in destinations - ] + return [device_util.resolve(destination) for destination in destinations] def _devices_match(left, right): diff --git a/tensorflow/contrib/distribute/python/examples/simple_tfkeras_example.py b/tensorflow/contrib/distribute/python/examples/simple_tfkeras_example.py index b87224251ca3844fc81c6f32a893d2c71664a955..2b05884b9b93470ef9a764cbedbc91bd3912c611 100644 --- a/tensorflow/contrib/distribute/python/examples/simple_tfkeras_example.py +++ b/tensorflow/contrib/distribute/python/examples/simple_tfkeras_example.py @@ -12,11 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""An example tf.keras model that is trained using MirroredStrategy.""" +"""An example of training tf.keras Model using MirroredStrategy.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from sys import argv + +import sys + import numpy as np import tensorflow as tf @@ -33,30 +35,37 @@ def input_fn(): def main(args): if len(args) < 2: - print('You must specify model_dir for checkpoints such as' - ' /tmp/tfkeras_example./') + print('You must specify model_dir for checkpoints such as' + ' /tmp/tfkeras_example/.') return - print('Using %s to store checkpoints.' % args[1]) - - strategy = tf.contrib.distribute.MirroredStrategy( - ['/device:GPU:0', '/device:GPU:1']) - config = tf.estimator.RunConfig(train_distribute=strategy) - optimizer = tf.train.GradientDescentOptimizer(0.2) + model_dir = args[1] + print('Using %s to store checkpoints.' % model_dir) + # Define tf.keras Model. model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(16, activation='relu', input_shape=(10,))) model.add(tf.keras.layers.Dense(1, activation='sigmoid')) + # Compile tf.keras Model. + optimizer = tf.train.GradientDescentOptimizer(0.2) model.compile(loss='binary_crossentropy', optimizer=optimizer) model.summary() tf.keras.backend.set_learning_phase(True) + + # Define a DistributionStrategy and convert the tf.keras Model to a + # tf.Estimator that utilizes the DistributionStrategy. + strategy = tf.contrib.distribute.MirroredStrategy( + ['/device:GPU:0', '/device:GPU:1']) + config = tf.estimator.RunConfig(train_distribute=strategy) keras_estimator = tf.keras.estimator.model_to_estimator( - keras_model=model, config=config, model_dir=args[1]) + keras_model=model, config=config, model_dir=model_dir) + # Train and evaluate the tf.Estimator. keras_estimator.train(input_fn=input_fn, steps=10) eval_result = keras_estimator.evaluate(input_fn=input_fn) print('Eval result: {}'.format(eval_result)) + if __name__ == '__main__': - tf.app.run(argv=argv) + tf.app.run(argv=sys.argv) diff --git a/tensorflow/contrib/distribute/python/input_ops.py b/tensorflow/contrib/distribute/python/input_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..1f24f629479b6ae93bbb8a6dfe0b33c4f6a7da35 --- /dev/null +++ b/tensorflow/contrib/distribute/python/input_ops.py @@ -0,0 +1,141 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Input-pipeline utilities for Distribution strategies.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import readers +from tensorflow.python.data.util import nest +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import tf_logging + +# TODO(priyag): Any other reader datasets to consider here? +_READER_DATASET_OPS = [ + "TextLineDataset", + "TFRecordDataset", + "FixedLengthRecordDataset" +] + + +# pylint: disable=protected-access +def auto_shard_dataset(dataset, num_shards, index): + """Shard the input pipeline by sharding the underlying list of files. + + Args: + dataset: A `tf.data.Dataset` instance, typically the result of a bunch of + dataset transformations. + num_shards: A `tf.int64` scalar `tf.Tensor`, representing the number of + shards operating in parallel. Same usage as in `Dataset.shard`. + index: A `tf.int64` scalar `tf.Tensor`, representing the worker index. + Same usage as in `Dataset.shard`. + + Returns: + A modified `Dataset` obtained by updating the pipeline sharded by the + files. + + Raises: + NotImplementedError: If we cannot automatically determine a good way to + shard the input dataset. + """ + + # TODO(priyag): Clone datasets instead of updating in place, similar to the + # clone method for TFRecordDataset. + def _auto_shard_impl(dataset, found_reader_op): + """Recursive implementation of auto sharding.""" + + if not found_reader_op: + # TODO(priyag): Make this check more robust by enforcing some common + # property on reader datasets. + if (isinstance(dataset, readers.TextLineDataset) or + isinstance(dataset, readers.FixedLengthRecordDataset)): + filenames_tensor = dataset._filenames + num_files = array_ops.size(filenames_tensor) + sharded_filenames_tensor = array_ops.gather( + filenames_tensor, math_ops.range(index, num_files, num_shards)) + dataset._filenames = sharded_filenames_tensor + return dataset + elif isinstance(dataset, readers.TFRecordDataset): + # `TFRecordDataset` needs to be handled separately than other readers + # because it converts filenames to a dataset first. Also, we clone it + # instead of updating in place because it has special logic in the + # constructor. Eventually we will change all cases to clone datasets + # instead of updating in-place. + return dataset._clone( + filenames=dataset._filenames.shard(num_shards, index)) + elif hasattr(dataset, "_map_func"): + # TODO(priyag): Make this check more robust by enforcing some common + # property on all map/flatmap/interleave datasets. + map_func_def = dataset._map_func.definition + for node in map_func_def.node_def: + if node.op in _READER_DATASET_OPS: + found_reader_op = True + break + elif node.op == "FlatMapDataset": + # TODO(priyag): Should this check for other map datasets? Should it + # be recursive? It is too specific to implementation of + # TFRecordDataset right now. + nested_func_name = node.attr["f"].func.name + nested_func = ops.get_default_graph()._functions[nested_func_name] + for nested_node in nested_func.definition.node_def: + if nested_node.op in _READER_DATASET_OPS: + found_reader_op = True + break + if found_reader_op: + break + if found_reader_op: + dataset._input_dataset = _auto_shard_impl( + dataset._input_dataset, found_reader_op) + return dataset + + # TODO(priyag): Make _input_dataset(s) a common property of all datasets to + # make this check more robust. + if hasattr(dataset, "_input_dataset"): + dataset._input_dataset = _auto_shard_impl( + dataset._input_dataset, found_reader_op) + if hasattr(dataset, "_dataset_to_concatenate"): + # Special case for `ConcatentateDataset`. We want to shard all input + # datasets. + dataset._dataset_to_concatenate = _auto_shard_impl( + dataset._dataset_to_concatenate, found_reader_op) + return dataset + + if hasattr(dataset, "_datasets"): + # Special case for `ZipDataset`. + dataset._datasets = nest.pack_sequence_as(dataset._datasets, [ + _auto_shard_impl(ds, found_reader_op) + for ds in nest.flatten(dataset._datasets) + ]) + return dataset + + if not found_reader_op: + tf_logging.warn( + "Could not find a standard reader in the input pipeline" + "(one of TextLineDataset, TFRecordDataset, FixedLengthRecordDataset)." + "Falling back to sharding the dataset anyway. Please verify" + "correctness of auto-sharding for your input.") + + # TODO(priyag): What do we want to do if the number of filenames is + # uneven in the number of shards? By default, this will just return as + # many items it can before throwing OutOfRangeError. + # TODO(priyag): This will shard the filenames before any shuffling of the + # filename dataset. It might be desirable to shard after shuffling + # filenames? If so, how do we achieve that? + return dataset.shard(num_shards, index) + + return _auto_shard_impl(dataset=dataset, found_reader_op=False) diff --git a/tensorflow/contrib/distribute/python/input_ops_test.py b/tensorflow/contrib/distribute/python/input_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..16179c3a4903c8149800d411853af734c1633466 --- /dev/null +++ b/tensorflow/contrib/distribute/python/input_ops_test.py @@ -0,0 +1,265 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for input pipeline modifications for distribution strategies.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import interleave_ops +from tensorflow.contrib.distribute.python import input_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.framework import errors +from tensorflow.python.lib.io import python_io +from tensorflow.python.platform import test +from tensorflow.python.util import compat + + +class AutoShardDatasetTest(test.TestCase): + + def setUp(self): + super(AutoShardDatasetTest, self).setUp() + self._num_files = 10 + self._num_records = 4 + self._num_shards = 2 + self._shard_index = 0 + self._record_bytes = 10 + + def _record(self, r, f): + return compat.as_bytes("Record %d of file %d" % (r, f)) + + def _text_line(self, r, f): + return compat.as_bytes("Text line %d of file %d" % (r, f)) + + def _fixed_length_record(self, r, f): + return compat.as_bytes(str((r * f) % 10) * self._record_bytes) + + def _createTFRecordFiles(self): + filenames = [] + for i in range(self._num_files): + fn = os.path.join(self.get_temp_dir(), "tf_record.%d.txt" % i) + filenames.append(fn) + writer = python_io.TFRecordWriter(fn) + for j in range(self._num_records): + record = self._record(j, i) + writer.write(record) + writer.close() + return filenames + + def _createTextFiles(self): + filenames = [] + for i in range(self._num_files): + fn = os.path.join(self.get_temp_dir(), "text_line.%d.txt" % i) + filenames.append(fn) + contents = [] + for j in range(self._num_records): + contents.append(self._text_line(j, i)) + if j + 1 != self._num_records or i == 0: + contents.append(b"\r\n") + contents = b"".join(contents) + + with open(fn, "wb") as f: + f.write(contents) + return filenames + + def _createFixedLengthRecordFiles(self): + filenames = [] + for i in range(self._num_files): + fn = os.path.join(self.get_temp_dir(), "fixed_length_record.%d.txt" % i) + filenames.append(fn) + with open(fn, "wb") as f: + for j in range(self._num_records): + f.write(self._fixed_length_record(j, i)) + return filenames + + def _verifySimpleShardingOutput(self, dataset, record_fn): + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + with self.test_session() as sess: + for f in range(self._shard_index, self._num_files, self._num_shards): + for r in range(self._num_records): + self.assertAllEqual(record_fn(r, f), sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testTFRecordDataset(self): + dataset = readers.TFRecordDataset(self._createTFRecordFiles()) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + self._verifySimpleShardingOutput(dataset, self._record) + + def testFlatMap(self): + dataset = dataset_ops.Dataset.from_tensor_slices( + self._createTFRecordFiles()) + dataset = dataset.flat_map(readers.TFRecordDataset) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + self._verifySimpleShardingOutput(dataset, self._record) + + def testInterleave(self): + dataset = dataset_ops.Dataset.from_tensor_slices( + self._createTFRecordFiles()) + dataset = dataset.interleave( + readers.TFRecordDataset, cycle_length=4, block_length=self._num_records) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + # Since block_length == num records in each file, the output will still + # contain records in order of files. + self._verifySimpleShardingOutput(dataset, self._record) + + def testParallelInterleave(self): + dataset = dataset_ops.Dataset.from_tensor_slices( + self._createTFRecordFiles()) + dataset = dataset.apply(interleave_ops.parallel_interleave( + readers.TFRecordDataset, + cycle_length=4, + block_length=self._num_records)) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + # Since block_length == num records in each file, the output will still + # contain records in order of files. + self._verifySimpleShardingOutput(dataset, self._record) + + def testListfiles(self): + filenames = self._createTFRecordFiles() + file_pattern = filenames[0].rsplit("/", 1)[0] + "/tf_record.*.txt" + dataset = dataset_ops.Dataset.list_files(file_pattern, shuffle=False) + dataset = dataset.flat_map(readers.TFRecordDataset) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + with self.test_session() as sess: + actual, expected = [], [] + for f in range(self._shard_index, self._num_files, self._num_shards): + for r in range(self._num_records): + actual.append(sess.run(next_element)) + expected.append(self._record(r, f)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self.assertAllEqual(expected, actual) + + def testComplexPipeline(self): + # Setup a complex input pipeline. + batch_size = 2 + num_epochs = 5 + dataset = dataset_ops.Dataset.from_tensor_slices( + self._createTFRecordFiles()) + dataset = dataset.shuffle(buffer_size=self._num_files) + dataset = dataset.flat_map(readers.TFRecordDataset) + dataset = dataset.prefetch(buffer_size=batch_size) + dataset = dataset.shuffle(2 * self._num_files * self._num_records) + dataset = dataset.repeat(num_epochs) + dataset = dataset.apply(batching.map_and_batch( + lambda x: x, batch_size=batch_size)) + dataset = dataset.prefetch(buffer_size=None) + + # Auto shard. + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + # Verify output. + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + with self.test_session() as sess: + actual = [] + num_iterations = (self._num_files * self._num_records * num_epochs) // ( + self._num_shards * batch_size) + for _ in range(num_iterations): + actual.extend(sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + expected = [] + for f in range(0, self._num_files, self._num_shards): + for r in range(self._num_records): + expected.append(self._record(r, f)) + expected *= num_epochs + + self.assertAllEqual(sorted(expected), sorted(actual)) + + def testZip(self): + dataset1 = readers.TFRecordDataset(self._createTFRecordFiles()) + dataset2 = readers.TextLineDataset(self._createTextFiles()) + dataset = dataset_ops.Dataset.zip((dataset1, dataset2)) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + record_fn = lambda r, f: (self._record(r, f), self._text_line(r, f)) + self._verifySimpleShardingOutput(dataset, record_fn) + + def testConcat(self): + dataset1 = readers.TFRecordDataset(self._createTFRecordFiles()) + dataset2 = readers.TextLineDataset(self._createTextFiles()) + dataset = dataset1.concatenate(dataset2) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + with self.test_session() as sess: + for f in range(self._shard_index, self._num_files, self._num_shards): + for r in range(self._num_records): + self.assertAllEqual(self._record(r, f), sess.run(next_element)) + for f in range(self._shard_index, self._num_files, self._num_shards): + for r in range(self._num_records): + self.assertAllEqual(self._text_line(r, f), sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testTextLineReader(self): + dataset = readers.TextLineDataset(self._createTextFiles()) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + self._verifySimpleShardingOutput(dataset, self._text_line) + + def testTextLineReaderWithFlatMap(self): + dataset = dataset_ops.Dataset.from_tensor_slices(self._createTextFiles()) + dataset = dataset.flat_map(readers.TextLineDataset) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + self._verifySimpleShardingOutput(dataset, self._text_line) + + def testFixedLengthReader(self): + dataset = readers.FixedLengthRecordDataset( + self._createFixedLengthRecordFiles(), self._record_bytes) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + self._verifySimpleShardingOutput(dataset, self._fixed_length_record) + + def testFixedLengthReaderWithFlatMap(self): + dataset = dataset_ops.Dataset.from_tensor_slices( + self._createFixedLengthRecordFiles()) + dataset = dataset.flat_map( + lambda f: readers.FixedLengthRecordDataset(f, self._record_bytes)) + dataset = input_ops.auto_shard_dataset( + dataset, self._num_shards, self._shard_index) + + self._verifySimpleShardingOutput(dataset, self._fixed_length_record) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py index e134fe34e10be402f028db986b8cbf14222db07f..d2054715f11c47b8fc3bd73288fd13c0fd5e71e8 100644 --- a/tensorflow/contrib/distribute/python/minimize_loss_test.py +++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py @@ -44,13 +44,16 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): combinations.distributions_and_v1_optimizers(), combinations.combine(mode=["graph"], use_callable_loss=[True, False]) + combinations.combine(mode=["eager"], use_callable_loss=[True]), - combinations.combine(is_tpu=[False])) + - combinations.combine( - distribution=[combinations.tpu_strategy], - optimizer_fn=[combinations.adam_optimizer_v1_fn], - mode=["graph"], - use_callable_loss=[False], - is_tpu=[True])) + combinations.combine(is_tpu=[False])) + combinations.combine( + distribution=[combinations.tpu_strategy], + optimizer_fn=[ + combinations.adam_optimizer_v1_fn, + # TODO(isaprykin): Make Adam v2 work with while_loops + # and TPUs. + ], + mode=["graph"], + use_callable_loss=[False], + is_tpu=[True])) def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss, is_tpu): with distribution.scope(): @@ -101,7 +104,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): distribution=[combinations.tpu_strategy], optimizer_fn=[ combinations.adam_optimizer_v1_fn, - combinations.gradient_descent_optimizer_v1_fn + combinations.gradient_descent_optimizer_v1_fn, + combinations.gradient_descent_optimizer_v2_fn, ], mode=["graph"], is_tpu=[True])) @@ -171,13 +175,28 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): set(created_variables)) @combinations.generate( - combinations.times(combinations.distributions_and_v1_optimizers(), - combinations.combine( - mode=["graph", "eager"], - momentum=[0.8, 0.9, 0.99], - renorm=[False, True]))) + combinations.times( + combinations.combine(momentum=[0.8, 0.9, 0.99], renorm=[False, True]), + combinations.times( + combinations.distributions_and_v1_optimizers(), + combinations.combine( + mode=["graph", "eager"], + is_tpu=[False], + # TODO(isaprykin): Allow False here. Currently subsequent + # towers will re-execute UPDATE_OPS of previous towers. + update_ops_in_cross_tower_mode=[True])) + + combinations.combine( + distribution=[combinations.tpu_strategy_single_iteration], + optimizer_fn=[ + combinations.gradient_descent_optimizer_v1_fn, + combinations.gradient_descent_optimizer_v2_fn + ], + mode=["graph"], + is_tpu=[True], + update_ops_in_cross_tower_mode=[False]))) def testTrainNetworkWithBatchNorm(self, distribution, optimizer_fn, momentum, - renorm): + renorm, is_tpu, + update_ops_in_cross_tower_mode): """Verifies that moving mean updates are reduced across towers.""" with distribution.scope(): num_towers = len(distribution.worker_devices) @@ -185,7 +204,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): optimizer_fn, batch_per_epoch=num_towers, momentum=momentum, - renorm=renorm) + renorm=renorm, + update_ops_in_tower_mode=not update_ops_in_cross_tower_mode) # Disable prefetching since that makes the specific input on each device # to be non deterministic, and this test relies on specific input being @@ -196,16 +216,18 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): dataset_fn).make_one_shot_iterator() def run_step(): - return control_flow_ops.group( - distribution.unwrap( - distribution.call_for_each_tower( - model_fn, - iterator.get_next(), - run_concurrently=batchnorm.built)) + - ops.get_collection(ops.GraphKeys.UPDATE_OPS)) + fetches = distribution.unwrap( + distribution.call_for_each_tower( + model_fn, iterator.get_next(), + run_concurrently=batchnorm.built)) + if update_ops_in_cross_tower_mode: + fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS) + return control_flow_ops.group(fetches) if not context.executing_eagerly(): with self.test_session() as sess: + if is_tpu: + sess.run(tpu.initialize_system()) run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) @@ -229,22 +251,40 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): expected_moving_mean - averaged_batch_mean(i)) * (1.0 - momentum)) self.assertNear(expected_moving_means[i], moving_means[i], 0.0001) + if is_tpu: + with self.test_session() as sess: + sess.run(tpu.shutdown_system()) + @combinations.generate( combinations.times( combinations.combine( - distribution=[combinations.one_device_strategy, - combinations.mirrored_strategy_with_gpu_and_cpu, - combinations.mirrored_strategy_with_two_gpus], - optimizer_fn=[combinations.gradient_descent_optimizer_v1_fn, - combinations.gradient_descent_optimizer_v2_fn], - loss_reduction=[losses_impl.Reduction.SUM, - losses_impl.Reduction.MEAN, - losses_impl.Reduction.SUM_OVER_BATCH_SIZE, - losses_impl.Reduction.SUM_OVER_NONZERO_WEIGHTS]), - combinations.combine(mode=["graph"], use_callable_loss=[True, False]) - + combinations.combine(mode=["eager"], use_callable_loss=[True]))) + optimizer_fn=[ + combinations.gradient_descent_optimizer_v1_fn, + combinations.gradient_descent_optimizer_v2_fn + ], + loss_reduction=[ + losses_impl.Reduction.SUM, losses_impl.Reduction.MEAN, + losses_impl.Reduction.SUM_OVER_BATCH_SIZE, + losses_impl.Reduction.SUM_OVER_NONZERO_WEIGHTS + ]), + combinations.times( + combinations.combine( + distribution=[ + combinations.one_device_strategy, + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.mirrored_strategy_with_two_gpus + ], + is_tpu=[False]), + combinations.combine( + mode=["graph"], use_callable_loss=[True, False]) + + combinations.combine(mode=["eager"], use_callable_loss=[True])) + + combinations.combine( + distribution=[combinations.tpu_strategy_single_iteration], + is_tpu=[True], + mode=["graph"], + use_callable_loss=[True, False]))) def testMeanVsSum(self, distribution, optimizer_fn, loss_reduction, - use_callable_loss): + use_callable_loss, is_tpu): with distribution.scope(): all_vars = [] @@ -280,12 +320,13 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): if not context.executing_eagerly(): with self.test_session() as sess: + if is_tpu: + sess.run(tpu.initialize_system()) run_step = sess.make_callable(run_step()) self.evaluate(variables_lib.global_variables_initializer()) run_step() - self.assertEqual(distribution.num_towers, len(all_vars)) v = all_vars[0] self.assertTrue(all([v is vi for vi in all_vars[1:]])) weight = numpy.squeeze(self.evaluate(distribution.fetch(v))) @@ -312,6 +353,10 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): # One of the mean loss reductions. self.assertNear(weight, 2 + 10.6, 0.0001) + if is_tpu: + with self.test_session() as sess: + sess.run(tpu.shutdown_system()) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index 6efd578a775da7bf326826289bd5bd50a57be892..8237b23dbbdb10c053de53880d6838113b99be2d 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -80,6 +80,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): dict((d, i) for i, d in enumerate(devices))) self._cross_tower_ops = cross_tower_ops self._prefetch_on_device = prefetch_on_device + # TODO(yuefengz): consider setting the default device. def _create_variable(self, next_creator, *args, **kwargs): """Create a mirrored variable. See `DistributionStrategy.scope`.""" @@ -321,7 +322,6 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): def _fetch(self, val, destination, fn): """Return a copy of `val` or `fn(val)` on `destination`.""" - assert isinstance(destination, six.string_types) if isinstance(val, values.TowerLocalVariable): val = self.reduce(val.reduce_method, val, destinations=destination) with ops.device(destination): diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py index 6c5c055070c0fc88ed8f3a459e3f346596f077a6..3635bd2e34f88ab05a3ddce1728fd53c5b7149b3 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py @@ -370,22 +370,27 @@ class MirroredStrategyVariableCreationTest(test.TestCase): expected_sum = 0.0 expected_mean = 0.0 for i, d in enumerate(dist.worker_devices): - # Test access within a device scope, should see different values. - with ops.device(d): - v_sum_value = self.evaluate(ret_v_sum.read_value()) - v_mean_value = self.evaluate(ret_v_mean.read_value()) - expected = i + 3.0 - self.assertEqual(expected, v_sum_value) - expected_sum += expected - expected = i * 6.0 - self.assertEqual(expected, v_mean_value) - expected_mean += expected - - # fetch() should return the value you get by applying the - # reduction across all towers. - self.assertEqual(expected_sum, self.evaluate(dist.fetch(ret_v_sum))) + # Should see different values on different devices. + v_sum_value = self.evaluate(ret_v_sum.get(d).read_value()) + v_mean_value = self.evaluate(ret_v_mean.get(d).read_value()) + expected = i + 3.0 + self.assertEqual(expected, v_sum_value) + expected_sum += expected + expected = i * 6.0 + self.assertEqual(expected, v_mean_value) + expected_mean += expected expected_mean /= len(dist.worker_devices) + + # Without get(device), should return the value you get by + # applying the reduction across all towers (whether you use + # fetch(), get(), or nothing). + self.assertEqual(expected_sum, self.evaluate(dist.fetch(ret_v_sum))) self.assertEqual(expected_mean, self.evaluate(dist.fetch(ret_v_mean))) + self.assertEqual(expected_sum, self.evaluate(ret_v_sum.get())) + self.assertEqual(expected_mean, self.evaluate(ret_v_mean.get())) + if not context.executing_eagerly(): + self.assertEqual(expected_sum, self.evaluate(ret_v_sum)) + self.assertEqual(expected_mean, self.evaluate(ret_v_mean)) # NOTE(priyag): Names and name scopes are ignored in eager, hence we are not # testing this in eager mode. diff --git a/tensorflow/contrib/distribute/python/multi_worker_strategy.py b/tensorflow/contrib/distribute/python/multi_worker_strategy.py new file mode 100644 index 0000000000000000000000000000000000000000..a552b370ebf359464afcaf3211119e73434e0dfb --- /dev/null +++ b/tensorflow/contrib/distribute/python/multi_worker_strategy.py @@ -0,0 +1,141 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Classes implementing a mirrored DistributionStrategy for multiple workers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +from tensorflow.contrib.distribute.python import values +from tensorflow.contrib.distribute.python.mirrored_strategy import MirroredStrategy +from tensorflow.core.protobuf import cluster_pb2 +from tensorflow.python.training import device_util +from tensorflow.python.training import server_lib +from tensorflow.python.util import nest + + +# TODO(yuefengz): support between-graph replication. +# TODO(yuefengz): merge this class into its base class. +# TODO(yuefengz): in some cases, we probably want to use configure method to +# configure this class. +# TODO(yuefengz): MirroredStrategy.worker_devices may be confusing after the +# class is introduced. +class MultiWorkerMirroredStrategy(MirroredStrategy): + """Mirrored strategy that works on multiple workers with in-graph replication. + + There are several important concepts for distributed TensorFlow, e.g. + `client`, `job`, 'task', `cluster`, `in-graph replication` and + 'synchronous training' and they have already been defined in the + [TensorFlow's documentation](https://www.tensorflow.org/deploy/distributed). + The distribution strategy inherits these concepts as well and in addition to + that we also clarify several more concepts: + * **In-graph replication**: the `client` creates a single `tf.Graph` that + specifies tasks for devices on all workers. The `client` then creates a + client session which will talk to the `master` service of a `worker`. Then + the `master` will parition the graph and distribute the work to all + participating workers. + * **Worker**: A `worker` is a TensorFlow `task` that usually maps to one + physical machine. We will have multiple `worker`s with different `task` + index. They all do similar things except for one worker checkpointing model + variables, writing summaries, etc. in addition to its ordinary work. + + This class maps one tower to one device on a worker. It mirrors all model + variables on all towers. For example, if you have two `worker`s and each + `worker` has 4 GPUs, it will create 8 copies of the model variables on these 8 + GPUs. Then like in MirroredStrategy, each tower performs their computation + with their own copy of variables unless in cross-tower model where variable or + tensor reduction happens. + """ + + def __init__(self, + num_gpus_per_worker=1, + worker_job_name=None, + num_workers=None, + cluster=None, + cross_tower_ops=None, + prefetch_on_device=None): + """Initialize the strategy object. + + Args: + num_gpus_per_worker: number of GPUs per work. If it is zero, the local + CPU will be used. + worker_job_name: the job name for `worker`, typically just 'worker'. + num_workers: the number of workers. If it is 0, it regenerates to + single-worker MirroredStrategy. + cluster: a `tf.train.ClusterSpec` object or a dict that can be used to + construct a `tf.train.ClusterSpec` object or a `tf.train.ClusterDef` + proto buffer. It is an alternative way to initialize this object. + cross_tower_ops: the cross tower ops to use. If None, a default one will + be used. If configure method is called, a best one for the configuration + will be chosen. + prefetch_on_device: a boolean to specify whether to prefetech input to + each worker's devices. + + Raises: + ValueError: if got an unexpected `cluster`. + """ + if cluster is None: + self._workers = [ + '/job:%s/task:%d' % (worker_job_name, task_index) + for task_index in range(num_workers) + ] + else: + if isinstance(cluster, (dict, cluster_pb2.ClusterDef)): + cluster_spec = server_lib.ClusterSpec(cluster) + elif isinstance(cluster, server_lib.ClusterSpec): + cluster_spec = cluster + else: + raise ValueError( + "`cluster_spec' should be dict or a `tf.train.ClusterSpec` or a " + '`tf.train.ClusterDef` object') + + self._workers = [] + for job in sorted(cluster_spec.jobs): + for task in range(cluster_spec.num_tasks(job)): + self._workers.append('/job:%s/task:%d' % (job, task)) + + self._num_gpus_per_worker = num_gpus_per_worker + if num_gpus_per_worker > 0: + self._worker_device_map = { + worker: [ + device_util.canonicalize(worker + '/device:GPU:%d' % gpu) + for gpu in range(num_gpus_per_worker) + ] for worker in self._workers + } + else: + self._worker_device_map = { + worker: [device_util.canonicalize(worker, '/device:CPU:0')] + for worker in self._workers + } + self._devices = nest.flatten(self._worker_device_map.values()) + + super(MultiWorkerMirroredStrategy, self).__init__( + devices=self._devices, prefetch_on_device=prefetch_on_device) + + # Setting `_default_device` will add a device scope in the + # distribution.scope. We set the default device to the first worker. When + # users specify device under distribution.scope by + # with tf.device("/cpu:0"): + # ... + # their ops will end up on the cpu device of its first worker, e.g. + # "/job:worker/task:0/device:CPU:0". Note this is not used in tower mode. + self._default_device = self._workers[0] + + def distribute_dataset(self, dataset_fn): + return values.MultiWorkerDataset( + partial(self._call_dataset_fn, dataset_fn), self._worker_device_map, + self._prefetch_on_device) diff --git a/tensorflow/contrib/distribute/python/multi_worker_strategy_test.py b/tensorflow/contrib/distribute/python/multi_worker_strategy_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ee7588163e42ee3c31dd9fd25fc53e3483f0fbee --- /dev/null +++ b/tensorflow/contrib/distribute/python/multi_worker_strategy_test.py @@ -0,0 +1,64 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for MultiWorkerMirroredStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distribute.python import multi_worker_strategy +from tensorflow.contrib.distribute.python import multi_worker_test_base +from tensorflow.contrib.distribute.python import strategy_test_lib +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.training import server_lib + + +@test_util.with_c_api +class MultiWorkerStrategyTest(multi_worker_test_base.MultiWorkerTestBase, + strategy_test_lib.DistributionTestBase): + + def _get_distribution_strategy(self): + return multi_worker_strategy.MultiWorkerMirroredStrategy( + cluster=server_lib.ClusterSpec({ + 'worker': ['/job:worker/task:0', '/job:worker/task:1'] + }), + num_gpus_per_worker=context.num_gpus()) + + def testMinimizeLossGraph(self): + self._test_minimize_loss_graph(self._get_distribution_strategy()) + + +class DeviceScopeTest(test.TestCase): + """Test the device scope of MultiWorkerMirroredStrategy.""" + + def testDeviceScope(self): + with context.graph_mode(): + strategy = multi_worker_strategy.MultiWorkerMirroredStrategy( + cluster={'worker': ['/job:worker/task:0', '/job:worker/task:1']}, + num_gpus_per_worker=context.num_gpus()) + with strategy.scope(): + a = constant_op.constant(1.) + with ops.device('/cpu:0'): + b = constant_op.constant(1.) + self.assertEqual(a.device, '/job:worker/task:0') + self.assertEqual(b.device, '/job:worker/task:0/device:CPU:0') + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/distribute/python/multi_worker_test_base.py b/tensorflow/contrib/distribute/python/multi_worker_test_base.py new file mode 100644 index 0000000000000000000000000000000000000000..f659be5f42594b275af06435cb0c228e5d594ac9 --- /dev/null +++ b/tensorflow/contrib/distribute/python/multi_worker_test_base.py @@ -0,0 +1,90 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Base testing class for strategies that require multiple nodes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import copy + +from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 +from tensorflow.python.client import session +from tensorflow.python.eager import test +from tensorflow.python.framework import test_util + + +class MultiWorkerTestBase(test.TestCase): + """Base class for testing multi node strategy and dataset.""" + + @classmethod + def setUpClass(cls): + """Create a local cluster with 2 workers.""" + num_workers = 2 + # Leave some memory for cuda runtime. + gpu_mem_frac = 0.7 / num_workers + default_config = config_pb2.ConfigProto() + default_config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_frac + + # The local cluster takes some portion of the local GPUs and there is no way + # for the cluster to terminate unless using multiple processes. Therefore, + # we have to only create only one cluster throughout a test process. + workers, _ = test_util.create_local_cluster( + num_workers, num_ps=0, worker_config=default_config) + cls._master_target = workers[0].target + + @contextlib.contextmanager + def test_session(self, graph=None, config=None): + """Create a test session with master target set to the testing cluster. + + This overrides the base class' method, removes arguments that are not needed + by the multi-node case and creates a test session that connects to the local + testing cluster. + + Args: + graph: Optional graph to use during the returned session. + config: An optional config_pb2.ConfigProto to use to configure the + session. + + Yields: + A Session object that should be used as a context manager to surround + the graph building and execution code in a test case. + """ + if self.id().endswith('.test_session'): + self.skipTest('Not a test.') + + if config is None: + config = config_pb2.ConfigProto(allow_soft_placement=True) + else: + config = copy.deepcopy(config) + # Don't perform optimizations for tests so we don't inadvertently run + # gpu ops on cpu + config.graph_options.optimizer_options.opt_level = -1 + config.graph_options.rewrite_options.constant_folding = ( + rewriter_config_pb2.RewriterConfig.OFF) + + if graph is None: + if self._cached_session is None: # pylint: disable=access-member-before-definition + self._cached_session = session.Session( + graph=None, config=config, target=self._master_target) + sess = self._cached_session + with sess.graph.as_default(), sess.as_default(): + yield sess + else: + with session.Session( + graph=graph, config=config, target=self._master_target) as sess: + yield sess diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py index 646d2a5c3b3b0bfcce6f89be0e588baacc6b9237..09b6d4a515ab46879520f304cd5ef60469512380 100644 --- a/tensorflow/contrib/distribute/python/one_device_strategy.py +++ b/tensorflow/contrib/distribute/python/one_device_strategy.py @@ -36,9 +36,11 @@ class OneDeviceStrategy(distribute_lib.DistributionStrategy): # doing something that won't work with other DistributionStrategy # implementations? - def __init__(self, device): + def __init__(self, device, prefetch_on_device=None): super(OneDeviceStrategy, self).__init__() self._device = device + self._prefetch_on_device = prefetch_on_device + self._default_device = device def _create_variable(self, next_creator, *args, **kwargs): # No need to distinguish tower-local variables when not mirroring, @@ -61,7 +63,9 @@ class OneDeviceStrategy(distribute_lib.DistributionStrategy): return next_creator(*args, **kwargs) def distribute_dataset(self, dataset_fn): - return self._call_dataset_fn(dataset_fn) + return values.PerDeviceDataset( + self._call_dataset_fn(dataset_fn), [self._device], + self._prefetch_on_device) def _broadcast(self, tensor, destinations): return tensor diff --git a/tensorflow/contrib/distribute/python/single_loss_example.py b/tensorflow/contrib/distribute/python/single_loss_example.py index 0db0b59fcacee2785eb8191bb84ed5216a79b081..d1fdb3279cf2a7cba6e2282d58eedccf38bd38a3 100644 --- a/tensorflow/contrib/distribute/python/single_loss_example.py +++ b/tensorflow/contrib/distribute/python/single_loss_example.py @@ -22,6 +22,7 @@ from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.distribute.python import step_fn from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops from tensorflow.python.layers import core from tensorflow.python.layers import normalization from tensorflow.python.ops import array_ops @@ -59,7 +60,7 @@ def minimize_loss_example(optimizer_fn, # TODO(isaprykin): map_and_batch with drop_remainder causes shapes to be # fully defined for TPU. Remove this when XLA supports dynamic shapes. return dataset.apply( - batching.map_and_batch(lambda x: x, batch_size=2, drop_remainder=True)) + batching.map_and_batch(lambda x: x, batch_size=1, drop_remainder=True)) # An Optimizer instance is created either outside or inside model_fn. outer_optimizer = None @@ -68,11 +69,10 @@ def minimize_loss_example(optimizer_fn, layer = core.Dense(1, use_bias=use_bias) - def model_fn(xs): + def model_fn(x): """A very simple model written by the user.""" def loss_fn(): - x = math_ops.reduce_mean(xs, keepdims=True) y = array_ops.reshape(layer(x), []) - constant_op.constant(1.) return y * y @@ -89,7 +89,8 @@ def minimize_loss_example(optimizer_fn, def batchnorm_example(optimizer_fn, batch_per_epoch=1, momentum=0.9, - renorm=False): + renorm=False, + update_ops_in_tower_mode=False): """Example of non-distribution-aware legacy code with batch normalization.""" def dataset_fn(): @@ -103,12 +104,19 @@ def batchnorm_example(optimizer_fn, optimizer = optimizer_fn() batchnorm = normalization.BatchNormalization( renorm=renorm, momentum=momentum, fused=False) + layer = core.Dense(1, use_bias=False) def model_fn(x): + """A model that uses batchnorm.""" def loss_fn(): - y = math_ops.reduce_sum(batchnorm(x, training=True), axis=1) - loss = math_ops.reduce_mean(y - constant_op.constant(1.)) + y = batchnorm(x, training=True) + with ops.control_dependencies( + ops.get_collection(ops.GraphKeys.UPDATE_OPS) + if update_ops_in_tower_mode else []): + loss = math_ops.reduce_mean( + math_ops.reduce_sum(layer(y)) - constant_op.constant(1.)) + # `x` and `y` will be fetched by the gradient computation, but not `loss`. return loss # Callable loss. diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py index a7e4fe80f3e65907fa4b48c5fe0fcfd422ba033f..75441786a615fc0d87b4c4b0b45b9384d678c1d3 100644 --- a/tensorflow/contrib/distribute/python/tpu_strategy.py +++ b/tensorflow/contrib/distribute/python/tpu_strategy.py @@ -33,7 +33,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.util import nest -# TODO(isaprykin): Consider whether inheriting is really appropriate. class TPUStrategy(one_device_strategy.OneDeviceStrategy): """Experimental TPU distribution strategy implementation.""" @@ -73,7 +72,6 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): def infeed_input(i): """Get input, split it and then enqueue.""" iteration_inputs = [f.get(i) for f in feeds()] - infeed_inputs = [[inputs_per_core[core_id] for inputs_per_core in iteration_inputs] for core_id in range(self._num_cores_per_host)] @@ -117,3 +115,14 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): iterate_on_tpu, [], num_shards=self._num_cores_per_host) return control_flow_ops.group(tpu_result, enqueue_ops) + + def _reduce(self, method_string, value, destinations): + del destinations # TPU is graph mode only. Rely on implicit Send/Recv. + if method_string == 'mean': + # TODO(jhseu): Revisit once we support model-parallelism. + value *= (1. / self._num_cores_per_host) + return tpu_ops.cross_replica_sum(value) + + @property + def num_towers(self): + return self._num_cores_per_host diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py index 8cb5276579f48f9ea5781c5351cbf9bf3db16e6c..759f3c359975bae6c892b65d3ce24c59e9f74116 100644 --- a/tensorflow/contrib/distribute/python/values.py +++ b/tensorflow/contrib/distribute/python/values.py @@ -27,11 +27,14 @@ import weakref import six from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.distribute.python import input_ops from tensorflow.contrib.distribute.python import prefetching_ops_v2 from tensorflow.python.eager import context +from tensorflow.python.framework import device as tf_device from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops from tensorflow.python.training import checkpointable from tensorflow.python.training import device_util from tensorflow.python.training import distribute as distribute_lib @@ -58,7 +61,7 @@ class DistributedValues(object): else: device = distribute_lib.get_update_device() if device is None: - device = device_util.current() + return self._get_cross_tower() device = device_util.canonicalize(device) try: return self._index[device] @@ -312,6 +315,18 @@ class MirroredVariable(DistributedVariable, Mirrored, def assign(self, *args, **kwargs): return self.get(device=_get_update_device()).assign(*args, **kwargs) + def _get_cross_tower(self): + device = device_util.canonicalize(device_util.current()) + if device in self._index: + return array_ops.identity(self._index[device]) + return array_ops.identity(self._primary_var) + + def _as_graph_element(self): + # pylint: disable=protected-access + if distribute_lib.get_cross_tower_context(): + return self._primary_var._as_graph_element() + return self.get()._as_graph_element() + def _gather_saveables_for_checkpoint(self): """Overrides CheckpointableBase method. @@ -356,6 +371,12 @@ class _TowerLocalSaveable(saver.BaseSaverBuilder.SaveableObject): for d, v in six.iteritems(self._tower_local_variable._index)]) # pylint: disable=protected-access +def _assert_tower_context(): + if not distribute_lib.get_tower_context(): + raise RuntimeError( + "Tower-local variables may only be assigned in a tower context.") + + class TowerLocalVariable(DistributedVariable, PerDevice, checkpointable.CheckpointableBase): """Holds a map from device to variables whose values are reduced on save.""" @@ -366,18 +387,35 @@ class TowerLocalVariable(DistributedVariable, PerDevice, super(TowerLocalVariable, self).__init__(index) def assign_sub(self, *args, **kwargs): + _assert_tower_context() return self.get().assign_sub(*args, **kwargs) def assign_add(self, *args, **kwargs): + _assert_tower_context() return self.get().assign_add(*args, **kwargs) def assign(self, *args, **kwargs): + _assert_tower_context() return self.get().assign(*args, **kwargs) @property def reduce_method(self): return self._reduce_method + def _get_cross_tower(self): + all_components = tuple(self._index.values()) + # TODO(josh11b): Use a strategy-specific method. + total = math_ops.add_n(all_components) + if self._reduce_method == "mean": + return total * (1./ len(all_components)) + return total + + def _as_graph_element(self): + # pylint: disable=protected-access + if distribute_lib.get_cross_tower_context(): + return self._get_cross_tower() + return self.get()._as_graph_element() + def _gather_saveables_for_checkpoint(self): """Overrides CheckpointableBase method. @@ -570,11 +608,106 @@ class PerDeviceDataset(object): dataset_iterator, self._devices, self._prefetch_on_device) -class PerIteration(object): - """Holds input for multiple iterations at once.""" +class MultiWorkerDataIterator(object): + """An iterator (like `tf.data.Iterator`) into a `MultiWorkerDataset`.""" - def __init__(self, index): - self._index = index + def __init__(self, iterators, worker_device_map): + """Initialize the MultiWorkerDataIterator object. + + Args: + iterators: a dict mapping from each worker to an iterator for + that worker. + worker_device_map: a dict mapping from each worker's devices to a list of + devices that belong to this worker. + + Raises: + ValueError: if iterators and worker_device_map are not compatible. + """ + self._iterators = iterators + self._worker_device_map = worker_device_map + if set(self._iterators) != set(self._worker_device_map): + raise ValueError("iterators and worker_device_map are not compatible.") + + @property + def initializer(self): + return control_flow_ops.group( + [iterator.initializer for iterator in self._iterators.values()]) + + def get_next(self, name=None): + """Scatter the input across hosts and devices.""" + index = {} + for worker, iterator in six.iteritems(self._iterators): + if name is not None: + d = tf_device.DeviceSpec.from_string(worker) + new_name = "%s_%s_%d" % (name, d.job, d.task) + else: + new_name = None + with ops.device(worker): + data_per_worker = iterator.get_next(name=new_name) + + worker_devices = self._worker_device_map[worker] + # Ungroup these per-device value so as to get a flat map from devices to + # values. + for d in worker_devices: + v = select_device(d, data_per_worker) + if d in index: + raise ValueError("Duplicated devices in worker_device_map: %r" % v) + index[d] = v + + return regroup(index) + + +class MultiWorkerDataset(object): + """Like a `tf.data.Dataset` that distributes data to different workers. + + Each worker gets one shard of the input dataset. It is currently not working + in + eager mode. + """ + + def __init__(self, dataset_fn, worker_device_map, prefetch_on_device=None): + """Initialize the MultiWorkerDataset object. + + Args: + dataset_fn: a function that returns a `tf.data.Dataset`. + worker_device_map: a dict mapping from each worker to a list of devices + that belong to this worker. + prefetch_on_device: whether to prefetch to devices. + """ + self._worker_device_map = worker_device_map + self._datasets = {} + # TODO(yuefengz, priyag): support different set of jobs for input + # processing. + for i, (worker, worker_devices) in enumerate( + six.iteritems(worker_device_map)): + with ops.device(worker): + worker_input = dataset_fn() + worker_input = input_ops.auto_shard_dataset( + worker_input, len(worker_device_map), i) + self._datasets[worker] = PerDeviceDataset( + worker_input, worker_devices, prefetch_on_device=prefetch_on_device) + + def make_one_shot_iterator(self): + iterators = {} + for worker, dataset in six.iteritems(self._datasets): + with ops.device(worker): + iterators[worker] = dataset.make_one_shot_iterator() + return MultiWorkerDataIterator(iterators, self._worker_device_map) + + def make_initializable_iterator(self): + iterators = {} + for worker, dataset in six.iteritems(self._datasets): + with ops.device(worker): + iterators[worker] = dataset.make_initializable_iterator() + return MultiWorkerDataIterator(iterators, self._worker_device_map) + + +class _PerKey(object): + """Holds data associated by keys.""" + + def __init__(self, *index): + # pylint: disable=protected-access + self._index = list(index) def get(self, iteration): return array_ops.gather(self._index, iteration) @@ -585,6 +718,24 @@ class PerIteration(object): def get_dtype(self): return self._index[-1][-1].dtype + def __str__(self): + return "%s:%s" % (self.__class__.__name__, self._index) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self._index) + + +class PerIteration(_PerKey): + """Holds input for multiple iterations at once.""" + + def __init__(self, *index): + # pylint: disable=protected-access + super(PerIteration, self).__init__(*[batch._index for batch in index]) + + +class Batches(_PerKey): + pass + class MultiIterator(object): """Iterator that returns results of multiple get_next()s.""" @@ -595,11 +746,31 @@ class MultiIterator(object): self._batches_per_iteration = batches_per_iteration def get_next(self, name=None): - return PerIteration([[ - self._dataset_iterator.get_next(name=name) - for _ in range(self._batches_per_iteration) - ] - for _ in range(self._iterations)]) + """Return PerIteration with `iterations x batches_per_iteration` inputs.""" + data = [] + for _ in range(self._batches_per_iteration): + batch = [] + for _ in range(self._iterations): + batch.append(self._dataset_iterator.get_next(name=name)) + data.append(batch) + + # Here is an example. Suppose each get_next returns a tuple of two tensors. + # For 3 `iterations` and 2 `batches_per_iteration`, the `data` is: + # [[(a,z), (b,y), (c,x)], [(A,Z), (B,Y), (C,X)]] + # + # After the first `map_structure` it gets transformed to: + # [(Batches(a, A), Batches(z, Z)), + # (Batches(b, B), Batches(y, Y)), + # (Batches(c, C), Batches(x, X))] + # + # After the second `map_structure` it gets transformed to a tuple of: + # (PerIteration([Batches(a, A), Batches(b, B), Batches(c, C)]), + # PerIteration([Batches(z, Z), Batches(y, Y), Batches(x, X)])) + + data = nest.map_structure(Batches, *data) + data = nest.map_structure(PerIteration, *data) + + return data @property def initializer(self): diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py index e96ce547415fcb2bf3da8b6085ee11f51717db8d..9aeef9fa3e86f25ba2544236fd802c7162f4e40e 100644 --- a/tensorflow/contrib/distribute/python/values_test.py +++ b/tensorflow/contrib/distribute/python/values_test.py @@ -18,9 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import os from tensorflow.contrib.distribute.python import mirrored_strategy +from tensorflow.contrib.distribute.python import multi_worker_test_base from tensorflow.contrib.distribute.python import values from tensorflow.core.protobuf import config_pb2 from tensorflow.python.data.ops import dataset_ops @@ -34,8 +36,10 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables as variables_lib from tensorflow.python.training import device_util from tensorflow.python.training import saver as saver_lib +from tensorflow.python.util import nest @test_util.with_c_api @@ -436,6 +440,130 @@ class PerDeviceDatasetTest(test.TestCase): self.evaluate(next_element) +class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase): + + def _test_iterator(self, iterator, devices, expected_values): + next_element = iterator.get_next() + for device in devices: + v = values.select_device(device, next_element) + # The `v` here can be a tuple. + for element in nest.flatten(v): + self.assertTrue(element.device in device) + + for expected_value in expected_values: + actual = self.evaluate( + [values.select_device(d, next_element) for d in devices]) + self.assertEqual(expected_value, actual) + + with self.assertRaises(errors.OutOfRangeError): + self.evaluate([values.select_device(d, next_element) for d in devices]) + + def _test_dataset(self, dataset_fn, worker_device_map, devices, + expected_values): + multi_worker_dataset = values.MultiWorkerDataset( + dataset_fn, worker_device_map, prefetch_on_device=False) + multi_worker_iterator = multi_worker_dataset.make_one_shot_iterator() + self._test_iterator(multi_worker_iterator, devices, expected_values) + + def _cpu_devices(self): + worker_device_map = collections.OrderedDict( + [("/job:worker/replica:0/task:0", + ["/job:worker/replica:0/task:0/device:CPU:0"]), + ("/job:worker/replica:0/task:1", + ["/job:worker/replica:0/task:1/device:CPU:0"])]) + devices = [ + "/job:worker/replica:0/task:0/device:CPU:0", + "/job:worker/replica:0/task:1/device:CPU:0" + ] + return worker_device_map, devices + + def _cpu_and_one_gpu_devices(self): + # The worker_device_map doesn't have to be a OrderDict object, this is just + # to simplify the testing so that we can pass expected values as a list + # instead of a dict. + worker_device_map = collections.OrderedDict( + [("/job:worker/replica:0/task:0", [ + "/job:worker/replica:0/task:0/device:GPU:0", + "/job:worker/replica:0/task:0/device:CPU:0" + ]), ("/job:worker/replica:0/task:1", [ + "/job:worker/replica:0/task:1/device:GPU:0", + "/job:worker/replica:0/task:1/device:CPU:0" + ])]) + devices = [ + "/job:worker/replica:0/task:0/device:GPU:0", + "/job:worker/replica:0/task:0/device:CPU:0", + "/job:worker/replica:0/task:1/device:GPU:0", + "/job:worker/replica:0/task:1/device:CPU:0" + ] + return worker_device_map, devices + + def testDataDistributionOneDevicePerWorker(self): + worker_device_map, devices = self._cpu_devices() + with context.graph_mode(): + dataset_fn = lambda: dataset_ops.Dataset.range(8) + self._test_dataset(dataset_fn, worker_device_map, devices, + [[0, 1], [2, 3], [4, 5], [6, 7]]) + + def testDataDistributionTwoDevicePerWorker(self): + if context.num_gpus() < 1: + self.skipTest("A GPU is not available for this test.") + worker_device_map, devices = self._cpu_and_one_gpu_devices() + with context.graph_mode(): + dataset_fn = lambda: dataset_ops.Dataset.range(8) + self._test_dataset(dataset_fn, worker_device_map, devices, + [[0, 2, 1, 3], [4, 6, 5, 7]]) + + def testTupleDataset(self): + worker_device_map, devices = self._cpu_devices() + + with context.graph_mode(): + + def dataset_fn(): + dataset1 = dataset_ops.Dataset.range(8) + dataset2 = dataset_ops.Dataset.range(8).map(lambda x: x**2) + return dataset_ops.Dataset.zip((dataset1, dataset2)) + + expected_values = [ + [(i, i**2), (i + 1, (i + 1)**2)] for i in range(0, 8, 2) + ] + self._test_dataset(dataset_fn, worker_device_map, devices, + expected_values) + + def testInitializableIterator(self): + worker_device_map, devices = self._cpu_devices() + with context.graph_mode(): + dataset_fn = lambda: dataset_ops.Dataset.range(8) + multi_worker_dataset = values.MultiWorkerDataset( + dataset_fn, worker_device_map, prefetch_on_device=False) + multi_worker_iterator = multi_worker_dataset.make_initializable_iterator() + + self.evaluate(multi_worker_iterator.initializer) + self._test_iterator(multi_worker_iterator, devices, + [[0, 1], [2, 3], [4, 5], [6, 7]]) + + # After re-initializing the iterator, should be able to iterate again. + self.evaluate(multi_worker_iterator.initializer) + self._test_iterator(multi_worker_iterator, devices, + [[0, 1], [2, 3], [4, 5], [6, 7]]) + + def testValueErrorForIterator(self): + # Incompatiable arguments. + with self.assertRaises(ValueError): + values.MultiWorkerDataIterator({"w1": None}, {"w1": "d1", "w2": "d2"}) + + # Test duplicated devices under same worker. + worker_device_map, _ = self._cpu_devices() + worker_device_map["/job:worker/replica:0/task:0"].append( + "/job:worker/replica:0/task:0/device:CPU:0") + with context.graph_mode(): + dataset_fn = lambda: dataset_ops.Dataset.range(8) + multi_worker_dataset = values.MultiWorkerDataset( + dataset_fn, worker_device_map, prefetch_on_device=False) + multi_worker_iterator = multi_worker_dataset.make_initializable_iterator() + with self.assertRaises(ValueError): + multi_worker_iterator.get_next() + + @test_util.with_c_api class MirroredVariableTest(test.TestCase): @@ -582,6 +710,21 @@ class MirroredVariableTest(test.TestCase): save_path = self._save_normal() self._restore_mirrored(save_path) + @test_util.run_in_graph_and_eager_modes(config=config) + def testFetchAMirroredVariable(self): + if context.num_gpus() < 1 or context.executing_eagerly(): + self.skipTest("A GPU is not available for this test or it's eager mode.") + + with self.test_session( + graph=ops.Graph()) as sess, mirrored_strategy.MirroredStrategy( + ["/device:GPU:0"]).scope(): + with ops.device("/device:GPU:0"): + v = variable_scope.get_variable( + name="v", initializer=1., use_resource=True) + mirrored = values.MirroredVariable({"/device:GPU:0": v}, v) + sess.run(variables_lib.global_variables_initializer()) + sess.run({"complicated": mirrored}) + _devices = ["/device:GPU:0", "/device:CPU:0"] diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index fad613155d8861a2508fb7aca752b10ff85d35eb..a1d56066b417ddd103d17a528d2922ca5853bd55 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -372,6 +372,7 @@ cuda_py_test( "//tensorflow/python:random_ops", "//tensorflow/python:variables", ], + shard_count = 4, ) cuda_py_test( @@ -459,7 +460,7 @@ cuda_py_test( cuda_py_test( name = "batch_reshape_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/batch_reshape_test.py"], additional_deps = [ ":distributions_py", @@ -578,7 +579,7 @@ cuda_py_test( cuda_py_test( name = "wishart_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/wishart_test.py"], additional_deps = [ ":distributions_py", @@ -866,7 +867,7 @@ cuda_py_test( cuda_py_test( name = "batch_normalization_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/bijectors/batch_normalization_test.py"], additional_deps = [ ":bijectors_py", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py index ca20442c3940664feab7526110229872a6cdc41f..dc45114b1c23b5edb78d68ad4f38f5201d265170 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py @@ -26,6 +26,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.exp import Exp from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import SoftmaxCentered from tensorflow.contrib.distributions.python.ops.bijectors.softplus import Softplus from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops from tensorflow.python.ops.distributions import bijector from tensorflow.python.ops.distributions.bijector_test_util import assert_scalar_congruency from tensorflow.python.platform import test @@ -188,6 +189,15 @@ class ChainBijectorTest(test.TestCase): -np.log(6, dtype=np.float32) - np.sum(x), self.evaluate(chain.inverse_log_det_jacobian(y, event_ndims=1))) + def testChainIldjWithPlaceholder(self): + chain = Chain((Exp(), Exp())) + samples = array_ops.placeholder( + dtype=np.float32, shape=[None, 10], name="samples") + ildj = chain.inverse_log_det_jacobian(samples, event_ndims=0) + self.assertTrue(ildj is not None) + with self.test_session(): + ildj.eval({samples: np.zeros([2, 10], np.float32)}) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mvn_full_covariance_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mvn_full_covariance_test.py index 7435bcbc684c1660a648cef4ab30c888723853f8..b003526392709b61e9cc46e0ff8e5fa78edc0568 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/mvn_full_covariance_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/mvn_full_covariance_test.py @@ -131,8 +131,8 @@ class MultivariateNormalFullCovarianceTest(test.TestCase): return mu, sigma def testKLBatch(self): - batch_shape = (2,) - event_shape = (3,) + batch_shape = [2] + event_shape = [3] with self.test_session(): mu_a, sigma_a = self._random_mu_and_sigma(batch_shape, event_shape) mu_b, sigma_b = self._random_mu_and_sigma(batch_shape, event_shape) @@ -156,6 +156,33 @@ class MultivariateNormalFullCovarianceTest(test.TestCase): self.assertAllClose(expected_kl_0, kl_v[0]) self.assertAllClose(expected_kl_1, kl_v[1]) + def testKLBatchBroadcast(self): + batch_shape = [2] + event_shape = [3] + with self.test_session(): + mu_a, sigma_a = self._random_mu_and_sigma(batch_shape, event_shape) + # No batch shape. + mu_b, sigma_b = self._random_mu_and_sigma([], event_shape) + mvn_a = ds.MultivariateNormalFullCovariance( + loc=mu_a, + covariance_matrix=sigma_a, + validate_args=True) + mvn_b = ds.MultivariateNormalFullCovariance( + loc=mu_b, + covariance_matrix=sigma_b, + validate_args=True) + + kl = ds.kl_divergence(mvn_a, mvn_b) + self.assertEqual(batch_shape, kl.get_shape()) + + kl_v = kl.eval() + expected_kl_0 = _compute_non_batch_kl(mu_a[0, :], sigma_a[0, :, :], + mu_b, sigma_b) + expected_kl_1 = _compute_non_batch_kl(mu_a[1, :], sigma_a[1, :, :], + mu_b, sigma_b) + self.assertAllClose(expected_kl_0, kl_v[0]) + self.assertAllClose(expected_kl_1, kl_v[1]) + def _compute_non_batch_kl(mu_a, sigma_a, mu_b, sigma_b): """Non-batch KL for N(mu_a, sigma_a), N(mu_b, sigma_b).""" diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mvn_tril_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mvn_tril_test.py index 685f32883dae5b8513badeb05e1508cd611d6e93..b556d06123800f22f5d9a90dd18f3c745aec90a1 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/mvn_tril_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/mvn_tril_test.py @@ -235,8 +235,8 @@ class MultivariateNormalTriLTest(test.TestCase): return mu, sigma def testKLNonBatch(self): - batch_shape = () - event_shape = (2,) + batch_shape = [] + event_shape = [2] with self.test_session(): mu_a, sigma_a = self._random_mu_and_sigma(batch_shape, event_shape) mu_b, sigma_b = self._random_mu_and_sigma(batch_shape, event_shape) @@ -257,8 +257,8 @@ class MultivariateNormalTriLTest(test.TestCase): self.assertAllClose(expected_kl, kl_v) def testKLBatch(self): - batch_shape = (2,) - event_shape = (3,) + batch_shape = [2] + event_shape = [3] with self.test_session(): mu_a, sigma_a = self._random_mu_and_sigma(batch_shape, event_shape) mu_b, sigma_b = self._random_mu_and_sigma(batch_shape, event_shape) @@ -282,9 +282,36 @@ class MultivariateNormalTriLTest(test.TestCase): self.assertAllClose(expected_kl_0, kl_v[0]) self.assertAllClose(expected_kl_1, kl_v[1]) + def testKLBatchBroadcast(self): + batch_shape = [2] + event_shape = [3] + with self.test_session(): + mu_a, sigma_a = self._random_mu_and_sigma(batch_shape, event_shape) + # No batch shape. + mu_b, sigma_b = self._random_mu_and_sigma([], event_shape) + mvn_a = ds.MultivariateNormalTriL( + loc=mu_a, + scale_tril=np.linalg.cholesky(sigma_a), + validate_args=True) + mvn_b = ds.MultivariateNormalTriL( + loc=mu_b, + scale_tril=np.linalg.cholesky(sigma_b), + validate_args=True) + + kl = ds.kl_divergence(mvn_a, mvn_b) + self.assertEqual(batch_shape, kl.get_shape()) + + kl_v = kl.eval() + expected_kl_0 = _compute_non_batch_kl(mu_a[0, :], sigma_a[0, :, :], + mu_b, sigma_b) + expected_kl_1 = _compute_non_batch_kl(mu_a[1, :], sigma_a[1, :, :], + mu_b, sigma_b) + self.assertAllClose(expected_kl_0, kl_v[0]) + self.assertAllClose(expected_kl_1, kl_v[1]) + def testKLTwoIdenticalDistributionsIsZero(self): - batch_shape = (2,) - event_shape = (3,) + batch_shape = [2] + event_shape = [3] with self.test_session(): mu_a, sigma_a = self._random_mu_and_sigma(batch_shape, event_shape) mvn_a = ds.MultivariateNormalTriL( diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py index 85ad23e4133ef09051cdc8b45e489caeea90fbb3..b158a51bb022b5e2ea3afda74e97b9dc131665a6 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py @@ -20,10 +20,9 @@ from __future__ import print_function import itertools -from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import bijector @@ -36,15 +35,6 @@ def _use_static_shape(input_tensor, ndims): return input_tensor.shape.is_fully_defined() and isinstance(ndims, int) -def _maybe_get_event_ndims_statically(event_ndims): - static_event_ndims = (event_ndims if isinstance(event_ndims, int) - else tensor_util.constant_value(event_ndims)) - if static_event_ndims is not None: - return static_event_ndims - - return event_ndims - - def _compute_min_event_ndims(bijector_list, compute_forward=True): """Computes the min_event_ndims associated with the give list of bijectors. @@ -238,13 +228,13 @@ class Chain(bijector.Bijector): return y def _inverse_log_det_jacobian(self, y, **kwargs): - ildj = constant_op.constant( - 0., dtype=y.dtype.base_dtype, name="inverse_log_det_jacobian") + y = ops.convert_to_tensor(y, name="y") + ildj = math_ops.cast(0., dtype=y.dtype.base_dtype) if not self.bijectors: return ildj - event_ndims = _maybe_get_event_ndims_statically( + event_ndims = self._maybe_get_event_ndims_statically( self.inverse_min_event_ndims) if _use_static_shape(y, event_ndims): @@ -258,11 +248,12 @@ class Chain(bijector.Bijector): if _use_static_shape(y, event_ndims): event_shape = b.inverse_event_shape(event_shape) - event_ndims = _maybe_get_event_ndims_statically(event_shape.ndims) + event_ndims = self._maybe_get_event_ndims_statically( + event_shape.ndims) else: event_shape = b.inverse_event_shape_tensor(event_shape) - event_ndims = _maybe_get_event_ndims_statically( - array_ops.rank(event_shape)) + event_ndims = self._maybe_get_event_ndims_statically( + array_ops.size(event_shape)) y = b.inverse(y, **kwargs.get(b.name, {})) return ildj @@ -274,13 +265,12 @@ class Chain(bijector.Bijector): def _forward_log_det_jacobian(self, x, **kwargs): x = ops.convert_to_tensor(x, name="x") - fldj = constant_op.constant( - 0., dtype=x.dtype, name="inverse_log_det_jacobian") + fldj = math_ops.cast(0., dtype=x.dtype.base_dtype) if not self.bijectors: return fldj - event_ndims = _maybe_get_event_ndims_statically( + event_ndims = self._maybe_get_event_ndims_statically( self.forward_min_event_ndims) if _use_static_shape(x, event_ndims): @@ -293,13 +283,21 @@ class Chain(bijector.Bijector): x, event_ndims=event_ndims, **kwargs.get(b.name, {})) if _use_static_shape(x, event_ndims): event_shape = b.forward_event_shape(event_shape) - event_ndims = _maybe_get_event_ndims_statically(event_shape.ndims) + event_ndims = self._maybe_get_event_ndims_statically(event_shape.ndims) else: event_shape = b.forward_event_shape_tensor(event_shape) - event_ndims = _maybe_get_event_ndims_statically( - array_ops.rank(event_shape)) + event_ndims = self._maybe_get_event_ndims_statically( + array_ops.size(event_shape)) x = b.forward(x, **kwargs.get(b.name, {})) return fldj + def _maybe_get_event_ndims_statically(self, event_ndims): + event_ndims_ = super(Chain, self)._maybe_get_event_ndims_statically( + event_ndims) + if event_ndims_ is None: + return event_ndims + return event_ndims_ + + diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py index ecdb8967f43e5960b2285de05125d0c3dbafe63c..268c8d03426d435dc38412ac1bd05c674bd05d2b 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py @@ -53,7 +53,7 @@ class CholeskyOuterProduct(bijector.Bijector): its spectrum), and that the product of two positive-diagonal lower-triangular matrices is another positive-diagonal lower-triangular matrix. - A simple inductive argument (proceding one column of L_3 at a time) shows + A simple inductive argument (proceeding one column of L_3 at a time) shows that, if `I = L_3 @ L_3.T`, with L_3 being lower-triangular with positive- diagonal, then `L_3 = I`. Thus, `L_1 = L_2`, proving injectivity of g. diff --git a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py index 1ef7651d03a3388e72618b1d9bb8b819bde17e92..eb94760ad71f5babaedaafd3f7990b40aaad85c2 100644 --- a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py @@ -128,7 +128,7 @@ The base distribution's `log_cdf` method must be defined on `y - 1`. class QuantizedDistribution(distributions.Distribution): """Distribution representing the quantization `Y = ceiling(X)`. - #### Definition in terms of sampling. + #### Definition in Terms of Sampling ``` 1. Draw X @@ -138,7 +138,7 @@ class QuantizedDistribution(distributions.Distribution): 5. Return Y ``` - #### Definition in terms of the probability mass function. + #### Definition in Terms of the Probability Mass Function Given scalar random variable `X`, we define a discrete random variable `Y` supported on the integers as follows: @@ -170,12 +170,62 @@ class QuantizedDistribution(distributions.Distribution): `P[Y = j]` is still the mass of `X` within the `jth` interval. - #### Caveats + #### Examples + + We illustrate a mixture of discretized logistic distributions + [(Salimans et al., 2017)][1]. This is used, for example, for capturing 16-bit + audio in WaveNet [(van den Oord et al., 2017)][2]. The values range in + a 1-D integer domain of `[0, 2**16-1]`, and the discretization captures + `P(x - 0.5 < X <= x + 0.5)` for all `x` in the domain excluding the endpoints. + The lowest value has probability `P(X <= 0.5)` and the highest value has + probability `P(2**16 - 1.5 < X)`. + + Below we assume a `wavenet` function. It takes as `input` right-shifted audio + samples of shape `[..., sequence_length]`. It returns a real-valued tensor of + shape `[..., num_mixtures * 3]`, i.e., each mixture component has a `loc` and + `scale` parameter belonging to the logistic distribution, and a `logits` + parameter determining the unnormalized probability of that component. + + ```python + tfd = tf.contrib.distributions + tfb = tfd.bijectors + + net = wavenet(inputs) + loc, unconstrained_scale, logits = tf.split(net, + num_or_size_splits=3, + axis=-1) + scale = tf.nn.softplus(unconstrained_scale) + + # Form mixture of discretized logistic distributions. Note we shift the + # logistic distribution by -0.5. This lets the quantization capture "rounding" + # intervals, `(x-0.5, x+0.5]`, and not "ceiling" intervals, `(x-1, x]`. + discretized_logistic_dist = tfd.QuantizedDistribution( + distribution=tfd.TransformedDistribution( + distribution=tfd.Logistic(loc=loc, scale=scale), + bijector=tfb.AffineScalar(shift=-0.5)), + low=0., + high=2**16 - 1.) + mixture_dist = tfd.MixtureSameFamily( + mixture_distribution=tfd.Categorical(logits=logits), + components_distribution=discretized_logistic_dist) + + neg_log_likelihood = -tf.reduce_sum(mixture_dist.log_prob(targets)) + train_op = tf.train.AdamOptimizer().minimize(neg_log_likelihood) + ``` + + After instantiating `mixture_dist`, we illustrate maximum likelihood by + calculating its log-probability of audio samples as `target` and optimizing. + + #### References - Since evaluation of each `P[Y = j]` involves a cdf evaluation (rather than - a closed form function such as for a Poisson), computations such as mean and - entropy are better done with samples or approximations, and are not - implemented by this class. + [1]: Tim Salimans, Andrej Karpathy, Xi Chen, and Diederik P. Kingma. + PixelCNN++: Improving the PixelCNN with discretized logistic mixture + likelihood and other modifications. + _International Conference on Learning Representations_, 2017. + https://arxiv.org/abs/1701.05517 + [2]: Aaron van den Oord et al. Parallel WaveNet: Fast High-Fidelity Speech + Synthesis. _arXiv preprint arXiv:1711.10433_, 2017. + https://arxiv.org/abs/1711.10433 """ def __init__(self, diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md index 9a3b780af888a597d2440b243ffb8dc98d764f18..4384431e7b9c3e6ef259391fa9efa5a35d23c86a 100644 --- a/tensorflow/contrib/eager/README.md +++ b/tensorflow/contrib/eager/README.md @@ -1,6 +1,6 @@ # Eager Execution -Eager execution provides an imperative interface to TensorFlow (similiar to +Eager execution provides an imperative interface to TensorFlow (similar to [NumPy](http://www.numpy.org)). When you enable eager execution, TensorFlow operations execute immediately; you do not execute a pre-constructed graph with [`Session.run()`](https://www.tensorflow.org/api_docs/python/tf/Session). @@ -37,7 +37,7 @@ support for distributed and multi-GPU training and performance. ## Installation -Eager execution is included in TensorFlow versions 1.7 and above. +For eager execution, we recommend using TensorFlow version 1.8 or newer. Installation instructions at https://www.tensorflow.org/install/ ## Documentation @@ -48,12 +48,3 @@ For an introduction to eager execution in TensorFlow, see: - Notebook: [Basic Usage](python/examples/notebooks/1_basics.ipynb) - Notebook: [Gradients](python/examples/notebooks/2_gradients.ipynb) - Notebook: [Importing Data](python/examples/notebooks/3_datasets.ipynb) - -## Changelog - -- 2017/10/31: Initial preview release (in TensorFlow 1.5) -- 2017/12/01: Example of dynamic neural network: - [SPINN: Stack-augmented Parser-Interpreter Neural Network](https://arxiv.org/abs/1603.06021). - See [README.md](python/examples/spinn/README.md) for details. -- 2017/03: Core functionality moved out of the experimental tf.contrib namespace - in TensorFlow 1.7. diff --git a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb index 459f2f4a7d2afa153e77069bc3ce0c5360ddd7e2..9fd2d8d1254e32ae75ab5b085986c6e1c05e76f4 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb @@ -1,11 +1,27 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Eager Execution Tutorial: Basics", + "version": "0.3.2", + "views": {}, + "default_view": {}, + "provenance": [ + { + "file_id": "0B0kLcpwLFwKEVm9XNkFueGk4bTg", + "timestamp": 1504118841551 + } + ] + } + }, "cells": [ { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "U9i2Dsh-ziXr" + "id": "U9i2Dsh-ziXr", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "# Eager Execution Tutorial: Basics\n", "\n", @@ -21,11 +37,11 @@ ] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "z1JcS5iBXMRO" + "id": "z1JcS5iBXMRO", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "# Step 1: Import Eager\n", "\n", @@ -33,34 +49,34 @@ ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { - "cellView": "code", + "id": "RlIWhyeLoYnG", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, - "colab_type": "code", - "id": "RlIWhyeLoYnG" + "cellView": "code" }, - "outputs": [], + "cell_type": "code", "source": [ "# Import TensorFlow.\n", "import tensorflow as tf\n", "\n", "# Import TensorFlow eager execution support (subject to future changes).\n", - "import tensorflow.contrib.eager as tfe" - ] + "tfe = tf.contrib.eager" + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "H9UySOPLXdaw" + "id": "H9UySOPLXdaw", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "# Step 2: Enable eager execution\n", "\n", @@ -69,30 +85,30 @@ ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { - "cellView": "code", + "id": "WPTUfGq6kJ5w", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, - "colab_type": "code", - "id": "WPTUfGq6kJ5w" + "cellView": "code" }, - "outputs": [], + "cell_type": "code", "source": [ - "tfe.enable_eager_execution()" - ] + "tf.enable_eager_execution()" + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "twBfWd5xyu_d" + "id": "twBfWd5xyu_d", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "# Step 3: Interactively Use TensorFlow!\n", "\n", @@ -102,20 +118,18 @@ ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { - "cellView": "code", + "id": "ngUe237Wt48W", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, - "colab_type": "code", - "id": "ngUe237Wt48W" + "cellView": "code" }, - "outputs": [], + "cell_type": "code", "source": [ "print(tf.add(1, 2))\n", "print(tf.add([1, 2], [3, 4]))\n", @@ -131,32 +145,32 @@ "# Most TensorFlow ops are directly usable with eager execution, giving\n", "# results immediately.\n", "print(tf.contrib.signal.hamming_window(x * y + 1))" - ] + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "IDY4WsYRhP81" + "id": "IDY4WsYRhP81", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "Numpy arrays are supported, too:" ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "lCUWzso6mbqR", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "lCUWzso6mbqR" + } }, - "outputs": [], + "cell_type": "code", "source": [ "import numpy as np\n", "\n", @@ -168,14 +182,16 @@ "\n", "print(\"Multiplied by 42:\")\n", "print(tf.multiply(ones, 42))" - ] + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "PBNP8yTRfu_X" + "id": "PBNP8yTRfu_X", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "# Step 4: Define and Print TensorFlow Variables\n", "\n", @@ -183,73 +199,66 @@ ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { - "cellView": "code", + "id": "3Twf_Rw-gQFM", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, - "colab_type": "code", - "id": "3Twf_Rw-gQFM" + "cellView": "code" }, - "outputs": [], + "cell_type": "code", "source": [ - "x = tf.get_variable(name=\"x\", shape=[], dtype=tf.float32, initializer=tf.zeros_initializer)" - ] + "x = tfe.Variable(0.)" + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "45G7094TxsMb" + "id": "45G7094TxsMb", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "## Printing TensorFlow Variables" ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { - "cellView": "code", + "id": "UJBJeZ5XxuwA", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, - "colab_type": "code", - "id": "UJBJeZ5XxuwA" + "cellView": "code" }, - "outputs": [], + "cell_type": "code", "source": [ "# This does NOT print the Variable's actual value:\n", "print(\"Printing a TensorFlow Variable:\")\n", "print(x)\n", "print(\"\")\n", "\n", - "# A TensorFlow variable represents a reference to a tensor.\n", - "# The `read_value()` method provides access to the current value of the\n", - "# variable. Tensorflow Variables are automatically initialized according to the\n", - "# semantics defined in tf.get_variable().\n", - "print(\"Printing a TensorFlow Variable's value using .read_value():\")\n", - "print(x.read_value())\n", - "print(\"\")\n", "\n", - "print(\"Printing a TensorFlow Variable's value using .read_value().numpy():\")\n", - "print(x.read_value().numpy())" - ] + "print(\"Printing a TensorFlow Variable's value as a numpy array:\")\n", + "print(x.numpy())" + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "2njjWHcTpBEn" + "id": "2njjWHcTpBEn", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "## Changing a TensorFlow Variable's value\n", "\n", @@ -257,64 +266,64 @@ ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "v3wr6Erbo_hB", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "v3wr6Erbo_hB" + } }, - "outputs": [], + "cell_type": "code", "source": [ "x.assign(42)\n", - "print(x.read_value())\n", + "print(x)\n", "\n", "x.assign_add(3)\n", - "print(x.read_value())" - ] + "print(x)" + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "uhtynjHVpTB5" + "id": "uhtynjHVpTB5", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "## Use a Variable just like any other Tensor" ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "7PbktdnHoehR", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "7PbktdnHoehR" + } }, - "outputs": [], + "cell_type": "code", "source": [ "print(x + 3)\n", "\n", "# This code will broadcast the value across the list of numbers:\n", "print(x * [1, 2, 4])" - ] + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "GVChqwlwy1SI" + "id": "GVChqwlwy1SI", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "# Step 5: Debug Errors with Instant Feedback\n", "\n", @@ -326,60 +335,58 @@ ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { - "cellView": "code", + "id": "23ap04N0v4k0", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, - "colab_type": "code", - "id": "23ap04N0v4k0" + "cellView": "code" }, - "outputs": [], + "cell_type": "code", "source": [ "vector = tf.constant([10.0, 20.0, 30.0, 40.0])" - ] + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { - "cellView": "code", + "id": "FCUMsIYxxRRa", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, - "colab_type": "code", - "id": "FCUMsIYxxRRa" + "cellView": "code" }, - "outputs": [], + "cell_type": "code", "source": [ "# Works, because the values of `begin` and `size` (the 2nd and 3rd input\n", "# arguments) are within the bound of `vector`.\n", "print(tf.slice(vector, [1], [3]))" - ] + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { - "cellView": "code", + "id": "T8me2oCNxpFp", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, - "colab_type": "code", - "id": "T8me2oCNxpFp" + "cellView": "code" }, - "outputs": [], + "cell_type": "code", "source": [ "# The following does NOT work, because the value of `size` (the 3rd\n", "# argument) causes the indices to go out of the bounds of `vector`. The\n", @@ -388,91 +395,90 @@ " print(tf.slice(vector, [1], [4]))\n", "except tf.OpError as e:\n", " print(\"Caught error: %s\" % e)" - ] + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "irxJhAgar84v" + "id": "irxJhAgar84v", + "colab_type": "text" }, + "cell_type": "markdown", "source": [ "# Step 6: Using the GPU\n", "\n", - "You can place Tensors on the GPU by calling a Tensor's `.gpu()` method.\n", + "You can explicitly place Tensors on the GPU by calling a Tensor's `.gpu()` method. The `.device` property tells you whether the Tensor is backed by CPU or GPU memory.\n", "\n", "The first operation executing on the GPU may be slow as TensorFlow initializes. Subsequent uses will be much faster." ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "7J4N9baqaKCL", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "7J4N9baqaKCL" + } }, - "outputs": [], + "cell_type": "code", "source": [ - "# The example code from here on will work only if your notebook\n", - "# is running on a machine with a functional CUDA GPU. The following\n", - "# line checks that.\n", - "is_gpu_available = tfe.num_gpus() \u003e 0\n", - "\n", "# Create some Tensors\n", "SIZE = 1000\n", - "cpu_tensor = tf.random_normal([SIZE, SIZE])\n", + "tensor = tf.random_normal([SIZE, SIZE])\n", + "print(tensor.device)\n", "\n", - "if is_gpu_available:\n", - " gpu_tensor = cpu_tensor.gpu()\n", + "\n", + "if tf.test.is_gpu_available():\n", + " gpu_tensor = tensor.gpu()\n", + " cpu_tensor = tensor.cpu()\n", "else:\n", - " print(\"GPU not available.\")" - ] + " print(\"GPU not available.\")\n", + " cpu_tensor = tensor" + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "4E-2n7VbzY1n", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "4E-2n7VbzY1n" + } }, - "outputs": [], + "cell_type": "code", "source": [ "# Time a CPU-based matrix multiplication\n", "\n", "print(\"Time to conduct matmul on CPU:\")\n", "%time tf.matmul(cpu_tensor, cpu_tensor)" - ] + ], + "execution_count": 0, + "outputs": [] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "vbSFW-T5zhZF", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "vbSFW-T5zhZF" + } }, - "outputs": [], + "cell_type": "code", "source": [ "# Time GPU-based matrix multiplications.\n", "\n", - "if is_gpu_available:\n", + "if tf.test.is_gpu_available():\n", " # First use of the GPU will be slow:\n", " print(\"Time to conduct first matmul on GPU:\")\n", " %time tf.matmul(gpu_tensor, gpu_tensor)\n", @@ -481,51 +487,9 @@ " # Subsequent uses are much faster:\n", " print(\"Time to conduct second matmul on GPU:\")\n", " %time tf.matmul(gpu_tensor, gpu_tensor)" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - }, - "colab_type": "code", - "id": "E5pIOe3Rz7iW" - }, - "outputs": [], - "source": [ - "# Second timing demo for GPUs, after it has been used once:\n", - "\n", - "cpu_tensor = tf.random_normal([SIZE, SIZE])\n", - "print(\"Time to conduct CPU matmul:\")\n", - "%time tf.matmul(cpu_tensor, cpu_tensor)\n", - "print()\n", - "\n", - "if is_gpu_available:\n", - " gpu_tensor = cpu_tensor.gpu()\n", - " print(\"Time to conduct GPU matmul:\")\n", - " %time tf.matmul(gpu_tensor, gpu_tensor)" - ] - } - ], - "metadata": { - "colab": { - "default_view": {}, - "name": "Eager Execution Tutorial: Basics", - "provenance": [ - { - "file_id": "0B0kLcpwLFwKEVm9XNkFueGk4bTg", - "timestamp": 1504118841551 - } ], - "version": "0.3.2", - "views": {} + "execution_count": 0, + "outputs": [] } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb index e6c7c117333e1e10aa571dae295e88747bd7d764..1e65b27bc8be8b05fefa38dffae7799b1e503bd3 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb @@ -43,11 +43,9 @@ "# Import TensorFlow.\n", "import tensorflow as tf\n", "\n", - "# Import TensorFlow eager execution support (subject to future changes).\n", - "import tensorflow.contrib.eager as tfe\n", "\n", "# Enable eager execution.\n", - "tfe.enable_eager_execution()" + "tf.enable_eager_execution()" ] }, { @@ -106,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 0, "metadata": { "cellView": "code", "colab": { @@ -114,34 +112,30 @@ "startup": false, "wait_interval": 0 }, - "height": 360, - "output_extras": [ - { - "item_id": 1 - } - ] + "base_uri": "https://localhost:8080/", + "height": 347 }, "colab_type": "code", "executionInfo": { - "elapsed": 127, + "elapsed": 374, "status": "ok", - "timestamp": 1505502830690, + "timestamp": 1525154227149, "user": { "displayName": "", "photoUrl": "", "userId": "" }, - "user_tz": 240 + "user_tz": 420 }, "id": "O4lsC4ckAcar", - "outputId": "2f760690-cafb-4777-b970-91d839f99faf" + "outputId": "f8becb3f-498b-4cb7-9ef3-608a68cb65d0" }, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAesAAAFXCAYAAACC+2avAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXt8VPWd99+TK7kykxtJQIebqZfaqogtrhKNa1ooEKl9\nCrpVn9ZNW6x9VWsbCi7aVUt01NZ9tq21KVZlFey2YkQNohhj3QWK2liCF5RIBCc3yEwmIZnMTOY8\nf/zmzJwzSSBAYibh+369eIU5c87vXLh8zvdu0TRNQxAEQRCEmCVurC9AEARBEISjI2ItCIIgCDGO\niLUgCIIgxDgi1oIgCIIQ44hYC4IgCEKMI2ItCIIgCDHOiIj16tWrufjii1m8eHF4269//Wvmz5/P\n0qVLWbp0Ka+//vpInEoQBEEQTjksI1Fn/eabb5KWlkZFRQWbN28GlFinpaXx7W9/+6QvUhAEQRBO\nZUbEsr7wwgvJzMwcsF36rQiCIAjCyTOqMesnn3ySsrIybr/9drq6ukbzVIIgCIIwYRk1sb722mt5\n5ZVXqK6uJicnh8rKytE6lSAIgiBMaEZNrLOysrBYLAB885vfZPfu3cc8RtzmgiAIgjCQhJFaKFpo\n29vbyc3NBeDll1+mqKjomGtYLBba2yeuuzw3N0Pubxwzke9vIt8byP2Nd06F+zsWIyLWt912Gzt3\n7sTtdnPZZZfxwx/+kJ07d/Lee+8RFxfH1KlTueuuu0biVIIgCIJwyjEiYv3ggw8O2Hb11VePxNKC\nIAiCcMojHcwEQRAEIcYRsRYEQRCEGEfEWhAEQRBiHBFrQRAEQYhxRKwFQRAEIcYRsRYEQRCEGEfE\nWhAEQRBiHBFrQRAEQYhxRKwFQRAEIcYRsRYEQRCEGEfEWhAEQRBiHBFrQRAEQYhxRKwFQRAEIcYR\nsRYEQRCEGEfEWhAEQRBiHBFrQRAEQYhxRKwFQRAEIcYRsRYEQRCEGCdhrC9AEARBOHXo6HCzcmUt\nTU2Z2O2dOBwl2GzWsb6smEfEWhAEQfjMWLmylurq6wAL9fUasJ6qqqVjfVkxj7jBBUEQhM+MpqZM\nwBL6ZAl9Fo6FiLUgCILwmWG3dwJa6JOG3e4Zy8sZN4gbXBAEQfjMcDhKgPWhmLUHh+Pysb6kcYGI\ntSAIgvCZYbNZJUZ9AogbXBAEQRBiHBFrQRAEQYhxRKwFQRAEIcYRsRYEQRCEGEfEWhAEQRBiHBFr\nQRAEQYhxRKwFQRAEIcYRsRYEQRCEGEfEWhAEQRBiHBFrQRAEQYhxRKwFQRAEIcYRsRYEQRCEGEfE\nWhAEQRBiHBFrQRAEQYhxRKwFQRAEIcYRsRYEQRCEGEfEWhAEQRBiHBFrQRAEQYhxRKwFQRAEIcYR\nsRYEQRCEGEfEWhAEQRBiHBFrQRAEQYhxRKwFQRAEIcYRsRYEQRCEGEfEWhAEQRBiHBFrQRAEQYhx\nRKwFQRAEIcYRsRYEQRCEGCdhrC9AEARBODE6OtysXFmL02mjsLADh6MEm806rGOamjKx2zuHdYww\n9oyIWK9evZrXXnuN7OxsNm/eDEBnZye33norn376KdOmTeOhhx4iIyNjJE4nCIIgACtX1lJdfR1g\nATRgPVVVS037RIuzz9dDTc33AQv19YMfI8QeI+IG//rXv866detM237/+98zb948XnrpJb70pS/x\nyCOPjMSpBEEQhBBNTZkooQawhD6b0QW9vv4qqquvZ/v27mMeI8QeIyLWF154IZmZ5j/wbdu2sXSp\neltbunQpr7zyykicShAEQQhht3eiLGoADbvdM2CfaEGH7GMeI8Qeoxaz7ujoICcnB4Dc3FxcLtdo\nnUoQBOGUxOEoAdaHYtYuHI7LAbPru61tD1AM2AAXkyY5sVr/CBxi3rwMHI5FY3cDwrCJuQSz3NyJ\nHdeW+xvfTOT7m8j3BhPz/uLi+klOTgQgOTmBnJwMsrIyuPnm5w2x7DKmTbuPgoJzaG7ew8GDt6PH\nuDMyNpKdncFNNz3Pxx+nM2NGFw8/vJCsrNhLOJuIf37Hw6iJdXZ2NocOHSInJ4f29naysrKGdVx7\ne9doXdKYk5ubIfc3jpnI9zeR7w0m7v2Vlz8XFuVduzT6+lSy2N69KRhd3zk5Z/LCC5dRWtrPwYOR\n7Xv3pnDjjYOvEUtM1D8/neG8iIxYnbWmaabPJSUlPPPMMwBs2rSJK664YqROJQiCIDB0gtlQsezB\ntg8nSU0Ye0bEsr7tttvYuXMnbrebyy67jB/+8Id897vf5Uc/+hF/+ctfKCws5D/+4z9G4lSCIAhC\nCLu9M1R+pdzauijrsWxVruUJx7JXrZrDrl2VuFzTsNkOsnr1EtaufWvQNYTYYkTE+sEHHxx0+2OP\nPTYSywuCIAiDMFSCmc1mHdSVXVn5Nk7nKsBCb6/G2rXrhxR2IbaIuQQzQRAEYXjoojxYTHewTmWD\nubyHEnYhthCxFgRBmIAYu5vpncrsdk1c3uMUEWtBEIQYYai+3SfSz3swK/rpp+cgLu/xiYi1IAhC\njDCYNVxVtXTI7UdjsOQzcXmPX0SsBUEQYoShyqhOpLxKEscmFiLWgiAIMcJQpVjm7S7a2t6ltJSw\nS3ywphojYUXLOM3YQcRaEAQhRhjKGjZub2t7F6dzFU6nconX1T1AaelU7r770mEL6XBF+ETc78Lo\nIGItCIIQIwxlDRu3l5aC0xlxibvdZ/KnPy06rjahwxVh6W4WO4xYu1FBEARh9IluGQpqPvXxCOlw\nRXg4IziFzwaxrAVBEMYRuku8ttaPx5MCLAQ0CgoODXuNoWLjQ51LktTGHhFrQRCEcYTuEr/hhv+i\npiYBeBY4xNtvu3G53ANiz4PFp4crwlLqFTuIWAuCIIxDmpsLgF5gOWChtVWjomJg7Hmo+LSI8PhC\nxFoQBGEcEG0hFxT4qK+fwrFiz5IkNjGQBDNBEITPiI4ON+Xlmygt3UZ5+TO4XO5hf69byPX1V1Fd\nfT0QoLBwN8dKAJMksYmBWNaCIAifEdEu6V27KqmtvS4cZz5aSVW0hdzcXEBt7SIqKgaOyDQiSWIT\nAxFrQRCEz4howXU6P09FRe2Qgmx0WRcUNFNf/xSQAXgoKPAcdUSmznCTxKRbWWwjbnBBEIRBOJbL\n+kQwu6RdwLts3Up4/aO7rBOBa4DFwLWhzyNHtJu9oqJ2RNcXTg6xrAVBEAZhNFptOhwl7NpVidP5\neeBdYCW9vRaqq9X6DkcJfX3r2LEjDjiMz5cWLsdqbs7B7AbPOalriUYS0WIbsawFQRAG4XjFaziW\nuM1mpbb2OsrK3KSkFA5Y32azkpychNv9bdzun1JTsyJs4UZb3QUFLeHzLVv21Elb/pKIFtuIZS0I\ngjAIw+3ypROxxDupr3+RurqXKS6OHxD71WPI5eXPhCxq8/pDvSREJ4r5fAkmy/94eoMb0WPV+/Yl\nUFhYSXZ2ETNn9kgiWowhYi0IgjAIx5tFHRHZGmABbvcWqqvT2LXrCWprrx+QrOVwlODzPcL27V1A\nNj5ffzhuPdhLQnSiWGnpNoZr+R8teczo7geNuXNlslYsImItCIIwCMfbajMisunAFvTOYk7n4kE7\ni9lsVpKSUnG7vwdYqKnRSEpaf9SXBKPotrXtAcoYjuV/PCVhEquOTUSsBUEQBmEoa3So7brI1tW1\n4HafyXAEcDChPNpLgtkKLqawsJK8vLMpKurl7ruHtvyPJsjH6+4XxgYRa0EQhEEYyhodarsusi6X\nm8svfwKnczFDCaAu+Pv3t6CSuoYnlGbRtZGXdzZbt15Bbm4GH3xwgPLyTYO6uo8myNI0ZXwgYi0I\nQswylo06IsLoBmrC9dD79iUQbaV2dLi55ZaXQiVXh5gzJ5kvfnEdzc052O0eVq26wCSkPl8PNTXf\nBzqBDVitXoqLE44plEcT3aO5uo8myDJZa3wgYi0IQswyGrXOwyXSMcwJ3Bauhy4srCTaGl65spYt\nW24Mb9u2bQNlZQG2br0CgPLyTab7sFofCO1rBa5l+vRnqaq6Ilz+NdTLiS66+/bF09HRRGNjEeXl\nz/Doo2VHdXWLII9/RKwFQYhZxjb5Se8Y9rzpGrKzi5g7V1mpBQUt+HwJvPZaErABWIgS4AyamvrD\nK0XfB2SjOphtAdJoa9uDyzXnmC8nuuhef/3TNDSswum0sHu3xo03PoHdjsSeJzAi1oIgxCxjmfwU\n6RjWhdGSnjmzJyygRotZ7fMgUAD00NbWTmkphnGWkTXmzQvyzjsP43SuwpgxPtyXE+Vuj+xXV6ex\nY8cVSOx54iJiLQhCzDKWyU+RF4WFDBVXHmgxfw5YRHLy7TidP8XptFFfr7Fgwe8oKzPex1dYtuwt\nnM7IsVu3gs023HKsQxhfIOCQuLonOCLWgiDELCMtQEdLWIv+bvXqOUReFAI4HFcOSG6LtvyhO/T7\n01Eu7nSgiwMHMnn11SVHPba3N5He3psoLKwkK6uIjo697Ntnp7z8mQGx63nz0qmp2YCawNXF/Pky\nHWuiI2ItCMIpw2Ax4fvuu5yVK2upqwvgdicDl1FfP5nBktmGEvTaWj8eTwrKCteAg4BqdgIaHR2V\nA65F9xps3Qq9vX7Uf8dv0NOTwFlnfUJDw3SczgwaGjz4fM/z+OPfCl8DJGG1eoGDzJuXwaOPXkN/\n/4BTCBMIEWtBEE4ZBosJR7fbhI3ANYPGi4dKALvhhv+ipkYD/gDk4PMlocqyAGpwuQoHWMjmHuEp\nqGQ2C273Il5//U7g1vA1bd/+AKCEuqRkfTjWDarrWVaWdch51sLEQMRaEIRThsES1gbGndOJjhfr\nFvXWrRj27WTz5oMUFf03gcCh0PbbAQuapqGywy3ActMYzGhr3eEooa7uZdzuyDX090+PuqZsQL0s\nqPGa0h70VEPEWhCEU4bBEtYqKl41CbjV+j7FxS5TIlnEot5AJLHrRYLBVSGR1YDHMQusDzWF+OjC\narNZ+fKX+9myJXINOTkHaWszZ4+D7hno5ni6ngkTAxFrQRBOGWw2azhG3dSUSUXFq1GJZB4cjuUD\nEsn27YtHucctwL1YLFPQNLMQQzvmDG0L8Klp2/vvv0lJyRFmzQqYXOIWSwD1IqASxs49N430dHP2\nOOiegSWha0mjsLABh+O6UXteQuwgYi0IQkwQnby1atUcKivfHvFWoyfSFa2jowmIxImTk9fg9Z6F\nWZyt6CIKO1BW9W3AfcDZwBG83ttoaNhCQ8P1pvM2NxcAV4XPd/jws2zYcMWA61Cegc2hZ+LG4bgO\nTYNlyzawd2/KZ96SVfjsELEWBCEmiBbRXbsqw4lUx9NqdLDyrNzcjPD3J9IVbfLkqTidG9FLsU47\nrZDZsz1s3/4A3d2ZBAKTQmumAW8CPwXeAGzAOcBiw2rpA8473OYvg5WyRbcy/SxbsgqfHSLWgiDE\nBNEi6nJN40QSqQaznJ999vrw98cSxsHEvrPzU4yW9ZEjlfzqV9excmUtjY2pHD78AR5PBt3de1Ad\nzGxEOp8ZO6C5gJ1AO3v2OLnhBicPPbT4pJq/yDzqUwMRa0EQYoJoEbXZDtLbG/mcn38oPOSioKAZ\nSAxNtTK7fo8lXqtWzWHnzntoa8sjPv4Q3d3puFzu8PGDiX12dpGp21h2dtGAkq/k5DXARcAelCir\nzmeZmR34fHfg9Z4P7AXuBiz4/VqosckLJCWlnrC7X+ZRnxqIWAuCMGocz4jLaOty9eolrF0b+ezz\n+amuVpOt1DSsaxisucn+/QHgSeBrwOQB4lVZ+TYtLbOAawgGLWzbplFREXEdDyb2M2d2snu3uT94\n9H59fRcBS1Au7/tISSmktBQcjjKWLXuL+vqrgM2mYyCD7ds/xe3+HsNxYw/2PB2OEpKTN4Zi1tIT\nfKIiYi0IwqhxPMlcg8Vjq6rs4d+Xlm4jInQZGEVv61bYtesJnM6bUC5oNYayuHjKAPFSmd1O1DSt\nLmAhdXUBGhubqKx8m/37W4gujVq1ag67dlXick3DZjvA6tVl3HnndswJZkfC1wNTuOyyI1RVqa5j\nEeu3K+qYLlQN9fDc2EM9z6efvkaaokxwRKwFQRg1oq3PfftSB8xr1jSGZX2b3b0ejKKn+mqvRu8+\nBhamTz+DqqqBGdXRmd2wAbd7Epde+if8/n9HdR4zD+6oqKgNJ7v19mosXVpJd7debuUDmoHvh86g\nAclApP+ncQ71oUP30NMzlbi4w8yblw4khLqfHduNLfHpUxcRa0EQRo3oeGpHx14aGswZ3sCg1uJQ\nfbhVL20f8ERo3URgAZFsbDia6EXHn5XYXoXfHwh9tqLizVU0NZ1BRcWrNDamYRTJSBexxSjX9lVA\nDSrT+wPgX2lufi18zqMNJHG53CQlDS+5TOLTpy4i1oIgjBrRceh9++wGoeykrq6Vvr4pKAt1IWAN\nW4tDuXxVL20Vu1ax6eXo4lVY2EBeXvCoojdz5pFQ/LkTeDG09QXgI4zdydzun1Bfr85dWLiWgS5v\njYgrezLqheFFIAd4gYKCwYV0sLjzcEutxnJkqDC2iFgLgjBqRFuU5eXP0NBgFkTzAI3lYWtxKJev\nUbCUIK4LZYV7cDiuO2YmtX58bW0LHs9PDedfh8redtHb68bvj8S0s7KmM3euOmdb27s4nStQrvh7\ngQySklbS359Mf/9cVDvQBcBfBj3/8cTxT0bYhYmFiLUgCJ8ZRqHdv99rGl6RkuKntHR92FocyuUb\n/QJgFLSKilePWfqkH19auo36eqM73A/00tX1KZr2C4wx7Vmz+sOu+VtvPURPzyaOHPkYv//HgA2f\nL5Kdrr94NDfnDCq2xxN3PpFua8LERMRaEITPDKPQKnd2RIxLSzEJ0VAu32gB7Orq5NVXf4guaD7f\nOh5/fNmAc+vH7dsXT0dHE93d8Zhd25OBa9G05zCKqdXqxeG4ElDiWVNzI2ZvwDVEZ6dDGna7e1Cx\ntdu1YcedJaFM0BGxFgRhTDhW/HWopKxoAUxMXItR0LZvjxtwzOHDxjnQG1HZ4CrrOzPTS3d3C8Hg\nTaG9zVOtiosThmy4EkloM2enT5q0i9Wrl/G9731EtNg+/XT04BBJKBOOjYi1IAhjwtEypI9GtGD2\n9+dgtpAPDzjmpptqQhncnahJWJF4dFzcM+Tnazidk0N7LwDuwGqdQXFxAqtWXRAuN2tr2wMUo2q5\nXUyatAuLxU1m5l407S7a2s5HDez4MZdc8is0bSrwGCpbXDVoOZ77loQyQUfEWhCEMWG43c2i9yso\n8Jmszby8Vlpa9PGSrfT2urDbN2GzHWDTpjJmzLDz8cdqAIfK1r4NYzwaDvPHP17OkiVr6OubgcXy\nMZdcks4f/nAlmgaXXfY4LS0/ALYA55KYuJaUlCx6erLwes8EvkZv72Ss1gdQHcwUfv+Foc9DN2g5\nFif6QiNMPESsBUEYE4abPBW934IFv+OKKx6hrs5CMHiY/v4errjiEIcPp/L++014vSo5rLfXRXHx\nL5k9+4vs21cPfA7owezG9jFvXjq//e1H9PWpnt2appGVtR5Ng5KS9bS0fAEl1KpEzO/vxu83J5Op\nuHU2Q3U0G6pBiyAMFxFrQRDGhOEmT0Xv19xcQFvbuwQCqrlKe7vGe+9VUl9/RSimq++7Ba/3Lhoa\nLMDVKFFNxyioU6Y0AqezdStE13qvXFkbcp13o4+1VEQnk6k1580LkpS0nrq6AG53K8aOZhJrFk6W\nURfrkpIS0tPTiYuLIyEhgT//+c+jfUpBEMaI4xncEZ08ZZyqZTx2sCSrDz4wj89U4zTBZjsQmtTV\nCfQxUFQvJTPzfk4/fSYdHXvp7k4JZXfrDVKeBRIpKPDQ1FRApGb6d6huZQNbnVqt71Nc7MLh+Ao2\nmxWXy80ttzzP9u1/ALKZNy+Iw/GVkXvIwinJqIu1xWJh/fr1TJ48+dg7C4IwrhnKtR0t4qtWzaG7\n20Ni4lr6+3PIzm7i7bcTaWubA3RTX78E2ExV1VJWrDiDmprb8fnsQCtvvHGE9HSLaXympn1ISclL\n+P3dJCSsIRBIAaZjdkt3A5NJTw8wa1ZPqO3p86HvazDXSa8LvSQsAZ4DJmOxrCEjYzpz5/aQlGRs\nxLLc9EJis1l5/PFvfRaPWziFGHWx1jSNYDA42qcRBCEGGMq1HS3iu3ZV4nTmoeK8GbS3dwA/wxgH\nbmrKZN++JhYufJ5gMNKk5PDhDcTH/4NJk9agaTPw+/fh9f6UhgYbEXd3MlBCxPX9D1QG94N0d/tp\nbEwNradPwUrH7GrPCZVYbWbfvgQ6OtxkZ5/HzJlHcDiWhsW5o8NNRcXwPAmCcDJ8Jpb1jTfeiMVi\nYdmyZXzzm98c7VMKgjBGDFUXHC3iym3dBhgbjAxsKnL11c8RDH4u6rsM+vtn0t9fTmFhJU7nl1FC\nrH/vB3YDS1HWsga8AawGLHg8GocP672+F6Ji1fuARabr1jOxy8s30dCwCqfTEuopHvEWqNptFdeu\nr19CX99fSE5OEvEWRpxRF+uNGzeSm5tLR0cH3/72t5k5cyYXXnjhaJ9WEIQxYKi64GgRV7HlwtBn\nN7AntIKKERcWNrBq1RIuvvgg8CEDZ0C3As/jdPaihHmx4ftEYAZKhDOIWM+6ld1FZmYuUGmYnnUd\ncB9wNoWFDTgc14Xv6WjeAn1spr7+jh1xuN3SHlQYeUZdrHNzcwHIysriyiuvZPfu3UcV69zcjNG+\npDFF7m98M5Hv70Tv7fBhNzfdVMPHH6czY0YXjz66hKwsszX56KNlrFixMbRPN2vXfotLLnmMlhYN\nFS+OuMCnTbuPd965iRUraggGVwGfALejYtDtKCs6H7gUaADsqIEaBSj394LQmkYSME7n6u6+j6lT\nz8XpXBzeIzW1kEWLjvDwwzeZrr+oqMf0olFU1EtubgZOp41ob4DF8qlpm9Np+8z+zkzkv5sw8e/v\nWIyqWPf29hIMBklLS6Onp4c33niDm2+++ajHtLd3jeYljSm5uRlyf+OYiXx/J3Nv5eXPhePRu3Zp\n9PUNZk3G8+tfLzJtOf/8PGpqNgD6HGkACzk5Z9LfH8/evSmh7XagAvgD8AVU/PkHRIu8Emz98/6o\n78wtSW222WRkfAw8hbK+D5OW9iF7987lO9+pNrmvf/zjL/DGG5W4XNOw2Q5w221ltLd3UVjYgdHi\nLyxs4ItftFJTY9zm+kz+zkzkv5twatzfsRhVsT506BA333wzFouF/v5+Fi9ezCWXXDKapxQEYYQY\nbhnWiQ6baG4uQLXhfAyj6L3zzm7OO28PZ51lrImeDEwFFpGfX09Ly2Sik8JU0xPlyk5IsBIIGL/L\nMp1j5swedu7sBH4Y3tbevoH29qvCCXB5eWdjt3fi8/nD7u7eXo21a9dTVWUfxOWvXOdJSdIeVBh5\nRlWsTzvtNKqrq0fzFIIgjBLD7TA2VFLZYOValZVvG9qGHgkd14MxvqxpBTidNxIM3kNZ2XoaG1Np\nb3+Pnh6NuLg/cs45mZx//jq2b+/A7Y4khcFeVCMSK3APA/uFb8Bq9VJcnIDDcTnnnVdLdOKa/nun\n8/M4nUuor9ewWv/IYC8jQ7UClRi1MBpIBzNBEAZluBbzUEllt9zyElu2qGzv+nqNF164g0DgrvDn\nBQvWsWDBOmpqfIbzgJpkZaGz024Yp9kTfnHYts1Fbu6DdHUB3IPFkk1c3Kf09/8EJdQagUAPkYSy\nbiwWK0uWBHA4rgx7B1SSmwvVSjQNleR2KcqKj7QKhUMYhV+6kQljwcBZcoIgCCiLWYkUgMb+/R9S\nXv4MLpfbtJ/NZuW++y7Hbvewb18ql1/+BCUlz7FtWwuqMxiAhUDAjlH8X3stgaSkROLjm4GvEmnr\n+S7gQtP2hs9lfnF4hvb2NPr7LwJmoWnX0N9fBNRgtT5KYWElKhltOSpLfDmTJ3cDsGzZW+F72LSp\njEmTfhnabwnwMzIz/5NJk+5AJao9BbiYNy+DsrL1nHfes5SVrT8h13ZHh5vy8k2Ulm4b9BkKwrEQ\ny1oQhEFxOEro61vHK68ECAS6cLu7qK6eyvbtf+Svf/22KX5tdJmDhtO5EZXBvQG4FiX6jRgt1N7e\nZKqrl2Ox3INxUIYS2Pvwem+jokJ1MTO72l1EN1BRMenFTJv2JJ98YkH913Ynykp2091dSHV1PHBZ\nKCb9MHl5ZzNp0gy83sgLRFzcJLzefwuvXVhYyUMPXXfStdLDDSkIwlCIWAuCMCg2m5Xk5CQCAWPj\nko20ta3hllseISkpNRx/bmxMY2AfbjXVCjaj6qKTgcdR86SnAN9ATbmaiu76jhxfAPyOLVuslJc/\nw+rVc9Bd7Q0NGVHJY16U21qjo6MJj2cFSvwvBHYBd4X214UdnE7V5ASexBzbzjZdR17e2SPS1ORE\nk/AEQUfEWhCEIYkWGV2Et2/vwu3+HrqlOGXKHShhzkANuuhFiV8Lqq92I5oWaRmqLG5QrmY/8L+Y\nG5skAT+jr+9RqqsTqav7G8XF8Tz99Bxuuul5tm0zCmwLaWk9/PM/r6exsQin02ilw8Dr7yISz+4h\nM/NeZs48C7vdg8/Xbyq9Gqn49FBJeIIwXESsBeEURs/YdjptFBZ2DCjPihYZFVceaIEePpyAcRBG\nQsJdBAIbUNnZk5k82YXbbRRNN/BrlKtcubaTk9fg988kGExBNTbRXd7fwe22UF2t3MdJSQA/B+ag\nLOrvEx9fFWoN+gy7dycSEWM9acyGPiHL6+3E6707fK3p6ZVs3apmTbtc7lEpvRoqCU8QhouItSCc\nwkTHmqNjqQ5HCUeOPMJrr2kEAk7i4rK45JKH2Lv3CGoaVTdwMYFAPkbxPuusc5g5s4emptcGtVhV\n1na84RgbfX3TgY+BuahxlQuAHAa6jzNRLvUl4evs6ckIX++WLY/Q16eL8SLgDmy2WcyfH4fDsZxv\nfGMnu3dH1szOLgqvM1Q51skyWusKpw4i1oJwCjBUg5NjxVJtNitPPfUvpm3l5ZtoabkFXXgtltvR\ntHOIxH5d7NnzNh99VITNtodHHilD0+CVV+7E75+NillfS2Lievx+o4A3Ar8wrZuRkYHHE+0+1qiv\nbzadr7+8jtPXAAAgAElEQVT/AKWl27DbO5k58wzee89oxV/A7NkJVFVdBsDMmUdCAzkiDVIEIdYR\nsRaEcc5wOo0NlY18tFjqcAVe07JQFnEVqnd3F8FgJb29quPX0qWVzJ07Db//34kI8wbmz8/kf/7n\nDrzeuSh39udN606ePJudO6/kllseYfv2LiAbn6+fn/98Hps3dxAMRlzdmvYL6uvVveXn/wJz0th7\nfPRRIeXlz+BwlIhLWhiXiFgLwjgnWojr6h6guDjPJNpDWdC6cKmYtcskXMMVeNU0pNLw+bemc7lc\nBQPOHxfnYc8eNxbLLJQrfSHK9R1Zd9IkJwBJSanhZLaaGo2kpPV85Svp1NQsN5wzsnZPTz6RjmgN\nwApcLls45l1VtXTYLunhtlwVhNFGxFoQxjnRQuh2n0l19SKM8eehLGg9ljrYoITIum6ghq1bCZdR\n9fWtY8eOOI4c2Y/ff6bp/Kq1Z+RcmtaI3T47dP5O4EWCwSAtLbOAr6FqoTcCC4iLu51g8MvAEVpa\nfkBFxeZBXzSefnoO77xTidM5DeVWj2SS9/a2ohLXQL0IbEHPAt+3L/64BFjqo4VYQcRaEMY5g2ds\nm+PPJ+L6LShopr7+KVRJViK9vUuorp7MSy+t4Z/+yYbb/R3gEeAAZrezF2OrT6/XRm1tVyi2nQXc\nZth3I3ANKSl9XHbZ0/zP/2Tg8fhQXczcvPiik/nzC03rt7Q0cMstzbhc01D/hX0/tE4asAO/f7ph\n//0YG6h89NEdfPnLTtzunzAcAZb6aCFWELEWhHGOLsR1dQHc7kkol7Kyns1WpMbTT885DjduIsZy\nLF1Yvd6LeP31N1EW60qUtfwEaiBHO6qLsdFFvQGP59rQPtEzoPuA5wgEGnj11UmGLO6rgY34/d+n\noeEOpky5k9bWTCCHlpZp1NT0oCzq7xPp7f0ukAt8E3gUVfaVazqf13s+Xm8iwxVgqY8WYgURa0EY\n5+iubJfLTUVFbbhcyuG4nIqKod24RiEvKurh7rsvNQl5c7O5bEpZyhpwhP5+O5GuY1ZUE5PridRG\n3wecg5o9nQc0AR+iyq6Mk7KSgCX4/Xpf8IENWDyeWSQnt2O2yO8AZgPVqBeEbuAW8vP/MzQ+MxX4\nDip2bbT6+1CW//AEWJLRhFhBxFoQJgjRtbwdHW7q6gIMZUVGx2O3blWJafooy/37WzAL3QcoUfwq\nmnY/0EYkVmxsF2pDCfWi0P7LUeJ9F8oK3xD6eRi4OXSMGo9pPp9qwKJp+4AZmIXc+HKgERdXyeLF\nm1m9+uusXbuerVuht9eC8jJsJDXVj9V6EKdzRegY87jM4T5TQRgrRKwFYYKycmUtbncyRgFsa3sX\nl2vOoCVYbvdsqqt7eeGFZwkEfoBqevI4CQkHiY930dcHyjLdiKaBGtDxBMpSNSd5KYu6m0gnsjwi\nVvi1obUnh36BalGqhFUJ//+GjrkTTZvOpEmfmu7DYslG0yLXnpmZHxbVqio75eXPhLK/rcByFi3a\nyN13XxdOWLPbzeMyBSHWEbEWhAmKEuPLiCR7fYDTuWKISVYaKuY7hUAgDlV+dROwhUDgCwQC/wvM\nAv7VsP8TKAs3A9iHxXIP8fF5BAKTQts04K+AhylTPqa11Xiut4F+LJZ7yMgoJDHxQw4f/hg4HdUi\nVG9peit9fRZaWlwUFlaSl3c2druH7m6LqT/4vHlB071Hu68ffngJ/f3xYiUL4xYRa0GYYOix6P37\nA8ALRMqjGgAL+/bFU16+iX37EigsrKS7Ow+PJxXoR8V6VwHPM3Bs5YOYXdFeIq7opSQn34HFkkIg\ncD1qulYkOe3ccx8hGFxDe7sVSEFlmF+IpnnxeBaQmPhbIuVWoCzvFoyu9by8s009vCsqjLHkr5ie\nQbT7OitrYGmaIIwnRKwFYYKgi3RdXWu4NElZqA8CU1GZ0y/S0dFEQ8Oq8PcLFqwjI8PCn/6Ui7KI\nLaj4cXTCVzbmmHIbcC9wJtCL16u3En0KJeSRY1991UJcXAoqSWwjymqPZJn7/YVRax9BxbUHTwST\nWLJwqiFiLQgThEjC2POYRfZzKMsYrFYv2dlFoVnO6vvm5hxefPEqtmypxOPJRAnkQuBhzHHoD4B7\nUOVQn6Cs9dNQ/43obvR7UWIcB/wB1VAlm2CwjWBwNsYs78j1pQE+EhPvxO+/ECXUXwX+jB7DLixs\nwOG4bljPYbCmJ7m5GcN/kIIQg4hYC8I4xihMjY3NKGs0Oqv6g9C2i0lNbaGpqdXwvYuWlgYuuiie\n1FQfHs8hIsM0koG1wNmo+dR+4N8M696DuQ57PxExVo1U4EbD9/eGfkZf35vArcyf/0fee68Bl6uA\nYPBBEhPzSEhwMW9eBg89dJ0pGexoXcgG6zr27LPXj+RjF4TPHBFrQRjHDBxxuQFlFW/AYulE0yaj\nksImAz/B6ZyDsnZ19/X7tLTcTkuLGiepYtjxeDyRrl9qNvVUVDmWbhF3hn4+jxLfhahY9FMoV/jn\nQvsaLegzQ9fXgYpPzwQ+4owzTufsszfj82XidN4aPm9f30ZgOe+8U3nU+46uH5euY8JEJG6sL0AQ\nhBMnWpiURfssYCEpCVSZlJVIzPkaVLz4Z6i4snnSVV7e2cTFTQltawLuIxA4DTW+8gPUCwGooRv/\nhnKTXwO8SE5OV+j35aiMbo9hfw34O6rL2b+EzvuvQCVnn51OVdXSIZqwdOJ0JvGlL71MefkzuFzu\nQe/bKMh2ux7rVueVrmPCREAsa0EYx+Tnt2N2KX+KsliXk529FqfT+F02A8XQQ3QS1/79h4hY6SsN\nx98R+mVHZY4b1+rC69Vbieq11A+jeocfRjVKuZWMjN/j9f4Wv/8H4WN1oR28x/mLwG243Zbw1Kyf\n/ewC3n//TZStoWq5jYIsXceEiYiItSCMA4aK0VosAZRL+xxUYtZNJCb+ioUL13PTTZdTVnYHXu8Z\nKBHXk8f0lqC7gHxgLSkpUykuDuDz+QkGe1FCrTcyIfTzDOA6VH11E+aXhAz6+oyNS14GvoDKLs9A\nxbxtBAIF5OYewOmcjHLHv0hjo5fzzvt/ZGbmUlhYSUdHPl7vx8BZKE+B2YK++urn8HrvDp970qQ7\ncDi+G35WkikuTERErAVhHDBUjLa5uQBlyR5BWco1zJ49i6qqpZSXb8LrvQtdnJOSKklIuJ2enjhg\nGiqurGqws7PvIzm5kOrqG9HHWMI+zILsDH13AGVdr0G9JAAsJCnpMIHAGjStCJVsdhvKotbLxzR6\nexPp7b2JwsJKenoScbt/gsdjwePRcDo3AuUUFq7F6bwRlQneHzpeXdP+/V48Hv2zcu9bLGdIJzJh\nwiNiLQjjgH374ol0IusKfdZdx06MYyA7OytDx6QSsUq34PPdh893H2bX9qNAKocPF1BX10JEBK8F\nqlCZ4fkogc5CDc643XD8htC+GoFAK5p2t+E7NaVLfc5An1kNVvLyzgagvn7g4I7U1Azi4n5PMHgm\nyoJ/mMREF37/atzugee12Q6OwBMWhNhGxFoQxgEdHU2ozmJKrDo6lCA7HCVs2/Ys3d0RIXc6M7nh\nho20tzehRk0aB20UYnZtu4Dv0NtrobdXL69agcoeTw/9Mo67/H3U8T5SUp6gtBS2bp0V9V1a6Pca\neXlO2toy0NuPFhR4SEpKHSRGrXHwYDvB4C8M2+8jIeE0/P7I2gkJXSQmPoHNdpBNm5aMwBMWhNhG\nxFoQxgHRjUy6u/MpLd2G3d5JWlob3d03YxS3mpofkJFRScQa34PK3Nbjyrqr20qk3MsKTCUh4X7i\n4vz4fBehksOMAtwedbwPTfuE1auXs2tXdUjw9VjyLs48Mxjq5Z3Ntm3Gmux14USwxsZUDh/eS1aW\nnVmz1g8i+oXYbAdMa3/taykSlxZOKUSsBWEcMHPmEXbvjoiVxzOJ+vqrqK/XyMy8n4Edyyx0dWWh\nOoFtQcWoVwE5KDd2GrAas8t6OZBIIHAPSsD/GZXR/RyRCVr5qASzT9AbpHi9LoqLf8mMGbPp6FiD\nxTILm62ZTZuWMWOGHYDS0m2ma2xuzglN7oL4+ATmzp2KwzEfm83Keef9P5Mwx8W9z2OPLeI3v5EM\nb+HURcRaEGIUYwZ4QcERFixYR3NzDvv3f4jbXR7ay0JcXA4DO5Y9h7Ki70fVNH+CyuZuAaaj/ukb\nBb6XSExZjzHXYIyFq45lFtRkrFzD8Vvweu/ivffUfmVl66mq+qHpXqLLsvLzD1FSsh6n8/NAN/X1\nSwA1DWzTpjKKi+/A650LHCEY/Cm/+c1msaSFUxoRa0GIUaIzwBcseCRUB52NcZqWGg+5jr/+tZfu\n7k+Bi1CW8OmoxiMbMVvRG1ATuIwCvxeoNHzuIjLUg9DPPOC7od8/aTg+zbSfPtXLWGbmcJTQ17eO\nHTvigMP8/e+dtLYas8U3huutZ8ywc+aZc0ICrpAuZMKpjoi1IMQI0bXU+/aZrd/t27twu7+HLqiZ\nmfeTnh7gwAE7s2YFuPRSqKkxCq4+0jJ6cEYGkEpy8hr6+opQJVnXAveRklLIxRd3smePO9yCNLJe\nu2Gdr6EsbTvwIcaBH8apXvX1Gjt33oPXO4nu7kwCgRRUh7PJRJLZrEAaBQVt4WcRbYlLFzLhVEfE\nWhDGEKNAt7Xtwem8CbBRX69RWFiJ2fo1dyCLi8vB6VyK07mFhgYbCQnNmEVZH2kZPTiji4yMOI4c\nyUEN2zgHZWmfTmlpAJhMS8vNqCSyDajGJMmobmcuVAw8DeU670MN67gPOJ1Jk97D5TIniLW0nAbc\nYDi/XtJ1DsrVvhyVABeplT6RLmRHG+4hCOMdEWtBGEPMgzjK0OueIZ3u7ngWLPgdzc0F2O0efL5+\namoiohsMtqISwFTcNxBIxizKHwLrAB8JCXeQmmqnt7cVvz+Prq6bQsdGyrL0TmDLlr2FuW3oY6hE\ntXZUDFwvq1qMst5/HfrcH2rCsiHqOj7GPPAjncjMaj9KvFfQ3Pxa+LmcSBeyW255iS1b1JSv+noN\nn28djz++7LjWEIRYRcRaEMaQgYM4VN0zWPB4FvHOO5U888ylVFa+zYEDqRQWVpKdXcTMmT3s2NGD\nx6N3KFPlUCrTuwg4hEokawb6uPLKqTz++DJKS7dRX38VqtXnFNO5LZaZVFS8SkGBL6r+OQnV4/t7\nqKYo0Znnt6EEWo9xL0QJsB/1wvBjIrHpDSi3ezfqBaAGZWWfvKtbxcONYQOZUyRMHESsBWEM0F22\n+/cHUMlaKlksISGDQCAiOE7n5/n615/D6VyFXtvc3d3K4cOddHbOwFwjnQccxOxyvg/4N/7+919Q\nWrqNtrY9QDHKlW22xHt7J1Fd/VVycx8gLq6SYPBclKguBJ4hMfGXTJ6sceiQUcg7iMTBdXe7FWWx\nbwQuQAm1up+MjB4uuSSN5uYUCgr+Avhpbn52hMqx9AEk+rUdPsn1BCF2ELEWhDEgeg611foAxcVT\n8PniTK5u2EN7+ySU8H0K3IbHsxGP5ybMMWA97mu2llUi1ye0tEyipUUD4oiLewzoIRj8FuamKWnA\nb2lvn40S/UuIWMQp+P134fGsJGJFd6GsZz0uvjD0nQc129ofWidyPxkZbTz00HWjEkueNy+dmprI\ntc2blz7i5xCEsULEWhDGgGj39/TpZ1BVdQUul5va2kiNMXyf/v77gVtRcd9OlGgbY8CHgZ8Dp6Hi\nwy4iItsM/BfG0q1g8FGUqNeiXNyXoBLMsgFzJzRlraeg11/7fJ9HxbHdKBd2H6rZyixUL3Er8+f3\n8NFHHQZvQCRJzelcQUXF6NRMP/TQYpKSamlq6sduD+BwLBrxcwjCWCFiLQhjwGClSR0dbm699QW8\n3lTgXVQf799hsVhD+3Whz3c210w7iSR96f29P48S4FuBNxgqLq72vxPlEg9gdqufTWLiLvx+Y1xc\nb1eqZ3FvBCJWfn7+PVRV/V+WLXsr1B5VT1J7At3CHq2aaRmNKUxkRKwF4SQYrFxI0zhmCdGqVXPY\ntasSl2saNtsBVq8uY+XKWmpqMlGCGBHI/v5VKKH7J1Ss2Si8XSir1rgtK7R/AcrCPow5lpsVtX8i\ng7ce3cP8+Vbee68y1GnsCPA14uJuJxiczWA13IcO5bFs2VuG2Lhu4SeG1tyA3R44mUcuCKckItaC\ncBJEdxnbseNOLJZEWlq+SHQbTSOVlW+H3MRq2tXatetDFmc8oAshqCztIpYsWU9dXStudwCz8LpQ\n7m/jtkmoGHRC6LNuMetx5vej9j8Ns3gfAdYQF5cNTGLTpq+wdu3bNDVl8v77/43X+wsi5VnmGu5A\noIv6+u8BZRQWqpeR3t5EdDe61erF4bhyJB69IJxSiFgLwkkQHXtubc3E7KbeyL598dxww5Ns394F\nZDNvXj8HD9pMx+lWeH19AsqtHRHA5OSPqaqqoKTkJdzuuURiyZ+gBnTEoVzZXyAu7m0SE0/Dau0h\nP9/PO++sQpVyAVyKcksfQrnN1QuFwije7cDdBIMWtm1TLxL6y4YqrzKWZx1Cud3PRDVJ0T0IFvLy\nzmbu3E6qqyO13MXFCdKoRBBOABFrQTgJomPPaqqV0UpN49Chf9DQMBNVp2yhpkajsHAtRoFsa3uX\nRx5ZwvPPr6e/H2ANMAP4kOeeUz2yOzo+QM2n/hmq3KsIVaOs1igsrKS2dkVYDE8/3YHRnR5xbx9C\nJY3prURdpKTcyec+d0FoSMh0ol8kdDIzP6S39ymUlR4EWiksTCMvz0Jb236czhWhPbVQOdbxdyIT\nBGEgItaCcBI4HCXs2mWM6YJRhAsLG+juzid6KEZW1nSCwXtCrTgP4XTm8POf/5WMjM/hdn8ntJ+b\nxMTf8KMfHaCx8QX6+rJRTU+CqCEdlqg1i/jRj14KNQc5hNc7jYHu7TtQGdr5JCSsITGxCJvtIK+/\nfiOZmVmUl3dSXR003YOxWcm5506ltdU8lzovL4etW6/A5ZpDRcVmkzBL0pcgjAwi1oJwEthsVmpr\nr6OiQh9l6QbWceCAlY6OvWRl2Wlvfx9lyUYEsKOjiba2eIwNTF555U5SUuyoEqgkQMPvn857730F\n+CbKMs4Grg8d86RpzY8+eoeGBqMlvQqze/sQytJ+ArievLxK6uuVkObmZtDe3oXDUUJX1yb++te1\n9PfnkJfXyurVXw/fb0tLtOcgKyzmgwmz9OsWhJFBxFo45TlZQRlMpMrLN9HQsCpUvuQCfoXqo51D\ncvJenM6fAq9hFD6//zz8/q8DT2F0b0cGX6SjMrvNk6+s1qmkprbgdJ6FWUhPJ9L0pBs1IUvPFrdw\n6JCV8877T7KzizjrLB93330pNpuVjAwrfv8PUUM4VMz6vvsms3JlLR988AnGF4BJk/6Ow/HdIZ9N\ndAIerBdLWxBOABFr4ZTnZAVlMLGPTjxLTEwmISEPm+0AaWmz+fBDGwOzsveimo34MIuuPviiG5X8\npR8zGYsFtmy5iNLSF4nUQOvrdaJGUBpFXwM+ADz4fB04nbfjdFrYvVtj69YHKC7OGzCas6kp0/CM\nzE1OZs8+86gvNtHPQeZSC8KJIWItnPKcrKAMJvYFBUeor9cTsRrw+1fj96syrbi421GiOR01ZcuF\nSkzTUIMyEjGKrsXyDzTtDSAXaMNYhlVSMpnKyrfxeH6KEtInAC8WSxslJalYLI/wt78l0Nv7CX7/\n5NCx/4pqQ/p703273WdSXb1owGhOu91jeEZ6k5PNwCJmzVp/1Gcjc6kFYWQQsRZOeU5WUAYT+4IC\nH2ZXduT7YLAIZeUeRDUNKUSJbyJKjL9NxH39Ppr2A5S4bgD+D/AUcXFTyM9vZu3aMr73vY9QQl2D\ncnHXk5CQQnp6DqtWzaGy8m2ami6goaGVQOBaw5V7MFvi3YCFtjYbmZn3Ehc3hXnzgjgcX6Gi4lXT\nM7Ja36e42HXM7G7JBheEkUHEWjjlOVlBGUzsm5qMiVjdmEVxHzAXZSk3oTK0dSt6DZo2GX1spGpu\nYkW5x53AfwM/Ixi04HSqeLLdrlFf/yKq8cgW4Iv4/X+jujqVHTueprVVTzp7LOo6JqFqtnWL/VpU\nYxM3Hk8u8G2SktZjs1kHeUbLhxXXl2xwQRgZRKyFU56TFZTBxN5siS5g0iR9OEcD5vnOD2O0ujVt\nKuaksNND3+k9wfVhHjVAOi+++AlPPTWX6upGlFDrDUgWAxtob3cb1r8KPclNZZtnYh7c8SAwFfg+\najZ2JCQgoisIY8uoi/Xrr7/O2rVr0TSNq6++mu9+d+jMUUGINYzJY0VFPeGM6ejv7HaNp5+eg6ZB\nRUUtH3zQR1LSSgKByWhaFgkJCeTkvMGhQzMwzneO7tsdH3+Q/v7lKOFNA/4GrEe5rI3DPJSL3e9f\nxHXX3YHqIKYnkaWH9rMQF5dNMOgCnkPVWbtRZWT7UF3QjIlsn0OJPOgxdIkxC0JsMKpiHQwGufvu\nu3nsscfIy8vjG9/4BldccQWzZs0azdMKwogRnTzW1xfJFDd/52LXrofp6cnH7U5GtQA9D11Uu7s1\nurvvZaBLvBdju87MzG5crl+i3OTdKGv6d8TFHSYYfCp0nC7cABb6+magyrgexNyx7A4CgWyUq/si\nlBv9bsP390ZdS1doTY3MzGYuv3y9xJgFIUYYVbH+xz/+gd1uZ+rUqQB87WtfY9u2bSLWwrjhaJni\n5u+2hAdzRFzKUzBbrlNR85/10qckoAKYTGbm/Vx+eT61tfmodqLGcqtzCAZ3EElYM2drJyU10tc3\nGbgg6nznh873o9Dn56K+n0JcXCWZmflcfHEQTfPT3PxsyJX/LWleIggxxKiKdWtrKwUFBeHPU6ZM\nYffu3aN5SkE4LvQZ0sYhGw899NWwUB0tU9z8XRpmIcxhYLb1p6h48JbQfsbM7CyqqpYye/bTUesk\nomZbzyYya/paVO/wmUAj55+vMWXKeurqWnC7jefrwzzCMtqqn0QwuBq3WyM9fSO//vWiE3+QgiCM\nKqMq1pqmjebygnDSRGZIR4ZsJCVFXN3G5LGiol7uvjviFjZ+19a2B6fzUpT1qgGfEB9/iJSU/Xi9\nOaSmdjB3bhJJSX/hwAErDQ31GIWzp6cJgNTUZjweo6C+jZqQFT2M42z07O3333+A555bSmNjE5dd\npiey7UG9GNQYzrMAWE1CwukEgx0Egz8I3YmFl1/uw+VyizUtCDHKqIp1fn4+Tqcz/Lm1tZW8vLyj\nHpObmzGalzTmyP3FFk6nMdlL/Xz5Zbj55s08/PBCiopO49lnrx/02I8//pitWz/C651OUpKb5OT7\n6euLCGt//4P097t5//2vMmuWPXzcsmUbaGiwY8z61rQscnMzyM+fTUuLMRu8ELOl3YNyg98U3max\n5JKbm8HNN+8OCfUSYD5KqA9jjImXlc3i2Wf/lWXLnuJPf5ocWkPD5UpizZo3ePrpa07mccY04+3v\n5vEi9zexGVWxPvfcc/nkk0/49NNPyc3N5YUXXuCXv/zlUY9pb+866vfjGX1YwkTls7y/kRoQUVjY\ngbI8jVauxp/+dA11dXdywQWn09ycg93eyaOPltHfHx8+trj4v/F6VUJXX9/AMiz4HL29izjnnDWc\nddaF4evcuzcF1RAl0go0Le1+PvjgAG1tjcBqIpb0PZhd1wdRLvGI0H75ywHa27tC6+qubiuwHKv1\nAdzuVeFrfuGFRzj33Cc57bROMjPvx+M5K3TMQvbufW3C/v2Uf3vjm1Ph/o7FqIp1fHw8a9as4Tvf\n+Q6apvGNb3xDksuEESE6S9vne4SkpNTjFm+Ho4QdO35Pa2ukhSf4AQutrZnU1NwYPseKFea4rsrC\nNoqzLvzmjmB9fRdRX78k3IpUNTHRO5KloHqEZ1BS8gRO57+gLO40EhPfRNM6CQSM1xYAesOJYfPm\nBXnooa8Aegx9Sfj4KVPexGJJQLnmu4EFBAIZNDRYaGhYQWHhWjyeReHrLShoobx8k0zIEoQYZNTr\nrOfPn8/8+fNH+zTCKUZ0lvb27V243SrufDzDOGw2K7m5X6S19RuGrZtRYmseB/nxx+mmYxMT38Pn\n08up9gNWLJZVaNoUVCb4wtA6R8JrNDVl8vTTc/D5nmf79k85csSH378aj8cSilXrE7bgnHOCfPCB\nJ6pF6BPAdSxePPD+VAxdnyftxuc7Pfyyoa7j58A09KSzrKzpzJ0bicd3dSXIhCxBiFGkg5kwLonO\n0lZzno8+jGMo13lHxweYLeJ/oCxRv2n71KkdpvXmzTuNurprUAKryq1UUuUToWP+GlrrJlQzkhfZ\nv99LRcWr3HnnpVRWvs3WreD361neVlRWOeiZ521tB+jtjVxDYuJHLFwYqX8+WjigtHQbZsv/QmAR\nqu5aY9as/rAY5+ZmcP75zx7zGQqCMDaIWAvjkugWnz5fPzU1Rx/GMdQozKys6TidxqQuG5BGUtLf\n8fkiLmhN8wMRgfzb3zKJuLKNomhDJXlp5OfXc/75f2H7dhdu909wuy1UV2vs2lUZVZetZ3nvITPz\nAOnpnTQ2FnHWWekEg/fQ2WnHZjvIpk3fZMaMSLLa0cZ75ucbx2lG3PLJyZlkZ1fS2FhEefkzOBwl\n5OZmyIQsQYhhRKyFcUl0r2qXy01SkhLv/PxD+Hx+Skqeo6OjiezsImbOPGKY0+wGati6FcrLn+G0\n047Q0BA993k+gUAzxlpop3MzYBbIwTuBvQtYsFrfp67u/2KzWSkt3UZ9fUTQW1ryMQu8L3TeFfT2\n/gaPZzVOp1qvrGxod/TRmrZYLAHMDViUWz47243TuSo8xxrW8+yz18uELEGIYUSshQmBUbzLyzdR\nXX0jSvwiohSZ01wDLKe3V1m5Cxaso6xsPXV1AdzuScDngQcJBmcCT6JaeU5mxoxuYKBAKsv7DlQ8\nuAOYDnhITvawbNlb2O2dFBT4TFZrMNiIWeCdwCpAw++fynDd0UezhpubC1DDO9TLSUrKc5SWQmNj\nUagP95kAAB2VSURBVOhFwLy+DOsQhNhFxFqYcETE1Ni9y0J2dhFz565n61bo7Y1s37LFD8SRlLSP\nSy9NYceO9/H7jT221wCn8cYbbXz8cdMQ8fJ/Ae4HvoxyN/8Tra1NtLbGU1+vYbM1oAR9BtCIGk/5\nKOACckhI8HHmmU9y8KATtzsXo5C3tb2Ly6WGhETHp49mDUeuU5VxlZYqC728/JmQRS3ubkEYL4hY\nCxOOiEh1YU4QcwNJJCe3mJK21Pzoa+nr0/jb39bQ3z8Ts+V8EbAEp1Nj6dJKamuvo69vHVu39hMM\ndqFqnp/D3GnsTuDfw59drmYiPb9dwC9DP28DLAQCGtOmraOjw4/bnYkS9rMAC07nCioqlAteud87\nqa9/kc2bXyQ//xCbNpWZ4tg6Qwm5uLsFYfwhYi1MOHQx2rcvno6OylDMugefzx9yj3cCG7Bavbjd\nzcC3ULHddPr6JqHKsIyWc6T0yuWahs1mJTk5iWBQj1u7gD9hFvjZUZ+Nru0tqOlYz5v22bEjLtTA\nxAIsxVjGFXGFW1Bu/GsIBi3hF4j6+h8OeA5DubXF3S0I44+4sb4AQTgROjrclJdvorR0G+Xlz+By\nucPf6WL05z/PZ+7cacTHJwAaBw7o7nErcC3Tp2eRnNwN/A8qE/tS1HCM6cDtKOt3FZAMPAW4sNkO\nAkZXuxvVuSwdJewQGdox1Gd96EdX1D6HMQu8uYzLbu8M7Wd277tc007gCQqCMJ4Qy1oYM06mZejR\nSpaG2ieSYBaJ1WZm5vL66z6MFmvEoq4M/VKfU1LuZNOmbwJGV3sNKiFtPpFe3/XAdeidxPLz/8E5\n56Tw1lsPEAza6OvbT1/fYlR2trLwi4sT8PnSTOVn8CbgJjHxQ1avXoamESr5ysKY+Ka/QAiCMHER\nsRbGjOEI7lAcrWRpqH2ys4v4whfWsWNHHHAYny8Nl+t0Iv20zRYrmMurZs/+AmvXvk1T00cUFPhY\nsOB3vPZaGr293ai49TWhdeopK3s93EnM4bjB9BLicrmpqNBjxgEcjiux2azh8jOVld4K3ArY8Ps1\n1q5dD2CqzY6LqyQ/HzZtWjKsZyYIwvhFxFoYM4YjuDC4BR6dkd3W9i6NjbOprHw7FKtuort7CkYL\n9PDhvRw4kIjb/RNAjcMsLFwL5KFi1p+iOnzplu1HGC3xDz98h927fwxsob5+Cvn573DxxbBt23J0\nK1qNpnRTVXXLkPetu+n1+9LLuxyOEqqqluJyufnSl17G7Y5MBDPHrNXPL3zhbLZuveL4HrogCOMS\nEWthzBhux6zBLHCHoyTkEv48cASncwVf//rDIctT1Vfr61qtD5Ca6sfpXAG8gVHwsrKm09PTh9t9\nLSr+vBHoBeJJT7fS3R3pbOb1TkElhy0HOmlp6aalxQU4UAlk76EakMwIdwY7mls/+r5eeukOzjjj\ni8yceYR587yDdGTTpMOYIJyiiFgLY8ZwSog6OtzU1bWiMqe7gIXU1QUAyMs7G6cz4gJWiVadKAs5\nsj9kk5WVHJpdbSznctHR0YT6Z2BM9Ipj0qQP+dKXckNWs3EQhje0dgORUiy9i9mtqBj2tVRXR9z6\nQ8Xmoz0LXu9cdu9ewu7dkUYtA5+NlFwJwqmIiLUwZgynhGjlytqw21qJ4gbc7klUVNSGRk1GLE2b\n7QC9vS+i1y4b909N3R/6HEnqSk1tCVnincCDoTOqY71eDYvlEcrK9CYqicBpgHGKlTG+fQ7K6s4I\nb9Nd10PF5gc2V4mUiG3fHsfOnZcPsMyl5EoQTk2kdEuIaQa29vQBC2lqysThKKGsbD3nnfcsZWXr\n2bSpDKvVO+j+2dlFoX1fo6wswM6dV5KXdzaRUq5CoMh07JtvJlFVtZTSUg3l+p5i+F5PSoOI0Kah\nLHe1TXUecw8am+/ocOPz9ZCYeCeqocq9wFfDx+ovJIIgCCCWtRCj6K7j/ftbMDcoSQYmY7d7BrXM\ni4vfCrmgzfvPnNkzYF+zZbsA1S50cfjYI0eacbnchvi4RiQBbQEqLn4xSqi/Sn7+b9C0Plpbn0OP\no1dUbB7gAbDbPaxcWUtNzfdRVv2LZGZm0Nv7K/z+81Gu9oU0Nb02sg9VEIRxi4i1EJNEXMeq21hm\nppf09BaysuzMmrWeVasuoLx804A4sB4Hb2xM5fDhvUPuv2LFGezc+Qlxcb9H09rQtGyU5RwZien3\n9/GlL71McXE8mzYtYfHi/6Kt7V5UMtmnXHppOllZ7tCam3E4bmDZsrdobdXj6CrePm1aIYWFkU5q\nDsflLFv2FsYGLTNnPovdnkF19VVE9wQfbu25IAgTFxFrIWYwJmIpi7oTo5ht3fp/wvuqyVqROPCO\nHXdywQX/v717D66yvvM4/s4dSAI5QIBEuiGAEay2TC11YVxCsY0SwKBopXWkRZuV0sEx7Qw3124t\n3VBTrbZDhyJip1AqWNYkUAhVA4RWKcvWTTEqZYg0CLmS5DQJhlzI2T8eTs41yUlyDufJyef1jyR5\n8jy/x4if/G7f379QVTWelBQb+/bdicVyj5frjbra+/f/DZttKvZtXUbFskSMFd23AGeBWVitVyks\nXAgcYO7cmRQUrMAepnFxO/rorR/qPsMabMye7VhwVlv7IcYsVAuw8PqCMc8V7mvXHtA8tYgorMU8\nPM+Jfg3jPGnPbUru88A1NaMpKjIWf5WW2igpeZ709AleVl4bVcpsNvszXgVGYdTyjsGo8x2O8yEc\nsIeKitFERUW4PLOqarzHO2zYcAenTm2msXEyHR2tdHZ67iNft+6oS3GT5OTN5OU9isWS4LHCvbfj\nMUVk+NACMzEN9wBOSLjavXjMfZuSo0421/853uV7rdYZFBau6F6k1VNdbSOclwMP4Dhw4zxGr95+\nTSy1tR9y7twZl2c6/wJhr1V+773/Q2VlCq2t99HZGef1evf3nDDh1u6hbvf30l5qEQH1rMVE3Lcy\n/eu/dhET00RFxWjWrj3iUmTEfcjY4LywrAXn3qx9LrukpBqr1blKWTze64I7evUjRpyisvJx4G3g\nBSIj4/nqVyPIy3MMs3uOCuwBFpGQ8DyTJ6fS0HCW8vIUsrPfICmpvcfiJjq+UkS8UViLaTiOthxF\nQ8NZ3n13NE1NI4H5lJaOwbl2uMWSwNGjj/LUUwc5caKZrq6RjBr1X3z6adL178nEOQjtK8dd63I3\n0dLSRXGxZ4979OirhIe/CtTT1TWRq1dPYN9j3dlp429/20xj4z9Zu9Y+x96Ja489DhhDevpE4FPK\nyjZQWRlGWZmNhQt/1UPBEx1fKSLeKazFNOxBlZ2dT1mZY07Xfq6z+/ytxZJAdPQorNYngDCamowg\njI6OoqLimNeeqc3m8hG5uf9Gbu4ujh6toqnJ0eP+9NOP6ezcdP3j3TiOtQQIo7LyNpYuzae6ehoQ\nAdTg3LNPSDhDenqj28pv43urqpJU01tE+kVhLUHR2/GYnoVQjLlfb/O37td6C0LnZ9XWfkBl5SPA\nCUpLLZw6Vcirr36ZoqIyjCpmxtx3Z2eM030XERb2PDabYw82NFJdzfW2NQNfJyrqP/nsZ79w/ZeE\n5S7z0KrpLSKDobCWoOjteEz3cHPupdo5iqZ0Ar/AmKOezJkzZzl/fjqpqSlenwVZwHPAOowe8hKW\nLv0B7e3P4dqTHwf8DmNOu4nY2Gu0tPwEo6zoFYzKaP/h8j2xsVO89pg1Dy0ig6WwlqDo7XhMz3Bb\n7lIYpKHByoIFu64vLmsB/oF9q9XVqzbuv38zpaVrenyWUVrU8XFbW6rb12MxVoR/B8ee6k20tKzC\nqP8dS2Rkk8u2LIhlzhz7QjdXmocWkcFSWEvA+XIetfPQcF/h5r5PGTbjHLaNjZNdnlldfRqoAyYB\nTURHv097u+PZMTEfc/Wq4+Pw8L8QE3Mzra2OeyYm3sq8eYc5e3YkKSlW2tvDXY6wTE4u46WXHvXr\nvyNVLhMRO4W1BFxP51E79557Kh/qjWtP+Z/ANYzDMIxqYBbLRS9D369h1P22MW9eM7Gxjmd/97uZ\nfOtbRiETi+Ui+fnfIDfXtcb41KmfsnfvCurqjIM6GhutREc79/4fHVS49jYtICKisJaA8zbk7d57\ndi8f2ltYTZpUh2Pl9SGc545HjPgB+fkP88QT53Ad2o4HrEAR77wziowMG3v3Oupul5be7vKMvDxj\nq1hP88z+HtrubVpAREQVzCTgfKnK5R5Wb74J2dlv0Nho7b7GXiXs3XerMHrKBzAWejm+b8aMO0hN\nTfFS4awZo/DJclpbV7hUN3O/f0ZG8fUiLF9mz547AFi27CSf+cxmFizY79Euf1DlMhHpjXrWEnB9\nrYb2drBFa2sUhYXLgV0899yXWbfuKCUlnVitMcDNGNXGwFix7Riurq39kIwMSEq6wsKFO6iqGk9S\n0mWgg2PHYl3mod17r84nfZWWHqKk5C1Gjap2mR+/eHEPZWUr8PcwtVaMi0hvFNYyIN4WRCUmxvf6\n9Z7mdD0XjD0HrMIeqJ6lPH+CUdP7MAAjRjzDzTfPor7+LJWV36Gy0kJpqY2srF28+ebd3W2Jiemk\ntXU39pO2ej4cxCg9arWGYbXux3PPt/+HqbViXER6o7CWAfG2IMo4PrLnr/cURp5bq27FOBrTGA72\n/PoM4JcYx1oa27UmT95BRMStVFZauq9zPuXKOewTEp4nPX2i18NBjLY6lx5twbPmuIapReTGUljL\ngHhbEFVfbyU7e7+X86h774m6b+OaNOk0V69eBuppb48lKanNrUjKOVpaEl32OZ84EU56uvftYO5t\nnTLlZrZv77l4iethHwtJTt7MuHFpNDaeIyHhM0yb5jgFTFuuRORGUFjLgHjbJ716dZHP51E7y8tb\nQFvbDv7yl3CgHputDat1JRBGUZG3gy+Wc+edr2G1Ovd468nLM+a43ed9fS332dNhH/ZtWYmJD3Zv\n3bLTlisRuREU1jIg3vZJL1z4v7ifRz1lSkGfC6YslgRiYqKxWpdgzEN3YAR9JpDgtd73nDlxFBW9\nhrElq5k5c+KwWBJYv/4LLFu2n7//PYk//nEbqak3M2WKY7GZL4u3+jN/rC1XInIjKKxlQLztk25s\njMJ5fjc9PdLrcLOd8xCyMWy+H1iBo7e8B1jutSf80ktLiI4+SkXFNVJSOsnLWwzAsmX7XRarffTR\nHj76aEX3YrPBcB7m96USm4iIvyisxS+MHuV8jIAdQVTU/1FefgvZ2W/0OI9rDCHbe9MzgCqce6kj\nR3aQkbGrx+pm3nq/jY2TXe7hz9XbzsP8PVVi05YrEQkEhbX4hdHDHIOx//l3dHQ8S1lZGGVlrvO4\nnr3p/wYex3FutKOXmpFB9/nWvs4LWyyf0NoamNXb5887rxL3XolNRCQQFNbiF3l5CwgL28mxY9do\nauqgq8v7PK7nnukXcD43Oioql8jIz2CxXGTjxvsAKC8fhXNIfvzxKI/n238JGDNmOg0Nz2CzJREW\nVk1q6nTS0nb5pcebmtrMqVMa8haRG09hLT5raLDy1FN/vL5q+zJz5sTx0ktLsFgSsFgSiI6Oxmpd\njrE4zHuouS/IioyMp7PTfu0YOjpS6ej4Bq2tNnJzd7F9ewoNDX93uV99/VngHpe2uf4S8DWysnax\nffsK/Gnr1kza2jTkLSI3nsJafLZu3VEOH7YPWdsoKnqN6Oij3cPAjmHiTGDP9TlnXELNfUHWqFEN\nxMUZ+5g/+eQ8Vms29gM39u/v5NSpXxAbm4QxFx4HtDB2bIpH227EquyxYzXkLSLBobAWn3lWEoun\nouJa99cdw8QJwHIyMjznlh2FRzqxWkfQ1PQdmprGMHv2LqZOnUBh4Rjsq8BttjAqK22MGPEMsAl7\nwE+btsujbVqVLSKhTGEtPjMC0V6TOxb4gKQkxypvX4aJ7QuyMjKKKS1d2v35iorR7N17B7CLwsJm\nHD3pZq5ds7gVRfG8r1Zli0goU1iLz/LyFnDy5C+prjZqcsMSYEf31/szTOzeE66t/ZCHH4aUFBsx\nMVW0ta3u/lpExA/Yvv3fe72fVmWLSChTWIvPLJYEJk26jepqx1B4VdX4Ad3LuSdcW/uhy2lZ8fHb\naWtzPCM19TZ/NF9EZMhSWIvPvJ07PdC5YeeecEYGLqdlRURYcV79nZbWNui2i4gMZQpr8Zn7udPJ\nyZvJy3t00Pd1HxKfMyee6GjNP4uI2CmsxWfuq8EnTLgViyWhuyBJZaWF5OSGfh8T6bk4bLGOmRQR\ncaKwHqYGcg5zT9ujPKuS9e+YyIEsDtM50iIynCish6mBnMPc0/aoG3VMpHNA19Z+QGXlasCic6RF\nJOQprIcJ957oxx/H0t+A7akH3FOP29+9X9cefBbGXuyv+9x+EZGhSmE9TLj3pJOTc+mpfndf3EN4\n40ajmIkxZ93Y3eMeSO+9N54V1GKv/1kVy0QktAUsrLds2cLrr7/OuHHjAMjJyWHevHmBepz0wT3o\nxo6dwuzZA1tx3VMIJybGU1fX3OMzB9v7de/BJyeXMWFCl1aMi0jIC2jPeuXKlaxcuTKQjxAfuQfd\ntGnXBtzL9TWE+1uvu69hc88580e1qExEhoWAhrXNZgvk7aUf/Fk729cQ7u8z+xo2V0lRERmuAhrW\nu3fvprCwkNtuu43169cTHx8fyMdJL/wZdL6GcH+feaNWlYuIDDVhtkF0f1euXMnly5c9Pp+Tk8Os\nWbOwWCyEhYXx4osvUldXR25u7qAaKwNXX29l9eoizp+PIzW1ma1bMxk71lxDyA8//Dtef91Y3Q02\nvva1Pezd+/VgN0tEJOgGFda+unTpEqtWreLAgQN9Xuu8QCnUuC/AupGys/NdCpdkZfl/X/Jg36+x\n0cratUddeuxmmpMO5s8v0EL53UDvN9QNh/frS8CGwevq6khMTATgrbfeIi0tLVCPEh84hpitQBFv\nvgnZ2W+YqvKX5qRFRLwLWFj/9Kc/5aOPPiI8PJybbrqJH/3oR4F6lPjAsSisCFhOa2sYhYUD3/vs\nbeW2L78diohI/wUsrPPy8gJ1axmADRvu4NSpzVRVTcJmG/wiLm8rtwsKVviruSIi4kQVzIaJzZvf\nu3685Wv0VrnM3mMuL4+goaGCcePSmDr1isdwuVZui4jcOArrYcIRrpnAHkaO7CAjA49tV44e8x5g\nA5WVYbz/vudwube91vX1VrKz9+skLBERP1NYDxOOcE0AlpOR4Qhf5/nnf/yjEyOA43DuOZeXjyI7\nO9+jHrh95faGDV9g1qxfcfHiOvpbC1zHXYqI9E5hPUz0VsjE9TSr3RjD5M04D5dfvnyGsrKnsQdx\ne/sOfvObh7vvkZ2dz8WLtzKQoXF/H/ghIhJqFNbDRG/bolznnxeRkPA8kycn09Cw+fqc9accPZqA\ncxCfOBHu5R4tDOQkL81/i4j0TmEtbvPPY0hPn8j27fe5XJOWthXnIIZ6L/e4D2OuO5bk5DLy8h4d\nwPN13KWIiDuFtfhU63vOnDiKil4D4oFm5syJ87hHTMxhzp4dSUqKtV8nYvnzkBERkVB0Q8qN9keo\nl5Qbqu/nSynQofx+vgjl9wvldwO931A3HN6vL+pZB0ioVfhSKVARkeBRWAeIKnyJiIi/hPd9iQyE\nVjiLiIi/KKwDJCXlnxirpkErnEVEZDA0DB4gWuGsymQiIv6isA6Q/i7ICsVgU2UyERH/UFibRCgG\nm+btRUT8Q3PWJhGKwaZ5exER/1DP2iRCseSm5u1FRPxDYW0SoRhsKqQiIuIfCmuTULCJiEhPNGct\nIiJicgprERERk1NYi4iImJzCWkRExOQU1iIiIiansBYRETE5hbWIiIjJKaxFRERMTmEtIiJicgpr\nERERk1NYi4iImJzCWkRExOQU1iIiIiansBYRETE5hbWIiIjJKaxFRERMTmEtIiJicgprERERk1NY\ni4iImJzCWkRExOQU1iIiIiansBYRETE5hbWIiIjJKaxFRERMTmEtIiJicgprERERk1NYi4iImJzC\nWkRExOQU1iIiIiansBYRETE5hbWIiIjJKaxFRERMTmEtIiJicoMK68OHD7N48WJmzpzJBx984PK1\nbdu2kZGRwcKFC/nzn/88qEaKiIgMZ4MK67S0NLZs2cLs2bNdPl9eXk5RURGHDh1i+/btPPvss9hs\ntkE1VEREZLgaVFhPnTqVKVOmeARxcXExmZmZREZGMnnyZFJSUjh9+vSgGioiIjJcBWTOuqamhqSk\npO6PJ06cSE1NTSAeJSIiEvIi+7pg5cqVXL582ePzOTk5LFiwwOv3eBvyDgsLG0DzREREpM+w/vWv\nf93vm06aNImqqqruj6urq5kwYYJP35uYGN/v5w0ler+hLZTfL5TfDfR+Q12ov19f/DYM7tybXrBg\nAYcOHaK9vZ1PPvmECxcu8LnPfc5fjxIRERlWwmyDWKb99ttvs2nTJhobGxk9ejQzZszglVdeAYyt\nW/v27SMyMpKnn36au+66y2+NFhERGU4GFdYiIiISeKpgJiIiYnIKaxEREZNTWIuIiJicacN6x44d\nzJgxA6vVGuym+NXPf/5z7rvvPpYuXcrjjz9OXV1dsJvkV3l5eSxcuJCsrCzWrFlDS0tLsJvkN73V\nwh/Kjh8/zr333ss999zDyy+/HOzm+NXGjRuZO3cuS5YsCXZTAqK6upoVK1aQmZnJkiVL2LlzZ7Cb\n5Dft7e089NBDLF26lCVLlrBly5ZgNykgurq6uP/++1m1alWv15kyrKurq3n33XdJTk4OdlP87tvf\n/jb79++noKCA+fPnh9x/gHfddRcHDx6ksLCQlJQUtm3bFuwm+U1PtfCHsq6uLjZt2sSOHTv4wx/+\nwMGDBykvLw92s/zmgQceYMeOHcFuRsBERESwYcMGDh06xJ49e9i9e3fI/Pyio6PZuXMnBQUFFBQU\ncPz48ZAsW71z506mTZvW53WmDOvc3FzWrl0b7GYERGxsbPefW1tbCQ835Y9gwObOndv9TrNmzaK6\nujrILfKfnmrhD2WnT58mJSWFm266iaioKBYtWkRxcXGwm+U3X/ziFxk9enSwmxEwiYmJzJw5EzD+\n3zJt2jRqa2uD3Cr/GTlyJGD0sjs7O4PcGv+rrq6mpKSEhx56qM9r+6xgdqMdOXKEpKQkbrnllmA3\nJWBefPFFCgsLiY+PD6lhK3f79u1j0aJFwW6G9MJbHf/3338/iC2Sgbp48SJnzpwJqQJUXV1dPPDA\nA1y4cIFHHnkkpN4NHB3T5ubmPq8NSlj3VG/8qaeeYtu2bbz66qvdnxuKvZi+6qnn5OSQk5PDyy+/\nzG9/+1vWrFkThFYOnC/14rdu3UpUVNSQmyscSC38oWwo/v0ST1euXOHJJ59k48aNLqN3Q114eDgF\nBQW0tLSwevVqzp07x/Tp04PdLL84duwY48ePZ+bMmZw8ebLP64MS1j3VGz979iyXLl0iKysLm81G\nTU0Ny5Yt4/e//z3jxo27wa0cOF/rqS9evJgnnnhiyIV1X++Xn59PSUnJkBw1GEgt/KFs0qRJVFZW\ndn9cU1Pjcx1/MYfOzk6efPJJsrKy+MpXvhLs5gREXFwcX/rSl/jTn/4UMmH93nvvceTIEUpKSmhr\na+PKlSusXbuWvLw8r9ebasI0LS2Nd955h+LiYo4cOcLEiRPJz88fUkHdl4qKiu4/FxcXM3Xq1CC2\nxv+OHz/OK6+8wtatW4mOjg52cwImVHqkt99+OxcuXODSpUu0t7dz8OBB7r777mA3y69C5WfVk40b\nNzJ9+nS++c1vBrspftXQ0NA9PHz16lVOnDgRUv+//N73vsexY8coLi7mZz/7GXfeeWePQQ0mnLN2\nFhYWFnJ/0V544QXOnz9PeHg4ycnJPPvss8Fukl/9+Mc/pqOjg8ceewyAz3/+8/zwhz8MbqP8xLkW\n/qpVq1xq4Q9VERERPPPMMzz22GPYbDYefPBBn1amDhXf//73OXnyJFarlfnz57NmzRqWLVsW7Gb5\nzV//+lcOHDhAWloaS5cuJSwsjJycHObNmxfspg1aXV0d69evp6uri66uLjIzM0lPTw92s4JGtcFF\nRERMzlTD4CIiIuJJYS0iImJyCmsRERGTU1iLiIiYnMJaRETE5BTWIiIiJqewFhERMTmFtYiIiMn9\nPyQ+uNKCpR6MAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAecAAAFKCAYAAAAnj5dkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xt8VPWdP/7X3M5MkpkkM8mEAAER\nQoICgUBALkUEQ7FucekDEeWL3VZXu121dler39pu1Vbb77b+2m1/3277qNXa2kUptGttt/tDEWqp\nyDWBiC6ES8slXDJJJpfJ3C+/P8JM5nLOmTOTmWQm83r+RebMnJyTAO/z+Xzen/dbFQqFQiAiIqKc\noR7rCyAiIqJYDM5EREQ5hsGZiIgoxzA4ExER5RgGZyIiohzD4ExERJRjtGN9AWE220DWzm02F8Nu\nd2bt/LmukO+/kO8d4P0X8v0X8r0D+XH/VqtJ8lhBjJy1Ws1YX8KYKuT7L+R7B3j/hXz/hXzvQP7f\nf0EEZyIionzC4ExERJRjGJyJiIhyDIMzERFRjmFwJiIiyjEMzkRERDmGwZmIiCjHMDgTERHlGAZn\nIiKiJDy+ADrtTnh8gVH5fjlTvpOIiCjXBIJBbNt9Gq3tNvT0e2Ap1aOxzopNq2uhUWdvfMvgTERE\nJGHb7tPYdfhi5Ovufk/k683NdVn7vpzWJiIiEuHxBdDabhM91treldUpbgZnIiIiEX0OD3r6PaLH\n7ANu9DnEj2UCgzMREZGIMqMellK96DGzyYAyo/ixTGBwJiIiEqHXadBYZxU91lhXCb0ue20pmRBG\nREQkYdPqWgBDa8z2ATfMJgMa6yojr2cLgzMREZEEjVqNzc112LByBvocHpQZ9VkdMYcxOBMRESWh\n12lQZS4ete/HNWciIsqa0a6sNV5w5ExERBk3VpW1xgsGZyIiyrixqqw1XvDxhYiIMmosK2uNFwzO\nRESUUWNZWWu8YHAmIqKMGsvKWuMFgzMREWXUWFbWGi+YEEZERBk3VpW1xgsGZyIiyrixqqw1XjA4\nExFR1ox2Za3xgmvORESUMawIlhmKRs7t7e34x3/8R3zmM5/Bli1bcPnyZTzxxBMIBAKwWq34zne+\nA0EQYj7zzW9+E8eOHYNKpcJTTz2FhoaGrNwAERGNPVYEy6ykPzGn04lvfOMbWLp0aeS1H/zgB9i8\neTO2bt2K6667Djt27Ij5zMGDB3Hu3Dls27YNzz//PJ5//vnMXzkREeWMcEWw7n4PQhiuCLZt9+mx\nvrS8lDQ4C4KAF198EVVVVZHXDhw4gFtvvRUAsGrVKrz//vsxn3n//ffR3NwMAJgxYwb6+vrgcDgy\ned1ERJQjlFQE43R3apJOa2u1Wmi1sW9zuVyRaeyKigrYbLG/lK6uLsyePTvytcVigc1mg9FozMQ1\nExFRCjy+QFYzppNVBHt150mcPG/ndHcKRpytHQqFMvIes7kYWm320uytVlPWzp0PCvn+C/neAd5/\nId+/xVKCl3/3IfYfvwxbrwvW8iIsmTMR962bDY0mc4HRVFYEq7kInXZXwjG9oMW+41ciX4enu4uL\nBDywfm7GrkFMPv/u0wrOxcXFcLvdMBgMuHr1asyUNwBUVVWhq6sr8nVnZyesVvFqMWF2uzOdS1HE\najXBZhvI2vlzXSHffyHfO8D7L+T7t1pN+L+/ao3pDNVpd+HNvWfhdHkz3hmqYUZFzPcKC4WCou9/\n79glfGLxlKztfc6H373cw0Naj07Lli3Dzp07AQBvvfUWVqxYEXN8+fLlkeMffvghqqqqOKVNRDSK\n3F7/qHaG2rS6Fs1NNagoNUCtAipKDVg+pxpur3hwZgMMeUlHzsePH8e//uu/oqOjA1qtFjt37sQL\nL7yA//2//ze2bduGSZMmYf369QCAf/qnf8K3vvUtLFiwALNnz8bdd98NlUqFp59+Ous3QkREw+z9\nyTtDZaI4SPR6dnxFMAA4cd6ObpHrYAMMeUmD85w5c/Dqq68mvP6zn/0s4bXvfe97kT8//vjjI7w0\nIiJKl7l0qDNUssCYLFlM6rjcvubooN9YZxWd7mYDDHks30lENA54fAHYel1AKASruRhWQSsbGLUa\nFbbuapcsGpKsqEh4X3NYONELQMx6tlgDjIYZFqxqnAyPL8AALYHBmYgojwWCQbz+zim898EVuL1D\n68gGQY3mxdfhzlumAxDvDJUsuMod37Byhsx6tg0bVs6IBN3oBhg9/W7sOnIRbae78MfWS9xWJYPB\nmYgoj23bfRrvHOmIec3tDeL3f/4L3G6faGeoZEVD1i2bJnv85nmTJNezu/s9eHXnSXz29lkxAVev\n02BPawf2tHTEvFdstE1sfEFElLfkgiwAtJy0RaaOq8zFkdFssqIhFzsdsscRCsFSKp3Mte/4lYSy\nnUqqiNEwBmciojE0krKWckEWAOwDHtHtSmVGvWRwNZsMqKkySh4XdBpYyorQWCdfuyI+4CZ7IOC2\nqlic1iYiGgPpdHGKz5wOB1mxjGwAMJv0otuV9DqNbLKYqViQPO72BvDG3rPYtLoWLrcf70VV/4oW\nv11L7lq5rSoRgzMR0RhQmu0MyAdyqSAKAAvqrZLZ0GJZ1OFkMQBYv+J6/LntciTJLFprexc2rJyB\nLWvr8T/netAz4E14T3zATfZAwKztWAzORESjLNn6a3S2MyAfyDetrkUoFIrL1tagefFU/O2y6ySv\nITqLWmwfs8Ppg0ckMAOxo+IF9VWKA26yBwIaxuBMRDTKlKy/hqeDlQTy/7WmHnfeUhuzz7lmUrmi\n2tLhZLF4SqehUwm4yR4IaBiDMxHRKEtl/VVpINfrNKixKu9hkKwymNJp6HQCrtQDAQ1jcCYiGmWp\nrL9mOpHK6fFh69uncOJcD+wDXlhK9WiorUTzwhpYSg0x3zuVUTEDbmYxOBMRjQGlgS9TiVThpLL4\nJK/ufg/2tAwVB6mIyxgXGxUDQHefO/JnTk9nB4MzEdEYSGU6eP2K6+F0+3HinB29Do9oIE82TR2f\nVCZGKmNcr9OgoswQyRjv7vfAIKgBqODxBliGMwsYnImIxlB4v7LSzk9LZ1fjnjV1KNZrJd/TWGfF\nw3c1Rs6TrJJYPCUZ49F9mlmGM/MYnImIRlH0CFerUaXc+em941fg8vrxd7fNgqlYkNxmVVwkYP3y\naQCSVxKLl0rGeDSxoE7pYXAmIsqAZNPKYiPcYoMOFzodkfco7fzU0t6F1vY/Y7K1BE63T/Q9+49f\nxicWT1FUSSye2aSH1xeI1OVWGtzjgzqlj8GZiGgElJbhFBvhSgXLZJ2fACAE4KJtUPJ4V68LZzv6\nMH1ymWxSmZhBtw9Pv3woci/rV0xXFNxZhjNzGJyJiEZASRnOVNd8ozs/KR3tJlAB33n9aCQDO7G3\nsx71U83QaVU4ftYO+4Abgk4DtzcQWU+OvhclwZ1lODOHwZmIKE1Ky3CmuuZbbtQDKhUaaitj+h+n\nIngtXyv+YUEsO9zjC8DW68K//eqoZC3tZ+9fhEAgiNZTXeh1eGEQhj7r9QVYhjMLGJyJiNKktHpX\nqmu+To8fT790EGaTgImWYlzucY74WsMPC2L0Og0ErRp2kQYWwNC9bH37FE6et6PP4YXZqMf8ukps\nWDkdDqeP+5yzgMGZiChNSqt3KV3zFbQqeP2hyOh1qNuTF3qdGh5fUPazydgH3Hh150mcPG8XXRuX\nuxdBp8G+qNaQdsdQ4RKNWsWtU1nC3eJERCny+ALotA+NZhvrrKLviV9/3bS6FsvmVMue1+sPib6u\nUqV5oVEEnRr7jl9Bd78HIQxPd7/+zikAww8Q4sSvq7W9Cx6feOcqGhmOnImIFBLLzJ4/sxKrF07G\nsVPdsmU4NWo17l1bj5Pn7Sknebm9QSyfU40j7TbRNWElfH7xkfd7H1zBnbfUQq/TYNPq2si6cp/D\nC0upAbOmluO9qFFzNG6dyh4GZyIihcQys9850oHmpho898BNSctwprqlKUytGirh+T/netIKztWW\nIlzpcYkec3uHksEmVhRj2+7TaDvTjT6HF+VGPRpqK7Bh5QyckHig4Nap7OG0NhGNC+Gp5mxNsybL\nzAYQad0oJRAMIhQKRTKdlQqGgE67SzJhKxmXxy//hlAo8uARnvYOryu/sfes4ql7yhyOnIkor8kV\nAckkpZnZcrbtPo13jqS+Ncpi0qOmypj2vue+QR/0WjU8IlPbBmGogpjcg8ez9y+K/DlZ60jKDAZn\nIsprckVAHr1nYca+z0j7KqdaiCRaSZEOGo0K9VPNMVnTSpUbBcybWYl3Wy8lHFs2txouj1/2wcPh\n9CnuoEWZweBMRHkr2VSz25tkOjcFep0GDTMqsEckwCmZ3k21EEm0C50OPP7DffB4AzAIGoRCoZS2\nVjXOrMTmNXXQadRoOWlDz4AHZSU6LKiz4p5bZ8IfkK5GFr8ljMlfoyPt4Lx9+3a8+eabka+PHz+O\n1tbWyNezZ8/GggULIl+/8sor0Gj4pEVEmZNsqtne78nICCQ8dd52phvAUIJWMDQ03bygXnoKPboZ\nRqqFSOKFE8FSTQibaCnG5jV10KjVQ9nYwRCOtneh1+FB25luaDSnsWl1rWSiGteVx0baf283btyI\njRs3AgAOHjyI//7v/445bjQa8eqrr47s6oiIZCSbajaX6jHQJ56lrEQ4uO48dCGmjGbw2rbfeTMr\nRYtwSK2Dz59Zmdaas5jwA4Icg6DGV/6uKdKAY9vu0zH3Eb0EEH7A4LpybsjItPYPf/hDvPDCC5k4\nFRGRYnJbkxrrKmEQtBgQ+Vwq7R27+z1QSxQBaTvdDc+qQMI5tr7dHjP9HQ6CtyyYhBpriWw3KRWk\nSn7EShaYAeBjDZNQrB/6b37A6cXh/+kUfV+4tCfXlXPHiINzW1sbJk6cCKs1NtXe6/XiscceQ0dH\nB9auXYvPfvazsucxm4uh1WbvL4LVasraufNBId9/Id87MP7v/+G7GlFcJGD/8cuw2V0wl+qxZM5E\nPLh+LoDY+w8Egnj5dx8OvbfXBWt5EZbMmYj71s2GRjO8s/TFNz6ICfhSgdA+4IZG0MFaWRI5/0/e\n+ADvHktclwaAAx9ehcsjPy2tJDADgLXcgAX1VXj70PlIk4toRXotHlg/F3pBi5++eRxvHzwHj1d8\nnTr+PmoUXkOuy+e/+yMOzjt27MCnPvWphNefeOIJ3HHHHVCpVNiyZQuampowd+5cyfPY7SMv7C7F\najXBZhN7fi4MhXz/hXzvQOHc/7qlUzEw6MFRXxfs/R4cOH4ZXq8fD9/ViJ6e4VHq1l3tMUG30+7C\nm3vPwunyxrR3fO+Ysqlns8mAgNcX+RnHnz9essAMDK1jz5tZibbT3TFtHOPNq63E8jnVeOvAedHz\neLx+/OWCHbuOXExa9CT+PsaDfPi7L/fwMOLgfODAAXz1q19NeP2ee+6J/HnJkiVob2+XDc5EROmS\nWkstLhKwfvk0ANlp7xidLDWSrVLRFtRbsbm5Dp5VQ1PvxmIBb+w9G7MWPG9mBUKhEP7tV0clR9qV\n5UUo0mvRclJ8KlvqPig3jKhC2NWrV1FSUgJBEGJeP3v2LB577DGEQiH4/X60tLRg5syZI7pQIiIx\nckFx//HLkYphSoqIAMNJZmLUqqEmFBWlBjQ31WDT6tpIZTKb3Zk0qMtVBrOY9JFzAsPblor1Wmxu\nrsNzD9yEbz64BF/7TBM8ngDeOdJxrWuVuCVzJg7tX05SVWzZnGomfeWgEY2cbTYbLBZL5Ouf/OQn\nWLRoERobG1FdXY0777wTarUaq1evRkNDw4gvlojGv2TJWvHkgq7N7sLZjj5Mn1yWkfaOK+dPwtrF\nU1Fm1EOrUSVkZAs6FTw+8bGsXqfGktlV+GPr5YRjy+dUY8vaetn71WpU2HXkIlpOdsoG3IprmeH3\nrZuNy1f7YTEJku+3lOpx79r6SDY35Y4RBec5c+bgpz/9aeTrBx98MPLnL33pSyM5NREVGLkynHLB\nQy7oqtTAC68fjZxr3sxK7BbZyiTW3hEQ31YUvpb49eVk+5c9viBuXVgDrUYje14p8ZXQxKgAPHpn\nA2qqTNBo1NDrNFhQXyX5uQV1Vk5n5yhWCCOinCBXhlNsL3GY3Eg3nMUcPtetCyejualGci9v9Kg9\nflsRAHT3uSN/Tmd9efeRDty7dlbS7UrxswceX0DR2rGl1ABrXAWvTatrEQyFsO+DK5HEMoOgwfK5\nnM7OZQzORDTmlCZrSYke6fb0u6GSKNBx9FQ3nnvgpoTgGAgGsXVXu+iovaLMkDCir59qTqsUZ9uZ\nHnh8AckymFKzB6saJyddOwbEE7s0ajW2rKnHxltqYbM7AZUK1vIijphzHIMzEY25kXZ80qjVkZHu\n2Y4+vPD6UdH39Qy4I2vQ0eeTG7UHAsGEgiL7jl+BRF0SWcnuRap4idcfkK0IJuhUWNEwSXYkrNdp\nUFOVv/t+Cw2DMxGNuZF2fArT6zSYPrlMeg0awHdePxpJmtq0uhb+QEhy1L637RK8EoU7lBYLiSZ1\nL0Mj91N496h48ZK2092yFcG8vhBUKhUTu8YR/iaJaMyF143FpLoHV+5c4QAXHpFu231adtTu8QbT\nCsJSpO4lvE9bKgD3ObwoNwriB69pbe+KbBsDALfXj067M+Y1yh8cORNRTshk44XwZ9rOdMPW64IK\n4lPCre1dWLds2oi6RSlRbhTQNKtK9F6UFC+xlBrQUFsRU2glXnjKPLxG3namGza7S3HWO+UWBmci\nygnR68YjbbwQPtfnNhTh4LEOfEdiDdo+4IbL45fM9s6EcqOAZ+9bDFOx+MhXSUWy6IeUd1vFR9jh\nKfN0s94pt/AxiohySjiTOZXAHK7SJTaFayrWoUKi4lc4oG1aXYtbF05GNgaWbq8fb+w9i8vdg6LX\nl6wi2c3zqrGqcTL8gRDuWlWLxTdMEH1vY10lAOktXvHT3pTbOHImorwltfXozlumY8cfz0amdvWC\neNSNXgMOBkOi3Z1SMVTeU4VA1NDW7R3K9t7TeikmES08xSy3T3tSZQk+/Isde49dgV7QAAjB7Q3C\nIKgBqOD1BWKm/7v73CPKeqfcweBMRHlLagr35PleXOh0RF53X8u41qiBwLUAbBA0CIVCCASDQxnb\np7pGdC2L6q24Z00dnvv5Ick9yVJTzGLr7cUGbdw9RCd7Dd3EsjnVuDeq7Gemst5p7DE4E1Fekkuk\n6rA5RF8PRI2M3d6h5hHBENBUZ0WvI3mRDznGEgFeXwB2BcVC4gurxK+3F+m1+Porh5Ke5+T53piv\n5Ubh7DyVXxiciSjjUm1ekY6efrdkhrXcnuB477Z2yGZBp3Ieh9MLs0yjiTD7gBu2XhcErTrmZxRe\nb+9U0OEKGPoZxE9VR2eqd/W6RpT1TmOHwZmIMibd5hXp2HVEOrtarppWvFQCebLzHDphg0bBbQo6\nDb63rRV2hw8Wk4AF9VUxPyO56eloekGTMFUdnal+5q/dWX1AouxhtjYRZUx4Dbi734MQYot9ZJLH\nF0Dbaek14urKsUt6CihIKnN7A7A7fACAngEvdh2+iNfeORXJOgcgWUhFKYOgTTnrnXIHR85ElBEj\nbV6RCrkpbQCwlhlwyebMyPfKJLUKQAgQi99/bO3A0XYb7ANeWEr1mDezErcunIyWk12wO8Tv1Xtt\n+YAZ2OMPR85ElBFKmldkytuHz0seU6uAY6d7Mva9MikoEZiBofaWPQPeyIzD7iMdUKlUeOa+RZKl\nO5mBPX4xOBNRRsgV05ALIh5fABc7B3DR5lBUJMPjC2D/h9K9jTO1hpwLWtu7IOg0aJpVJXqcGdjj\nF6e1iQjAyDOsU93GEwgG8do7p7Dvg8uRfbsGQYPlc6tx960zJRPIbL2umD2/41l4xiGTdccpPzA4\nExW4TGZYpxJEtu0+jd1HYrcwhfceq1QqbG6uE39gCOXe0DiV7PBUhGccMll3nPIDgzNRgctkowSl\nQcTjC6DlpPTUdGu7DYFAEG1nuhMeGKzmYhgEdWS0nQuyNZUeP+MQ3gdN4x/XnIkKWLIMa6WNEuIb\nTyRrXtHn8MgW6uju92BP6yXRLVl6nQY33Sje/GG8ELRqNDfVcNq6gHHkTFTAlGRYy43U0p0SL9Jr\nIWhU8AbEh5xS08QtJ20IBEM4fjY3s7GTUauGZuXNJj2cHr/o2rleq8a3/mEpypmFXdAYnIkKWCqN\nEsTWf1OdEo8O5lKBGZCeJu4Z8GSk1OZYWXzDBKxfcT3KjHr8+t0zoslzK+ZPYmAmBmeiQqYkw1pq\ndLx+xfUpFx2JD+ZSykp06Bv0JbyercSr0WAQNNiyth7F+qH/dpmBTXIYnIkKXLIgITU6drr9KU2J\ny61vR6soNWDuDDP+2Ho54Vi+BmYA+FjDxEhgBpQnz1FhYnAmKnByQUIuoJ44Z1c0JR6eDvf6Aoo6\nLTXWVSKYg9ul0lVRKj8iZgY2iWFwJiIA4kFCLmGs1+HB4hsmoPujqwnH6qeWIxAMYeuu9pjpcL3M\nFihTkRY3Xl+B25dch+d/cXjkN5QDyo0CvvaZJpiKxctvEklJOzgfOHAAjz76KGbOnAkAqKurw7/8\ny79Eju/btw/f/e53odFocPPNN+Ohhx4a+dUS0aiSSxgTdBq0X+wFMLwWrNepgJAK7x+/gpb2zphA\nnKz94YDLjwMfXR1KFvPnzh7mkegf9MLl8TM4U8pGNHJevHgxfvCDH4gee+655/DSSy9hwoQJ2LJl\nC9auXYvaWiY6EOWD6MxsqYQxtzcQ2QoUXgv2+EIAQteOiwdYg6BGKAR4fOLHMxGY9Tq15PnV6qEm\nE+kyCBrF5UPZmILSlZVp7QsXLqCsrAwTJ04EAKxcuRLvv/8+gzNRjhPLzJ4/sxKrF07GsVPdsA+4\nUW6U3qOrRCYre5mNAubXWdF2ujsmmW353Go8+zPxqfFQEBC0Knj9qa1rq1XAyvmToFar8M4RZdu5\nGmZYmORFaRlRcD59+jT+4R/+AX19fXj44YexfPlyAIDNZoPFYom8z2Kx4MKFCyO7UiLKOrHM7HeO\ndKC5qQbPPXDTUGKXP4inXzo4hlc5zHXtAeHhDXOgUathLS+CVqPC1rfbJbddhQD4ZfZYSwmGgOam\nKagyF0GlUqG1vQs9A27ZUt/NTVNS/j5EwAiC87Rp0/Dwww/jE5/4BC5cuIBPf/rTeOuttyAI6a2t\nmM3F0Gqz94RptZqydu58UMj3X8j3Dii/f6fLiz+3JW5fAoDWU134zLo5qJlUDrfXD6u5CJ12VyYv\nMy1ubwB7Wjqwp6UDVnMR5s6ohKBTY0/rJdnPpbsl670Pr+LzG+bh0XsWwu3140r3IL7+0/2w9boT\n3qtWD73/wfVzodGMTaVk/t3P3/tPOzhPmDABt99+OwBg6tSpqKysxNWrVzFlyhRUVVWhq6sr8t6r\nV6+iqkq8H2mY3e5M91KSslpNsNkGsnb+XFfI91/I9w4ov/9AMIiv/fSg5FR1d58bD39nN5pmVWHT\n6lo0zKhQVExkNNnsLuw+nN0Zuv0fXMa6pddFpqpLtGrMq60U/VkEg8Af9v0VXq8/5QYimcC/+7l/\n/3IPD2k/zr355pt46aWXAAxNY3d3d2PChKFi9DU1NXA4HLh48SL8fj/27NkTmfImotwQ3axi665T\nuNwj/4Dc6/BGmk9sWl2L5qYaVJQaoFYNJUllmirjZxy5ngEP+hyxWeebVtdiVeMkqCUuOJUGIkRh\naY+cV69ejccffxzvvPMOfD4fnnnmGfz+97+HyWTCmjVr8Mwzz+Cxxx4DANx+++24/vrrM3bRRJS+\n+KQvs0nAoMuv+PPh0pzRhUuMxTq8sfcvOHyiE70O6W5TqcjFMiRq1VDTjmgatRprF0/FHyWm0pU0\nECGKl3ZwNhqN+PGPfyx5fNGiRdi2bVu6pyeiLIlP+pJr3SgmOthEFy7Z3FyHdcum4emXD2YsQOea\nYAii+5ZTaSBCpAT7ORMVEKX1reWIdasKT4+bigUUG8Zv4cGKUr1ooA03EBETbiBClIrx+6+IiBLI\nleNUSq5bVZFei0td2UvuHGuNdVbJQMsuU5RJDM5EBaTMqIfZJIhOZet1ahiLdOgZ8KC8RI95MysQ\nCoVw7HQ3+hxeWEqTd6sCRhb4c9nK+ZNkAy27TFEmMTgTFRC9ToOSIvHgXGUuxlP3LoxJ8Gptt6HP\n4UW5UY+G2gpsWl0LjVqdkenxfPOJm6ZCo06+EsguU5QJDM5EBcTjC8Dp9okec7p9sNmdsJqL8et3\nz8SMiu0OD/a0dECjVmFzc11GpsfzidmoY1IXjSoGZ6ICIhdUu/s9+NrLh2AxCXB6xPfltrZ3Yf2K\n6fjDgXNQqSBbunI8MRbLT1FHNwrhVDZlAoMzUQGR2/ITJre1yj7gxvM/P5y0YMl4M+jyweMLJARe\nsaS4xjprZPqfKF3820NUQOS2/Cih1agKLjADQK8jsTIYMJwU193vQQhDsw/hKmpEI8HgTDTORO87\nFhNdelOVYo1Mf7odI/KcWCERuaQ4luykkeK0NlEOS2UtU2yKdfm8yVi3NDbLOLzlZ/2K6/HLnSdx\n4KNOxaUyg5lrxZxXxAqJyK3fs2QnjRSDM1EOSmctU2zf8Zt7z8Lp8op2RXpj71+w/6POrN1DPlOr\nhmp7W2QKibBkJ2UTgzNRDhILtOGvxQJtsinWDStnxIz8CnGfcipWzp+EtYunxsxYDDi9uNjpQE2V\nEaZiIbJ+L9YukiU7aaQYnIlyTKqBFkg+xWrrdUHQqiPBps/hkc3YLiRTqoxwuv0JJTfDMxRevx/P\n/6IFHTYHgqGhUfVkqxFf+fQCluykrGFwJsox6axlyk2xCjoN/u1XR2Ef8Eamx29fMhVq1VCXpZEQ\ntGp4/bm/EF1jLcGDd8zGntYOtJ3uTgik/kBIcm3/+V+04EKnI/J1MARc6HTg+V+04Nn7FrNkJ2UF\ngzNRjklnLVNuitXtDcDtHcocDk+PO93+EQdmAPjnTQ048FEn3j16KSPny4abGybi3tvqoVGrce/H\n6+FZlZhkp1FDNHlrwOlFh82R8DoAdNgcGHB6I1PcTP6iTOJWKqIck077wUAwiFAoBIMwfMwgqGEQ\nxP+Jnzhnh8UkiB5LxSt/OIlFCLMMAAAgAElEQVSb508as0phgjb5XjCNJvY94UCqZIR7sdMh+dAR\nDA0dJ8oGBmeiHBS9F1mtAipKDWhuqpFcy9y2+zTeOdIRGSEDgNsbhNsrPuXc6/DghussI77OK3YX\n/s8vWyDoUtwwnQHlJTo01FZCp5X/b2xP66W0i4LUVBmhlrg1tWroOFE2cFqbKAeF9yKvWzYtJkM4\nnscXgM3uTDnzWtBpcM+aOpzvdMSsp6bD4xubNefeQR8On1B231KJdMmYigVMthpFf0aTreK/E6JM\nYHAmyiHhoiPRLRvF9jlH74NOJ+s6FArhavcgBl3SdbTHk5EUBfnKpxdIZmsTZQuDM1EOiC86ohc0\nMVPU8fuc4/dBp8rjC+Ibvzgy4uvOFyMpCiJotXj2vsUJ+5yJsolrzkQ5YOvb7TENFKIDc7TW9i4M\nOL0sIJKiTBQFMRULuGGahYGZRgVHzkRjKBAMYuuuU3j36CVF7+/pd+Nip0NyH3Suq7YUodPuyvq2\nK/W1XtNWcxEaZlSwKAjlHQZnojHi8QXwy50n8d7xK4o/oxc0qKkyJu3JnItunj8RaxdNRZFei1f/\nvxM4cd4OlzeYkWIo8UIAHr97PhbPm4yBPldmT040ChiciUZZeH35yImrsDt8KX46hDf2nsWgO9XP\njb332i7jT0cvJ7yejVF0eYke0yeXwSBoMZD50xNlHdeciUZZOJkr9cA8tHd5T+ulhP3LGjWwsnEi\nKkpztxNSIMmOK71O+X9HS26sgtmokzw+n40nKM8xOBMl4fEF0Gl3wuMLiH6d6rmykcwVCAJqlVqy\nslg+ULJf2iBo0NxUg/s/eSMWzpog+p4pVUZsbp6Z6csjGlWc1iaSEL+9yWwSUFIkwOn2Ke6xHC+b\n3aBaT9owt9aSN80o4pmKdBB0atmfT4lBiw0rZ0CjVsd0hOrpd6PMKKBxZiU2r6lT/PsgylUMzkQS\n4vcS9wx40TMwXLQjWY/leIFgEDsPXchKAhQA9A56sfeY8uSyXNNYXwlBq5Hdv20f8ESKiYSrqLEj\nFI1HIwrO3/72t3HkyBH4/X587nOfw8c//vHIsdWrV6O6uhoazdA/lhdeeAETJohPQxHlmlSmn5WW\nhty2+zT2tHSM6LoErQpef462f7rm5saJOHuxHxdtg4o/o9WocO/H6wEAgWAI77Z2iD7AiBUTYUco\nGo/SDs779+/HqVOnsG3bNtjtdnzqU5+KCc4A8OKLL6KkpGTEF0k02uR6KsdTUhoyU2vNC+qtOH6m\nBw63f8TnyhatSoWnP7sIW99uR+upLvQ5vBB0atk15dJiHfyBELQaFTRqFXRa8fdnopgIUT5IOzgv\nWrQIDQ0NAIDS0lK4XC4EAoHISJkon8n1VI6npDSkXLBXqQBTkYB+p3yda4OggVarzunADADvHb+C\njatm4t61s3DX6qFa4V5fAF97+ZDkZ+wOL/ocHuw6clF0WnsoG30yi4lQwUg7OGs0GhQXD40UduzY\ngZtvvjkhMD/99NPo6OjAwoUL8dhjj0Glkm4rZzYXQ6vNXmC3Wk1ZO3c+KOT7T/fel8+bjDf3nlXw\nvkmomVQu+x5TWRGs5qHqWAnXV16EuuvK8WeRPcDRqiuKse+D3F9T9niD8KtUqCwrwmC3EyUmA2pM\nBljLDbD1ukU/Yy0vQs2kcrT96pjo8UAQMOh1qJ5QlvL18O9+4crn+x9xQtiuXbuwY8cOvPzyyzGv\nf+ELX8CKFStQVlaGhx56CDt37sRtt90meR673TnSS5FktZpgsxVuKYJCvv+R3Pu6pVPhdHnR2t4F\n+4Ab5UY9Sop0cLp9sA94YDYZ0FhXiXVLp8JmG4h0lJJKTGqYUSE6Kuwf9CQNzBMtxfjr5fz5Hf7i\nDx/hg9Ndkf3YBkGDynKD5PsbZlTg4qVe0YeXsPfbLmPd0utSmtbm3/3CvHcgP+5f7uFhRMF57969\n+PGPf4yf/vSnMJliv8n69esjf7755pvR3t4uG5yJco1GrcaGlTNwc8NEQKWCtbwIep0mIQgP1cdu\nl2zvGHbnLdNx8nxvpPVgWHxBkXhmowCvP/U91WPp0EedMV+7vQFc7BxEtaUI9gFPZD3ZIGiwfG41\nNq2uhT8QQrlRQK9DfHq/d9CTdttHonyTdnAeGBjAt7/9bbzyyisoLy9POPbFL34RP/rRjyAIAg4d\nOoS1a9eO+GKJRkv8HufogBufHRy/5Sp+i1U4mO88eB4XOh0pX8sN0yx4P4X627nsSo8LZpMejTPL\nsPam61BtKY6MhDVqoHFmJfa0ijcBsYyg7SNRvkk7OP/hD3+A3W7HF7/4xchrN910E+rr67FmzRrc\nfPPN2LRpE/R6PW688UaOmimvSAXcQDAU2fIDAANOLw6f6BQ7BVrbbQgEgmg7042efg9kUi4kLZ9T\njXvWzMTJ8/a8a3QhxT7gwf6POqFRq7FlbX3Msc1r6nC6o1/0IYaZ2lRIVKFQKCc2TWZzbSAf1h6y\nqZDvP5179/gC+OqL+0WDoVoFLLphAjavmYk39v4FR050ot+ZnSYUpmItHr2zAZOtppS7V+ULi0nA\ngvqqmCWAcBvNo+1d6B30wHJtbT+VSmxh/LtfmPcO5Mf9Z23NmSjfhaeci/RauDx+lBn1stuegiHg\nwEdXceCjq0nPrbrWUzhdA04/nvtFCwyCGotvqIJBUCddn843PQPehCprGrUad62qxarGyUAoBKu5\nmCNmKjgMzlSQwmvKLSc70TPgjZTUrCjVY870CpSVCOgdlN93nEym5qTc3iD+dOwKplQZ01qzzgfh\nKmtajUpyrZ/1sqmQMDhTQYpfUw5nT3f3e/DuUfGEpLE2MDg+1pzFhKusxRchSbV+OdF4wUdRKjjZ\natuYbb2D2VnbzgVmkwFFeq3k76W1vSutFp1E+YrBmcalcM9ltzex1GUqdbMzqdwoYGXjJOi16f2z\ny9VlV4Mw8v9GGusq4fL4JX8v4ZE1UaHgtDaNK/H7k63mIjTMqIhZs0ylbnaYkh7J1RVF6OxxiXZT\nUqmAL909H5ayIrSf68XlntQr4uXqwLGi1ICOLvn7MQgaeH0BmE16FBt0GHT50OsYrrIWLkIi9XtR\nUr+caDxhcKZxJX4tudPuSliz1Os0aKyzyvYNjmYx6THnegsOfHQVnmsBWqMeanPo8YWgAhAC4HL7\nJfs0h0LAsz8/BBVUst2Z8pHD5cOqBZPRdrob3f3uoZkBNeDzBSPBd/2K6XA4vZGqamKlTjVqSP5e\nuMeZCg2DM40bcmvJ8T2Xw92NWk7a0DPgiWRriykp0uFPbbG1rwNBYIKlCJe6nAh/rC/JmrDXFwKQ\nE2UFMqp/0Ie1i6bgrlW1kYALICH4FuuH/7uR6sEc/r2E65lHj6yJCgmDM40bcmvJ8T2XNWo1NjfX\nYcPKGZF9zg6XD7sOX0DbmZ5IYGiYYUHbmW7Rc15KMpVbKCylhkgQjg646dTAjv+9SDURIRrvGJxp\n3JBbS5Zas4wOKKZiAfeunRUz5WqzOyVrPcspLdZlrXJYrsnGlLPUyJqoUDBbm8aN8FqymPgAEs7m\njt+eEw7MxmIBv373DL63vS3l67CY9Nh068yUP5ePjEVaTjkTZQFHzjSuxK9ZVpYPZ2sD0t2m7rxl\nOnb88Sxa223o7vcoys6W4nB58dPffZSxe8plTrcfTrcfpmJhrC+FaFxhcKZxJX7Ncsa0Cgz0uSLH\npbpNnTzfG1MaM93APPTZ8Zf0JSUYAi52OnDDNMtYXwrRuMJpbRqXwmuWBmH4+VMum3u81qzONrUK\nqKkyjvVlEI07DM5UMMaqMthYaaqvhEFQlqhVbhSgVg01/qixlkCvG/6vwSBoYCwSn2SbbDVySpso\nCzitTQWjzKiH2SSgZ2Bk3abyhVarwavP3ob/OW2D1+fHd147KloAxSBo8Ox9iyMtM8NFQmx2J6BS\nwVpeBJUqhOd/0YIOmwPB0NCIebLViK98esEY3BnR+MfgTAVDr9Ng1nUW7Dt+ZawvZVS0n+8FANRY\njRhwehGSqrICQNBpYkbAep0GNVWxjeCfvW8xBpxeXOx0oKaKI2aibGJwpoKyec1MtLTb4PbmaKHq\nDOp1eNDV68L2XSfx52OX4Q2IB2ePNxBToEWOqVhg8hfRKOCaM41LUl2p9DoNKssMY3RVo8tsMuB3\ne89i95EO2ezzcIUvIsodHDmTLLEGBbkm+hq1GpVsV6ptu0/jom0w49dQXiKgpEiLrj53zjS2mD3d\njP0fJK9uJlbhKx9+70TjGYMziZIq1hHdenGsiV1jsUEXsy0quivVhpUzJLdSjVTvoBcurz9nArOx\nSIu2093odcgnvy2bUx1T4Ssffu9EhYDBmURJFesAhlsvjjWxa5Tq0dza3oWlN07I6laqXAnMAOBw\n+ZO/CYBOp4r5Oh9+70SFgI/ClCBZ68X4etRjQe4axXT3u/GDX7eNw4aNI/Nu62Vs230aQH783okK\nBYMzJVDSenGspVNQJFm/5ULVctIWWWPO9d87UaFgcKYE4daLYqRaL442uWuk1NgHPJHkr1z/vRMV\nCgZnSpBK68WxIneNwFDVK7UKqCiQbVMjYTbpI1nZuf57JyoUDM4katPqWjQ31aCi1HCt5rIBzU01\nOdW7d/2K6yVrR5cYtHjms4vw/X++BRUcYctaUG+NBN58+L0TFYK0s7W/+c1v4tixY1CpVHjqqafQ\n0NAQObZv3z5897vfhUajwc0334yHHnooIxdLoye+9WIu7nd1OH3wSFT6Cq+dlhn1mDXVjPcKpGQn\nANRPLcPJ831J32cQNFg2N3YrVT783okKQVrB+eDBgzh37hy2bduGM2fO4KmnnsK2bdsix5977jm8\n9NJLmDBhArZs2YK1a9eitpZP3vko3HpxrMgVwwivkYptnwoB+P6ONiw/1Y31N08vmOCsVgGf/cQN\n+MqL+xEQ2dmlF9T40j2NEDRqWM3FkoF3rH/vRIUureD8/vvvo7m5GQAwY8YM9PX1weFwwGg04sKF\nCygrK8PEiRMBACtXrsT777/P4EwpUVIMI7xGGr0vN1p3vwdv7j2LQx8WRmAGhjpFVZmLcUvjZLxz\npCPh+MfmTsT0iWVjcGVElIq0gnNXVxdmz54d+dpiscBms8FoNMJms8FiscQcu3DhQtJzms3F0Gqz\nN31mtZqSv2kcy7f7f/GND0SLYRQXCXhg/dzI6w/f1YjiIgHvf3AJtl636Lku9zizfr1jTa0GplWX\n4juPrIAgaPHIpgUoKdZj//HLsPW6YC0vwpI5E3HfutnQaAor1STf/u5nUiHfO5Df95+RCmGh0MhL\nO9jt2fsP1Go1wWYbyNr5c12u33/81LXT48NbB86Jvve9Y5fwicVTYqZjP7F4CqZXG/Fv29tG65Jz\nyv23z0JDbSVMxQL6+lyR19cvn4Z7b78BZ/7aHfnZ9vRkvq54Lsv1v/vZVMj3DuTH/cs9PKQVnKuq\nqtDV1RX5urOzE1arVfTY1atXUVVVlc63oXFOaura4fZJtnQMF8OoMhcnfL4QVZQa0HTDBMm1Y4Og\n5doxUR5Ka35r+fLl2LlzJwDgww8/RFVVFYxGIwCgpqYGDocDFy9ehN/vx549e7B8+fLMXTGNG+E6\nzt39HoQwPHXderJT8jPlJj28vgA8vkDC5wsR9x8TjU9pjZwXLFiA2bNn4+6774ZKpcLTTz+N3/zm\nNzCZTFizZg2eeeYZPPbYYwCA22+/Hddff31GL5ryn1wdZ49POtQ6nD48/fIhmE0CnJ7Cq/WsUSOS\nhW0Q1AiGQggEg+wYRTTOpL3m/Pjjj8d8PWvWrMifFy1aFLO1igqT3DaodGpjA4DXPxSZegbkWyGO\nFwZBA68vALPJgCK9JqYXtdsbxO4jHVCrVOwYRTTOsGUkZZySbVBye5QNgkZyzXm8U6uG9mhbTAY0\n1lVi/Yrp6HN4sPPQefz52GXRz7S2d2HDyhmc3iYaRxicKeOU9ASW26NcWW5AV687EqD1WjU8/uz3\nSv7nuxrQM+DBG386g95BZf2QM23l/ElYu3hqzGzDG3vP4k9HxQMzEJskR0TjA4MzZZTcWvKREzas\nWzYNpmIBACJlI1vbu2AfcMNsMqDYoMWFTkfsOf1BGAQ13N7EAG0QNCjSa2Af4TS3SgW88t8n0DPg\nRaa3AWvUgFajhscn/4BhEDTYcEstivXD/yyV9K1mxyii8YfBmTJKtieww4OnXz6IpllVkSnu6DrO\nRXotvv7KIYkzq0RftZYXYVJlMQ58JJ3hrUQoNLyOLVb2MhUq1dD5wl2xnti8AIJWjadfPoheh/RD\nhNcXgMPpjQnOStbmmbFNNP4wxZMyKlmf5V6HF7sOX8R/vH0y8lq4jrPL45cMRF5fANWWooTXL3Q6\ncPRUl8gnxk64Jk8wBNh63fiXn+7H7/b9FQvrpVtcAuIjYLmfp1oFrGqcxI5RROMQgzNlVLI+y2F/\nbL2MV986iUBweJhqLNZBL9ECUqdV42qPS/RYsuniseb2BrHr8EWEADQ31Ui2uWyYYUGfwwOPbzgZ\nTu7nubJxMu5dO4vbqIjGIU5rU0Z5fAGsapyMQDCElnYb+mSmcfe0dECjHt4G9Js/nZXM0s71AKzE\nsVPdeO6Bm7B+xfXY+vYpnDhnR6/Dg3KjHiVFOrSd6cYfWy8lZLeLrc031lVyxEw0jjE4U0ZEb5/q\n7vdA0Krg9Sev2xXeBgQA+z6QzkiWky9br6Kzqv/+kzdG9oHvPHQBe1qGO0jFZ7ezxzJR4eF8GGVE\ndClNAIoCMzAcsGx2p2g2thJL50xAjbUkrc+Opvg1Zb1OgzKjHm2nxdfMW9u7Eqa4q2R6MBPR+MHg\nTCOmZLuPlEjAUolnYyvh8wfh8ozNvuRUiGVVy2a3X3twIaLCw+BMafH4Aui0OyNTs+l2hQoHLGt5\nETRq8QCdLGy/13ZFtNKYEjoNUG4U0vqsHL1ODUE7fOUGQYPQtTrY0eSysbl/mahwcc2ZUiJWmrOh\nthJmk6Co3nV8ecropCadVoWAN3E6XC+oMb/Wiv0fXRU950g6UvkCkN17nK4qc3FMMRW3N4B3jnRA\nFVcHW6tRodigE3244P5losLF4EwpESvNuaelA1OqjIqC88caqnHTDdWoqTJGKoUBQ9O7UmvObm8Q\nK+dPwoGPruZ0a0iVauiho6G2AsdOiU/zRyfA9Tk82HnwfEJFNACYUmVkNjZRAWNwJsXk1padbh9W\nNU7C+x9eFc2cVquBSRUl+PAvduw9diVhu5CxWCebdf3j3x5XFJjD1blGm8WkxxfvmoeyEgEXOx0x\n2dfRevrd+OXOkzhx3o6efo/kUrvT7Yc/EMp4KVEiyg8MzqSYfPKSB2sXT8WGW2rx2tvtQ8FnwIOy\nEgGzppZDr9fi3dZLkffHbxd6Y+9fZLdD9Q36FF3jWARmAJhfV4k/HbsUme5Xq4YqhMXT6dR47/iV\nyNdS18tmFkSFjcGZFJNr8xhOXtLrNLj/2h5eW68LCIVQZtRL1sxube/CumXT0HJyZLWxR5v62gjd\nUjq0dh4KhWKm+6WCrldhMRUmgxEVNgZnUkyuzWN08lIgGMSv3z0TGUWWGQXJpCv7gBsXOx2K1qtz\nycrGyVg1fxKgUqGsRJB8+JAaQSfDZDCiwsbgTEmFt0uVGfWKSknGJ43JZUObTQZUmYvSDmIjoVYD\nQZmBrFoFlJYMPViEr89i0mN+XSVUAL6/oy3pw0cIQGmxgH6n/MNH/EicyWBEhY3BmSSJbZsKJ3FJ\nlZJMtSBJY10lXB7/qAdmYCgw11SVoMM2KDoNfcuCydh4S22knaXL40eZUY9fv3tG8cOHqViH/sHk\nswIrGydj7aIpLM1JRAAYnEmG2Lap6CQusWSlPodHtiCI2ahH36AHZpMBc6ab4XT78b3tbZm/eIVc\n7gD+n4eW4Ve7T+PDv3RjwBWA2ShgYVTP6fB9moqFlB8++gd9srMCFpMeC+qHs9aJiAAGZ5IgF4TC\ne3XjR3iBYBA7D12QDEYVpQZ87TNNcLh82HXkIt4/fjntetqZYh9ww+UJwFgsQNBpoXIFoJaoVAbI\nZ6wDQw8f9riSm1KBefmcamxZW8+RMhEl4KM6iVJa8zm6jOe23aexp6VDMhg11lXCVCxgT2sH9rR0\njHlgBoCyEj12HjofadoRwvAMwbbdp2PeGwgGsfPgecm9yRWlBjx17wLJcqBq1VAp0opSA5qbavCZ\n22cxMBORKI6cCUBs0le4W5LctiljsYCtu9qHM7JLBLi80s0nJltLsGl17YiaZGSD3eHBn4+Jt6qM\nnyHYtvs09kTt1Y7XWFeJQDAk2cM6BODxu+dj+uQyBmUiksXgXODkkr6ktk0V6TX4j7dO4v0Ph2td\n9yZJehp0+eDxBbD17VNpN6nIFqmRfnQhELmHCrVqKKFr0+pa+AMhyYcai8nAwExEijA457H40W46\n5JK+Nq2uxcnzvQm1ny/aBnHRNpjS9+lzeLH17VPYF1UdK9eVG/WRQiBy0/yhELB20RRo1Gpo1FC0\nF5yISA6Dcx6SG+2mkvGbLOlr3bJpcLqVlc1Mxlyqx4lzPSM6h0EYCmweXwAqZH9fdEmRLhJM5ab5\nLaWx1byU7AUnIpLD4JyHkm1xUipZ0tfFTkfafZrjOZw+eP3KE8AMggZeXwDma12emhfWwFJqiFz3\nHw6cw5+Oiq8VZ4rTPTQVr9dpFFdHAwCNWo3NzXWSe8GJiJJJKzj7/X585Stfwfnz5xEIBPDEE0+g\nqakp5j2zZ8/GggULIl+/8sor0Gj4H9RIpbPFSUqypK+aKqPkcTla9VBWcnQZaaWBWaUCbmmcjA0r\nZ8Dh9IoGtipzMdYumpr14Gwf8MQ0n0h1RKzXadi4gojSklZw/u1vf4uioiK89tprOHXqFL785S9j\nx44dMe8xGo149dVXM3KRNEzJFielASHZaFDQaVA/1ZzyOnEKA+QEEyxFuPfj9QCAYr30X09LqQEG\nQZ32dixBq4I/EJKdGo9vPpFsRJyJHAAiIiDN4HzHHXfgk5/8JADAYrGgt7c3oxdF0pR0hkqF2Ghw\n/swKBEMhfPXF/ejp90TWeuVaOmaKvd+DAac3UipTPshJFwvR69TwyHSA8vmTL1hLJXDFj4gzlQNA\nRBSWVnDW6XSRP//85z+PBOpoXq8Xjz32GDo6OrB27Vp89rOfTf8qKSKVtU8lxEaDv373DN6JOn84\nKC+bUw29To22Mz2wD7gh6DIftD2+IJ5+6SD6Br2yQa7P4YFH4vuqVEBTfVVM3+R4ZpMeKhVEH3LU\nKmDl/EmKE7gylQNARBSWNDhv374d27dvj3ntkUcewYoVK/Af//Ef+PDDD/HjH/844XNPPPEE7rjj\nDqhUKmzZsgVNTU2YO3eu5Pcxm4uh1WZvKtBqNWXt3KPt4bsaUVwkYP/xy+jqdaGyvAhL5kzEfetm\nQ6MRH6kpuf8aAG6vH21nukWPn+7oww+fWA0AuNLtRCAYwH/vO4cDx6+g15G5vcvhPdPhIFdcJOCB\n9bF/d0xlRbCai9BpdyV83lpehEfubkTFzpN4++A5uDyJQfxj8ycDAN7cezbh2G1Lp+HzG+Ypula5\nn1fbmW58bkMRDMLY5l2Op7/76Sjk+y/kewfy+/6T/q+xceNGbNy4MeH17du3Y/fu3fj3f//3mJF0\n2D333BP585IlS9De3i4bnO12p9JrTpnVaoLNNpC18482jy+AZTdW4dbGSTHTvz094nuPU7n/TrsT\nNpGABwBdvS60n+3CntYOtLbb0i4mkmp7yPeOXcInFk9JmBWYO92Cd450JLx/7nQLnA4P1i+fhs1r\n6/H/vt6KE+ftsA94Iklc65ZOBQA4Xd6EBK9PfWxaxn5eZ/7aPaZJYePt736qCvn+C/negfy4f7mH\nh7Qe6S9cuIDXX38dv/zlL6HXJ65xnj17Fj/84Q/xwgsvIBAIoKWlBbfddls634qiiK1tNsyoQHPT\nFFhKDRlJQkq2pr3r8AXZEpZKTLYaEwqbyOnpF090k4rv/mAQnXYnyox6WIsE3P/JGyWTtUa65SnT\nOQBERECawXn79u3o7e3Fgw8+GHntpZdewiuvvIJFixahsbER1dXVuPPOO6FWq7F69Wo0NDRk7KIL\nldja5p7WS9jTegkVGUpC0us0aKitxJ4WkRHpDEtMyc50LJ9TjXtvq8P2PWfw3gdXIuvVekENny8o\nOqLWC5qEIOfxBXBUYkvZ3qOX8afWy7CU6rF83mSsWzpVdlvTSLY8ZToHgIgIAFShkFib+dGXzemH\nfJjeAOS34nh8AXz1xf1Jp5Kbm2oSkpCU3n94ZN5yshM9A97I9HNFqR6zpprhDwZx4KPOpOdRqYZK\nWsazmPR4/sElkXvz+AKw9boQCASx52iH5L5lg6DB9x75WORzgWAQL/3+I+xXcC2A+M8kk4ZnNBL3\nP491tna+/N3PlkK+/0K+dyA/7j/j09qUWUq24iTrIxyWaiGSaPEj8/Ao1uHy4r3jV2Q2LsWqNhfj\nck9iDsGCemvMdel1GtRYjdi6q122oIj32kNLlbkYgWAQX3/lcErT4iP5mSjBimBElGnchJkDwkFR\nrp9weG0zmehey6mQqzzm8Q1F6WRTLAZBA4OgwZUeZ+TP4f7FqxZMxqrGyfD4YjOnlbSQjF673fp2\ne0qBGUj/Z5Kq8PQ4AzMRjRSD8xhLVo4zHMzCa5vJpJuEpHRkLkavU+OmG6vg9gbg9gYQAiJ/Xjqn\nGg0zLGg73YWvvngAX31xP7buakcgGFT8fcNrtx5fAK2nulK+PiZmEVG+4bT2GEulHGd0Na/ufrfo\nZ9JNQpLLOk5m2Zxqyb2+Le22mCIl8QU65L5vdJ9kYOhn1euQ7xstholZRJRvOHIeY3LT1VK1nZ97\n4CY8/8BNWLVgMipKDVCrhqaOm5tq0m5LqHRkDgwFTVXU92xumiL5gCFVPSw8KyD3fVfOn4R7P14f\nWXcv0mtRbhQUXWNYkXhq+nIAABFXSURBVF6D9Sump/QZIqKxxpHzGEtnK45ep8HEihLc+/F6eFZl\nrtlCfJ1tQacRDa4r50/C2sVTI9/T4wukPOruiZoVSNbtKTphLtWRs8cbgMPplW2iQUSUa/g/Vg5I\ntRVhtEy2JYzOOu7pd+Otw+dx4MOrkc5PBkGDJbOr0Nw0JeFhYNZUs2wt63gqADsPnsfmNXVJs53j\ns8hTUVlexPVmIso7DM45YKRbcTLdqlCv02BPawfebY3d3uT2BrD/w068e63Ax/yZlQgBOHaqC939\nHhgENQAVvL6A5Kg7LBgC9rRegkajjuxBFnvQkEuY0+vUKDFo0evwSn6/JXMmcr2ZiPIOg3MOSXUU\nnK1WhXIBMRwAu/s9CXWtwyPsJbMnoP28XVG3qmR7kOUS5nz+IL5413wIWjWMxQLe2Hs2YfbhvnWz\nJWuOExHlKgbnPJatVoUj2VYFACfP9cKucG04PiM9nnztaj2s5UWRwC42+yDVpYuIKJfxf648pXR/\ntNznO+1O0fcpLXgixZ5CwY9ke5D1Og2KDYldzwCg2KBLGHGzEAgRjQccOecpudFtd78bPf1uTKwo\nSTgWPxVebtRjfl0lNjfPjEyFy2WQK5FKS8hke5A9vgAGXeKj8EGXL7Idi4hoPOHIOU8lG93uOnxB\n9PX4UqF2hwd7Wjrw9VcOR6p2AUMZ5M1NNZF91AZBeQBUEpjVKmDVgslJM9L7HB7YB8SDc6/DMypl\nOYmIRhtHzjkqWQa2XGtHAGg705MwqpSbCr/Q6cDWt9tx79pZAGIzyMOdo/7Udhltp7sjCVfzZ1Zc\ny9Yefq2htgLHTtnQIxFQw0IhYO2iKUkT19gvmYgKEYNzjkklA7t5YY1kcBZLtEqW6PXe8SvYcEtt\npGBHIBjEr989E3MtDTMq0Nw0BZZSQyTwb7wl9kFCo1YlnRK3lIoH1vBDSZFeC5fHjzKjnv2Siajg\nMDjnmFQysC2lBlSkMKosM+pRbtRLJmx5fUG89nY77v/kjZLXEr83GUjcApZODXC5XtLzZ1Zi9cLJ\nMSN0pUVaiIjyEYNzDpGbdj58ohPrlk2DqXi4tnSqpT/1Og3m10lPhQPAifP2SAa3XDa43N7k+Epj\nuw5fQNuZHtnAKtVLOryfurmpBs89cBP7JRNRQWBwziFy0869Di+eefkQFs6KneJOtfTn5uaZOPFX\nOy73OEWP2weGk6yUdsuSEqkBvnaW7Bq6kp7O4QeCTJUqJSLKZQzOOSRZ20a7I3GKO9XSnxq1Gl/5\nu4V47P++B48vmHA8PB0eCIagF9SRql9i70mlbKhc9TMlRU+UPhAQEY0HDM45ROn+4tb2LqxbNi2S\nMKXXaVIq/Vms12HFvEmS0+EAsPXtdtHADADzZlYkJIqNpGyokl7SzMwmokLC4JxjwtPRh090SrZH\n7O534+mXD6LP4U07MIpNh8+bWYFQKISvvrhfMlAaBA2CwRB2tw6vW4+0bKiShxJmZhNRIWFwzjHh\naep1y6bhmZcPSWZWhwN3KoExfho6fjr81++eSTpq93gDOHaqW/RYskQxOeGHhZaTNvQMeGKytcMP\nH0REhYLBOUeZigUsnKW8hGZ8YIwOxIFAEFt3tYtOQ4enw5UkZQFAmVFAr8QDw0jWhePXzqP3OXPE\nTESFhsE5h8VPPZeVSO9RDgfGijJDQhGTMqMeZy/1R94rNtpW2omqcWYl2s50Z61iV/TaefS2MSKi\nQsLgnMPERpNff+WQbGAUKxwitX4cPdpOlpRlMemxoP7a2rbmNCt2ERFlEYNzHogeTcoVHQGkC4eI\niZ6GlkvKWj6nGlvW1kcCb6p7q4mIKDUMznkkEAzCHwhCr1XD4x/a5mQQNFg2txqbVteiu8+taGo6\nLH4aWi7oRmeCp7q3moiIUsPgnMOik7q0GhW+/sphXOh0xLzH7Q3A6fLjctegov3C0eKnoVMNuqns\nrSYiIuXSCs6/+c1v8P3vfx9Tp04FACxbtgyf//znY97z5ptv4uc//znUajXuuusubNy4ceRXWyDE\nOlMZ9Fp02AZF37//o6vY/9FVGAQ1KsuKACQG5ylVRjjdfkXT0Ay6RERjK+2R8+23344nn3xS9JjT\n6cQPf/hD7NixAzqdDnfeeSfWrFmD8vLytC+0kIgldYkF3HhubxAXbYMJgXj5vElYt3Qq/IEQp6GJ\niPJAVqa1jx07hrlz58JkMgEAFixYgJaWFqxevTob3y4vKK1D7fEF0HKyc0Tfa9Dlw9OfXRTZJ1wz\nqRw22wA0anBETESUB9IOzgcPHsT9998Pv9+PJ598EjfeeGPkWFdXFywWS+Rri8UCm00+i9hsLoZW\nm73RnNVqysp53V4/7P0emEv1MAiJP85AIIiXf/ch9h+/DFuvC9byIiyZMxH3rZsNjUad8N4f/Ooo\negbEy3YqZR/woKjEgOnXlURey9b954NCvneA91/I91/I9w7k9/0nDc7bt2/H9u3bY177m7/5Gzzy\nyCO45ZZb0NraiieffBK/+93vJM8RCoWSXojdLt7CMBOsVhNstoGMnlNsXVisxvXWXe0xU9Sddhfe\n3HsWTpc3odzm1l3t2K2wIpgcs0mPgNcXueds3H++KOR7B3j/hXz/hXzvQH7cv9zDQ9LgvHHjRtlk\nrsbGRvT09CAQCECjGRr5VlVVoaurK/Kezs5OzJ8/P5Vrznli68LxVbfkSmKKldtMtkc5vJbc0++G\nTqeGV6TlIwAsqLdyTZmIKI+l3t8PwIsvvojf//73AID29nZYLJZIYAaAefPm4YMPPkB/fz8GBwfR\n0tKCpqamzFxxDkgWdD2+AAD5kpjhAiBhycpnLptTja99pgnPPXATvvW5Jfjuwx/DrQsnwyAM/9wN\nggarF05mMRAiojyX1przunXr8KUvfQmvv/46/H4/nn/+eQDAT37yEyxatAiNjY147LHHcP/990Ol\nUuGhhx6KJIeNB0qCbpW5WHbfcXwBELn3VpTqce/aemjU6pikrv+1ph533lILW68LCIVgvVbpi4iI\n8ltawbm6uhqvvvpqwusPPvhg5M+33XYbbrvttvSvLIcpDbpyJTHjC4DIv1d6mlqv06DGakz3VoiI\nKAexQlgaUgm6qdShZs1qIiICGJzTpjSQplISkzWriYgIYHBOWzbrULN8JhFRYUsrW5uGhQNpvoxw\nPb4AOu3OSEY5ERHlHo6cC4TSoilERDT2GJwLhJKiKURElBvG7ZCJ07fD3F6/oqIpRESUG8bdyFls\n+nb5vMlYt3Rqzk/fKu1clSp7v7KiKURElBvGXXAWm76VajSRK7K9HmwuVV6pjIiIxl5uDyVTpLTm\nda4JP1B093sQwvB68LbdpzNyfoOgRWOdVfRYfNEUIiIae+MqOKfSaCJXjNYDxabVtWhuqkFFqQFq\nFVBRakBzUw2rjxER5aBxNa2dSqOJXKG0icZIsfoYEVH+GFcj53DNazG5On0bfqAQk40HinwrmkJE\nVIjGVXAGxKdv71gxPWenb/PxgYKIiLJrXE1rA+LTtzWTymGzDYz1pUliNyoiIoo27oJzWD41j8jm\nerDHF8DlrkEEfAGOwomI8sS4Dc75KJMPFDF7pwc8sJhYS5uIKF8wOI9TrKVNRJS/OIQah/K1GAsR\nEQ1hcB6H8rEYCxERDWNwHodGe+80ERFlFoPzOMS900RE+Y0JYRmWrbaPqeLeaSKi/MXgnCHZbvuY\nqui90xpBh4DXxxEzEVGe4LR2hmS77WO69DoNJlaWMDATEeURBucM4NYlIiLKJAbnDODWJSIiyqS0\n1px/9KMfYd++fQCAYDCIrq4u7Ny5M3L84sWLWLduHebMmQMAMJvN+MEPfpCBy81N+dhHmoiIclda\nwfnzn/88Pv/5zwMA/vM//xPd3d0J77n++uvx6quvjuzq8kR461J0ucwwbl0iIqJUjShb2+/347XX\nXsMvfvGLTF1P3uLWJSIiypQRBee33noLH/vYx2AwGBKOdXV14Qtf+AI6OzuxefNm3HHHHSP5Vjkv\nm20fiYiosKhCoVBI7g3bt2/H9u3bY1575JFHsGLFCtx///149tlnUVNTE3Pc4XBg586duOOOOzAw\nMICNGzfitddeQ1VVleT38fsD0GoZzIiIiJIGZylOpxMbN27Ef/3XfyV976OPPop77rkHS5YskXyP\nzTaQzmUoYrWasnr+XFfI91/I9w7w/gv5/gv53oH8uH+r1SR5LO2tVCdOnMD06dNFj+3fvx/f+ta3\nAAwF8RMnTuD6669P91sREREVlLSDs81mg8ViiXnt+eefx4ULF9DU1IS+vj5s2rQJn/70p/Hggw9i\nwoQJI75YIiKiQpD2tHamcVo7ewr5/gv53gHefyHffyHfO5Af95+VaW0iIiLKDgZnIiKiHMPgTERE\nlGMYnImIiHJMziSEERER0RCOnImIiHIMgzMREVGOYXAmIiLKMQzOREREOYbBmYiIKMcwOBMREeWY\nggjO3d3d+Pu//3vce++9uPvuu3Hs2LGxvqRR4/f78eSTT+Kee+7BXXfdhcOHD4/1JY26gwcPYunS\npdizZ89YX8qo+uY3v4lNmzbh7rvvRltb21hfzqhrb29Hc3MzfvnLX471pYy6b3/729i0aRM2bNiA\nt956a6wvZ1S5XC48+uij2LJlCzZu3Ji3/+61Y30Bo+HNN9/E3/7t32LdunU4ePAgvv/97+Pll18e\n68saFb/97W9RVFSE1157DadOncKXv/xl7NixY6wva9ScP38eP/vZz7BgwYKxvpRRdfDgQZw7dw7b\ntm3DmTNn8NRTT2Hbtm1jfVmjxul04hvf+AaWLl061pcy6vbv349Tp05h27ZtsNvt+NSnPoWPf/zj\nY31Zo2bPnj3/f3v3D5JaFIAB/BNvRtHfK9ewLVqKIlqaoqJoimgTWguChhqL4g7NRrQooZiDQ2Bo\nBEFDEVE0BOGoREtLiFEXScqSQHhDcHnCe5EP3j3q+X7TuWf6DlzOxz2IB/39/VhYWEA6ncb8/DzG\nx8dFxyqbFOU8NzdnjjOZjFTXV87MzGB6ehoAoKoqXl5eBCeylqZp8Pv90HVddBRLXV9fY3JyEgDQ\n3d2NXC6Ht7c3NDU1CU5mDYfDgVAohFAoJDqK5YaGhjAwMAAAaGlpwcfHB4rFIux2u+Bk1piamjLH\n1bzfS1HOwNf904uLi8jn84hEIqLjWKaurs4cRyIRs6hl0dDQIDqCEIZhoK+vz3xWVRXPz8/SlLOi\nKFAUaba3Ena7HY2NjQCAeDyO0dFRaYr5d7Ozs3h8fEQgEBAd5Z/U3Nsbi8UQi8VK5paXlzEyMoKD\ngwNcXl5ifX29Jo+1v1v73t4eUqlU1b6oP/Hd+mXHf+mVz9nZGeLxeE3udT8RjUZxe3uLlZUVHB0d\nwWaziY5UlporZ4/HA4/HUzJ3c3ODXC6H1tZWjI2NYXV1VVC6/+tPawe+Suv8/Bw7OzslX9K15m/r\nl5HL5YJhGObz09MTNE0TmIisdHV1hUAggN3dXTQ3N4uOY6lkMgmn0wm3243e3l4Ui0Vks1k4nU7R\n0coixa+1T09PcXh4CAC4u7uD2+0WnMg6Dw8PiEaj8Pv9qK+vFx2HLDI8PIyTkxMAQCqVgsvlkuZI\nW3avr6/Y3NxEMBhEW1ub6DiWSyQS5mmBYRh4f39He3u74FTlk+JWqmw2i7W1NeTzeXx+fkLXdQwO\nDoqOZYnt7W0cHx+js7PTnAuHw3A4HAJTWefi4gLhcBj39/dQVRWapklzzLe1tYVEIgGbzYaNjQ30\n9PSIjmSZZDIJr9eLdDoNRVHQ0dEBn88nRVnt7+/D5/Ohq6vLnPN6vSV7QC0rFArQdR2ZTAaFQgFL\nS0uYmJgQHatsUpQzERFRNZHiWJuIiKiasJyJiIgqDMuZiIiowrCciYiIKgzLmYiIqMKwnImIiCoM\ny5mIiKjCsJyJiIgqzC8iivHPF8qqogAAAABJRU5ErkJggg==\n", "text/plain": [ - "\u003cmatplotlib.figure.Figure at 0xa813090\u003e" + "\u003cmatplotlib.figure.Figure at 0x7f7a18dfb8d0\u003e" ] }, "metadata": { @@ -155,7 +149,7 @@ "\n", "import matplotlib.pyplot as plt\n", "\n", - "plt.scatter(inputs.numpy(), labels.numpy())\n", + "plt.scatter(inputs, labels)\n", "plt.show()" ] }, @@ -168,14 +162,12 @@ "source": [ "## Step 2: Define our TensorFlow variables\n", "\n", - "We'll use Keras's object-oriented [`Dense`](https://www.tensorflow.org/api_docs/python/tf/contrib/keras/layers/Dense) layer to create our variables. In this case, we'll create a `Dense` layer with a single weight and bias.\n", - "\n", - "(**Note**: We're using the implementation of `Dense` found in `tf.layers.Dense` though the documentation link is for `tf.contrib.keras.layers.Dense`. When TensorFlow 1.4 is released, the documentation will also be in `tf.layers.Dense`) " + "We'll use Keras's object-oriented [`Dense`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense) layer to create our variables. In this case, we'll create a `Dense` layer with a single weight and bias." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 0, "metadata": { "cellView": "code", "colab": { @@ -183,27 +175,23 @@ "startup": false, "wait_interval": 0 }, - "height": 34, - "output_extras": [ - { - "item_id": 1 - } - ] + "base_uri": "https://localhost:8080/", + "height": 34 }, "colab_type": "code", "executionInfo": { - "elapsed": 22, + "elapsed": 332, "status": "ok", - "timestamp": 1505502830753, + "timestamp": 1525154229931, "user": { "displayName": "", "photoUrl": "", "userId": "" }, - "user_tz": 240 + "user_tz": 420 }, "id": "z9r-ZeyrXu3A", - "outputId": "6230a7a3-29fe-4d08-f101-da80425bad82" + "outputId": "e19a698e-5892-4fcd-80d3-1394605ee72c" }, "outputs": [ { @@ -212,7 +200,7 @@ "[]" ] }, - "execution_count": 4, + "execution_count": 48, "metadata": { "tags": [] }, @@ -222,7 +210,7 @@ "source": [ "# Create TensorFlow Variables using Keras's Dense layer.\n", "\n", - "wb = tf.layers.Dense(units=1, use_bias=True)\n", + "wb = tf.keras.layers.Dense(units=1, use_bias=True)\n", "\n", "# We can access the underlying TensorFlow variables using wb.variables.\n", "# However, the variables won't exist until the dimensions of the input\n", @@ -240,7 +228,7 @@ "id": "docKLUaonYG_" }, "source": [ - "## Step 3: Define our loss function\n", + "## Step 3: *Define the loss function*\n", "\n", "Our loss function is the standard L2 loss (where we reduce the loss to its mean across its inputs)." ] @@ -261,15 +249,14 @@ }, "outputs": [], "source": [ - "def loss_fn(inputs, labels, wb):\n", + "def loss_fn(predictions, labels):\n", " \"\"\"Calculates the mean L2 loss for our linear model.\"\"\"\n", - " predictions = wb(inputs)\n", " return tf.reduce_mean(tf.square(predictions - labels))" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 0, "metadata": { "cellView": "code", "colab": { @@ -277,36 +264,32 @@ "startup": false, "wait_interval": 0 }, - "height": 34, - "output_extras": [ - { - "item_id": 1 - } - ] + "base_uri": "https://localhost:8080/", + "height": 34 }, "colab_type": "code", "executionInfo": { - "elapsed": 24, + "elapsed": 348, "status": "ok", - "timestamp": 1505502830875, + "timestamp": 1525154234538, "user": { "displayName": "", "photoUrl": "", "userId": "" }, - "user_tz": 240 + "user_tz": 420 }, "id": "RkNbXoXkpjVH", - "outputId": "c36fc98d-3a57-4074-901d-c10ae017ae3f" + "outputId": "e4688f3c-e29f-416d-f541-6d81953b5660" }, "outputs": [ { "data": { "text/plain": [ - "\u003ctf.Tensor: id=40, shape=(), dtype=float32, numpy=7.3549819\u003e" + "\u003ctf.Tensor: id=1252, shape=(), dtype=float32, numpy=16.979801\u003e" ] }, - "execution_count": 6, + "execution_count": 50, "metadata": { "tags": [] }, @@ -316,47 +299,43 @@ "source": [ "# Test loss function (optional).\n", "\n", - "loss_fn(inputs, labels, wb)" + "loss_fn(wb(inputs), labels)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 0, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, - "height": 51, - "output_extras": [ - { - "item_id": 1 - } - ] + "base_uri": "https://localhost:8080/", + "height": 51 }, "colab_type": "code", "executionInfo": { - "elapsed": 57, + "elapsed": 418, "status": "ok", - "timestamp": 1505502830981, + "timestamp": 1525154260083, "user": { "displayName": "", "photoUrl": "", "userId": "" }, - "user_tz": 240 + "user_tz": 420 }, "id": "K_7beXoHOU7t", - "outputId": "1ad0856a-02ec-4117-a6c0-b41030981d87" + "outputId": "8f55c028-fe2b-4edb-ad68-a849afc60623" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "w: tf.Tensor([[ 1.56891453]], shape=(1, 1), dtype=float32)\n", - "b: tf.Tensor([ 0.], shape=(1,), dtype=float32)\n" + "w: -0.311619\n", + "b: 0.000000\n" ] } ], @@ -364,31 +343,20 @@ "# At this point, the variables exist, and can now be queried:\n", "\n", "w, b = wb.variables\n", - "print(\"w: \" + str(w.read_value()))\n", - "print(\"b: \" + str(b.read_value()))" + "print(\"w: %f\" % w.numpy())\n", + "print(\"b: %f\" % b.numpy())" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", - "id": "YIlebeb_qYtC" + "id": "JVDWpL9VYWdP" }, "source": [ - "## Step 4: Create our gradients function using `implicit_value_and_gradients()`\n", - "\n", - "With a loss function defined, we can calculate gradients and apply them to our variables to update them.\n", + "## Step 4: Create an optimizer\n", "\n", - "To calculate the gradients, we wrap our loss function using the `implicit_value_and_gradients()` function.\n", - "\n", - "`implicit_value_and_gradients()` returns a function that accepts the same inputs as the function passed in, and returns a tuple consisting of:\n", - "\n", - "1. the value returned by the function passed in (in this case, the loss calculated by `loss_fn()`), and\n", - "1. a list of tuples consisting of:\n", - " 1. The value of the gradient (a `tf.Tensor`) with respect to a given variable\n", - " 1. The corresponding variable (`tf.Variable`)\n", - "\n", - "Test it out below to get a feel for what it does. Notice how the first value of the returned tuple (the loss) is the same as the value returned in the cell above that tests our loss function." + "We'll use a `GradientDescentOptimizer` to fit our model." ] }, { @@ -403,87 +371,29 @@ } }, "colab_type": "code", - "id": "v1spZQ4NwW1U" + "id": "DudNEebMKDWN" }, "outputs": [], "source": [ - "# Produce our gradients function. See description above for details about\n", - "# the returned function's signature.\n", - "\n", - "value_and_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "cellView": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "height": 153, - "output_extras": [ - { - "item_id": 1 - } - ] - }, - "colab_type": "code", - "executionInfo": { - "elapsed": 46, - "status": "ok", - "timestamp": 1505502831114, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 240 - }, - "id": "21WMcpsmFFLd", - "outputId": "f51b3171-33f5-4f87-8bf7-0be2dc8edc8a" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Outputs of value_and_gradients_fn:\n", - "Loss: tf.Tensor(7.35498, shape=(), dtype=float32)\n", - "\n", - "Gradient: tf.Tensor([[-3.00773573]], shape=(1, 1), dtype=float32)\n", - "Variable: \u003ctf.Variable 'dense/kernel:0' shape=(1, 1) dtype=float32\u003e\n", - "\n", - "Gradient: tf.Tensor([-4.06519032], shape=(1,), dtype=float32)\n", - "Variable: \u003ctf.Variable 'dense/bias:0' shape=(1,) dtype=float32\u003e\n" - ] - } - ], - "source": [ - "# Show outputs of value_and_gradients_fn.\n", - "\n", - "print(\"Outputs of value_and_gradients_fn:\")\n", - "\n", - "value, grads_and_vars = value_and_gradients_fn(inputs, labels, wb)\n", - "\n", - "print('Loss: {}'.format(value))\n", - "for (grad, var) in grads_and_vars:\n", - " print(\"\")\n", - " print('Gradient: {}\\nVariable: {}'.format(grad, var))" + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", - "id": "JVDWpL9VYWdP" + "id": "YBeJYxY8YaiO" }, "source": [ - "## Step 5: Create an optimizer\n", + "### Step 5: Define a training step\n", "\n", - "We'll use a `GradientDescentOptimizer` to fit our model." + "To fit model variables to the data we'll need to:\n", + "\n", + "1. Calculate the gradients of the loss with respect to the model variables.\n", + "2. Use `optimizer` to compute updates to the variable values based on those gradients.\n", + "\n", + "To calculate the gradients, we use the [`tf.GradientTape`](https://www.tensorflow.org/api_docs/python/tf/GradientTape) context manager\n", + "and its `gradient` function to compute gradients through computation conducted within its context:\n" ] }, { @@ -498,94 +408,72 @@ } }, "colab_type": "code", - "id": "DudNEebMKDWN" + "id": "diDZfrMJM3OC" }, "outputs": [], "source": [ - "optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)" + "def run_step(inputs, labels):\n", + " with tf.GradientTape() as g:\n", + " loss = loss_fn(wb(inputs), labels)\n", + " # Compute the partial derivatives of loss with respect to the variables\n", + " grads = g.gradient(loss, wb.variables)\n", + " optimizer.apply_gradients(zip(grads, wb.variables))\n", + " return loss" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", - "id": "YBeJYxY8YaiO" + "id": "1WWepgmJQOzc" }, "source": [ - "### Step 5a: Test Our Optimizer\n", - "\n", - "Now we have everything needed to start fitting our variables to the data!\n", - "\n", - "In the next cell, we'll demo these capabilities. We'll:\n", - "\n", - "1. Print the current values of `w` and `b`\n", - "1. Calculate the loss and gradients\n", - "1. Apply the gradients\n", - "1. Print out the new values of `w` and `b`\n", - "\n", - "You can run the cell multiple times. Each time, you should see the values of `w` and `b` get closer to their true values of 3 and 2." + "Repeatedly running the training step will nudge the variables towards the values that best fit the data (i.e., \"w\" will move closer to 3.0, while \"b\" will tend to 2.0):\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 0, "metadata": { - "cellView": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, - "height": 102, - "output_extras": [ - { - "item_id": 1 - } - ] + "base_uri": "https://localhost:8080/", + "height": 51 }, "colab_type": "code", "executionInfo": { - "elapsed": 103, + "elapsed": 380, "status": "ok", - "timestamp": 1505502831285, + "timestamp": 1525154412590, "user": { "displayName": "", "photoUrl": "", "userId": "" }, - "user_tz": 240 + "user_tz": 420 }, - "id": "diDZfrMJM3OC", - "outputId": "d585fff0-ecb3-4e98-9b33-bbae07a95d8c" + "id": "ya5Qxz5XQlhU", + "outputId": "8dd47155-a6c1-44c5-c279-617c803f1723" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Values of w, b, BEFORE applying gradients:\n", - "(array([[ 1.56891453]], dtype=float32), array([ 0.], dtype=float32))\n", - "()\n", - "Values of w, b, AFTER applying gradients:\n", - "(array([[ 1.86968815]], dtype=float32), array([ 0.40651903], dtype=float32))\n" + "Values of w, b BEFORE applying gradients: 2.725763, 1.894334\n", + "Values of w, b AFTER applying gradients: 2.774932, 1.922555\n" ] } ], "source": [ - "# Test the optimizer.\n", - "\n", - "print(\"Values of w, b, BEFORE applying gradients:\")\n", "w, b = wb.variables\n", - "print(w.read_value().numpy(), b.read_value().numpy())\n", - "print()\n", - "\n", - "# Calculate the gradients:\n", - "empirical_loss, gradients_and_variables = value_and_gradients_fn(\n", - " inputs, labels, wb)\n", - "optimizer.apply_gradients(gradients_and_variables)\n", - "\n", - "print(\"Values of w, b, AFTER applying gradients:\")\n", - "print(w.read_value().numpy(), b.read_value().numpy())" + "print(\"Values of w, b BEFORE applying gradients: %f, %f\" % (w.numpy(), b.numpy()))\n", + "run_step(inputs, labels)\n", + "print(\"Values of w, b AFTER applying gradients: %f, %f\" % (w.numpy(), b.numpy()))\n" ] }, { @@ -602,51 +490,44 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 0, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, - "height": 397, - "output_extras": [ - { - "item_id": 1 - }, - { - "item_id": 2 - } - ] + "base_uri": "https://localhost:8080/", + "height": 364 }, "colab_type": "code", "executionInfo": { - "elapsed": 225, + "elapsed": 580, "status": "ok", - "timestamp": 1505502831550, + "timestamp": 1525154278709, "user": { "displayName": "", "photoUrl": "", "userId": "" }, - "user_tz": 240 + "user_tz": 420 }, "id": "VukGe-huNaJ4", - "outputId": "f0a8d665-1910-477c-d8ab-c94ccdc4afcd" + "outputId": "c79c8e63-c781-451e-f74f-20815d8da49f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2.111051321029663, 2.3047544956207275, 2.4602210521698, 2.5850086212158203, 2.6851789951324463, 2.7655951976776123, 2.830157995223999, 2.8819968700408936, 2.9236228466033936, 2.9570505619049072]\n" + "[0.9409681558609009, 1.3733772039413452, 1.7128530740737915, 1.9793939590454102, 2.188689708709717, 2.3530514240264893, 2.4821391105651855, 2.583533763885498, 2.6631851196289062, 2.7257626056671143]\n" ] }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd0AAAFXCAYAAADnFpTQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd4FFUbBfAzu+m9koSShBQCSC+igIAgRRGkChJEiggo\nHURAEBQBQeADRcWCha50ULFLk6IivYRQQwskhPS6O/P9sckmm4Rkk2x2difn9zz7bLuZvC8JHO7M\n7FxBkiQJREREVOlUchdARERUVTB0iYiIzIShS0REZCYMXSIiIjNh6BIREZkJQ5eIiMhMjArdlJQU\njB8/Hk8//TS6d++OkydPVnZdREREiiMY8znd6dOno2XLlujbty80Gg0yMzPh4uJijvqIiIgUo9TQ\nTU1NRa9evfDbb7+ZqyYiIiJFKnX38s2bN+Hp6YkZM2agd+/emD17NjIzM81RGxERkaKUGroajQbn\nzp3DoEGDsH37djg4OOCzzz4zR21ERESKUmro+vv7w9/fHw0bNgQAdO3aFefOnSvxa3g5ZyIioqJs\nShvg4+ODgIAAXL16FbVr18aRI0cQGhpa4tcIgoC4uBSTFSkHX19Xq+8BUEYfSugBYB+WRAk9AMro\nQwk9ALo+jFFq6ALArFmzMHXqVGg0GtSqVQsLFy6sUHFERERVkVGhW7duXWzdurWyayEiIlI0XpGK\niIjITBi6REREZsLQJSIiMhOGLhERkZkwdImIiMyEoUtERCbRuXM7uUuweAxdIiIyCUEQ5C7B4hn1\nOV0iIqKy+OijFTh69BAEQYUhQ4ajU6fOuH8/HnPmzER6ehq0Wi2mTJmOJ59sgwUL3kZU1HkAArp3\n74nnn39B7vIrDUOXiEhh5s6dhd27d5h0mz169MLcue8aNXbv3t9x+XI01qz5Fg8eJODll4egadNm\n+PXXn9Cq1eN48cVhkCQJmZmZOH/+POLi7uGbbzYBANLSUk1at6Xh7mUiIjKp06dP4qmnugIAPD29\n0LRpc5w/fw716j2CH37Yha+++hyXLkXD0dERtWrVwp07t7F8+RIcPXoYTk7OMldfuTjTJSJSmLlz\n3zV6VloZCq80l/e8ceOm+Oijz3H48EEsWDAXAwcOxuDBA/D11xtx9Ohh7Ny5DX/88StmzHhLjrLN\ngjNdIiIyifxwbYbff/8VoijiwYMHOHXqBOrXfwSxsbHw8PDEs8/2wrPP9sLFixeQmJgIUdSiffsn\n8fLLoxEdHSVzF5WLM10iIjKJvLOX27d/EmfPnsbQoS9AEFR49dXx8PT0wp4932PjxrWwsbGBk5Mz\nZs16G7GxsXj99TcgSSIEQcDo0eNk7qJyCVIlrThv7esjKmmNR2vvQwk9AOzDkiihB0AZfSihB8D4\n9XS5e5mIiMhMGLpERERmwtAlIiIyE4YuERGRmTB0iYiIzIShS0REZCYMXSIismjHjx/DmTOn9M93\n7NiKn3/+0STbXrv2K5Nsx1gMXSIismjHjx/D6dP5odurV1907fqMSba9Zo15Q5dXpCIiogrbsGEN\n7O3t0bfvAHzwwVJcvnwJK1Z8gmPH/sGPP+7C7NnzDMZHRV3Ahx8ug0aTDWdnN7z55hx4eXlj8+ZN\n2LlzG2xsbBAcXBujR4/Fzp1boVbb4Ndf92DixNfx779/w8nJCQMHDsa4caNQp04ETp48gczMTMya\nNRdr136FK1cuo2PHzhg5cgwAYMaMqYiLu4fs7Cz07/8CevTohVWrViI7OwvDh0eidu0QzJ49D7/8\nsgebN2+CVqtB/foNMGXKdJOuE8zQJSJSGOe5s2Bv4qX9snr0QloJiyg0btwM3367Hn37DkBU1AXk\n5ORAq9Xi1KkTaNy4mcFYjUaD5csX4733liEsrBY2bdqGTz/9CDNmvIX167/Bli27YWNjg7S0VDg7\nu+C55/rqQxYA/v33b4Pt2dra4Ysv1mDz5k2YPn0KvvpqPVxcXDFgQC8MGBAJNzc3zJw5B66ursjK\nysLIkUPQvn1HjB49Ftu2bcaXX64HAFy/fg2///4LVq36Emq1GkuXLsIvv+wx2awaYOgSEZEJRETU\nRVTUeaSnp8PW1hYREXVx/vw5nDx5HJMmTTMYGxNzHVeuXMakSa9BrVYhO1sDHx9fAEBYWDjmzn0T\n7dp1wBNPdDDqe7dt2w4AEBoahpCQUHh6egEAqlevgXv37sLNzQ3ffbcBBw7sAwDcu3cPN2/GoH79\nBgYrIv3779+4eDEKI0cOgSRJyM7OhpeXV0X/aAwwdImIFCZt7rslzkorg42NDfz9A/Djj7vQsGFj\nhIWF4/jxf3H79i0EBQUXGi0hJCQUn3zyZZFrL7///gqcOPEfDh7cjzVrvsSaNd+W+r1tbe0A6BZc\nsLW11b8uCAK0Wi2OHz+G//77F5999jXs7OwwbtwoZGdnF7MlCd26dceoUa+V40/AODyRioiITKJx\n46bYuHEdmjRphkaNmmDHjq0ID69TZFxgYDAePEjEmTOnAeh2N1+9egUAcPduLJo2bY4xY8YhLS0N\nGRnpcHJyQlpaWrnrSktLhaurK+zs7HD9+jWcPXtG/56trS20Wi0AoHnzR7F37+948OABACA5ORmx\nsbHl/r7F4UyXiIhMonHjpli79is0aNAQ9vYOsLe3L3I8F9DNit99dxGWL38fy5cvQnZ2Dp5//gXU\nqhWId96ZnRuwEvr3HwhnZxe0adMOs2a9gb/+2o+JE183OLGppJOc8t5r1ao1duzYisGDn0dgYBAa\nNGioH9OzZ2+89NJARETUxezZ8/Dyy2MwefJrEEUJtra2mDx5Gvz9/U32Z8Sl/R5CSctNWXsfSugB\nYB+WRAk9AMroQwk9AFzaj4iIyOIwdImIiMyEoUtERGQmDF0iIiIzYegSERGZCUOXiIjITBi6RERk\ndt99txFZWVlyl2F2DF0iIjK7zZs3Iisrs9j3RFE0czXmw9AlIqIK27BhDbZu1V0n+YMPlmLCBN2S\neseO/YN582YbjN2yZRPi4+MwbtxovPTSSwCAzp3bYeXK5Rg2bBDOnDmF/v17Ijk5CQBw4cJ5jBs3\nCgCQmZmJhQvfwciRL2H48ME4eHC/uVo0CV4GkohIgbyaNyj29YRjZ4p9vazjCyvL0n79+g3Et99u\nxIcfforQ0BqIi0tBZmYGGjRoiLFjJ+aOMry8Y94lHb/5ZjWaN38UM2a8hdTUVIwcOQQtWz4Ke3sH\no+qUG0OXiIgqrCxL++lIuTcdtVqN9u07Fnq/qH/+OYpDhw5g48Y1AHSLJdy9G4vAwGCT9VKZGLpE\nRApk7Ay1vOMLK9vSfkXZ2dkbLF6gVqshirrgzc7OP+FKkiS8++5i1KoVWKF65cJjukREZBLGLu0H\nAE5OzgbL9RVeeycgoDqios4DAPbt+0P/+qOPPoYtWzbpn0dHR5myhUpn1Ey3Y8eOcHFxgUqlgo2N\nDbZs2VLZdRERkZUxdmk/AOjZsxemTh2PgAB/LFmyssgSfUOHjsR7770DFxcXNG3avMDrL+ODD5bi\npZcGAgD8/QOwaNH/Kq8pEzNqab9OnTph27ZtcHd3N2qjFy9ehKdnQIWLk5OSlpuy9j6U0APAPiyJ\nEnoAlNGHEnoATLy0nyRJZfrc1IABA5CTk2P0eCIioqrAqNAVBAEjRoxA37598d1335U6/sSJE/jw\nQ+uZ7hMREZmDUcd0N23aBF9fXyQkJGDYsGEICQlBixYtHjq+Ro0aWLp0Ebp164769R8xWbFERETW\nzKhjugWtXLkSzs7OGDZs2EPH/PDDD3j22WfRvHlzHDlyBDY2/GQSERFRqWmYkZEBURTh7OyM9PR0\nHDx4EGPHji3xa7p3747nn38B3323EXPnvosJE6aYrGBzUdLBfWvvQwk9AOzDkiihB0AZfSihB8D4\nE6lKDd34+HiMHTsWgiBAq9WiR48eaNu2bakbfvfd97Bv3594//2F6NatOyIi6hpVEBERkVKVeiJV\nrVq1sHPnTuzYsQO7d+/GK6+8YtSGPTw88f77y5GdnY0JE8ZAo9FUuFgiIrJMsbF3MGTIAJNuMzr6\nIg4f/kv//ODB/Vi//huTbFuupQUr9YpU3bo9g759n8d//x3DqlUfVea3IiIimRW+wEVFXbp0EUeO\n5Idu27btEBn5kkm2XdLSgpWp0s9wmj9/Efbv34tFi95F165PP/SSYEREZN00Gg3eeWc2Ll68gNq1\nQzFr1tuwt7c3GHPr1k0sW7YYSUmJcHBwwHvvLYCLiw/++OM3fP3151Cr1XB2dsHy5R/jiy9WITs7\nG6dPn8TgwcOQlZWJCxfOYdKkaViw4G3Y2dkjOjoKiYkPMGPGW9iz53ucPXsa9es3wMyZcwAAS5a8\nh6ioc8jKykKHDp0wfPgrBksLenh4YMWKT/D330fw5ZefIScnBzVq1MTMmXPg4GD6lYsqPXS9vLyx\nePH/MGxYJCZMeBW7d/8MtVpd2d+WiKjKmjvXHrt3m/af9x49NJg7t+TdsTEx1zFjxhw0aNAQCxe+\ng+3bN2PgwMEGYxYvXoBp02aiRo2aOHfuDObOnYslS1bim2++wLJlH8HHxwdpaamwsbHByy+PRlTU\neUyc+DoAYM+e7w1m06mpKfj0069w8OA+vPHGJKxa9RVq1w7BiBEv4tKlaISFhWPUqNfg6uoKURQx\nYcIYXLlyyWBpQTc3NyQlJWLNmi+xYsXHsLd3wPr132DTpnUYOvRlk/4ZAmZaZah79x7o1asPduzY\nhs8//wSjR5d89jMREVkfPz9/NGjQEADQtesz2LLlW4PQzcjIwJkzJzF79hsFFjjQ3Tds2Bjz589B\nx46d0b79k0Z9vzZtngAAhISEwcvLG7VrhwAAatcOQWzsbYSFheP333/Grl07oNVqkZBwH1evXkVI\nSBgKLi149uwZXLt2BWPGjIAkSdBoNGjQoFHF/0CKYbYP0C5YsAQHD+7HggXvoEuXbrlNExGRqc2d\nm1XqrLQyFD6mW/gQrySJcHV1w5dfrte/lveRoalTZ+D8+bM4dOggRox4EatXryv1+9nZ2QEAVCqV\n/nHec61Wizt3bmPTpvVYvXotnJ1dsGDB2wbLBObXJaFly8cwZ867ZWm3XMy2tJ+Pjw/ee28pMjMz\nMWHCa2W6ljMREVm+2Ng7OHtWty7vr7/+jEaNmhi87+TkjICA6vjzz9/0r124cAGA7lhvvXqPYMSI\nUfDw8MS9e3fh5ORksPxfSYq7zlNaWhocHR3h5OSMhIT7OHLkkEEtedt+5JGGOH36JG7dugkAyMrK\nxI0bMWXo3HhmvVRUz5690aPHduzevQOrV3+KkSPHmPPbExFRJQoKCsa2bd9h4cK3ERwcgl69+hUZ\nM2fOu3j//YX45psvodVq0LNnD/Tv/yI+/ngFbt68AQBo3rwlwsLCUa2aH9at+xrDh0di8OCHXwUR\nKP7M6bCwcISHRyAysh+qVfNDo0aN9e/lLS3o4+OLFSs+wcyZczB37kxkZ+dAEASMHDkGtWoFVvBP\npJg6y3oZSGM97AojcXFxeOKJlsjMzMSffx7S74O3NEq6Soq196GEHgD2YUmU0AOgjD6U0ANg4qX9\nTMnX1xcLFy5Beno6Jk0ay93MRERUZZg9dAGgV6++ePrpZ3Ho0EF8/fVqOUogIiIyO1lCVxAELF78\nP3h4eOCdd97C9evX5CiDiIjIrGQJXQDw8/PD/PmLkZ6ehsmTxxV75hkREZGSyBa6ANCv3wB06dIN\nBw7sw5o1X8lZChERUaWTNXQFQcCSJSvg7u6BuXNnVdrnooiIiCyBrKELAP7+AZg3byHS0lK5m5mI\nyEoZu7Tfnj3f4/79eDNUZJlkD10AGDBgEDp16ox9+/7Ehg1r5S6HiIjKwZil/X78cTfi4uKKfa8q\nfITUIkJXEAQsXfoBXF3d8NZbM3H79i25SyIiojLKW9pv8OD+mD17epFF4vfu/R0XLpzHvHmzMXx4\nJLKystCxY0d88smHGDHiRfz5528YN24UoqJ0l4ZMSkpE//49AegC+eOPV2DkyJcwdOgg7Nq13ez9\nmYJFhC4AVK9eA++8swApKcmYMmU8dzMTEVVA8+bOxd5MNb44MTHX0afP81i3bjOcnJywfftmg/c7\ndOiEevXqY86cd/Hll+v1a+26u3tg9eq16NSpSzFb1c2ev/9+J1xcXPH559/g88+/wa5d2xEbe6dM\n9VkCiwldABg06EV06NARv//+K779doPc5RARURkUXtrv1KmTRcZIkoTCc6pOnTqXuu2//z6Cn376\nAcOGDcIrr7yE5OQkqzz51qwLHpRGEAQsW/Yh2rV7DLNnz0CHDh3h7x8gd1lERFbn2DHjVucp7/ji\nlLa038M4OjrqH6vVakiS7thudnZ2gVESJk16HS1bPlbRMmVlUTNdAKhZsxbmzJmHpKRETJ06gbuZ\niYisRGlL+wGAs7Mz0tJSH7qNgIAauHDhHAAYLAH46KOPY9u2LdBoNACAGzdikJWVacryzcLiQhcA\nhgwZhieeaI9ffvkJW7Z8K3c5RERkhLyl/QYP7o+UlORil/Z7+ulnsWTJQv2JVIVnxy+8EInt27di\n+PDBSE5O1r/eo0cvBAfXxogRgzFkyAAsWbIQWq220nsyNbMv7WesmJjraNfuMdjZ2eLAgX/g5+dn\nosqMo6Tlpqy9DyX0ALAPS6KEHgBl9KGEHgALXtrPWIGBQZg9+20kJiZi2rRJ3M1MRERWz2JDFwCG\nDXsZrVu3xZ4932PHjq1yl0NERFQhFh26KpUK//vfSjg5OWHGjKm4d++e3CURERGVm0WHLgDUrh2C\nN9+cg4SEBMyYMVXucoiIiMrN4kMXAEaMGIVWrR7H7t07rPbSX0RERFYRuiqVCitWfAQHBwdMnz4F\n8fFVd4UKIiKyXlYRugAQEhKGGTPeQnx8PGbO5G5mIiKyPlYTugDwyitj0KLFo9ixYxt++GG33OUQ\nERGViVWFrlqtxooVH8Pe3h7Tpk1CQsJ9uUsiIiIymlWFLgCEh9fBG2/MQlzcPbz55htyl0NERGQ0\nqwtdABgzZiyaNWuOrVu/w08//Sh3OUREREaxytDV7Wb+BHZ2dnj99YlITHwgd0lERESlssrQBYCI\niLp4/fUZuHs3FrNnz5C7HCIiolJZbegCwGuvTUDjxk3x7bcb8OuvP8ldDhERUYmsOnRtbGzwwQef\nwNbWFlOnTkRSUqLcJRERET2UVYcuANSrVx+TJ0/DnTu3MWfOm3KXQ0RE9FBWH7oAMH78ZDRo0Agb\nNqzFH3/8Jnc5RERExVJE6Nra2uKDDz6BjY0NJk8eh5SUZLlLIiIiKkIRoQsADRo0xMSJU3H79i3M\nnTtb7nKIiIiKUEzoAsDEiVNRv34DrF37Ffbt+1PucoiIiAwYHbqiKKJ3794YPXp0ZdZTIXZ2dvjg\ng4+hVqsxefI4pKamyF0SERGRntGhu2bNGoSGhlZmLSbRqFETjB8/CTduxGDevDlyl0NERKRnVOjG\nxsZi37596N+/f2XXYxKTJ7+BunXr4auvvsDBg/vlLoeIiAiAkaG7YMECTJs2DYIgVHY9JmFvb48V\nKz6GSqXCxIljkZaWJndJREREsCltwN69e+Hj44N69erh6NGjRm/Y19e1QoVVVJcuHTBt2jS89957\nWLZsAT744IMyb0PuHkxFCX0ooQeAfVgSJfQAKKMPJfRgLEGSJKmkAcuWLcOuXbugVquRlZWFtLQ0\ndO7cGYsXLy5xw3Fx8p/ElJmZiaeeegIXL0Zh5849ePzxNkZ/ra+vq0X0UFFK6EMJPQDsw5IooQdA\nGX0ooQfA+P84lLp7efLkydi7dy9+//13LFu2DK1atSo1cC2Fg4MDli//CCqVChMmvIr09HS5SyIi\noipMUZ/TLU6LFo9i9OixuHbtKhYunCd3OUREVIWVKXQfffRRrFq1qrJqqTRvvPEmQkPD8NlnH+Po\n0SNyl0NERFWU4me6AODo6Ijlyz8GAEyc+CoyMjJkroiIiKqiKhG6ANCq1WN45ZUxuHz5EhYtmi93\nOUREVAVVmdAFgBkz3kJwcG2sWrUS//77t9zlEBFRFVOlQtfJyQkrVnwMURQxYcKryMzMlLskIiKq\nQqpU6ALA44+3wcsvj0J09EUsWfKe3OUQEVEVUuVCFwDefHMuAgODsXLlchw/fkzucoiIqIqokqHr\n7OyM5ctX6nczZ2VlyV0SERFVAVUydAGgbdt2GDp0BC5cOI///c86rrBFRETWrcqGLgC89dY7qFUr\nECtWLMOpUyfkLoeIiBSuSoeui4srli37EFqtFuPHv4rs7Gy5SyIiIgWr0qELAO3bP4kXXxyGc+fO\nYPnyJXKXQ0REClblQxcA5s6dhxo1amL58iU4c+a03OUQEZFCMXQBuLq6YenSD6DRaDB+/Bjk5OTI\nXRIRESkQQzdXx45PYdCgF3HmzCl8+OH/5C6HiIgUiKFbwNtvz4e/fwCWLl2E06e5m5mIiEyLoVuA\nu7sHli5dgZycHAwdOhSpqalyl0RERArC0C2kc+duiIwcgv/++w8DBvRGcnKS3CUREZFCMHSL8f77\nyzFo0CD8889R9O3bEwkJ9+UuiYiIFIChWwwbGxusWbMGgwa9iJMnj6N372cRFxcnd1lERGTlGLoP\noVarsWzZhxg+fCTOnz+LXr2exp07t+Uui4iIrBhDtwQqlQoLFy7Bq6+OR3T0RfTs2Q03bsTIXRYR\nEVkphm4pBEHAnDnzMGXKG7h+/Rp69uyGK1cuy10WERFZIYauEQRBwBtvvIlZs+bi1q2beO65pxEV\ndUHusoiIyMowdMtg/PjJmD9/Ee7ejUWvXk/j9OlTcpdERERWhKFbRiNHjsGSJSuQkJCAPn2exfHj\nx+QuiYiIrARDtxyGDBmGDz9chZSUZPTt2xNHjhyWuyQiIrICDN1yev75F/DZZ18hMzMDAwf2xoED\n++QuiYiILBxDtwJ69uyNr75aD41Gg0GD+uG3336WuyQiIrJgDN0K6tr1aaxb9x1UKhVeemkQfvhh\nt9wlERGRhWLomkCHDh2xceNW2NnZ4+WXh2Dbts1yl0RERBaIoWsirVu3xebNO+Ds7IIxY17Ghg1r\n5S6JiIgsDEPXhFq0eBTbtu2Gp6cnJk58DatXfyZ3SUREZEEYuibWqFETbN/+I3x9q2HGjKn4+OMP\n5S6JiIgsBEO3EtSrVx87d+5BQEB1zJ37JpYuXQRJkuQui4iIZMbQrSRhYeHYuXMPAgODsGjRfCxY\n8A6Dl4ioimPoVqLg4NrYuXMPQkJCsWLFUsyePZ3BS0RUhTF0K1mNGjWxc+dPqFu3Hj777BNMnToR\noijKXRYREcmAoWsGfn5+2L79RzRo0Ahr136FceNGQ6PRyF0WERGZGUPXTLy9vbFt2240b94Cmzdv\nwujRI5CTkyN3WUREZEYMXTPy8PDE5s078fjjbbBr13YMHz4YmZmZcpdFRERmwtA1MxcXV2zcuBXt\n2z+Jn3/egyFDBiI9PV3usoiIyAwYujJwcnLC2rXfokuXbti79w8MGtQPqakpcpdFRESVrNTQzc7O\nRv/+/dGrVy/06NEDK1euNEddiufg4IAvv1yHHj164dChg+jfvxeSkhLlLouIiCqRTWkD7OzssGbN\nGjg6OkKr1eKFF15Au3bt0KhRI3PUp2h2dnb49NMvYW9vjy1bvkWfPj3w3Xc74O3tLXdpRERUCYza\nvezo6AhAN+vlR11My8bGBitXfooXXxyK06dPok+f7rh7967cZRERUSUodaYLAKIook+fPoiJiUFk\nZGTps9zgYHiJRa+8lHDsTLHDvZo3KPZ1WcerhCI9VGY9XwFwGDkan3++Cr16PY2tW3ejevUaFd9+\ngT6s6s+/oNweLKaeco5HzHWLqofjOd4SxisiL4CH/v0uzKjQValU2LFjB1JTU/Hqq6/i0qVLCAsL\nK/Fr1CqhyGu+vq4P+QZFx1rC+MI9VHY9n376Mby83LFo0SL07v0M/vjjDwQHB1d4+3l9yP3nWZHx\napVgUfWUZ/xDv8ZK6i843uBrLaCe8ozXP7eQeso7vrh/a+Wsp8zjoYy8MJYglfFiwCtXroSzszOG\nDRtW4ri4OOs+G9fX11WWHiRJwtKli7B48QJUr14D27btRkhIyf/BKYlcfZiSEnoA2IclUUIPgDL6\nsPgeRBHIzISQmQEh9x4ZmRCyMiFkZgKZGRAyMuE+dJBRmyt1ppuQkABbW1u4uroiMzMThw8fxiuv\nvFLhPqh4giBg6tTpcHBwxDvvzEbPnk9jy5ZdqFu3ntylERHJy8gAzHtfN7bg89z3szLzt5M7Hpm5\n28nIHZf3fna2cbWZKnTj4uIwffp0iKIIURTxzDPPoH379sYVQeU2duwEODo6YMaM19G79zP47rsd\naNiwsdxlEREVJUlAdjaE9DQI6em5tzT9PdLTIaQ95D1JA9fEFMMAzMrSPy9XAJa1fEEAHB0hOThA\ncnCE5OICyccXkoM9JAdHIO91BwdIjo6Avb3hcwcHuBj5vUoN3YiICGzfvr2CLVF5jBgxCg4Ojpg8\neRz69OmBTZu2onnzlnKXRUTWSJKAjIwioWd4nw4UfC2taEgWHZf7ulZb7tIcCpZZXAB6+0BydCga\ngA4ORQPRwQGSfe57jo4FxjoCDvaGz/O2aWsLCGU7NluYyUKX5BUZOQQODg4YO3YU+vV7Dhs2bMbj\nj7eRuywiqkyiqAuylBQIqakQUpINH6emQJWaCkg5cI5/UCQQ9Y/T8h8jIx2CCdbzllQqSE7OkJyc\nACcniN4+kJyc9K9JTk6QnAs8dnIGDN43fM+rpi/i00VdANo7AHZ2FQ5AS8bQtQJ9+z4POzt7jB49\nHAMH9sGaNZvQvv2TcpdFRAVJku64YEpKbiimFB+aqbrHKv17KbrX8h6npEBISzU6IJ2KK8XWVh9u\nors7pIDqucH38PArGJYlhSTs7U0bir6ukCz5RCoTY+haiR49noODw3oMH/4iBg9+HqtXr0GXLk/L\nXRaR9cvJyZ095oeeKi0lPwALhmZaam5gFgzRlPyvL+fFgyQ7O0iurpBcXCEGBUN0dc197gLJxS3/\nsasrJFfc0+i4AAAgAElEQVQ3iC4ukFxc4FHTDwlZAJxzw9HRUReMtram/TMik2HoWpHOnbth/frN\nGDJkIIYOjcSnn36JHj16yV0WkbxEEUJyEoTERKiSEiEkJkJISoQqMbH415ISgdRkeCcl6YKynMtr\nSmo1JBddOIoB1SE560JRdHXLD0gXV/2Y/OB0g+iS/1hycdHNHsvD1xXaKjRLVAKGrpVp164DNm3a\nhkGD+mPkyKH48MNV6N9/oNxlEVWMkcGpSnxQJECF5KQyHauUnJwAd3eInl6QagUWmUnqQzMvLAuG\npqsrRGfdPRwdFX3skSoHQ9cKPfZYa2zZshMDBvTB2LGjkJWVhcGDX5K7LKrqSgzOB/qQzAvSigan\n6O4BsXp1iPXqQ/LwgOTuAbHQveThAdHDE5KHJ0R3D0ju7oC9PXx9XfGAM0SSAUPXSjVr1gLbtn2P\n559/DpMnj0NmZgZefnm03GWRUmRkQBUfB9X9eKjux0OIj4fq/n2o7scDmalwi40zTXB6eEKsXgNi\n/UfyQ1Iflh6FXvPUvwc7u0psnqjyMHStWMOGjbBjxx707dsDM2dOQ0ZGJsaNmyh3WWSJ0tL0AaoP\n0fgCz+/H54bsfaji43UXLShB3hFIyckZoocHg5PISAxdKxcRURe7du1B3749MW/eW8jISMfrr8+A\nwGNNyiVJhiEaHwchNyzzn+cFqm52KqSnl75Ze3uI3j7QhIVD8vaG6O2ju/n4QPLxzX3uDc/QWojX\n2up21TI4icqEoasAISFh2LlTN+NdsuQ9ZGZmYvbstxm81kKSdB9FiYszDMr4eMNdvPfv658bc8at\n5OCgC9HwiEIh6gvJx0cfonnPJWcX404MqmKfqyQyJYauQgQGBmHXrp/Qt28PrFy5HBkZ6Zg/f7Hc\nZVVdkqQ7eSg2Fqo7t6G6GwukJcL5+q1Cx0lzH2dllb5JR0eIPr7Q1K2nuwpQboDqZ6O5AZoXrnB2\n5tm1RBaGoasgAQHVsWPHHvTv/xxWr/4MWVlZ+Prr1XKXpTzp6VDF3oH6bm6g6oP1DtR37kAVeweq\nu7HFzkYLXj1IcnKG6OMDTf1HdCFaIDAfGqJEZNUYugpTrVo1bN/+PQYM6IN1677BvXt3MG/eYtSu\nHSJ3aZZPo4Hq3l1daBYIT/Wd27rHsXd0AZuU+NBNSCoVRN9qutmof4D+pg2oDrewIDywdc4PUafi\nLuBHRErG0FUgLy9vbN26C6+8Mgy//PIL9u/fjwkTpmDs2ImwL++Vb6yZJEF4kKAL0oKz0dhYqGIL\nzFTj7pX4kRfRwwNiQAA0TZvlBmkARL8AiAHVIfr76+59fAGbh/y18nWFhsdCiao0hq5Cubm5Y+PG\nrfjzzz2YMGEiFi2ajy1bvsWiRcvQrl0HucsznbQ0qO8WmJnmBqsqNm+GGgvV3TslHjOVHBwg+gcg\np9XjEIsJUq2fP0T/AN0ViIiIKoChq2CCIGDAgAFo0aINFi2aj9WrP0O/fj3Rp08/vP32Qvj5+cld\n4sPlnoikvhEDJMXB4eKV/BlqgWBVJSc9fBMqFUQ/f90xU78AXaDm7uoV/fz1wSq5e/CEIyIyC4Zu\nFeDm5o758xdjwIBBmDZtErZt24Jff/0FM2fOxtChL0OtVstTWFoa1DdioI65BlXMdaivX4c6RndT\nxVyHKiVZP9S10JeKnp4Qa9SEpnkLaP0Dip2hij6+gFy9EREVg6FbhTRq1AQ//PAb1q79GvPnv40Z\nM17Hpk0b8P77/0OTJs1M/w2zs6G6dVMfpLowvaZ7fP06VPFxxX6Z5OQEbWAQcgJbQxsYBKd6dZDs\n6gWtf26g+gcADg6mr5eIqJIxdKsYtVqNoUNH4JlneuDtt2dh8+ZN6Nr1SQwdOgIzZ74Fd3cP4zcm\nirqPzsRch+r6NYNZqjrmOlR3bkMQxSJfJtnaQluzFjSPNIA2MAjawCCIuffawGBIPj4Gu3udfF2R\nxROQiEgBGLpVVLVq1fDRR59h0KAXMW3aJHz11Rf4/vtdePvt+ejb93nd1awkCUJCAtS5s1OVfvdv\n7u7gmzcgZGcX2bYkCBADqiPn0ccKhGkQxKBg3b1/AHf7ElGVxNCt4to2boIDH32OXz/7GCd3bkPW\nqyNxcdZ0NPX0hGNsLFRpqcV+nejjkztTDS4UrEHQ1qhV/kW5iYgUjKGrdFlZUF+OLjBLzdv9mzt7\nTUgAAAzOvQEAEu4jOeE+Yn184dGmLVA7JDdYdTNVba1AwMVFro6IiKwWQ1cJJAlCfDxsoqOgvhgF\ndXQUbC5GQX0pGrh9C17FXPBBsreHtlYgNI2b5odpkC5Qf4m+iCnz38btO7cReOEC3hs6Ak891VWG\nxoiIlIWha01EEapbN2Fz8QLUFy/mh2t0FFQPHhQZrq1eA2jfHhkBNQ1OVBKDgiBW8wNUqmK/Taem\nzXHwmR5YunQRPv30Iwwa1B/du/fEu+++hxo1alZ2l0REisXQtUQ5OVBfvQL1xagCs9eLsLl0sci6\nqJJKBW1wbeS0ehza8Aho6kRAWycC2vA6kFxc4evritRynPnr4uKCOXPm4fnnX8C0aZPwww+78Oef\nv2PatJkYOXI0bG1tTdUtEVGVwdCVU1oabC5dzA/V3Fmr+uoVCBqNwVDJwQHa0HBo6tTJD9fwCGhD\nQiv1pKV69epj5849+PbbDXj77VmYO/dNfPvtBixe/D+0avVYpX1fIiIlYuiagXD/ftHjrdEXob55\no8hY0d0DmibN8kO1Th1owiMg1gqU7WM2KpUKL7wwGF27Po13352Ldeu+QY8eXRAZOQSzZ78NLy9v\nWeoiIrI2DF1TkSSobt8qsEv4ItQXL8AmOgqq+/eLDNf6+SP7iQ76UNXWiYAmPAJStWoWex1gLy9v\nLFv2IQYMiMS0aZOwfv0a7NnzPd56ax4GDoyE6iHHiImISIehW1YaDdTXrhaatUZBHR1d5DOtkkoF\nMTAIWc1bFtglXAfaOhGQ3NxlaqDiWrV6DL/9th9ffPEpFi2aj4kTX8OGDWuxePH/UL/+I3KXR0Rk\nsRi6D5OeDpvTJwuEq+5sYfWVyxBycgyGSnZ20IaGI7tAqGrCI6ANDVPsNYJtbW0xZsxYPPdcb8ya\nNR3ff78TnTq1xahRr2Hq1Olw4ed4iYiKYOgCEFKSYXPqJGxOnoDNqeOwOXkCuHIZnoU+3yq6uELT\nsBG0deoW2CVcB2JQcJW9rGH16jXw5Zdr8dtvP2P69Nfx8ccfYMeOrZg/fzGeeeZZ3eUkiYgIQBUM\nXSE5CTanT+kC9uR/uvsrlw3GiG7uQLt2yKgdVuCEpgjdNYMZIsV66qmuOHCgHVasWIIPP1yOYcMi\n0blzVyxY8D6CgoLlLo+IyCIoOnSF5KQiM9giAevugewn2kPTqAk0TZoip1ETiMG14VvNrVyfb63K\nHB0dMX36bPTtOwBvvDEZv/76Mw4e3I9Jk17Hq6+Oh52dndwlEhHJSjGhW6aAbdwUmsZN9AHL2atp\nhYfXwdatu7F163eYM+dNLFjwDjZv3oRFi5ahbdt2cpdHRCQbqwzdIgF74jhsrl4xGKML2A7QNG7C\ngJWBIAjo128AOnfuioUL5+Grr75Anz7Pol+/AZg7dz6qVasmd4lERGZn8aGrD9gTx/NnsKUFbOOm\nupObGLCyc3f3wHvvLcWAAYMwbdpkbNnyLX755Se8+eYcDBkyDOoqegIaEVVNFhW6QlJi0V3EJQRs\nTpOm0DRqwoC1Ak2bNsdPP/2Br79ejQUL3sEbb0zGpk3r8P77y9GoURO5yyMiMgvZQteogPXwQHa7\nJ3Nnr00YsFZOrVZjxIhX8OyzPTFnzkxs27YFXbp0wPDhIzF9+iy4WfEFQ4iIjGGW0DUI2JPHYXvy\nONTXrhqMYcBWHX5+/li16ku88MKLmD59Cr744lPs2rUD8+YtRK9effnZXiJSrMoJ3T/+gOPev2Bz\n6oRxAdu4KcTAIAZsFdO+/ZPYu/cwVq5cjuXLl2DUqOFYv34tFi1agtDQcLnLIyIyucoJ3U6dkHcR\nQIOAzTsGy4ClXPb29pgy5Q306dMfM2ZMxR9//Ib27R/HuHGTMGHCFDgo9DKaRFQ1VU7oTp+OpPD6\nDFgyWu3aIdi4cSu+/34XZs16A0uXLsLWrd/lnvncW+7yiIhMotS12GJjYzFkyBA888wz6NGjB9as\nWVP6VhcuRHaPXjwmS2UiCAJ69HgOf/31D0aNeg03bsRg4MA+6NevH44d+wdSoWthExFZm1JDV61W\nY8aMGfjxxx+xadMmrF+/HpcvXy7ty4jKzcXFFfPmLcSvv+5HixaPYuvWrXj66U7o0OFxfPbZx0hI\nKLo+MRGRNSg1dH19fVGvXj0AgLOzM0JDQ3Hv3r1KL4yoQYOG+P77X/DTTz+hZ8/euHQpGrNmTUej\nRhEYNWoY9u/fC1EU5S6TiMhoZTqme/PmTVy4cAGNGjWqrHqIDKhUKnTt2hXNmrVGfHw8Nm/ehHXr\nvsb27VuxfftWBAUFIzJyCAYOjIS/f4Dc5RIRlUiQjDxQlpaWhhdffBGvvvoqnnrqqRLHBgej2BnI\nsWNpxY5v3ty52NflHK9SqYr0YE315ynYhyXUU57xeT3kjZckCX//fRTr13+DXbu2Iz39LADAwcER\nLi4ucHBwgCAIFlN/npgYFeKKWbnK0v/8C4/39XU16EPuesozvmAPllBPecf7+roiMLD4vT3WUD8A\ntGzpavV5Aej+fhvDqJmuRqPB+PHj8dxzz5UauHlUqqIF+Pq6PmRs8duQe3zhHuSup7zj8/qwlHrK\nM16lUhmMf/bZznj22c5ISkpCSIgKqakpyMzMQGZmBtRqNVxcXJCUdB9hYWEWUX9JX2MNf/6Fxxd8\nbAn1lGd83nNLqaf844v/AmupX/c11p8XxjJqpjtt2jR4enpixowZRm+4uP/RW5PC/5u3Vkrow9ge\nTp8+hQ0b1mDLlu+QlJQIAGjbth0iI4ege/eesn/mVwk/C0AZfSihB0AZfSihB6Dk/1QUVGpGHzt2\nDLt378aRI0fQq1cv9O7dG/v3769wgUSm1rBhIyxcuASnTkXh448/R5s2T+Dgwf0YM+ZlNGpUBzNn\nvo6zZ8/IXSYRVWFGH9MtK2v/n4uS/vdl7X1UpIcrVy5hw4Z12LhxHeLidGfdN2vWHJGRL6F3775w\ncTHuf6emoISfBaCMPpTQA6CMPpTQA2DCmS6RNQsJCcOsWXNx4sR5fPPNRnTp0g0nThzHlCnj0aBB\nHUyc+Br++ecoL7xBRGbB0KUqwdbWFk8/3R3r1n2H48fPYcaM2fDx8cWGDWvRvXtntGvXCqtWrcT9\n+7zwBhFVHoYuVTkBAdUxadLr+PvvE9i8eSd69eqDq1ev4K23ZqJRozoYOXIo9u79gxfeICKTk20R\neyK5qVQqtG//JNq3fxL379/Hli2bsH79GuzcuQ07d25DYGAQXnhhMF54YTCqV68hd7lEVMmys4H0\ndCA9XShwr3uclpb/WkZG0TEbNxr3PXgi1UMo6eC+tfdhzh4kScKxY/9g/fo12L59K9LT06BSqdCx\n41OIjHwJXbp0g62tbbm2rYSfBaCMPpTQA6CMPsrSgyiimMDLv8/IKDksC79XOEA1mvIv0GNsknKm\nS1SAIAho0eJRtGjxKObNW4gdO7Zh/fpv8Ntvv+C3336Br281DBgwCIMHD0FISNELbxCRjiQBaWlA\naqqAlBQBKSmGj9PSdI+1WiA+3v6hQVg4LE1BpZLg5AQ4OenuvbzEAs91rzk7S3B0zB9jeF/0NehX\nkS8ZZ7oPoYT/QQLK6MMSejh37iw2bFiDzZs34cGDBwCA1q3bIjJyCJ599jk4OjqWug1L6MMUlNCH\nEnoATN+HJAFZWXnhmB+SqanIDUvd4/zwzH+vuK+RpPKHpL19ySFX8N7R0XCMs/PDw9LREbC3N/2q\ns8Z+ZIih+xD8S2k5LKmHzMxM7NnzPdatW4MDB/YCANzc3NGv3/OIjHwJDRs+fDEQS+qjIpTQhxJ6\nAPL70GhQKAx1j4ubZRYOybzHea/n5JQvjezsJLi6SnBxAVxcdI9dXQFXVwnOzvmPdWN0z11cJNSs\n6YTs7LQiM0y12sR/WJWMoVtBSvtLac0stYdr165i48a12LhxPWJj7wAAGjduisjIIejTpx/c3NwN\nxltqH2WlhD4srQdJ0h2rTEwUkJgoICkp7x548CD/eeH309JUSE6Wyr3bVRAMw9DZuWAwokBA5j/P\nC1NdkOaHp719+Xq3tJ9FeTF0K0hJvwjW3oel96DRaPD7779i/fpv8OuvP0Or1cLR0RE9e/ZGZORL\naNXqMQiCYPF9GEsJfVRWD5mZKBSQMAjJwqFZ8P3sbOOD09ZWgru7BC8vFRwdtUVmjwXDMO/1ggGa\n956Tk+l3s5aVEn6fAIZuhSnpF8Ha+7CmHmJj7+Dbbzdg/fo1uHbtKgAgLCwckZEvYeTIobCzc5O5\nwoqzpp/Hw5TUQ04ODELz4YFZ9P3MTOMTTK2W4OEhwd0d8PCQ9Dd3d6nQ86Lv54Wl0n8W1oShW0FK\n+kWw9j6ssQdRFHHo0EGsW/cNfvhhF7KysgAAoaFhaN26rf4WEFBd5krLzlp+Hnlnz8bHC7h/X0B8\nvID4eBXu3xeQnm6PO3dy9KFZcBduWXbVCoIuFN3dJXh65gem4fOi73t46HbXVnSWaS0/i5IooQeA\noVthSvpFsPY+rL2HBw8SsG3bZhw48Cf27z+A1NT8XkJCQvUB3KbNE1YRwnL+PDIzDUM0Li7vsapQ\nuOoeZ2QYl2pubg+bZepC82GzUFfXsq+nakrW/ncDUEYPAEO3wpT0i2DtfSihB0DXx507D3DmzCn8\n9ddBHDp0AEeOHEZKSrJ+TO3aIWjT5gk8/ngbtGnzhEVeCcuUP4+cHCAhQReexYWmLlhV+sepqaWH\nqL29BB8fw5u3twQfH1H/PDTUCZKUCg8PCW5ugI2VXrFACX83lNADwNCtMCX9Ilh7H0roASi+D61W\naxDChw8fMgjh4ODaBiFco0ZNc5ddREk/D61Wd7Zt4QDNn5EWfE+FxMTSQ9TGJi808wPU17domOa9\n7uxc+m5bJf9OWRsl9AAwdCtMSb8I1t6HEnoAjOtDq9Xi7NnTBiGcnJykfz8oKNgghGvWrFXZZUOj\nAe7dE3DnjoDYWBUyMx1x7VpWkRlpfLyAhAQBolhy4qlUEry8Cs9Ciz7OC1N398q5kEFV+Z2ydEro\nAWDoVpiSfhGsvQ8l9ACUrw+tVotz587gr78O4NChgzh8+BCSkhL17wcGBqNNm/wTs2rVCizT9tPS\ngNhYAbdvq/SheueOgNu38x/fu1d6kHp46EKy5Bmp7ubpKcl+4YOq/DtlaZTQA8DQrTAl/SJYex9K\n6AEwTR+6ED6LQ4cO4K+/DuLw4b8KhXAQWrdui8cea4v69dtDrQ7MDVEVYmMF3LmjC1LdTYXk5IeH\nqZ2dBH9/CQEBIgICJAQESPD3FxEW5gBb23T4+OhC1ctLQjnXgJANf6cshxJ6AIwPXSs9fYCoalKr\n1ahTpxHc3BqjcePx6NVLwokT93DqVDyuXMnErVu22LTJD5s21QBg99DtuLtLqFFDRPPmulD195dQ\nvXr+44AA3ey0uN26vr4OiIvTVl6TRArG0CWyEJIEJCejwK5e3Wy04K7e2FjdCUiGaufedMdLfXyy\n4eAQj5yca0hMPI2srCsAbgG4CT8/EW3ahKB9+1Zo3botAgODIMh9SSKiKoShS2QGWi1w6xZw5oyq\nwK7egrt7da+VdGEGJyfdDLRuXU3uzFTM3eWrm6FWr67b3as7XuoKoCFE8RGcP38Ohw8fxF9/peDw\n4YPYtu0Atm37BgBQo0ZN/WeEW7dui6CgYIYwUSXiMd2HUNJxBmvvw1p6SEkBrl9X4do1Fa5fF3D9\nukr//ObNkldv8fExPG4aEKAL1bxdvQEBItzcKn4WryiKuHDhfG4IH8Thwwdx//59/fs1atTUnxnd\nunVbBAfXLhLC1vLzKIkSegCU0YcSegB4IlWFKekXwdr7sJQetFrdmb7Fher16wLu3y/+0kQ+PiKC\ngiSEhqrh5ZVtcGJSQIAIP7/yr9BSUaIoIirqAg4dOoBDh/7CoUMHDEK4evUaBiFcu3YIqlVzs4if\nR0VYyu9URSmhDyX0ADB0K0xJvwjW3oc5e0hNhT5M84JVF6oq3LhR/EowtrYSAgMlBAWJ+ltwcP5z\nFxfz91FekiQhKuoC/vrrAA4f1oVwfHy8/n1//wA0bdoEgYEhqFMnAuHhEQgPrwNvb28Zqy47a/hZ\nGEMJfSihB4BnLxMVSxR1s9W8UL12LT9Ur18v7iQlHW9vEQ0aFAxV3ew1KEg3a5X7c6emIggC6tat\nh7p162HEiFcgSRIuXozSh/CRI4ewZ8+eIl/n7e2tD+D8WwRq1qwFlZwXJyayMAxdUpz0dBjMVAvu\nAo6JUSErq+hs1cZGQq1aEho00BQJ1aAg3fHUqkgQBERE1EVERF0MHz4SAGBjo8GRI/8hOvoiLl6M\nwqVLuvu//z6CI0cOGXy9o6MjQkPDUadOnQKhHIGQkFDYy7VPnUhGDF2yOpIE3L1reGy14Gz13r3i\nZ1aenhLq1Ss6Uw0K0p35a60XvTc3T09PtGjxKFq0eNTg9czMTFy9egXR0VEFwvgiLl+OxpkzpwzG\nqlQqBAUFo06dCISF1cndVa2bIbu7e5izHSKz4j8zZJEkSbcb+MIFFWJjgTNn7PWhGhOjKnbJNrVa\nQs2aEtq10+hDVXevu7m7y9BIFeLg4IB69eqjXr36Bq+LooibN2/khvFF/cw4OjoKP/+8Bz//bLi7\nulo1v9wwDjc4bhwQUJ0fZyKrx9AlWUmS7mL6Fy6oEBWlu124oMbFiyokJRX8B1Z3dSU3Nwnh4WKB\nMJX0M9caNThbtUQqlQqBgUEIDAxCp05dDN67f/8+oqOj9Luqo6OjcOlSNA4e3I+DB/cbjHV2dkF4\neDjCwyMMZsjBwbVha23XoaQqi/9EkdnExeWHa37Iqoss76ZWSwgJEfHEEyIiIkS0bGkPb+80BAWJ\n8OCeR0Xx9vaGt3drPPZYa4PX09PTcflydIEw1s2Qz507ixMnjhuMtbGxQe3aIQXCOFx/7+Ji3Bml\nRObC0CWTu39fKBSsulvhz7GqVBJq15bQurUGdevqAjYiQkRoqGjwuVVfX3vExYlm7oLk5OTkhIYN\nG6Nhw8YGr2s0GsTEXEN0dLTBSVzR0RcRHX0RP/6422B89eo1DM6mzpsh+/i4mLMdIj2GLpXbgwdA\nVJS60K5hVZGP3QiChKAgCS1b5hiEa1iYCAcHmYonq2RjY4OQkDCEhISha9en9a9LkoR79+4VOYkr\nOjoK+/b9iX37/jTYjouLC/z9A+DvHwA/P//cez+D1/z8/OHk5GTuFknhGLpUqqQk4MIFtUGwRkWp\nij1LODBQRJcuGkREaBERIaJuXV248t8uqkyCIMDPzw9+fn5o27adwXupqSkFPt6kmyHfvHkdt2/f\nxqVL0SVu193dA/7+/vDzC4C/v39uKPvnhnL+Y378iYzF0CW9lBTkBqraIFxjY4uGa61aIp56SpM7\na9Wibl0R4eEinJ1lKJyoBC4urmjatDmaNm2ufy3vKkhZWVm4d+8u7t6NRWxsLO7evYPY2FjExt5B\nbOyd3NfvICrqQonfw8vLq5hgDjAI6WrV/HjCFzF0q6LUVODixfwzhfPC9fbtouFao4aIjh01ubNW\n3ey1Tp38SxsSWTN7e3vUqhWIWrUCSxyXkZGBe/fuFgjm/HDOC+Zbt27i/PmzD92GIAjw9vbRB3HB\nXdsFw9nHxxc2PA1fsfiTVbD0dODff4HDh230s9eoKBVu3CgargEBIjp00Oh3CeftHnblyZ9EcHR0\nRFBQMIKCgkscl5aWhrt3Y/VBnB/M+Y+vXLlc5GIhBalUKvj6Vis0Yy46g7a2612TDkNXITQa3a7h\n48fVOH5chf/+081gRREAHPXjqlUT8cQT+WcL581eeeEIoopzdnZGSEgoQkJCSxyXmppisBu74K7t\n/F3a53Hy5PGHbsPGxgZeXl5wc3OHu7s73N09Ctx76J97eHjAza3ovVopFwy3MgxdKyRJQEyMgOPH\n1fjvP13InjqlNrhKk5OThJYttWjZ0gaBgZn62aunp4yFExEA3XHmsDBXhIWFP3SMJElITk4q9hjz\n3bt3ERt7B8nJiUhIeICYmOvIzs4uUw2urm7FhLW7QVjnv+ZpEOCOjo68Olg5MXStQEICcOJEXsDq\nQrbgx3JUKgl164po1kyLZs1ENG2qm73a2OSdMJIjY/VEVB6CIOhnrBERdYsdk3dCmCRJyMjIQHJy\nEhITE5GUlISkpAe597rniYmJ+vcL3sfEXEdKSnKZarOzs9PPmjnLLhuGroXJyABOn87bTawL2mvX\nDI/BBgaKeO65HDRtqgvZhg21PGuYqAoTBAFOTk5wcnKCv39Amb9eq9UiOTnJIKSTkhILBHhigZth\nkF+/fg05OWX7j72rq5s+gH18vGBraw9HR139jo6OcHJy1t87ORV87lRgXP69s7Pu3hrCnKErI60W\niI5W4b//VPpZ7PnzKmg0+bttPD0ldOyoyQ1YLZo0EeHrK8lYNREpjVqthqenFzw9vcr8tWWdZRcM\n7piY6zh79rTJ+rC3ty8S2oXDOu/2sJA3DHvDcLezs6vwbnWGrplIEnD7tqA/Bnv8uBonTqiRlpb/\nA7S3l9CkiW43cdOmulvt2hJ46ISILFVFZ9ne3s6IibmH9PR0ZGSkF3ufd8vIyEB6elqh+4LjdK+l\npaUjOTkZsbGxyMhIhyia5jKyarW6SFjnzcT3799r1DZKDd2ZM2di79698Pb2xu7du0sbTrmSkqDf\nRZx3NnHBKzgJgoSICBFNm4r6WWzduiLs7GQsmojIzFQqFZydneFcScfIJElCVlZWgSDXBXZ6enEB\nXtmry4cAAAsRSURBVFyQFwx9w/vExESkp6eVafd6qaHbp08fvPjii5g2bVqFGleyrCzg7FmVwdnE\nly4ZHluoXl1E9+45aNpUN5Nt3FjLz8ASEVUyQRDg4OAABweHcu0+N4ZJQ7dFixa4detWhQpSElEE\nLl/WHYfNm8meOaNCTk7+PmBXV91C6rrdxLqZrL8/j8MSESlRWS7vyWO6pbh7V3ccNu9kpxMn1EhJ\nyQ9YW1sJDRqI+mOwzZrplqZTFb3oExERVXEM3QIkCTh/XoUDB9Q4fhw4csS5yPWIw8K06NYt/2Sn\nRx4xXPuViIjoYSotdH19reOA5Y0bwG+/6W6//w7cvZv/np+fCj17Ao8+CrRqBbRoAXh4qAGoAVjP\naiHW8rMoiRJ6ANiHJVFCD4Ay+lBCD8YyKnQlqezHI+PiUsr8NeaQlAQcPGiD/fvV2L/fBpcv589k\nq1UT0a+fFu3aadCzpyMcHVMMPq6TkwPExclQdAXkXbHGmimhB4B9WBIl9AAoow8l9AAY/x+HUkN3\nypQpOHr0KBITE9GhQweMGzcOffv2rXCB5pKVBfzzj1ofsidOqCCKuiR1dpbQpYsG7dpp0K6d7tKJ\neSHr62t9AUtERJat1NBdunSpOeowGVHUfXxn3z5dyB49mr8QgI2NbhGAdu10t2bNtOCa0kREZC6K\nOJHq+nUB+/frdhkfOKBGQkL+LuN69fJCVoPHH9dy8XUiIpKNVYZuQoLuuGzebPb69fyQrV5dxMCB\nOWjXToMnntDCz4+fjyUiIstgFaGbkQEcPZp/XPb0aRUkSbfL2M1NwjPP5Ohns6GhvFYxERFZJosM\nXa0WOHVKpd9l/PffamRl6ZLUzk5Cmzb5u4wbNdKtG0tERGTpLCKuJAm4elXAvn26kD140AZJSfnT\n1YYN80O2VSstnJxkLJaIiKicZAvde/cEHDyYv8v45s3847KBgSJ69tTtMm7TRgsfHx6XJSIi62e2\n0E1NBY4cUetns+fP56/C4+kp6UO2XTsNgoMZskREpDyVFro5OcDx4/nHZf/9Vw2NRrfL2MFBQvv2\nugtStG+vQYMGXCCAiIiUr1JCt2dP4M8/XZCaqgtZQZDQpImov/JTy5ZaODhUxncmIiKyXJUSurt3\nAyEhEvr10+0ybttWAw+PyvhORERE1qNSQvfaNcDJKa0yNk1ERGS1KuVIalBQZWyViIjIuvH0JSIi\nIjNh6BIREZkJQ5eIiMhMGLpERERmwtAlIiIyE4YuERGRmTB0iYiIzIShS0REZCYMXSIiIjNh6BIR\nEZkJQ5eIiMhMGLpERERmwtAlIiIyE4YuERGRmTB0iYiIzIShS0REZCYMXSIiIjNh6BIREZkJQ5eI\niMhMGLpERERmwtAlIiIyE4YuERGRmTB0iYiIzIShS0REZCYMXSIiIjNh6BIREZkJQ5eIiMhMGLpE\nRERmwtAlIiIyE4YuERGRmRgVuvv370e3bt3QtWtXfPbZZ5VdExERkSKVGrqiKGLevHlYvXo1vv/+\ne/zwww+4fPmyOWojIiJSlFJD99SpUwgKCkKNGjVga2uL7t274/fffzdHbURERIpSaujevXsXAQEB\n+ud+fn64d+9epRZFRESkRKWGriRJ5qiDiIhI8WxKG+Dv74/bt2/rn9+9exfVqlUrdcO+vq4Vq8wC\nKKEHQBl9KKEHgH1YEiX0ACijDyX0YKxSZ7oNGzZETEwMbt26hezsbPzwww/o1KmTOWojIiJSlFJn\numq1GrNnz8bw4cMhSRL69euH0NBQc9RGRESkKILEg7ZERERmwStSERERmQlDl4iIyEwYukRERGZS\n6olUZbF//34sWLAAkiShb9++eOWVV0y5ebOYOXMm9u7dC29vb+zevVvucsolNjYW06ZNQ3x8PNRq\nNfr3748hQ4bIXVaZZWdnIzIyEjk5OdBqtejatSvGjh0rd1nlIooi+vbtCz8/P6xatUrucsqlY8eO\ncHFxgUqlgo2NDbZs2SJ3SeWSkpKCN998E9HR0VCpVFiwYAEaN24sd1lGu3r1KiZNmgRBECBJEm7c\nuIEJEyZY5d/xr7/+Glu2bIEgCKhTpw4W/r+9u3mJag8DOP6dHKRQexElCyzIjCySFr1AEyamSTXV\nxGCLNiVRbdIow14oghYJLfoHWkREEBEaRG1EszGmQiuGYIgwIhhMKkRT5yXPnOcu4l64G+89x7nz\na7rPZz1n+A6HmYcznHmmo4P8/HzTWY7cunXrr/fCv/qslQxJp9NSX18vsVhMfvz4IXv37pWhoaFM\nPX3WDAwMSDQaFb/fbzrFtS9fvkg0GhURkcnJSdmxY0dOngsRkXg8LiIilmVJU1OTRCIRw0Xu3Lx5\nU9ra2uT48eOmU1yrq6uTsbEx0xmzdvbsWbl//76IiExPT8vExIThIvfS6bT4fD4ZHh42neLYyMiI\n1NXVSSqVEhGRkydPSldXl+EqZ96/fy9+v19SqZRYliWHDx+WT58+zXhMxr5e/l12NG/YsIH58+eb\nzpiV0tJSqqqqACgoKKCioiJnV3fOmzcP+HnVa1mW4Rp3RkZGePr0KU1NTaZTZkVEsG3bdMasTE5O\nMjg4SDAYBMDr9VJYWGi4yr1wOMyyZcv+tqo3l9i2TSKRwLIsksnkv1q89Cv58OED69evJz8/n7y8\nPDZu3Eh3d/eMx2Rs6OqO5l9TLBbj3bt3VFdXm05xxbZtAoEAPp8Pn8+Xk6/j6tWrtLe34/F4TKfM\nisfj4ciRIwSDQe7du2c6x5VYLMaiRYs4f/48+/fv59KlSySTSdNZrj1+/Jjdu3ebznBl8eLFNDc3\nU1tbS01NDUVFRWzZssV0liOVlZUMDAwwPj5OIpEgFArx+fPnGY/J2NAV/bnvL2dqaorW1lYuXLhA\nQUGB6RxX5syZw4MHDwiFQkQiEYaGhkwnOdLX10dJSQlVVVU5/x65e/cunZ2d3Lhxgzt37jA4OGg6\nyTHLsohGoxw8eJCuri7mzp2bs/8RPj09TW9vLzt37jSd4sr379/p6enhyZMn9Pf3E4/Hc+4+moqK\nCo4ePUpzczPHjh1j9erVeL0z3yqVsaHrdkez+m9YlkVrayv79u2jvr7edM6sFRYWsmnTJvr7+02n\nOPL69Wt6e3vZvn07bW1tvHz5kvb2dtNZrpSWlgJQXFxMQ0MDb9++NVzkXFlZGWVlZaxbtw6AxsZG\notGo4Sp3QqEQa9eupbi42HSKK+FwmPLychYuXEheXh4NDQ28efPGdJZjwWCQzs5Obt++zYIFC1i+\nfPmMj8/Y0P2ddjTn+hUJ/LwLe+XKlRw6dMh0imujo6NMTEwAkEwmef78OStWrDBc5czp06fp6+uj\np6eH69evs3nzZq5du2Y6y7FEIsHU1BQA8XicZ8+eUVlZabjKuZKSEpYsWcLHjx8BePHiRc6utX30\n6BF+v990hmtLly4lEomQSqUQkZw9F6OjowAMDw/T3d39j+ckYz8Z+l12NP95NTI2NkZtbS0tLS1/\n3XSRK169esXDhw9ZtWoVgUAAj8fDqVOnqKmpMZ3myNevXzl37hy2bWPbNrt27WLbtm2ms/6Xvn37\nxokTJ/B4PKTTafbs2cPWrVtNZ7ly8eJFzpw5g2VZlJeX09HRYTrJsWQySTgc5sqVK6ZTXKuurqax\nsZFAIIDX62XNmjUcOHDAdJZjLS0tjI+P4/V6uXz5MkVFM/9jku5eVkoppbJEN1IppZRSWaJDVyml\nlMoSHbpKKaVUlujQVUoppbJEh65SSimVJTp0lVJKqSzRoauUUkpliQ5dpZRSKkv+AO2e4yf8wTuC\nAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAFKCAYAAABcq1WoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xd4U2X/BvD7ZLRpumlLS6EDgbKh\niIggU7aAgPhDRKsIUoYgiK++ioAguBARXmZBEARFUBGhiChIEQcqe+/RMlpGd9KRcX5/nDZtaFra\nkuY07f25rlw5zXmSfPMk5OY5Oec8giiKIoiIiMhhFHIXQEREVN0wfImIiByM4UtERORgDF8iIiIH\nY/gSERE5GMOXiIjIwVSOeJJbtzLs/pi+vlqkpOjt/rhkjf3sGOxnx2A/Owb7WRIQ4FnsOqcd+apU\nSrlLqBbYz47BfnYM9rNjsJ/vzWnDl4iIyFkxfImIiByM4UtERORgDF8iIiIHY/gSERE5GMOXiIjI\nwRi+REREDsbwJSIih/vxx61YtGi+3GXIhuFLRETkYA45vSQREZEtGzeux65dPwMAOnbsjOeeG45/\n/tmHFSuWwNVVA1/fGnjnndk4eHB/kdtUKueNMKesPDZ2C7p37wSNxkfuUoiInN6MGVOxdetmuz2e\nQiGgb98BmDFjdontbty4hgMH/sGKFV8AAKKjX0DXrt3x3XcbMH78q2jZshX27PkVaWmpNm/z8/O3\nW82O5nSbnTMy0jFixHN48cUX5S6FiIjuw9mzZ9G0aXOoVCqoVCo0b94S58+fRdeu3fHxxx/giy9W\noUGDhvDz87d5mzNzupGvp6cXOnTohF27duHkyRNo0qSp3CURETm1GTNm33OUWhYBAZ6lms1OEABR\nFC1/GwwGCIICvXv3Rdu27fDbb3H4739fxezZc2zeFhYWbreaHc3pRr4AEB09DgCwYsVSmSshIqLy\niohoiOPHj8FoNMJoNOLkyROIiGiI1as/g1KpwoABT6Jbt564fPmizducmdONfAGgR49eqFevHr79\ndgPefnsG/P2de/MDEVF1FBQUjFatHsKECdEwm0X07z8AQUG1EBgYhEmTxsHT0wuenp4YOvQ56PX6\nIrc5M0EsPOavIKXZ/FBW69d/jokTJ+LNN6di8uQ37P74JCnt5iO6P+xnx2A/Owb7WRIQ4FnsOqfc\n7AwAL774Ijw9vbBq1Qrk5ubKXQ4REVGpOW34enp6YtiwKNy8mYQfftgkdzlERESl5rThCwAvvTQa\nCoUCMTFL4ICt50RERHbh1OEbFhaO3r374ujRw/j7731yl0NERFQqTh2+ADB6tHTY0fLlS2SuhIiI\nqHScPnwfeaQ9mjdviR9/3Ir4+Ctyl0NERHRPTh++giAgOnoszGYzVq5cLnc5REQkk/Pnz1kGYe+8\n8xZycrLL/ViHDx9ESkqyvUorwunDFwAGDhyMgICa+PLLL5CZmSl3OUREJIM9e35FQkI8AGDmzA/g\n6qop92Nt27alQsPXKc9wdTdXV1e8+OJLmDPnfWzY8BVGjoyWuyQiIrqHYcMGY+3ajRBFEX36PIaF\nC5ehUaMmmDx5PN54420EBdWCyWTCnDnv4fr1azAajXjppTFo3boNtm+PxaZNG6FSqVG/fgQGDhyM\nH37YhD17foWvry+mT38LX3yxAZ9+Oge+vr44c+Y0UlNT8OyzL2Dbtq1IS0vFokXLIQjAzJlTkZWV\nhezsbLz66uvQ6TKxd28cLl26iNmz5+DMmZP4+ut1UCpVaNiwMSZMePW+X3uVCF8AeOGFkZg/fy5W\nrFiKF198CQpFlRjUExFVOPcZU+FqxykFoRDg3ncAdPeYrKFhw8a4ePECjEYDGjVqjOPHjyIiohGS\nk5MRFFQLAPDLLz/Bz88fb701HampqZg4cQzWrPkaX3+9DnPmzEdgYBC2bduCOnXqoG3bdujSpRua\nNGlm9TxKpQoLFizFzJlTcezYUSxYsASzZk3DwYP7ER5eF/36DUSnTl1w4MC/+PLLNXjvvY9Rv34E\nJk9+A15eXlizZiWWLfscLi4umDbtTRw9ehgtWkTeVxdVmfANCAjA4MFDsH79Ouza9TN69Ogtd0lE\nRFSCyMgHceLEMeTm5uCpp57Gnj270bLleURENLS0OX78KI4cOYSjRw8DAHJycmAwGNC9ey9MmfI6\nevXqg+7de5W4iblxY2n2Oz8/f8tMSL6+ftDpMlGjhh/WrPkM69evhcFggEZj/TiXLl1EUlIiJk8e\nDwDQ6TKRmJiIFi3u77VXmfAFgFGjxmL9+nWIiVnK8CUiKiXdjNn3HKWWRUCAJ3SlOLdzq1atsW7d\nauTkZKNfvwHYtm0rjh07ggcffMjSRqVS4/nnRxT5To+KehE9evRBXNxOvPLKWCxeXPwOt0ql0uay\nKIrYuPEr+PvXxLRps3D69EksWjTf6r5qtbSped68Rfd8PWVRpbbNNmvWHB06dMJvv+3GqVMn5S6H\niIhKEBoahqSkJGRm6qDVusPPzw9798ZZhW+TJs3w++97AAApKcmIiVkMs9mMmJjF8Pf3x9Chz6FZ\ns+ZITEyEIAgwmUxlqiEtLRW1a9cBAOzZsxtGoxEAoFAoYDKZEBoajsuXL1l2vlq5Mga3bt2879de\nqvA9e/YsunfvjnXr1gEAbty4gaioKAwbNgwTJ06sVBMbcK5fIiLn4evri6CgIABS0N64cQM1awZa\n1j/2WHe4uWkxZswIvPHGq2jRIhIKhQJarTtGj34REyeOhSAIaNAgAi1btsL8+R9j//5/Sv38vXv3\nxYYNX+LVV19G06bNcOfOHWzbtgWRkQ9i6tT/4vr1a5g48TX85z8TMXbsCKSlpcLfP+C+X/c9pxTU\n6/UYPXo0wsPD0bBhQzz33HN466230KlTJ/Tp0wfz5s1DUFAQhg0bVuxjVMTUUsVNWWUymdCu3YO4\nceM6Dh06xbl+7xOnBnMM9rNjsJ8dg/0sua8pBV1cXLBixQrUrFnTctvff/+Nbt26AQC6du2Kv/76\nyw5l2odSqcSoUWOQk5ODtWs/l7scIiKiIu4ZviqVqsjeX1lZWXBxcQEA+Pn54datWxVTXTk988xz\nnOuXiIgqrfve27k0U/n5+mqhUinv2a6sihvSBwR44qWXRuLTTz9FXNxPePbZZ+3+3NVJSZtOyH7Y\nz47BfnYM9nPJyhW+Wq0W2dnZ0Gg0SEpKstokbUtKir5cxZXkXr8pDBv2IhYsWIC5cz9Bjx79IQiC\n3WuoDvjbjWOwnx2D/ewY7GfJff3ma0v79u2xY8cOAMDPP/+Mjh07lq+yChQWFo5evR7H4cOH8M8/\nf8tdDhERkcU9w/f48eOIiorC999/jy+++AJRUVEYP348Nm/ejGHDhiE1NRUDBw50RK1lxrl+iYio\nMrrnoUb24MhDjQoTRRHdunXEyZPH8e+/RxESEmr3Oqo6bj5yDPazY7CfHcPe/RwXtwtdunSz2+M5\nit03OzsLzvVLROTcbty4jp07d8hdht1V6fAFgEGDnoK/fwDWrVvDuX6JiCqRYcMGw2QywWg0okeP\nTjh9Wjot8OTJ45GYeAMAMG/eRzh8+CA+/3wFVq6MwaxZ0zFu3EvYv/8fTJ36huWx+vaVRsaXLl3E\nK6+MwcSJY/HWW68hI6Nybumo8uGbP9dvenoaNmz4Su5yiIgqpRqtm9m8aAptNfQcN8pmG8/o4ZY2\nmrWrgfDwUj1n/pSC586dsUwpaDabraYUfOaZKERGPogXXxwFADAaDViy5LNip42dP/9jvP76FCxY\nsBRt2jyCTZs2lqs/KlqVD19AmutXOlPXUpjNZrnLISIiFEwpeOzYETz11NM4efIELlywnlLwbvnT\nAxbn5MkT+Oij2Rg/Pho7dvxomRChsqlSUwoWp2bNmnjyyf/D119/ybl+iYhsSD5w/J5tMpasuGeb\n7Kjh8Jw8AbDTlIJ3U6vVAFDk3A35sxFpNBosXBhT6c/tUC1GvoA01y8AxMRwtiMiosqgNFMK5k/t\ndzd3d3fcuXMbAHD+/Dno9dLJnOrXb4B9+/4EAOzcuaNMMxw5UrUJ3+bNW+DRRztyrl8iokrkXlMK\nhoXVxZkzp/G//31idb/69SOg0bhhzJgR2LHjRwQFBQMAJk78D9au/Rzjx0fjxx9jS9yELacqfZzv\n3bZv34YXXngGzz33AubNW2j3mqoiHhfpGOxnx2A/Owb7WVJtj/O9W8+evREWFo5vvvkat2/flrsc\nIiKqpqpV+HKuXyIiqgyqVfgC0ly/Hh6enOuXiIhkU+3C19PTC88+G4WkpERs2fK93OUQEVE1VO3C\nFwBGjhwNQRCwfPkSOGB/MyIiIivVMnzDw+uid+++OHz4EP79t3IeA0ZERFVXtQxfgHP9EhHJ6ccf\nt2LRovl2eSydLhP//LMPALB27WocP3603I+VmJiIkyfvfbav+1Vtw7ddu0fRrFkLxMb+gISEeLnL\nISKicjpz5rQlfKOihqNZsxblfqyDB//FqVMn7FVasarFuZ1tyZ/r95VXxmLVqhV4551ZcpdERFSt\n3LhxDf/5zyu4eTMJQ4YMQ79+A6zWf/fdRuzc+RMEQYGOHbvgmWeew9mzp/HJJx9BrVbDxcUFM2d+\ngHnz5kCv1yEkJBTHjx9Fly7dkJaWisOHDyI1NRWXLl1EdPRY7Ny5A5cvX8L06bPRtGkzLFw4DydP\nnkBubi4GDhyMDh06Y9Wq5VCpVAgMDELt2iH49NM5EAQBWq0WU6bMgKdn8SfOKItqG76ANNfvu+9O\nx7p1a/Daa/+Fh4eH3CURETncjBmu2LrVfnGgUAB9+7pixoycEtslJMRj1aovodNlYvjwYejb9wnL\nhAjXr19DXNwuLFmyEgAwduxIdO3aHT/+uBWDBj2F3r374sCBf5GcfAfDhkXh4sULGDDgSatNzgkJ\n8Viy5DNs3boZ69atxqpVX2L79q3YuXMH6tdvgKCgYEyYMBk5OdkYMmQg+vcfiD59+sHHxwcdOnTG\nxIlj8frrUxASEopNm77Bpk0b8cILI+3SR9U6fPPn+v344w+wceN6jBgxSu6SiIiqjRYtIqFSqeDt\n7QN3d3ekpaXBx8cHAHDq1AlcvZqACRNGAwD0eh0SE6+jQ4fOmDv3QyQkxKNbtx4ICwvHiRPHbD5+\no0ZNIAgC/Pz8Ua9eAyiVSvj6+kGnOwJXV1ekp6dhzJgRUKlUSE1NKXL//OkJAcBgMKBx4yZ2e+3V\nOnwBaa7fBQs+wYoVSzF8+MhiJ2gmIqqqZszIuecotSykczuX5vGsp/0rPAugSqVGu3aP4o033i5y\nr88++wJ//rkXs2fPwPjxk4p9dKVSaXNZFEUcOnQABw/ux6JF0mbmHj06Frl/RU5PWO2TJn+u3wsX\nzuPXX3+RuxwiomrjxImjMJlMSElJQVZWFry8vC3rGjZsjIMHDyA7OxuiKGL+/LnIycnGd99tQHp6\nGnr27IOnnx6Gs2dPQxAEm9MOliQtLRU1awZCpVLh99/3wGQyw2AwWE1hWJHTE1b7kS8gzfX79ddf\nIiZmCbp37yV3OURE1UJoaDimTXsT164lIDp6nNUIMygoCEOGPIOXXx4FhUKBTp26wNVVg9q1QzBt\n2pvw8PCAWq3GlCnvIDU1BcuWLURAQM1SP/dDD7XFl1+uwfjx0ejYsTPat++AuXM/QPfuPTF79gz4\n+Phi4sT/YM6c9/Dll2vg4uKKGTNm2+21V6spBUsyaFBf/PHHXvz2299o1Kix3R7X2XFqMMdgPzsG\n+9kx2M8STilYCtHR0kk3VqxYKnMlRERU1TF88xSe6/fOnTtyl0NERFUYwzdP/ly/2dnZnOuXiIgq\nFMO3EM71S0REjsDwLSR/rt/ExBvYunWz3OUQEVEVxfC9S/5cvzExiznXLxERVQiG71041y8RUcUr\nzZSCu3fvdFA1jsfwtYFz/RIRyW/dujVyl1BhGL42cK5fIqKKlz+l4PPPP43Y2B+s1n311Rc4f/4s\npkx5HQcP7scbb0zC+PHROH36FPr27WZpN3XqGzh4cD/0eh2mTn0DEyeOxfjx0Th//pyjX06ZMHxt\nyJ/r12w2Y9WqFXKXQ0RU4Vq3drd5WblSbWkzbpzGZpvoaI2lzdq1aoSHl+45ExLi8eGH87BwYQxW\nroyx2s9m2LDn4eHhgfff/xgAcOHCecybt6jYMxBu3Lgebdu2x4IFS/Haa29i0aJPy94JDsTwLcbA\ngYPh7x+AdevWIDMzU+5yiIiqHFtTChanfv0GcHFxKXb9sWNHsXnzdxg/PhqffPIhdLrK/b3NiRWK\nodFoMHz4SMyd+yHn+iWiKu/AAd092yxZkn3PNlFRBkyerMGtW6V51uKnFLybWq22ebvRaMxbr8Kr\nr76OZs1alOaJZceRbwleeGEkXFxcsGLFUpjNZrnLISKqUkqaUhAAzGbbh3sKgoDs7GxkZ2fj7Nkz\nAIAmTZrht9/iAACXLl3E11+vq9Da7xfDtwSBgYEYNOgpzvVLRFQB8qcUnDRpbJEpBQEgIqIhRo16\nvsj9Bg58CtHRL+D992eiYUPpN+Cnnnoa164lYNy4l/DRR7MRGfmgQ15DeXFKwXs4duwIunXriM6d\nu+Kbb3649x2qGE4N5hjsZ8dgPzsG+1nCKQXvQ/PmLdG+fQfs2bMbp0+fkrscIiKqAhi+pcC5fomI\nyJ4YvqXQq1cfhIZyrl8iIrIPhm8pSHP9jkZ2djbWrVstdzlEROTkGL6lNGxYFDw8PLFy5XIYDAa5\nyyEiIifG8C0lT08vDBv2HOf6JSKi+8bwLQPO9UtERPbA8C2DunUfQK9ej+PQoYPYv59z/RIRUfmU\nK3x1Oh3Gjx+PqKgoDB06FHv37rV3XZVWwVy/POyIiIjKp1zh+/3336Nu3bpYu3YtFixYgPfee8/e\ndVVa7dt3QNOmzREb+wOuXk2QuxwiInJC5QpfX19fpKamAgDS09Ph6+tr16IqM0EQMHr0OJhMJs71\nS0RE5VLuczuPHDkS8fHxSE9PR0xMDCIjI4ttazSaoFIpy11kZZOdnY2wsDDk5ubi6tWrcHd3l7sk\nIiJyIuWaz/eHH35AcHAwVq5cidOnT2PKlCnYtGlTse1TUvTlLrA4cp+4+/nnR2Du3A+xePFyvPji\nS7LVUdHk7ufqgv3sGOxnx2A/S+w+scLBgwfRoUMHAECjRo1w8+ZNmEym8lXnpDjXLxERlVe5wjcs\nLAxHjhwBAFy7dg3u7u5QKqvOZuXSyJ/r9/z5c9i9e6fc5RARkRMpV/g+/fTTuHbtGp577jm89tpr\nmDFjhp3Lcg7R0WMBADExS2SuhIiInEm5fvN1d3fHggUL7F2L08mf6zcu7lecPn0KjRo1lrskIiJy\nAjzD1X0qmOt3mcyVEBGRs2D43qeCuX7XIzmZc/0SEdG9MXzvU+G5fteuXS13OURE5AQYvnbAuX6J\niKgsGL52wLl+iYioLBi+dsK5fomIqLQYvnbCuX6JiKi0GL52xLl+iYioNBi+dsS5fomIqDQYvnbE\nuX6JiKg0GL52NnDgYPj7B2Dt2tXQ6XRyl0NERJUQw9fONBoNhg8fibS0VGzcuF7ucoiIqBJi+FYA\nzvVLREQlYfhWgMDAQAwcOBjnz59DXNwuucshIqJKhuFbQTjXLxERFYfhW0FatIhEu3aPYvfuXThz\n5rTc5RARUSXC8K1AnOuXiIhsYfhWoN69H0doaBjn+iUiIisM3wqkVCrx0kujkZWVhXXr1shdDhER\nVRIM3wo2bFgU3N09ONcvERFZMHwrmJeXN4YNew43blxHbOwPcpdDRESVAMPXAV56aQwEQcAHH8xC\nZmam3OUQEZHMGL4OULfuA3j55Ym4fPkSpk79r9zlEBGRzBi+DvLmm1PRokUkvvpqLbZu3Sx3OURE\nJCOGr4O4uLhg2bKVcHNzw+TJr+Datatyl0RERDJh+DpQ/foNMGvWh0hLS8X48aNhMpnkLomIiGTA\n8HWwqKjh6NOnH/74Yy8WL/6f3OUQEZEMGL4OJggC5s1biMDAIHz44SwcPnxQ7pKIiMjBGL4y8PPz\nw6JFMTAajRgzZiR0Op3cJRERkQMxfGXSuXNXjBv3Ci5evIBp096UuxwiInIghq+M3nprGpo1a4F1\n69Zg61ae/YqIqLpg+MrI1dXVcvjRa69NwPXr1+QuiYiIHIDhK7OIiIaYOfN9pKamYsKEMTCbzXKX\nREREFYzhWwm88MII9O79OPbu3YMlSxbKXQ4REVUwhm8lIB1+tAg1awbigw/exdGjh+UuiYiIKhDD\nt5Lw9/fHwoXLYDAYePgREVEVx/CtRLp27YbRo1/G+fPnMH36FLnLISKiCsLwrWSmTp2Bpk2bY+3a\nz/Hjj7Fyl0NERBWA4VvJ5B9+pNFoMHnyeCQm3pC7JCIisjOGbyXUsGEjzJjxHpKTk/Hyy6N5+BER\nURXD8K2kXnzxJfTs2Rt798Zh2bLFcpdDRER2xPCtpARBwKefLkZAQE28994MHDt2RO6SiIjIThi+\nlVhAQAAWLlxqOfxIr9fLXRIREdkBw7eSe+yxHoiOHotz587inXfelrscIiKyA4avE5g6dSYaN26K\nNWtW4qeffpS7HCIiuk/lDt8tW7bgiSeewJNPPom4uDg7lkR302g0WLZsJVxdXfHqqy8jKSlR7pKI\niOg+lCt8U1JSsHjxYnz11VdYtmwZdu3aZe+66C6NGzfBjBmzcefOHc5+RETk5MoVvn/99RfatWsH\nDw8P1KxZE7NmzbJ3XWTDiBHR6N69J+LifsXy5UvkLoeIiMqpXOF79epVZGdnY8yYMRg2bBj++usv\ne9dFNgiCgAULlsLfPwCzZ8/AsWNH5S6JiIjKQRBFUSzrnZYvX46DBw9i0aJFuH79Op5//nns3r0b\ngiDYbG80mqBSKe+7WJJs374djz/+OBo3boz9+/dDq9XKXRIREZWBqjx38vPzQ6tWraBSqRAaGgp3\nd3ckJyfDz8/PZvuUFPsfnxoQ4IlbtzLs/rjO4KGHOuCll0bjs89iMH78RHz00bwKe67q3M+OxH52\nDPazY7CfJQEBnsWuK9dm5w4dOmDfvn0wm81ISUmBXq+Hr69vuQuksps+fRYaN26Czz//DD//vF3u\ncoiIqAzKFb6BgYHo1asXhgwZglGjRmHq1KlQKHjIsCNpNBosXSodfjRx4jgkJSXJXRIREZVSuX7z\nLauK2PzAzRqSFSuW4u23/4uuXbth/frv7P6fIPazY7CfHYP97BjsZ4ndNztT5fHSS2Pw2GPdsXv3\nLnz22TK5yyEiolJg+Dq5gsOP/PHuu9Nx4sRxuUsiIqJ7YPhWAYGBgZg/fzFyc3MxduxIZGVlyV0S\nERGVgOFbRfTs2QcjRozC6dOn8O670+Quh4iISsDwrULeeWc2GjZshJUrl2Pnzh1yl0NERMVg+FYh\nbm5uWLZsFVxcXPDKK+Nw8+ZNuUsiIiIbGL5VTNOmzTBt2kzcvn0LEyeOhQOOJCMiojJi+FZBo0aN\nRZcuj2HXrl+wcmWM3OUQEdFdGL5VkEKhwMKFy+Dn54eZM6fh1KmTcpdERESFMHyrqMDAIHz66WLk\n5ORgzJgRyM7OlrskIiLKw/Ctwnr3fhzDh4/EqVMnMWvWdLnLISKiPAzfKm7GjPcQEdEQK1Ysw65d\nP8tdDhERgeFb5Wm1WixdutJy+NGtW7fkLomIqNpj+FYDzZu3wNtvz8CtWzcxadI4Hn5ERCQzhm81\nMXr0OHTu3BW//LIDq1atkLscIqJqjeFbTeQfflSjRg3MnDkVp0+fkrskIqJqi+FbjQQF1cKnny5G\ndnY2xowZycOPiIhkwvCtZvr06Yvnnx+BkyeP4733ZspdDhFRtcTwrYZmznwP9es3QEzMYuzevUvu\ncoiIqh2GbzXk7u6OZctWQq1WY8KEMbh9+7bcJRERVSsM32qqRYtIvPXWdNy8mYRXX32Zhx8RETkQ\nw7caGzduAjp27IIdO7Zj9eqVcpdDRFRtMHyrMYVCgUWLlsHX1xfvvDMFZ8+ekbskIqJqgeFbzdWq\nFYx58xYhOzsbo0ePQE5OjtwlERFVeQxfQt++/REVNRwnThzD+++/K3c5RERVHsOXAADvvvsB6tWr\nj6VLFyIu7le5yyEiqtIYvgSg4PAjlUqFCRPG4M6dO3KXRERUZTF8yaJly1Z4881pSEpKxKuvjufh\nR0REFYThS1bGj5+IDh064aeftuGLLz6XuxwioiqJ4UtWpMOPYuDj44Pp09/C6dOn5S6JiKjKYfhS\nEcHBtfHJJwuRlZWF/v374+LFC3KXRERUpTB8yab+/Qdg8uTXcf78eTz+eDf8/fc+uUsiIqoyGL5U\nrDffnIbly5cjLS0Ngwf3w/fffyt3SUREVQLDl0o0atQorF//HVxdNRg9egQ+/fRj7gVNRHSfGL50\nT126PIbY2J9Rp04IPvhgFiZNehm5ublyl0VE5LQYvlQqjRs3wfbtuxAZ2Qrr16/DM88MRlpaqtxl\nERE5JYYvlVpgYBC+//5H9O7dF3v37kHfvj1w5cplucsiInI6DF8qE3d3d3z++TqMGTMeZ8+eQZ8+\nj+HAgX/lLouIyKkwfKnMlEol3n33fXz44SdITk7GoEF9sXXrD3KXRUTkNBi+VG4jRozCunUboFSq\nMHJkFBYtWsA9oYmISoHhS/ele/de2LLlJ9SqFYx3352G//xnEgwGg9xlERFVagxfum/Nm7fATz/9\nimbNWmDt2s/x7LP/h/T0NLnLIiKqtBi+ZBe1agVjy5af0KNHL8TF/Yr+/Xvh6tUEucsiIqqUGL5k\nNx4eHlizZj1GjozGqVMn0bv3Yzhy5JDcZRERVToMX7IrlUqFDz6Yi9mzP8StWzcxYEAfbN++Te6y\niIgqFYYvVYjo6HFYvforAMDw4cMQE7OYe0ITEeW5r/DNzs5G9+7dsWnTJnvVQ1VInz598cMP2xEQ\nUBPTpr2FKVNeh9FolLssIiLZ3Vf4Ll26FN7e3vaqhaqgli1b4aeffkXjxk2xcuVyvPDCM8jMzJS7\nLCIiWZU7fC9cuIDz58+jS5cudiyHqqI6dUIQG7sDXbo8hl9+2YEnnuiNGzeuy10WEZFsBLGcP8RF\nR0dj2rRp2Lx5M2rXro0nn3xblRT0AAAgAElEQVSy2LZGowkqlbLcRVLVYDAYMH78eCxfvhy1a9dG\nbGwsIiMj5S6LiMjhVOW50+bNmxEZGYmQkJBStU9J0ZfnaUoUEOCJW7cy7P64ZM3e/Txr1seoVSsU\nM2dOxaOPdsBnn61G9+697Pb4zoqfZ8dgPzsG+1kSEOBZ7LpyhW9cXBwSEhIQFxeHxMREuLi4ICgo\nCO3bty93kVQ9CIKAl19+BaGhYXj55VF47rmn8f77H2PEiFFyl0ZE5DDlCt/58+dblhcuXIjatWsz\neKlM+vcfgODgYERFDcWbb76GS5cuYsaM2VAq+fMEEVV9PM6XZNO6dRts374LERENEROzGC+++Bx0\nOp3cZRERVbj7Dt8JEyaUuLMVUUnCwsKxbdsv6NixM376aRsGDnwcSUmJcpdFRFShOPIl2Xl7+2D9\n+u/wzDPP4ciRQ+jTpxtOnTopd1lERBWG4UuVgouLC+bPX4wpU6bj6tUE9OvXE7t375K7LCKiCsHw\npUpDEARMmvQfxMSsQm5uDoYNewpr166WuywiIrtj+FKlM2jQU/j2263w9vbGa6+9glmz3oHZbJa7\nLCIiu2H4UqXUtu0j+PHHXahXrz4WLvwUo0YNR1ZWltxlERHZBcOXKq0HHqiHH3/ciXbtHsXWrZvx\n5JN9cevWLbnLIiK6bwxfqtR8fWtg48bNeOqpp3HgwH706dMNZ8+ekbssIqL7wvClSs/V1RWLFy/H\n66+/hfj4y+jbtwd+//03ucsiIio3hi85BUEQ8Prrb2HRohjo9ToMGTIQX3/9pdxlERGVC8OXnMqQ\nIc/gm29+gIeHB155ZSw+/HAWyjkrJhGRbBi+5HTat++AH3/chbCwcMyb9zHGjh2J7OxsucsiIio1\nhi85pfr1G2D79l/Rpk1bbNr0Lf7v/wbgzp07cpdFRFQqDF9yWv7+/vjuu60YOPBJ/P33X3j88W64\nePG83GUREd0Tw5ecmkajwbJlqzBp0n9w6dJF9OnTjXtCE1Glx/Alp6dQKDBlynTMn78YGRkZePLJ\nfnjhhWE4ffqU3KUREdnE8KUqY9iwKGzZ8hPatGmL7dtj0aVLO0yYMAbx8VfkLo2IyArDl6qUhx56\nGLGxP2Pdug1o1KgJNmz4Cu3aPYgpU17HzZs35S6PiAgAw5eqIEEQ0LNnH/z66+9YuvQzBAfXxmef\nxeDhh1vigw/eRVpaqtwlElE1x/ClKkuhUGDw4CH4888DmDPnU3h6euLTT+eiTZsWWLhwPvR6vdwl\nElE1xfClKk+tVmP48JH4++/DmDp1JgBg1qzpaNs2EqtXr4TBYJC5QiKqbgTRAefmu3Urw+6PGdCm\nOUzmoqXrx72C7JHRAADPcaOg/vuvIm0MrR9CxvLVAADN2tXQzp9r8zmS/zoIuLhAee4svIc+abNN\nxryFMHTuCgDw6dUFitu3i7TJHvIM9P99GwDg/s7bcI39oUgbU2gY0r7fBgBw2b4NHlP/a/P5Urfu\ngDm4NoTUFPh262izjW7KdOQMHgIA8Hr2/6CysddvbtfuyJw7HwDgtnA+3FZ/VqSNqNVCdfoUbt3K\ngGr/P/AaPcLm86WvWgtjy1YAAN+2kRCMxiJtsqLHImv0ywAAj0kvw2XvniJtjM1bIn21dL5m16+/\nhPvHH9h8vuQ9+wAPDyguX4LP4P4222TOmYfcbj0BAD79ekJx47plndlsRkZ6OlZl6fG60Yjw8Lr4\nrmEjtDxxHBAEq8cx1wpGauzPAACXXT/D443JNp8v9butMIfXBTIzUaPzIzbb6F5/CzlDnwUAeA1/\nFqpjRyzrlAoBJrOI3I6dkTl/MQDALWYx3JYvLfI4okqFlL8PAwBURw7Ba0SUzedLj1kF40MPAwB8\nOz4MwcZIP2v4S8iaMAkA4PGfSXDZvbNIG2Ojxkj/8hsAgOt3G+H+/rs2ny9l116IPr5QXL8Gn/69\nbLbJnP0Rcvv0BQB4D+oLpY2d4XL6DYBu5nsAAO1H70GzcX2RNmZ/f6TuiAMAqPfshufkCTafL+3r\nTTA1iAByc1Gj3YOWfi5MP+k/yI4aDgDwjB4O9YH9RR7H0LYdMpasAABoVi6Hdsn/bD5f8oHjAADl\nyRPwjnraZpuMRTEwtHsUAODb9VEI6WlF2mQ/+zz0k98AALhPeR2uO7YXaWOqVx9pGzcDAFy2bobH\njKk2ny9l+68Qa9aEcPMmfPs8ZrNN5ozZyO0/EADgPWQglBeKHi+f06sPdO9/DADQzpsDzZdfFGkj\nenkjZfcfCAjwROqWn+A5frTN50tbuwGmJk0BADVaN7PZRs7vcnsJCPAsdp3Krs9E5AQUCgW8fXzw\nwpBncBoivvjic+y4fAmBajW8vX3g5uYmd4lEVMU578g3wLNCHpesVYd+vnLlMj7++AN8883XEEUR\nDz/8CKZOnYFHHmnvsBqqQz9XBuxnx2A/S0oa+fI3X6r2wsLCsWhRDPbs2Yc+ffrhn3/24YknemPo\n0CdxrNCmYSIie2H4EuVp1Kgx1qz5Ctu370KHDp3w66870a1bR0RHD+c5o4nIrhi+RHdp3boNvvtu\nKzZu3IzIyFbYvHkTHn20DV577RVcv35N7vKIqApg+BLZIAgCunR5DDt2xGHlyrV44IF6WLt2Ndq2\njcQ777yN5GROX0hE5cfwJSqBIAjo338A9uzZhwULliAgoCaWLl2Ihx5qgblzP0RmJncqIXJqZjOg\n0wGZmQ59Wu7tTCViP1vLycnBmjUrMX/+XNy+fRv+/v6YOPE1vPDCSGg0mnI/LvvZMdjPjnHf/SyK\ngMEAITsLQlYWoNdDyM6GkKWHkJUFITsL0GdJfxe6HdlZEPRZBW2yCrXRF2pT+PbsbOkpFQqkffUt\nDI91t1MvlLy3M8OXSsR+ti0zMwMxMUuwZMlCZGSko3btOnj99bcwZMgzUKnKfvg8+9kx2M92IoqA\nTgdBp4Ogy4Sg00Ghy4SgywR0OngrTMhISraEoJCVBdwVggXhWNBG0OuB/DA1mexbsloN0U0L0c0N\n0GggarUQNRrLbaKPL3RvvwNznRC7PSfDl8qN/Vyy5OQ7+N//PsWqVcuRnZ2NBg0i8Oab09Cv3xMQ\n7jpbVknYz45RLftZFKWQKxSUQmZmwbLuruXM/GUdhMyMguXC6/Q6CHaIDlEQADc3KfzcCsIQbm4Q\nNW4QtXnrNG557Qq3KRScGqkd7g5UjRugzbsux3+K7xfDl8qN/Vw6169fwyeffISvvloLk8mEyMhW\nmDLlHXTu3LVUIcx+dgyn6Oe8sFSkp0FIS4OQnlZ8GBYXmlZ/Z0Iwm8tfjiBAdPeA6O4uXTw8Cy17\nWK9zl9Z5BvkhzaQoCNG84LQEZn6AuroWOaVrVcLwpXJjP5fNhQvn8NFH72Hz5k0AgA4dOuHtt99B\n69ZtSrwf+9kxHNLPZjOEjPS84Ey3CtGC5XTp70LLVuttnB+9tEStuyUMzR6eQKHl/Nvh7gGzR35o\neuSFqDtErUeRUIVWW+aA5OdZwvClcmM/l8+xY0fw/vvvYteuXwAAvXv3xVtvTUPjxk1stmc/O0ap\n+jk3F0JaGhQZxQVlqnVopqVBUWhZyMwo8yZZUauF2csbopcXRC9vmL3zl32k68JBefeIs/Cy1h1Q\nyH8QCz/PEoYvlRv7+f7s2/cnZs+egX/+2QdBEPDUU0/jjTemICws3Kod+9nOjEYIKSlQJN+BIvkO\nhDvStWeuDvrrN4sP0Yx0aeegMhAFAaKnF0Rvbyk4ve5a9vKG6C2FqNnLO29d/rJ0O9TqCuoIefDz\nLGH4Urmxn++fKIrYuXMH3nvvXZw8eRxqtRpRUcPx6qtvIDAwEAD7uURmM4TUFCiSky0hWjhQFcl3\nICTfgeJO3nVyMhRpqaV+eFGthujtDbMlQPOCMi9ArZel8LSMUr29IXp4VorRZmXCz7OE4Uvlxn62\nH7PZjM2bv8OHH87G5cuXoNVqER09Di+//Arq1w+pHv0sitIo0yo4k6G4Yx2iluWUZAgpKaXaYUhU\nqWCu4QfRz0+6riFdm/1qWJa9wmsjRVRLI868wIWbW5Xe6UcO/N6QMHyp3NjP9mcwGPDVV2sxd+6H\nSEpKhI+PDyZNmoQePfqhQYMIucsrPVGU9qS1MRq1CtSUZGn9HWm5NDsTiQoFxBo1pPDMD1K/QoFa\no4YUsr41LIErenrdM0T5eXYM9rOE4Uvlxn6uOHq9HitXLsfChfOQmiptJm3YsBH69n0C/foNQNOm\nzcp0rLDdmM0Qbt+GMukGFEmJUCQlQZFYaDnphnR96yaE3NzSPaSvr1WIWo9M85Z9a0D0ywtTb58K\n2ZTLz7NjsJ8lDF8qN/ZzxcvISMcff/yKr77agLi4XcjOO91deHhd9Os3AP36PYFWrVrffxAbjVDc\nvpUXoolQJCbeFah5t926WeLZhUQXF5gDg2AOCIDZz79oiNbwsx61+vjIcoIDW/h5dgz2s4ThS+XG\nfnaM/H7OzMzErl0/IzZ2C375ZQf0eh0AIDi4Nvr27Y9+/Qbg4YcfgVKpLLizwQDFrZt5o9OkvBC9\nAcXNJOuQvX2rxN9ORY0G5ppBMAcGwhxUC6bAQClk8y9BtWAODIToW8NpfyPl59kx2M8Shi+VG/vZ\nMWz1c1ZqKvbH/oCD27bg8l9/wFuvRy0AdTUaNK/hh1C1Gp46HRR3bpd4XKmo1cJcMxCmoFp5IRpk\nFbJSuAZKm3qdNFRLi59nx2A/S0oK38qxLYiousnKgjIhHsqEK1DExwOpt+B58Yr1ZuDkZIQCePLu\n+2ZnA9evIQPARYUCBv8AuNVvAP9mzSEE15HC1TJaDZIOhanioUrkbBi+RBUhOxvKawlQXLmSF7Lx\nUMRflpbj46G4dbPIXfInJDR7ecMcGAhj0+Yw1wwsGK3mBaohoCb+jr+CH3b9jG3btuLGjevArZvw\nOHYUPXr0RL/QAXisVWu4u7s79jUTUalxszOViP1cjNxcKK4m5IXpFSjyrqWQvQJlUqLNu4lqNcy1\n68AUGg5TaCjMIaEwhYbBq2kE7rh6wRwYJJ1Lt5TMZjMOHtyP2NgtiI3dgvj4ywAANzc3dO3aHX37\n9kevXn3g5eVtj1ft9Ph5dgz2s6RCfvOdM2cODhw4AKPRiNGjR6Nnz57FtmX4Oq9q288GAxTXrlqP\nWuPzlhPiobhx3ebvrKJSCXPtEJhCpVA1h4TCFBIKU2g4zKGhUrgW3lkqjz36WRRFHD9+DNu2/YDY\n2C04e/YMAECtVqNTpy7o128AevfuCz8/v/t6HmdWbT/PDsZ+ltg9fPft24eVK1dixYoVSElJwaBB\ngxAXF1dse4av86qy/Ww0QnHjuvWoNX85IR6K69ds7hksKhTSyDUktFCwhsEcGibdViu4XIfVVEQ/\nnz17BrGxUhAfP34UAKBUKtG+fQf07fsEHn+8H4KCatn1OSu7Kvt5rmTYzxK7h6/JZEJOTg60Wi1M\nJhPat2+PP//80/rwh0IYvs7LafvZZIIi8YYUpFcuW0asls3E167aPJZVFASYawVbNgebQkKlYM1f\nDq5dISfBr+h+vnz5ErZt24rY2B9w4MC/AABBEPDQQw+jX78B6Nu3P0JDwyrs+SsLp/08Oxn2s6RC\nDzXasGED9u/fj48//rjYNhXxJrRp4wmzjZHJuHG5GDnSkLeswd9/F/0PQevWJixfLp3IYO1aNebP\nd7H5HH/9pYOLC3DunAJDh7rZbDNvXjY6d5a+xHv10uL27aJ7lQ4ZYsB//yudCeidd1wRG1t0ZBQa\nasb330uzqWzfrsLUqa42n2/rVj2Cg0WkpgLdutneoWbKlBwMHiydwu/ZZ91w+nTRMwV17WrE3Lk5\nAICFC12wenXRQNFqRZw+rcStWxnYv1+B0aNt98GqVVlo2VJ6L9q2dYetswdGR+di9GjpfZk0yRV7\n9xbtg+bNTVi9Wnpfvv5ahY8/tt0He/bo4OEBXL4sYPBADWA0QDAYAEPetdGIJRiLx02xAIAO2Iur\nqFPwAAoloFLh/8L3YUbfP2EOCcP033rhu31hgEpptWdwrVpmxMZK78uuXUq88YYGtnz3nR7h4SIy\nM4HOnW2/L6+/noOhQ6XOGT5cg2PHCj6bCoUCZrMZHTsaMX++9L7ExKixfHnRz6ZKBfz9t3T875Ej\nCowYYft9iYnJwkMPSe9Lx45a6PXS6zIaTcjK0iMrS4/c3PkQxTkAAD+/b2A0doebmxvUhf6D0aiR\nGV9+mZX3OlV4/33b78uuXTr4+ADXrwvo39/279azZ+egTx+pDwYNckN8fNHPZr9+RsycKfXBRx+5\nYOPGop9Nf38RO3boAQB79igxebLt9+Xrr7PQoIEZublAu3buln4ubNKkXERFSZ/N6GgNDhwo+p3R\ntq0JS5ZIn82VK9VYssT2d8aBA9L7cvKkAlFRtt+XRYuy0a6d9J3RtasW6elFvzOefdaAyZOl74wp\nU1yxY0fRfy/16pmxcaP0vmzdqsKMGbbfl+3b9ahZU8TNmwL69LH9vsyYkYP+/aX3ZcgQN1y4UPR9\n6dXLiPffl96XefNc8OWXRd8XLy8Ru3frERDgiS1b9Bg/3vb7snZtFpo0kd6H1q1t/3uR87vcXirs\nUKOdO3fi22+/xapVq0ps5+urhUple1R8PxQ2Tj/n6alBQID0hms0ts9Q5+qqQECAOq998WexCwjw\nhIsLcOdO8W18fLQICJCWVSrb7dzdXREQIP3D0Gptt1GrFZY3ytu7+Ofz8/NAQEDxzwUAXl5ulppc\nXGy3c3NzQUCA9EH18LDdJn9DRkCAJ3x9i38+X193y/MplYCt8zh4eNzP+yICJhOQKwVswKyp8Dh/\nGBnH9VCkfFv0gRQKCA3qA62GAuHhwDf1gMy8sywpVZZwVT05CO4fDJJqugkoDhV9qLK+L25uxbfx\n9Cx4X1xdi7ZTKBTQaEr3vuTXVJb3Jb+di4sCLi7e8Pb2xvPPT0OdOvWwadMm/PxzMkQxFWlpqVCr\n1dBqtXB3d4eLi9ryfF5exT+fv7/0OcnJKb6Nt3dBH6jVtttptQV94F7M9LQqVUEf+JRwJsoaNaQ+\nyM0taHP390bh7wxb7wsAaDSl/86Qntd+3xnFfaZcXBSlfF+kz6bZbL/vjNK9L9p7vi9ASf9e5Psu\nd4Ryj3z37t2LBQsW4LPPPoOPj0+JbbnZ2Xk5tJ+NRigvX4Ly7Bkoz52BKu9aee4cFLpMq6aiQgFz\naBiMEQ1himgkXTeIgKlBBEQn3LO3MnyeU1NTsGPHdmzbtgW7d+9CTo40yqlb9wHLaS4jIx+U53zT\ndlIZ+rk6qMh+FkXAaATyNnbBYBBgMEj/wTIagdxcoci6/PWF/zYYhEL3kf7TMXSoAV5e9qvV7pud\nMzIyMGzYMKxevbpUe04yfJ1XhfRzVhaU589Bde4MlGfPQHXurBSyFy8UOVG/6OICU70GBeEa0RDG\nBg1hqldf+u9wFVHZPs+ZmRnYuVM6zeXOnT9bTnNZu3Ydy2ku27RpW+x+HpVVZevnyshkks7jkpsr\nBVlODpCTI11b3ybdLt0GZGdLyzk5AtRqV6Sm5tgIwMLhd3cAFl1XEKrW7SrKypVZls3v9mD38N2w\nYQMWLlyIunXrWm776KOPEBwcbLM9w9d53U8/C2mpBeFaaDSriL9S5DAds4cnTBERMDXIC9eIhtLf\noeGV5qT8Fakyf56zsrIQF/crYmN/wI4d25GengYA8PX1RYsWkWjZshVatoxEixaRCA0Nq9Qj48rc\nz2YzkJUlhVh2trSclSXkBaFgFXZSEBaEoK1l27cVF6QFj2syyfP+qdUi1GppBKpSiXnXgIuLdHvB\npaBd/nL+xcVFzLtP0XVqtfVjFrST1nl5AW3amGwdCVhuPLczlds9+1kUobiZJIXr2TPSaDY/bG2c\naMLsH5A3ipXC1digIUwNG8EcVKtanwLRWT7Pubm5+P33PYiN3YK9e/fgypXLVut9fHzQvHkkWraM\ntARyeHjdShPIZelnUcwf0UmBqNcXBGN2tmAVjkWvC9rq9db3KQhX679zciq+jwRBhKurFDouLiI0\nGmnZ1VXMuw3QaETL+vy2rq62lotfX7OmFnq9/q4ALByu1gGoUlXNf/4MXyo3Sz+bzVDEX8nbVHy2\n0G+yZ6FISy1yP1NIKEwNCsLVmBe2om8NGV5F5eesn+fU1BQcPXoER44cxtGj0uXSpYtWbby9fdCi\nRUs0b97SEsrh4Q/Y3GGyLERRGqnpdAJ0uruvbd8mii5ISTFYQq9wgNoKUbPZ/ong5iaFnkYjXbu5\niXBzk/7Ovy58e36wFQ486/C71/qCx3BUyDnr59neGL5UJkJKMlRHDkN15BA8Lp6F4dgJqM6fhZA3\nz2w+UaWCqe4DeTs8ReSNZhvCWK+BtDsklVpV+jynpaXi2LGjeYF8CEePHsGFCxcAuAPwAOABrTYI\nDzzQEqGhTREcHIGaNetBqw1EVpaiSGjq9cWHqz02kapURcPPVgjmL+cHp5tb6f8uuL1qjvDuVpU+\nz/eDsxpRsYSMdKiOHoHq8CGojhyE+tBBKO/alKjSamGMaGS9w1NEQ5jqPlAhJ5ygysFgANLTBaSn\nAxkZAtLSBMvf6ekCMjKKjjCloNRCpwuGTtfHchtgnTh6PXD8uHQpLa1WhLu7CHd3oEYNs2XZ+rrk\n22rXdkdWViY0GunxNJpqsUsBVUL82FUnej1Ux49BfeQgVIcOQnXkEJTnz1nt/GSuUQO5XbvB0OpB\nGFs+CO9Oj+C2WwkHk1KlZDYDmZn54WkdmmlpUnCmp8OynB+sGRkFt+WflKOslEoRHh5S2Pn6iqhT\n5+5QlJbV6hxkZNzAnTuXkZh4Htevn8aNG+cgiukAMgFkws1NRNOmddGqVSO0bNkSLVu2Qv36Dcq9\nl3VAAHDrVoVv7CO6J4ZvVZWbC9XJ49KI9vBBqA8fgvLMKatTKpo9vWB4tCOMkQ/CENkKxsgHYQ4J\ntd4uFuAJcPORQ4mi9Ptj4dC0DkncFZgC0tJQaFkKUVEsW3hKe3yK8PQEgoLM8PIS8y4otFxwm6en\nCA8PEVqtdai6uJRl02qtvEs7AIBOp8Px48dw9Oghy+/IBw/GYf/+Xy330Gq1aNq0uWWHrpYtW6FB\ngwioOIQlJ8LffKsCoxHKM6ehPnIob0R7EKqTJ6yOmRXd3GBs3jJvRCsFremBevcc0bKf7092NpCc\nLFhd7tyRrlNSCv7OzFQhOdlsGZ0aDGULTkGQQlMKTxHe3sWHZuG/vb0L7uPmVjl/j9Tr9Thx4hiO\nHj2MI0eky9mzp2Eq9B9JNzc3NGnSLG+HrlZo0SISDRs2KhLI/Dw7BvtZwh2uqhKzGcoL56E6fNAy\nolUdPwohK8vSRHRxgbFps7wRrRS2poiGlWa2HWeVk1NykOYvF/67tJtu3dwALy9zMSPNoiHq7Y1C\nISsWeyrKqiorKwsnTx63jI6PHDmMM2dOwVjoxOIajQZNmzbL28taCuSOHR9Gamp2CY9M9sDvDQnD\n11mJIhRXLhca0R6C6shhKDILXreoVMLUqIlls7ExshWMjZtK2/7soKr2c04OLCPPwkFaeDR6d5Dq\ndKULUq1WRI0a0sXXV4SfX/F/+/lJt4WEVM1+dqTs7GycOnXCKpBPnz4Jg8FgaaNUKlG7dh3UqRNi\nuYSEhFqua9euA1dX2xMUUOlV1e+NsuLezs5AFKG4cd0SsurD0rUiJaWgiSDA1CACuS1bFWw+btZC\nGjZVczodkJQkIClJgdu3bQdp4dFpZmbpglSjkQLygQfMRYLz7kt+kPLtkIdGo0GrVq3RqlVry205\nOTk4ffqkZXP1hQtncOnSZfz11x8obtwRGBiUF8YhCAkJsyzXqSOFtIeHh6NeElVhHPnKRLh1C+rD\nB6x2iFLcumnVxhReN29E21oa0bZoCdGj+P9JVQS5+zkzsyBUExMFJCUJSExU5N0mWNZlZNw7TF1d\nC8KzpCAt3EZrewY2u5O7n6uL/H7Ozc3FtWtXcfVqAq5eTUBCQrxlOT4+HtevX7XahF2Yr6/vXaEs\nBbMU1qHw8fGtNGf0kgs/zxKOfOWWlQX1/n+gOrgf6vxDfK5dtWpiql0HOY/3LxjRtoyssmeDEkUp\nVPNDND9Uk5IKQjV/3b029fr7mxESYkZgoIigIBGBgWYEBBQfpNX8O5HyuLi4oG7dB1C37gM215tM\nJiQlJSIhIQFXr8YjISHesnz1agLOnTuDo0cP27yvu7tHoVCWRs/5f4eEhCIgoOZ9n92LnB/DtyIY\njVAdPgiXvXug3rsH6n//hpA3PRsgnd84p0cvy2+0hpYPQqxZU8aC7UMUgfR0WI1MExMVuHlTsBq1\n3rxZ8o5IgiCFZt26+aEqXdesWRCwQUEiAgJEe/20TWRFqVQiOLg2goNro23bR4qsF0URd+7cQULC\nlbyRc0Ewx8dL16dPn7L52K6urggOrm0VyvnBHBISilq1gnnYVDXAd9geRBHKUyfhsjdOCts//7Da\nKcrYtDlyO3aG4eFHYGz1IMzBtZ1qCCaKQGoqrDb9Wo9SC/7Ozi7+dSkUIvz9RdSrZ7aEaGCgaBWw\ngYFSqPLEWVSZCYIAf39/+Pv7W/3GXFh6elpeKCcgIeGKZVkaSSfgt99227yfUqlErVrBVjuF1ahR\nA76+NQpd+6FGjRrw8vLmKNpJ8TffclJcvpQ3so2Dy++/QXH7tmWdse4DMHTsgtxOnWFo3xGiv79s\ndZaG0Qhcvy4gPl6BhAQBV64oLMtJSSrcuCGWOOOKQiGNSvM3/dasab0ZWLqWgpf/obdN7s9zdVGZ\n+lmv1+Patat3/d58xbKcmHgDZrO5xMdQKBTw9fWFr68Uyn5+fpbl/KDOX65RI3+dL1wqeJNRZepn\nOfE3XzsQkpLg8ru0Gdnl99+gjL9iWWcKDEL2U08jt1MXGDp0grlOiIyVFmU2SzstXbkiBWp+sMbH\nSyF77Zpg8wT1CoWIWolNSOEAAAkTSURBVLWAJk3Md41SrUet/v6iXefAJKoOtFotGjSIQIMGETbX\nGwwGXL9+DdevX0NycjJSUpILXd+x+jslJRmXLl20OvFISTw8PG2MpouGduEwd3d3r/Y7ktkTw7cY\nQloq1H/+IY1s9+6B6sxpyzqztw9yHu8vbUru1AWm+g1k3YwsisDt24JVoMbHFyxfvSogN9d2fUFB\nZjz4oBmhoWaEhZkREiIiNFT6OzhYRHCwJ27d0jv4FRGRWq1GWFg4wsLCS9XebDYjPT3NKpALL9+5\nU/T2s2dPI6vQCXpK4uLiYmMUbTu869ULQW6uAHd3d2i17uU+F3dVxvDNl5UF9T/7LJuSVUcOQ8jb\n5CO6uSG3y2PI7dgFhk6dpWNrHfxhSk0FEhIUVqPXwiPY4nZg8vc3o2lTsyVQ88M1LMyM2rWlWV2I\nyPkpFAr4+PjCx8cXQL1S30+v1xcJ6sIj7Ltvv379Ok6dOlmm2jQaDdzd3eHu7gGtVpsXyh5511q4\nuxddzg/u/PvZauvMv3dX3/A1GqE6dMB6j+S8cyGLKhWMDz1sGdkaHnxImp26AmVmSuEaHy9YQjZ/\nOT5egfR02+Hq5SWdACI/WMPCCpZDQszg+QCIqCRarRZarRa1a9cp9X2MRiNSUlKKDe2srAwkJ6dC\np9PlXTKh1+uh0+mQlJQInU6H3ELnnr+/2u8O6sIhbyvIrf8TkL/s6+sLb2+f+66ptKpP+JrN1nsk\n//WnZY9kURBgbNYChg6dYOjUGblt28PeqZWTgyKbhfODNT5ewJ07tv8Hp9VKI9VHHpHCVBrBFmwa\n9va2a5lERPekUqkQEBCAgIAAm+tLs8OVwWCAXq+zBHTBcmbe33rLsvV66zDPb5OamorMzIxS/+59\nN4VCga+++haPPda9XPcvq6obvqJYsEfy73uK7pFcrz5yBg+R9kh+tCPEGn52eVq9Hjh/XoEzZxQ4\nezb/WonLlwWYzUVHry4uIkJCRDRvbiwSrKGh0vGu3MeBiKoatVoNb28fu442RVFEbm6uzXC2DvOi\n6wEgIqKh3Wq5lyoVvoqkRGlUm79HckK8ZZ2pVjCyhzyD3A6dYOjYGeYybGKxJTMTOHs2P2CVlqBN\nSBCKzKNao4YZbdqYUK+eFKjSCFbaRFyzplitZqMhIqoogiDA1dUVrq6uqGGnAVVFcerwFdJSof7j\nd2lT8u+/We+R7OuLnH4DpLDt1AWmevXLtUdyaipw5owS584VjGbPnlXg2rWiiVmzphkdOpgQEWFG\nRIQZDRtK1/7+FX4oNRERORHnC19RhNvC+cCOWPgdOFCwR7JWi9zHukt7JHfsJO2RXIYh5e3bQqHN\nxAWbjG/eLPoYwcFmdOlitISrdDHB19dur5KIiKow5wtfnQ7un3wIGI0wPPwIDB07S5cHH7rnHLai\nCNy8Kdz1e6x0sbXDU2ioGd27G/NGsdKItkEDM7y8KurFERFRdeB84evhgTv7j8M/LBBpetunXhNF\n6XSJ1qNY6XfZtDTrTc+CICI8XESbNgarzcX165vh7u6IF0RERNWN84UvADEgAHB3hzkzAwkJgtVe\nxfnLd09Fp1RKx8N26GC22lxcr56Zk58TEZFDOV34iiIwfbor/v0XOHXKA1lZ1iGrVouoX99cZKen\nBx4wc/o5IiKqFJwufPV6YMMGNbKzYQnZ/IBt2NCE8HDOnENERJWb08WUuztw4kQmAgM9kZzME/4T\nEZHzccrTO6jVDp/XgIiIyG6cMnyJiIicGcOXiIjIwRi+REREDsbwJSIicjCGLxERkYMxfImIiByM\n4UtERORgDF8iIiIHY/gSERE5GMOXiIjIwRi+REREDiaIoijKXQQREVF1wpEvERGRgzF8iYiIHIzh\nS0RE5GAMXyIiIgdj+BIRETkYw5eIiMjBnC5833//fTz99NMYOnQojh49Knc5VdqcOXPw9NNPY/Dg\nwfj555/lLqdKy87ORvfu3bFp0ya5S6mytmzZgieeeAJPPvkk4uLi5C6nStLpdBg/fjyioqIwdOhQ\n7N27V+6SKi2V3AWUxT///IMrV65gw4YNuHDhAqZMmYINGzbIXVaVtG/fPpw7dw4bNmxASkoKBg0a\nhJ49e8pdVpW1dOlSeHv/f3v398r6H8Bx/LkzubBxzDJaIblRSigXWHJBLlz7kRa3cqVc0FKUq7lS\nKAp/gLZwI0pZuZgr5UJRXGExy8evxgU6d6fOt9x8a3vbp9fjbrt61i5ee38+n7bfpjNsy7IslpaW\niEajpNNpFhYW6OjoMJ1lO5ubm1RXVzM+Ps7d3R3Dw8Ps7u6azvqRcmp84/E4nZ2dANTU1PD09MTr\n6ytut9twmf00NzdTX18PQFFREW9vb3x+fuJ0Og2X2c/l5SUXFxcagwyKx+O0tLTgdrtxu93Mzs6a\nTrIlj8fD+fk5AM/Pz3g8HsNFP1dOXXZOpVL/fJglJSXc398bLLIvp9NJQUEBAJFIhPb2dg1vhoTD\nYSYnJ01n2Nr19TXv7++MjIwwODhIPB43nWRLPT09JBIJurq6CAaDTExMmE76sXLq5Ptf+mXMzNvf\n3ycSibC+vm46xZa2trZoaGigoqLCdIrtPT4+sri4SCKRYGhoiIODAxwOh+ksW9ne3sbv97O2tsbZ\n2RmhUEjPMXwjp8bX5/ORSqX+vk4mk5SWlhossrfDw0OWl5dZXV2lsLDQdI4txWIxrq6uiMVi3N7e\nkp+fT3l5Oa2trabTbMXr9dLY2EheXh6VlZW4XC4eHh7wer2m02zl+PiYQCAAQG1tLclkUrervpFT\nl53b2trY29sD4PT0FJ/Pp/u9GfLy8sLc3BwrKysUFxebzrGt+fl5otEoGxsb9Pb2Mjo6quHNgEAg\nwNHREV9fX1iWRTqd1v3IDKiqquLk5ASAm5sbXC6XhvcbOXXybWpqoq6ujoGBARwOB9PT06aTbGtn\nZwfLshgbG/v7Xjgcxu/3G6wS+X/Kysro7u6mr68PgKmpKX79yqmzR07o7+8nFAoRDAb5+PhgZmbG\ndNKPpb8UFBERyTJ99RMREckyja+IiEiWaXxFRESyTOMrIiKSZRpfERGRLNP4ioiIZJnGV0REJMs0\nviIiIln2BzQKNGAGnBgwAAAAAElFTkSuQmCC\n", "text/plain": [ - "\u003cmatplotlib.figure.Figure at 0xc1dc310\u003e" + "\u003cmatplotlib.figure.Figure at 0x7f7a18df6b50\u003e" ] }, "metadata": { @@ -668,13 +549,10 @@ " w_at_step = []\n", " b_at_step = []\n", " for step_num in range(num_training_steps):\n", - " loss, gradients_and_variables = value_and_gradients_fn(inputs, labels, wb)\n", - " loss_at_step.append(np.asscalar(loss.numpy()))\n", - " \n", - " optimizer.apply_gradients(gradients_and_variables)\n", + " loss_at_step.append(run_step(inputs, labels))\n", " w, b = wb.variables\n", - " w_at_step.append(np.asscalar(w.read_value().numpy()))\n", - " b_at_step.append(np.asscalar(b.read_value().numpy()))\n", + " w_at_step.append(np.asscalar(w.numpy()))\n", + " b_at_step.append(np.asscalar(b.numpy()))\n", "\n", " print(w_at_step)\n", " t = range(0, num_training_steps)\n", @@ -688,171 +566,12 @@ "\n", "train_model(inputs, labels, wb, optimizer, num_training_steps)" ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "UNurY9VJ-hpH" - }, - "source": [ - "## Other Ways to Compute Gradients\n", - "\n", - "Using our loss function as an example (`loss_fn()`), there are several other ways we could compute gradients:\n", - "\n", - "1. `tfe.implicit_gradients()`\n", - "1. `tfe.gradients_function()`\n", - "1. `tfe.implicit_value_and_gradients()`\n", - "1. `tfe.value_and_gradients_function()`\n", - "\n", - "Each of these functions does the following:\n", - "* Wraps a function.\n", - "* Returns a function with the same input signature as the wrapped function.\n", - "\n", - "They differ only in what information they return.\n", - "\n", - "### Gradients-only functions\n", - "\n", - "The following two functions return a function that returns only the variables' gradients:\n", - "\n", - "1. `tfe.gradients_function()`: Returns the partial derivatives of the function `f()` with respect to the parameters of `f()`.\n", - "1. `tfe.implicit_gradients()`: Returns the partial derivatives of the function `f()` with respect to the trainable parameters (`tf.Variable`) used by `f()`.\n", - "\n", - "In our example above, the `tf.layers.Dense` object encapsulates the trainable parameters.\n", - "\n", - "### Value and gradients functions\n", - "\n", - "The following two functions are identical to their counterparts above, except that they also return the value of the wrapped function.\n", - "\n", - "1. `tfe.implicit_value_and_gradients()`\n", - "1. `tfe.value_and_gradients_function()`\n", - "\n", - "### Gradient demos\n", - "\n", - "In the demos below, we show examples for the `implicit_*` functions, since our existing loss function works seamlessly with these versions. (The other versions require that your parameters are tensors and tensors only; in our example, we're using a `Dense` layer.)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "height": 85, - "output_extras": [ - { - "item_id": 1 - } - ] - }, - "colab_type": "code", - "executionInfo": { - "elapsed": 100, - "status": "ok", - "timestamp": 1505502831671, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 240 - }, - "id": "aEoCftnfAIH5", - "outputId": "72f1c1dc-a574-463f-f860-c4e5f48fcdaa" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[(\u003ctf.Tensor: id=673, shape=(1, 1), dtype=float32, numpy=array([[-0.26846504]], dtype=float32)\u003e,\n", - " \u003ctf.Variable 'dense/kernel:0' shape=(1, 1) dtype=float32\u003e),\n", - " (\u003ctf.Tensor: id=671, shape=(1,), dtype=float32, numpy=array([-0.32890949], dtype=float32)\u003e,\n", - " \u003ctf.Variable 'dense/bias:0' shape=(1,) dtype=float32\u003e)]" - ] - }, - "execution_count": 13, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "# tfe.implicit_gradients() demo\n", - "gradients_fn = tfe.implicit_gradients(loss_fn)\n", - "\n", - "# Returns only gradients and variables:\n", - "gradients_fn(inputs, labels, wb)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "height": 102, - "output_extras": [ - { - "item_id": 1 - } - ] - }, - "colab_type": "code", - "executionInfo": { - "elapsed": 88, - "status": "ok", - "timestamp": 1505502831785, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 240 - }, - "id": "bbgCUdCzAVhH", - "outputId": "152aa9b6-9e42-4b7e-848a-9423c0b1929c" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(\u003ctf.Tensor: id=688, shape=(), dtype=float32, numpy=1.0623235\u003e,\n", - " [(\u003ctf.Tensor: id=720, shape=(1, 1), dtype=float32, numpy=array([[-0.26846504]], dtype=float32)\u003e,\n", - " \u003ctf.Variable 'dense/kernel:0' shape=(1, 1) dtype=float32\u003e),\n", - " (\u003ctf.Tensor: id=718, shape=(1,), dtype=float32, numpy=array([-0.32890949], dtype=float32)\u003e,\n", - " \u003ctf.Variable 'dense/bias:0' shape=(1,) dtype=float32\u003e)])" - ] - }, - "execution_count": 14, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "# tfe.implicit_value_and_gradients() demo\n", - "value_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)\n", - "\n", - "# Returns the value returned by the function passed in, gradients, and variables:\n", - "value_gradients_fn(inputs, labels, wb)" - ] } ], "metadata": { "colab": { + "collapsed_sections": [], "default_view": {}, - "last_runtime": { - "build_target": "", - "kind": "local" - }, "name": "Eager Execution Tutorial: Working with Gradients", "provenance": [], "version": "0.3.2", diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb index 0088da5c4b583dd13251de5839235de666fe8b78..bfcc7feb075c403d024772e0d715339d58877a51 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/3_datasets.ipynb @@ -16,7 +16,9 @@ "\n", "We recommend using the `Dataset`s API for building performant, complex input pipelines from simple, re-usable pieces that will feed your model's training or evaluation loops.\n", "\n", - "If you're familiar with TensorFlow graphs, the API for constructing the `Dataset` object remains exactly the same when eager execution is enabled, but the process of iterating over elements of the dataset is slightly different. You will use a Pythonic `Iterator()` class instead of using `make_one_shot_iterator()` and `get_next()`. As a result, the discussion on iterators in the [Programmer's Guide](https://www.tensorflow.org/programmers_guide/datasets) is not relevant when eager execution is enabled." + "If you're familiar with TensorFlow graphs, the API for constructing the `Dataset` object remains exactly the same when eager execution is enabled, but the process of iterating over elements of the dataset is slightly simpler.\n", + "You can use Python iteration over the `tf.data.Dataset` object and do not need to explicitly create an `tf.data.Iterator` object.\n", + "As a result, the discussion on iterators in the [Programmer's Guide](https://www.tensorflow.org/programmers_guide/datasets) is not relevant when eager execution is enabled." ] }, { @@ -48,11 +50,8 @@ "# Import TensorFlow.\n", "import tensorflow as tf\n", "\n", - "# Import TensorFlow eager execution support (subject to future changes).\n", - "import tensorflow.contrib.eager as tfe\n", - "\n", "# Enable eager execution\n", - "tfe.enable_eager_execution()" + "tf.enable_eager_execution()" ] }, { @@ -137,32 +136,27 @@ "source": [ "# Step 3: Iterate\n", "\n", - "Use `tfe.Iterator` on the `Dataset` object to get a Python iterator over the contents of the dataset.\n", - "\n", - "If you're familiar with the use of `Dataset`s in TensorFlow graphs, note that this process of iteration is different. Here there are no calls to `Dataset.make_one_shot_iterator()` and no `get_next()` calls." + "When eager execution is enabled `Dataset` objects support iteration.\n", + "If you're familiar with the use of `Dataset`s in TensorFlow graphs, note that there is no need for calls to `Dataset.make_one_shot_iterator()` or `get_next()` calls." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 0, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, - "height": 153, - "output_extras": [ - { - "item_id": 1 - } - ] + "base_uri": "https://localhost:8080/", + "height": 153 }, "colab_type": "code", "executionInfo": { - "elapsed": 201, + "elapsed": 388, "status": "ok", - "timestamp": 1505952405928, + "timestamp": 1525154629129, "user": { "displayName": "", "photoUrl": "", @@ -171,7 +165,7 @@ "user_tz": 420 }, "id": "lCUWzso6mbqR", - "outputId": "ec027d30-96c6-4ea4-9ee1-ef74ec1ae29a" + "outputId": "8e4b0298-d27d-4ac7-e26a-ef94af0594ec" }, "outputs": [ { @@ -179,9 +173,9 @@ "output_type": "stream", "text": [ "Elements of ds_tensors:\n", - "tf.Tensor([4 9], shape=(2,), dtype=int32)\n", + "tf.Tensor([1 9], shape=(2,), dtype=int32)\n", "tf.Tensor([16 25], shape=(2,), dtype=int32)\n", - "tf.Tensor([36 1], shape=(2,), dtype=int32)\n", + "tf.Tensor([ 4 36], shape=(2,), dtype=int32)\n", "\n", "Elements in ds_file:\n", "tf.Tensor(['Line 1' 'Line 2'], shape=(2,), dtype=string)\n", @@ -191,22 +185,19 @@ ], "source": [ "print('Elements of ds_tensors:')\n", - "for x in tfe.Iterator(ds_tensors):\n", + "for x in ds_tensors:\n", " print(x)\n", "\n", "print('\\nElements in ds_file:')\n", - "for x in tfe.Iterator(ds_file):\n", + "for x in ds_file:\n", " print(x)" ] } ], "metadata": { "colab": { + "collapsed_sections": [], "default_view": {}, - "last_runtime": { - "build_target": "", - "kind": "local" - }, "name": "Eager Execution Tutorial: Importing Data", "provenance": [], "version": "0.3.2", diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index 8517a3bf7b6aebf4ecd2f148d2160cfea1b1b9c0..b8f352d5f5b72ffb8ae81a2bb72974c7fd65bd5a 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -36,9 +36,7 @@ def device_and_data_format(): 'channels_last') -def random_batch(batch_size, device_and_format=None): - _, data_format = device_and_format or device_and_data_format() - +def random_batch(batch_size, data_format): shape = (3, 224, 224) if data_format == 'channels_first' else (224, 224, 3) shape = (batch_size,) + shape @@ -70,7 +68,7 @@ class ResNet50Test(tf.test.TestCase): if defun: model.call = tfe.defun(model.call) with tf.device(device), tfe.execution_mode(execution_mode): - images, _ = random_batch(2) + images, _ = random_batch(2, data_format) output = model(images, training=False) tfe.async_wait() self.assertEqual((2, 1000), output.shape) @@ -91,7 +89,7 @@ class ResNet50Test(tf.test.TestCase): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format, include_top=False) with tf.device(device): - images, _ = random_batch(2) + images, _ = random_batch(2, data_format) output = model(images, training=False) output_shape = ((2, 2048, 1, 1) if data_format == 'channels_first' else (2, 1, 1, 2048)) @@ -101,7 +99,7 @@ class ResNet50Test(tf.test.TestCase): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format, include_top=False, pooling='avg') with tf.device(device): - images, _ = random_batch(2) + images, _ = random_batch(2, data_format) output = model(images, training=False) self.assertEqual((2, 2048), output.shape) @@ -115,7 +113,7 @@ class ResNet50Test(tf.test.TestCase): name='t0').as_default(), tf.contrib.summary.always_record_summaries(): with tf.device(device), tfe.execution_mode(execution_mode): optimizer = tf.train.GradientDescentOptimizer(0.1) - images, labels = random_batch(2) + images, labels = random_batch(2, data_format) train_one_step(model, images, labels, optimizer) self.assertEqual(320, len(model.variables)) tfe.async_wait() @@ -134,7 +132,7 @@ class ResNet50Test(tf.test.TestCase): model = resnet50.ResNet50(data_format) optimizer = tf.train.GradientDescentOptimizer(0.1) with tf.device(device): - images, labels = random_batch(2) + images, labels = random_batch(2, data_format) gc.disable() # Warm up. Note that this first run does create significant amounts of # garbage to be collected. The hope is that this is a build-only effect, @@ -202,18 +200,18 @@ class ResNet50Benchmarks(tf.test.Benchmark): # which forces a sync. This is a roundabout way, yes. tf.constant(1.).cpu() - def _benchmark_eager_apply(self, label, defun=False, execution_mode=None, - device_and_format=None): + def _benchmark_eager_apply(self, label, device_and_format, defun=False, + execution_mode=None, compiled=False): with tfe.execution_mode(execution_mode): - device, data_format = device_and_format or device_and_data_format() + device, data_format = device_and_format model = resnet50.ResNet50(data_format) if defun: - model.call = tfe.defun(model.call) + model.call = tfe.defun(model.call, compiled=compiled) batch_size = 64 num_burn = 5 num_iters = 30 with tf.device(device): - images, _ = random_batch(batch_size, device_and_format) + images, _ = random_batch(batch_size, data_format) for _ in xrange(num_burn): model(images, training=False).cpu() if execution_mode: @@ -227,30 +225,34 @@ class ResNet50Benchmarks(tf.test.Benchmark): self._report(label, start, num_iters, device, batch_size, data_format) def benchmark_eager_apply_sync(self): - self._benchmark_eager_apply('eager_apply', defun=False) + self._benchmark_eager_apply('eager_apply', device_and_data_format(), + defun=False) def benchmark_eager_apply_async(self): self._benchmark_eager_apply( - 'eager_apply_async', defun=False, execution_mode=tfe.ASYNC) + 'eager_apply_async', device_and_data_format(), defun=False, + execution_mode=tfe.ASYNC) def benchmark_eager_apply_with_defun(self): - self._benchmark_eager_apply('eager_apply_with_defun', defun=True) + self._benchmark_eager_apply('eager_apply_with_defun', + device_and_data_format(), defun=True) def _benchmark_eager_train(self, label, make_iterator, + device_and_format, defun=False, execution_mode=None, - device_and_format=None): + compiled=False): with tfe.execution_mode(execution_mode): - device, data_format = device_and_format or device_and_data_format() + device, data_format = device_and_format for batch_size in self._train_batch_sizes(): - (images, labels) = random_batch(batch_size, device_and_format) + (images, labels) = random_batch(batch_size, data_format) num_burn = 3 num_iters = 10 model = resnet50.ResNet50(data_format) if defun: - model.call = tfe.defun(model.call) + model.call = tfe.defun(model.call, compiled=compiled) optimizer = tf.train.GradientDescentOptimizer(0.1) with tf.device(device): @@ -273,18 +275,21 @@ class ResNet50Benchmarks(tf.test.Benchmark): self._report(label, start, num_iters, device, batch_size, data_format) def benchmark_eager_train_sync(self): - self._benchmark_eager_train('eager_train', MockIterator, defun=False) + self._benchmark_eager_train('eager_train', MockIterator, + device_and_data_format(), defun=False) def benchmark_eager_train_async(self): self._benchmark_eager_train( 'eager_train_async', MockIterator, + device_and_data_format(), defun=False, execution_mode=tfe.ASYNC) def benchmark_eager_train_with_defun(self): self._benchmark_eager_train( - 'eager_train_with_defun', MockIterator, defun=True) + 'eager_train_with_defun', MockIterator, + device_and_data_format(), defun=True) def benchmark_eager_train_datasets(self): @@ -294,7 +299,8 @@ class ResNet50Benchmarks(tf.test.Benchmark): return tfe.Iterator(ds) self._benchmark_eager_train( - 'eager_train_dataset', make_iterator, defun=False) + 'eager_train_dataset', make_iterator, + device_and_data_format(), defun=False) def benchmark_eager_train_datasets_with_defun(self): @@ -304,7 +310,8 @@ class ResNet50Benchmarks(tf.test.Benchmark): return tfe.Iterator(ds) self._benchmark_eager_train( - 'eager_train_dataset_with_defun', make_iterator, defun=True) + 'eager_train_dataset_with_defun', make_iterator, + device_and_data_format(), defun=True) if __name__ == '__main__': diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index f825a2a7363fbe144162eca96398920ead0c4e50..1e4746d01ca1a8d13162844bc064c479c7184237 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -34,10 +34,10 @@ import tensorflow.contrib.eager as tfe from tensorflow.contrib.eager.python.examples.spinn import data from third_party.examples.eager.spinn import spinn from tensorflow.contrib.summary import summary_test_util -from tensorflow.core.protobuf import checkpointable_object_graph_pb2 from tensorflow.python.eager import test from tensorflow.python.framework import test_util -from tensorflow.python.training import checkpoint_utils +from tensorflow.python.training import checkpointable_utils +from tensorflow.python.training import saver # pylint: enable=g-bad-import-order @@ -421,10 +421,8 @@ class SpinnTest(test_util.TensorFlowTestCase): # 5. Verify that checkpoints exist and contains all the expected variables. self.assertTrue(glob.glob(os.path.join(config.logdir, "ckpt*"))) - object_graph_string = checkpoint_utils.load_variable( - config.logdir, name="_CHECKPOINTABLE_OBJECT_GRAPH") - object_graph = checkpointable_object_graph_pb2.CheckpointableObjectGraph() - object_graph.ParseFromString(object_graph_string) + object_graph = checkpointable_utils.object_metadata( + saver.latest_checkpoint(config.logdir)) ckpt_variable_names = set() for node in object_graph.nodes: for attribute in node.attributes: diff --git a/tensorflow/contrib/eager/python/tfe_test.py b/tensorflow/contrib/eager/python/tfe_test.py index e80ccbb74d8623e977a98cb7fa5eb41f3c9bf250..db50b33af2e4f1cc6575d4b0d416d6d2669b5c35 100644 --- a/tensorflow/contrib/eager/python/tfe_test.py +++ b/tensorflow/contrib/eager/python/tfe_test.py @@ -57,7 +57,7 @@ class TFETest(test_util.TensorFlowTestCase): return math_ops.multiply(x, x) grad = tfe.gradients_function(square) - self.assertEquals([6], [x.numpy() for x in grad(3)]) + self.assertEquals([6], [x.numpy() for x in grad(3.)]) def testGradOfGrad(self): @@ -66,7 +66,7 @@ class TFETest(test_util.TensorFlowTestCase): grad = tfe.gradients_function(square) gradgrad = tfe.gradients_function(lambda x: grad(x)[0]) - self.assertEquals([2], [x.numpy() for x in gradgrad(3)]) + self.assertEquals([2], [x.numpy() for x in gradgrad(3.)]) def testCustomGrad(self): @@ -80,7 +80,7 @@ class TFETest(test_util.TensorFlowTestCase): return y, grad_fn grad = tfe.gradients_function(f) - self.assertEquals([12], [x.numpy() for x in grad(3)]) + self.assertEquals([12], [x.numpy() for x in grad(3.)]) def testGPU(self): if tfe.num_gpus() <= 0: diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index b473de86ee8be92e8111ee5098b2536d4b957a8c..e9a68801efccc1a74450359eb672caaa51ad73e8 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -17,6 +17,7 @@ py_library( ":boosted_trees", ":dnn", ":dnn_linear_combined", + ":export", ":extenders", ":head", ":linear", @@ -77,6 +78,7 @@ py_test( tags = [ "no_pip", "notsan", + "optonly", # times out http://b/79220679 ], deps = [ ":dnn", @@ -179,6 +181,43 @@ py_test( ], ) +py_library( + name = "export", + srcs = [ + "python/estimator/export.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python/estimator:model_fn", + ], +) + +py_test( + name = "export_test", + size = "medium", + srcs = ["python/estimator/export_test.py"], + srcs_version = "PY2AND3", + tags = ["notsan"], # b/62863147 + deps = [ + ":export", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:metrics", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:session", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//tensorflow/python:util", + "//tensorflow/python:variables", + "//tensorflow/python/estimator", + "//tensorflow/python/estimator:export_export", + "//tensorflow/python/estimator:export_output", + "//tensorflow/python/estimator:model_fn", + "//tensorflow/python/saved_model:loader", + "//tensorflow/python/saved_model:tag_constants", + ], +) + py_library( name = "head", srcs = [ @@ -450,20 +489,25 @@ py_test( "no_pip", "noasan", # times out "notsan", + "optonly", # times out http://b/79220679 ], deps = [ + ":head", ":rnn", + "//tensorflow/contrib/data", "//tensorflow/core:protos_all_py", "//tensorflow/python:check_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:lib", "//tensorflow/python:math_ops", "//tensorflow/python:state_ops", "//tensorflow/python:summary", "//tensorflow/python:training", "//tensorflow/python:variables", "//tensorflow/python/estimator:numpy_io", + "//tensorflow/python/estimator:parsing_utils", "//tensorflow/python/feature_column", "//third_party/py/numpy", "@six_archive//:six", diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py index be20d1b7770d3f3df21ac9c0f811d924bf4152ee..ec502f86ddb724c403e00bd21da4f7e970849d4e 100644 --- a/tensorflow/contrib/estimator/__init__.py +++ b/tensorflow/contrib/estimator/__init__.py @@ -22,6 +22,7 @@ from __future__ import print_function from tensorflow.contrib.estimator.python.estimator.boosted_trees import * from tensorflow.contrib.estimator.python.estimator.dnn import * from tensorflow.contrib.estimator.python.estimator.dnn_linear_combined import * +from tensorflow.contrib.estimator.python.estimator.export import * from tensorflow.contrib.estimator.python.estimator.extenders import * from tensorflow.contrib.estimator.python.estimator.head import * from tensorflow.contrib.estimator.python.estimator.linear import * @@ -38,6 +39,7 @@ _allowed_symbols = [ 'binary_classification_head', 'clip_gradients_by_norm', 'forward_features', + 'logistic_regression_head', 'multi_class_head', 'multi_head', 'multi_label_head', @@ -54,6 +56,9 @@ _allowed_symbols = [ 'replicate_model_fn', 'TowerOptimizer', 'RNNClassifier', + 'RNNEstimator', + 'export_saved_model_for_mode', + 'export_all_saved_models', ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/estimator/python/estimator/export.py b/tensorflow/contrib/estimator/python/estimator/export.py new file mode 100644 index 0000000000000000000000000000000000000000..e7e366a3f26fa60ea7867c128799fe358b027bdf --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/export.py @@ -0,0 +1,216 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Wrapper for methods to export train/eval graphs from Estimator.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.estimator import model_fn as model_fn_lib + + +def export_saved_model_for_mode( + estimator, export_dir_base, input_receiver_fn, + assets_extra=None, + as_text=False, + checkpoint_path=None, + strip_default_attrs=False, + mode=model_fn_lib.ModeKeys.PREDICT): + # pylint: disable=line-too-long + """Exports a single train/eval/predict graph as a SavedModel. + + For a detailed guide, see + @{$saved_model#using_savedmodel_with_estimators$Using SavedModel with Estimators}. + + Sample usage: + ```python + classifier = tf.estimator.LinearClassifier( + feature_columns=[age, language]) + classifier.train(input_fn=input_fn, steps=1000) + + feature_spec = { + 'age': tf.placeholder(dtype=tf.int64), + 'language': array_ops.placeholder(dtype=tf.string) + } + label_spec = tf.placeholder(dtype=dtypes.int64) + + train_rcvr_fn = tf.contrib.estimator.build_raw_supervised_input_receiver_fn( + feature_spec, label_spec) + + export_dir = tf.contrib.estimator.export_saved_model_for_mode( + classifier, + export_dir_base='my_model/', + input_receiver_fn=train_rcvr_fn, + mode=model_fn_lib.ModeKeys.TRAIN) + + # export_dir is a timestamped directory with the SavedModel, which + # can be used for serving, analysis with TFMA, or directly loaded in. + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.TRAINING], export_dir) + ... + ``` + + This method takes an input_receiver_fn and mode. For the mode passed in, + this method builds a new graph by calling the input_receiver_fn to obtain + feature and label `Tensor`s. Next, this method calls the `Estimator`'s + model_fn in the passed mode to generate the model graph based on + those features and labels, and restores the given checkpoint + (or, lacking that, the most recent checkpoint) into the graph. + Finally, it creates a timestamped export directory below the + export_dir_base, and writes a `SavedModel` into it containing + the `MetaGraphDef` for the given mode and its associated signatures. + + For prediction, the exported `MetaGraphDef` will provide one `SignatureDef` + for each element of the export_outputs dict returned from the model_fn, + named using the same keys. One of these keys is always + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, indicating which + signature will be served when a serving request does not specify one. + For each signature, the outputs are provided by the corresponding + `ExportOutput`s, and the inputs are always the input receivers provided by + the serving_input_receiver_fn. + + For training and evaluation, the train_op is stored in an extra collection, + and loss, metrics, and predictions are included in a SignatureDef for the + mode in question. + + Extra assets may be written into the SavedModel via the assets_extra + argument. This should be a dict, where each key gives a destination path + (including the filename) relative to the assets.extra directory. The + corresponding value gives the full path of the source file to be copied. + For example, the simple case of copying a single file without renaming it + is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. + + Args: + estimator: an instance of tf.estimator.Estimator + export_dir_base: A string containing a directory in which to create + timestamped subdirectories containing exported SavedModels. + input_receiver_fn: a function that takes no argument and + returns the appropriate subclass of `InputReceiver`. + assets_extra: A dict specifying how to populate the assets.extra directory + within the exported SavedModel, or `None` if no extra assets are needed. + as_text: whether to write the SavedModel proto in text format. + checkpoint_path: The checkpoint path to export. If `None` (the default), + the most recent checkpoint found within the model directory is chosen. + strip_default_attrs: Boolean. If `True`, default-valued attributes will be + removed from the NodeDefs. For a detailed guide, see + [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes). + mode: tf.estimator.ModeKeys value indicating with mode will be exported. + + Returns: + The string path to the exported directory. + + Raises: + ValueError: if input_receiver_fn is None, no export_outputs + are provided, or no checkpoint can be found. + """ + # pylint: enable=line-too-long + + # pylint: disable=protected-access + return estimator._export_saved_model_for_mode( + export_dir_base, input_receiver_fn, + assets_extra=assets_extra, + as_text=as_text, + checkpoint_path=checkpoint_path, + strip_default_attrs=strip_default_attrs, + mode=mode) + # pylint: enable=protected-access + + +def export_all_saved_models( + estimator, export_dir_base, input_receiver_fn_map, + assets_extra=None, + as_text=False, + checkpoint_path=None, + strip_default_attrs=False): + # pylint: disable=line-too-long + """Exports requested train/eval/predict graphs as separate SavedModels. + + This is a wrapper around export_saved_model_for_mode that accepts + multiple modes simultaneously and creates directories for each under + export_dir_base. See `Estimator.export_saved_model_for_mode` for + further details as to how the export works for each mode. + + Sample usage: + ```python + classifier = tf.estimator.LinearClassifier( + feature_columns=[age, language]) + classifier.train(input_fn=input_fn) + + feature_spec = { + 'age': tf.placeholder(dtype=tf.int64), + 'language': array_ops.placeholder(dtype=tf.string) + } + label_spec = tf.placeholder(dtype=dtypes.int64) + + train_rcvr_fn = tf.contrib.estimator.build_raw_supervised_input_receiver_fn( + feature_spec, label_spec) + + serve_rcvr_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( + feature_spec) + + rcvr_fn_map = { + model_fn_lib.ModeKeys.TRAIN: train_rcvr_fn, + model_fn_lib.ModeKeys.PREDICT: serve_rcvr_fn, + } + + export_dirs = tf.contrib.estimator.export_all_saved_models( + classifier, + export_dir_base='my_model/', + input_receiver_fn_map=rcvr_fn_map) + + # export_dirs is a dict of directories with SavedModels, which + # can be used for serving, analysis with TFMA, or directly loaded in. + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.TRAINING], + export_dirs[tf.estimator.ModeKeys.TRAIN]) + ... + ``` + + Args: + estimator: an instance of tf.estimator.Estimator + export_dir_base: A string containing a directory in which to create + timestamped subdirectories containing exported SavedModels. + input_receiver_fn_map: dict of tf.estimator.ModeKeys to input_receiver_fn + mappings, where the input_receiver_fn is a function that takes no + argument and returns the appropriate subclass of `InputReceiver`. + assets_extra: A dict specifying how to populate the assets.extra directory + within the exported SavedModel, or `None` if no extra assets are needed. + as_text: whether to write the SavedModel proto in text format. + checkpoint_path: The checkpoint path to export. If `None` (the default), + the most recent checkpoint found within the model directory is chosen. + strip_default_attrs: Boolean. If `True`, default-valued attributes will be + removed from the NodeDefs. For a detailed guide, see + [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes). + + Returns: + A dict of tf.estimator.ModeKeys value to string path for each exported + directory. + + Raises: + ValueError: if any input_receiver_fn is None, no export_outputs + are provided, or no checkpoint can be found. + """ + # pylint: enable=line-too-long + + # pylint: disable=protected-access + return estimator._export_all_saved_models( + export_dir_base, input_receiver_fn_map, + assets_extra=assets_extra, + as_text=as_text, + checkpoint_path=checkpoint_path, + strip_default_attrs=strip_default_attrs) + # pylint: enable=protected-access diff --git a/tensorflow/contrib/estimator/python/estimator/export_test.py b/tensorflow/contrib/estimator/python/estimator/export_test.py new file mode 100644 index 0000000000000000000000000000000000000000..89d02582e18e39ee35730e7674691ed9638a3e50 --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/export_test.py @@ -0,0 +1,391 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for contrib wrapping of export_saved_model_for_mode functionality. + +These are direct copies of the tests included in core, with import locations +changed. These should be removed when the functionality in core is part of the +public API. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tempfile + +from tensorflow.contrib.estimator.python.estimator import export as contrib_export +from tensorflow.python.client import session +from tensorflow.python.estimator import estimator +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator.export import export +from tensorflow.python.estimator.export import export_output +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics as metrics_lib +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile +from tensorflow.python.platform import test +from tensorflow.python.saved_model import loader +from tensorflow.python.saved_model import tag_constants +from tensorflow.python.training import training +from tensorflow.python.util import compat + + +def _model_fn_for_export_tests(features, labels, mode): + _, _ = features, labels + variables.Variable(1., name='weight') + scores = constant_op.constant([3.]) + classes = constant_op.constant(['wumpus']) + update_global_step = state_ops.assign_add(training.get_global_step(), 1) + with ops.control_dependencies([update_global_step]): + train_op = constant_op.constant(2.) + return model_fn_lib.EstimatorSpec( + mode, + predictions=constant_op.constant(10.), + loss=constant_op.constant(1.), + train_op=train_op, + export_outputs={ + 'test': export_output.ClassificationOutput(scores, classes)}) + + +def _x_y_input_fn(): + return ({'x': constant_op.constant([[1], [1]]), + 'y': constant_op.constant([[2], [2]])}, + constant_op.constant([[1], [1]])) + + +def _model_fn_with_x_y(features, labels, mode): + _ = labels + variables.Variable(1., name='weight') + scores = constant_op.constant([3.]) + classes = constant_op.constant(['wumpus']) + if mode == model_fn_lib.ModeKeys.PREDICT: + variables.Variable(36., name='name_collision') + return model_fn_lib.EstimatorSpec( + mode, + predictions=constant_op.constant(10.), + export_outputs={ + 'test': export_output.ClassificationOutput(scores, classes)}) + else: + prefix = 'eval_' if mode == model_fn_lib.ModeKeys.EVAL else '' + + multiplied = math_ops.multiply( + features['x'], features['y'], name='{}multiplied'.format(prefix)) + metrics = {'mean': metrics_lib.mean(features['x'] - features['y'], + name='{}mean'.format(prefix))} + variables.Variable(1., name='later_var') + variables.Variable(3., name='name_collision') + return model_fn_lib.EstimatorSpec( + mode, + predictions=multiplied, + loss=constant_op.constant(1.), + train_op=state_ops.assign_add(training.get_global_step(), 1), + eval_metric_ops=metrics) + + +def _get_serving_input_receiver_fn(): + feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64), + 'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)} + return export.build_parsing_serving_input_receiver_fn(feature_spec) + + +def _get_supervised_input_receiver_fn(): + feature_spec = { + 'x': array_ops.placeholder( + dtype=dtypes.int64, shape=(2, 1), name='feature_x'), + 'y': array_ops.placeholder( + dtype=dtypes.int64, shape=(2, 1), name='feature_y') + } + label_spec = array_ops.placeholder( + dtype=dtypes.float32, shape=[1], name='truth') + + return export.build_raw_supervised_input_receiver_fn( + feature_spec, label_spec) + + +class EstimatorExportTest(test.TestCase): + + def test_export_saved_model_train(self): + self._test_export_saved_model_for_mode( + _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.TRAIN) + + def test_export_saved_model_eval(self): + self._test_export_saved_model_for_mode( + _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.EVAL) + + def test_export_saved_model_predict(self): + self._test_export_saved_model_for_mode( + _get_serving_input_receiver_fn(), model_fn_lib.ModeKeys.PREDICT) + + def _test_export_saved_model_for_mode(self, input_receiver_fn, mode): + tmpdir = tempfile.mkdtemp() + est = estimator.Estimator(model_fn=_model_fn_for_export_tests) + est.train(input_fn=_x_y_input_fn, steps=1) + + # Perform the export. + export_dir_base = os.path.join( + compat.as_bytes(tmpdir), compat.as_bytes('export')) + export_dir = contrib_export.export_saved_model_for_mode( + est, export_dir_base, input_receiver_fn, mode=mode) + + # Check that all the files are in the right places. + self.assertTrue(gfile.Exists(export_dir_base)) + self._validate_exported_files(export_dir) + + # Restore, to validate that the export was well-formed. + tag_set = model_fn_lib.EXPORT_TAG_MAP[mode] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, tag_set, export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertFalse('name_collision_1' in graph_ops) + self.assertTrue('weight' in graph_ops) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + + def test_export_all_saved_models_proto_roundtrip_receiver_map(self): + input_receiver_fn_map = { + model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn() + } + export_dirs, tmpdir = self._test_export_all_saved_models( + input_receiver_fn_map) + + self.assertEqual(len(export_dirs), 1) + # Restore, to validate that the export was well-formed. + export_dir = export_dirs[model_fn_lib.ModeKeys.PREDICT] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.SERVING], export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertTrue('input_example_tensor' in graph_ops) + self.assertTrue('ParseExample/ParseExample' in graph_ops) + self.assertFalse('feature_x' in graph_ops) + self.assertTrue('weight' in graph_ops) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + + def test_export_all_saved_models_proto_roundtrip_train_only(self): + input_receiver_fn_map = { + model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), + } + export_dirs, tmpdir = self._test_export_all_saved_models( + input_receiver_fn_map) + + self.assertEqual(len(export_dirs), 1) + # Restore, to validate that the export was well-formed. + export_dir = export_dirs[model_fn_lib.ModeKeys.TRAIN] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.TRAINING], export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertTrue('multiplied' in graph_ops) + self.assertTrue('mean/update_op' in graph_ops) + self.assertFalse('eval_multiplied' in graph_ops) + self.assertTrue('feature_x' in graph_ops) + self.assertTrue('weight' in graph_ops) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + + def test_export_all_saved_models_proto_roundtrip_eval_only(self): + input_receiver_fn_map = { + model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn() + } + export_dirs, tmpdir = self._test_export_all_saved_models( + input_receiver_fn_map) + + self.assertEqual(len(export_dirs), 1) + # Restore, to validate that the export was well-formed. + export_dir = export_dirs[model_fn_lib.ModeKeys.EVAL] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.EVAL], export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertTrue('eval_multiplied' in graph_ops) + self.assertTrue('eval_mean/value' in graph_ops) + self.assertFalse('multiplied' in graph_ops) + self.assertTrue('feature_x' in graph_ops) + self.assertTrue('weight' in graph_ops) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + + def test_export_all_saved_models_proto_roundtrip_no_serving(self): + input_receiver_fn_map = { + model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), + model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn() + } + export_dirs, tmpdir = self._test_export_all_saved_models( + input_receiver_fn_map) + + self.assertEqual(len(export_dirs), 2) + # Restore, to validate that the export was well-formed. + export_dir = export_dirs[model_fn_lib.ModeKeys.TRAIN] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.TRAINING], export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertTrue('multiplied' in graph_ops) + self.assertFalse('eval_multiplied' in graph_ops) + self.assertTrue('feature_x' in graph_ops) + self.assertTrue('weight' in graph_ops) + export_dir = export_dirs[model_fn_lib.ModeKeys.EVAL] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.EVAL], export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertTrue('eval_multiplied' in graph_ops) + self.assertFalse('multiplied' in graph_ops) + # TODO(karmel): is this the desired behavior when names are shared? + self.assertTrue('feature_x_1' in graph_ops) + self.assertTrue('feature_y_1' in graph_ops) + self.assertTrue('weight' in graph_ops) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + + def test_export_all_saved_models_proto_roundtrip_three_defs(self): + input_receiver_fn_map = { + model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), + model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(), + model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn() + } + export_dirs, tmpdir = self._test_export_all_saved_models( + input_receiver_fn_map) + + # Restore, to validate that the export was well-formed. + for mode, tag_set in model_fn_lib.EXPORT_TAG_MAP.items(): + export_dir = export_dirs[mode] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, tag_set, export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertTrue('global_step/Assign' in graph_ops) + self.assertTrue('global_step/Initializer/zeros' in graph_ops) + self.assertTrue('weight' in graph_ops) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + + def test_export_all_saved_models_proto_roundtrip_all_vars(self): + input_receiver_fn_map = { + model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), + model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn() + } + export_dirs, tmpdir = self._test_export_all_saved_models( + input_receiver_fn_map) + + export_dir = export_dirs[model_fn_lib.ModeKeys.TRAIN] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.TRAINING], export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertTrue('later_var' in graph_ops) + self.assertTrue('weight' in graph_ops) + + export_dir = export_dirs[model_fn_lib.ModeKeys.PREDICT] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.SERVING], export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertFalse('later_var' in graph_ops) + self.assertTrue('weight' in graph_ops) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + + def test_export_all_saved_models_name_collision(self): + input_receiver_fn_map = { + model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(), + model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn() + } + export_dirs, tmpdir = self._test_export_all_saved_models( + input_receiver_fn_map) + + export_dir = export_dirs[model_fn_lib.ModeKeys.TRAIN] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.TRAINING], export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertTrue('name_collision' in graph_ops) + self.assertFalse('name_collision_1' in graph_ops) + collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertEqual(3, collection_vars[-1].eval()) + + export_dir = export_dirs[model_fn_lib.ModeKeys.PREDICT] + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.SERVING], export_dir) + graph_ops = [x.name for x in graph.get_operations()] + self.assertTrue('name_collision' in graph_ops) + self.assertFalse('name_collision_1' in graph_ops) + collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + # This is a non-obvious detail: when we load the estimator spec + # for predict, name_collision gets set to 36. However, we then restore + # from checkpoint, which should overwrite that var and make it the 3 + # from training. In practice, this would not be a good way to write + # a model_fn, but leaving this check in for now to ensure consistency + # with what would happen given our current order of spec, then + # checkpoint. + self.assertEqual(3, collection_vars[-1].eval()) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + + def _test_export_all_saved_models(self, input_receiver_fn_map): + tmpdir = tempfile.mkdtemp() + est = estimator.Estimator(model_fn=_model_fn_with_x_y) + est.train(input_fn=_x_y_input_fn, steps=1) + + # Perform the export. + export_dir_base = os.path.join( + compat.as_bytes(tmpdir), compat.as_bytes('export')) + export_dirs = contrib_export.export_all_saved_models( + est, export_dir_base, input_receiver_fn_map) + + # Check that all the files are in the right places. + self.assertTrue(gfile.Exists(export_dir_base)) + + for _, export_dir in export_dirs.items(): + self._validate_exported_files(export_dir) + + return export_dirs, tmpdir + + def _validate_exported_files(self, export_dir): + self.assertTrue(gfile.Exists(export_dir)) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('saved_model.pb')))) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('variables')))) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('variables/variables.index')))) + self.assertTrue(gfile.Exists(os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes('variables/variables.data-00000-of-00001')))) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index 3dcf0374c8a12b5907fbaf20d1ad72211a45ab5c..109fdd3883427ab93fd289b9621141f5281bd7d0 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -205,8 +205,9 @@ def regression_head(weight_column=None, shape `[D0, D1, ... DN, label_dimension]`. Also supports custom `inverse_link_fn`, also known as 'mean function'. - `inverse_link_fn` takes `logits` as argument and returns predicted values. - This function is the inverse of the link function defined in + `inverse_link_fn` is only used in `PREDICT` mode. It takes `logits` as + argument and returns predicted values. This function is the inverse of the + link function defined in https://en.wikipedia.org/wiki/Generalized_linear_model#Link_function Namely, for poisson regression, set `inverse_link_fn=tf.exp`. @@ -234,7 +235,7 @@ def regression_head(weight_column=None, Raises: ValueError: If `label_dimension` or `loss_reduction` is invalid. """ - return head_lib._regression_head_with_mean_squared_error_loss( # pylint:disable=protected-access + return head_lib._regression_head( # pylint:disable=protected-access weight_column=weight_column, label_dimension=label_dimension, loss_reduction=loss_reduction, @@ -296,7 +297,7 @@ def poisson_regression_head( def _poisson_loss(labels, logits): return nn.log_poisson_loss( targets=labels, log_input=logits, compute_full_loss=compute_full_loss) - return head_lib._regression_head_with_mean_squared_error_loss( # pylint:disable=protected-access + return head_lib._regression_head( # pylint:disable=protected-access weight_column=weight_column, label_dimension=label_dimension, loss_reduction=loss_reduction, @@ -305,6 +306,69 @@ def poisson_regression_head( name=name) +def logistic_regression_head( + weight_column=None, + loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE, + name=None): + """Creates a `_Head` for logistic regression. + + Uses `sigmoid_cross_entropy_with_logits` loss, which is the same as + `binary_classification_head`. The differences compared to + `binary_classification_head` are: + + * Does not support `label_vocabulary`. Instead, labels must be float in the + range [0, 1]. + * Does not calculate some metrics that do not make sense, such as AUC. + * In `PREDICT` mode, only returns logits and predictions + (`=tf.sigmoid(logits)`), whereas `binary_classification_head` also returns + probabilities, classes, and class_ids. + * Export output defaults to `RegressionOutput`, whereas + `binary_classification_head` defaults to `PredictOutput`. + + The head expects `logits` with shape `[D0, D1, ... DN, 1]`. + In many applications, the shape is `[batch_size, 1]`. + + The `labels` shape must match `logits`, namely + `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`. + + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`. + + This is implemented as a generalized linear model, see + https://en.wikipedia.org/wiki/Generalized_linear_model. + + Args: + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to + reduce training loss over batch and label dimension. Defaults to + `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by + `batch size * label_dimension`. See `tf.losses.Reduction`. + name: name of the head. If provided, summary and metrics keys will be + suffixed by `"/" + name`. Also used as `name_scope` when creating ops. + + Returns: + An instance of `_Head` for logistic regression. + + Raises: + ValueError: If `loss_reduction` is invalid. + """ + def _logistic_loss(labels, logits): + labels = head_lib._assert_range( # pylint:disable=protected-access + labels, n_classes=2, message='Labels must be in range [0, 1]') + return nn.sigmoid_cross_entropy_with_logits( + labels=labels, logits=logits) + return head_lib._regression_head( # pylint:disable=protected-access + weight_column=weight_column, + label_dimension=1, + loss_reduction=loss_reduction, + loss_fn=_logistic_loss, + inverse_link_fn=math_ops.sigmoid, + name=name) + + def multi_label_head(n_classes, weight_column=None, thresholds=None, @@ -496,10 +560,10 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access weights=weights, processed_labels=processed_labels) - def create_estimator_spec( + def _create_tpu_estimator_spec( self, features, mode, logits, labels=None, optimizer=None, train_op_fn=None, regularization_losses=None): - """Returns an `EstimatorSpec`. + """Returns an `model_fn._TPUEstimatorSpec`. Args: features: Input `dict` of `Tensor` or `SparseTensor` objects. @@ -522,7 +586,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access `loss_reduction=SUM_OVER_NONZERO_WEIGHTS` when creating the head to avoid scaling errors. Returns: - `EstimatorSpec`. + `model_fn._TPUEstimatorSpec`. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode, or if both are set. @@ -542,7 +606,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access classifier_output = head_lib._classification_output( # pylint:disable=protected-access scores=probabilities, n_classes=self._n_classes, label_vocabulary=self._label_vocabulary) - return model_fn.EstimatorSpec( + return model_fn._TPUEstimatorSpec( # pylint:disable=protected-access mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ @@ -565,16 +629,18 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access # Eval. if mode == model_fn.ModeKeys.EVAL: - return model_fn.EstimatorSpec( + return model_fn._TPUEstimatorSpec( # pylint:disable=protected-access mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=regularized_training_loss, - eval_metric_ops=self._eval_metric_ops( - labels=processed_labels, - probabilities=probabilities, - weights=weights, - unreduced_loss=unreduced_loss, - regularization_loss=regularization_loss)) + eval_metrics=head_lib._create_eval_metrics_tuple( # pylint:disable=protected-access + self._eval_metric_ops, { + 'labels': processed_labels, + 'probabilities': probabilities, + 'weights': weights, + 'unreduced_loss': unreduced_loss, + 'regularization_loss': regularization_loss, + })) # Train. if optimizer is not None: @@ -608,7 +674,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access summary.scalar( head_lib._summary_key(self._name, keys.LOSS_REGULARIZATION), # pylint:disable=protected-access regularization_loss) - return model_fn.EstimatorSpec( + return model_fn._TPUEstimatorSpec( # pylint:disable=protected-access mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index 98962ca4277a3e8fbbdb3fb2d26df9acc45168b5..19b86df5565a85168bdbc37076a0af69248a8010 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -1211,5 +1211,124 @@ class PoissonRegressionHead(test.TestCase): self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval()) +class LogisticRegressionHead(test.TestCase): + + def setUp(self): + ops.reset_default_graph() + + def test_train(self): + head = head_lib.logistic_regression_head() + + # Create estimator spec. + logits = np.array([[0], [-1], [1]], dtype=np.float32) + labels = np.array([[.4], [.6], [.8]], dtype=np.float32) + # Following the documentation in + # tf.nn.sigmoid_cross_entropy_with_logits: + # With x = logits, z = labels. + # loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + # loss = [0 - 0 * 0.4 + ln(1 + exp(-0)), + # 0 + 1 * 0.6 + ln(1 + exp(-1)), + # 1 - 1 * 0.8 + ln(1 + exp(-1))] + # = [0.6931, 0.9133, 0.5133] + # training_loss = (0.6931 + 0.9133 + 0.5133) / 3 + expected_loss = 0.7066 + atol = 0.001 + expected_train_result = b'my_train_op' + def _train_op_fn(loss): + with ops.control_dependencies((check_ops.assert_near( + math_ops.to_float(expected_loss), math_ops.to_float(loss), + atol=atol, name='assert_loss'),)): + return constant_op.constant(expected_train_result) + + spec = head.create_estimator_spec( + features={'x': np.array(((42.,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + loss, train_result = sess.run([spec.loss, spec.train_op]) + self.assertAlmostEqual(expected_loss, loss, delta=atol) + self.assertEqual(expected_train_result, train_result) + + def test_train_labels_too_large(self): + head = head_lib.logistic_regression_head() + + # Create estimator spec. + logits = np.array([[0], [-1], [1]], dtype=np.float32) + labels = np.array([[.4], [1.2], [.8]], dtype=np.float32) + expected_train_result = b'my_train_op' + def _train_op_fn(loss): + del loss + return constant_op.constant(expected_train_result) + + spec = head.create_estimator_spec( + features={'x': np.array(((42.,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'\[Labels must be in range \[0, 1\]\] .* \[\[0.4\]\[1.2\]\[0.8\]\]'): + _ = sess.run(spec.loss) + + def test_train_labels_negative(self): + head = head_lib.logistic_regression_head() + + # Create estimator spec. + logits = np.array([[0], [-1], [1]], dtype=np.float32) + labels = np.array([[.4], [-0.2], [.8]], dtype=np.float32) + expected_train_result = b'my_train_op' + def _train_op_fn(loss): + del loss + return constant_op.constant(expected_train_result) + + spec = head.create_estimator_spec( + features={'x': np.array(((42.,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'\[Labels must be in range \[0, 1\]\] .* \[\[0.4\]\[-0.2\]\[0.8\]\]' + ): + _ = sess.run(spec.loss) + + def test_predict(self): + head = head_lib.logistic_regression_head() + + # Create estimator spec. + logits = np.array([[0], [-1], [1]], dtype=np.float32) + expected_predictions = 1. / (1. + np.exp(-logits)) + spec = head.create_estimator_spec( + features={'x': np.array(((42.,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.PREDICT, + logits=logits) + + # Assert spec contains expected tensors. + keys = prediction_keys.PredictionKeys + self.assertItemsEqual( + (keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys()) + self.assertEqual(dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype) + self.assertEqual(dtypes.float32, spec.predictions[keys.LOGITS].dtype) + + # Assert predictions. + with self.test_session(): + _initialize_variables(self, spec.scaffold) + self.assertAllClose( + expected_predictions, spec.predictions[keys.PREDICTIONS].eval()) + self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval()) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/estimator/python/estimator/rnn.py b/tensorflow/contrib/estimator/python/estimator/rnn.py index b475c12f5af3aedc766a0880a98c5c1e29bddbb7..7f385fd76e88aba46f45d16198d707bf1d1e0d8a 100644 --- a/tensorflow/contrib/estimator/python/estimator/rnn.py +++ b/tensorflow/contrib/estimator/python/estimator/rnn.py @@ -328,6 +328,19 @@ def _rnn_model_fn(features, logits=logits) +def _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type): + """Assert arguments are valid and return rnn_cell_fn.""" + if rnn_cell_fn and (num_units or cell_type != USE_DEFAULT): + raise ValueError( + 'num_units and cell_type must not be specified when using rnn_cell_fn' + ) + if not rnn_cell_fn: + if cell_type == USE_DEFAULT: + cell_type = 'basic_rnn' + rnn_cell_fn = _make_rnn_cell_fn(num_units, cell_type) + return rnn_cell_fn + + class RNNClassifier(estimator.Estimator): """A classifier for TensorFlow RNN models. @@ -341,8 +354,8 @@ class RNNClassifier(estimator.Estimator): token_emb = embedding_column(categorical_column=token_sequence, ...) estimator = RNNClassifier( - num_units=[32, 16], cell_type='lstm', - sequence_feature_columns=[token_emb]) + sequence_feature_columns=[token_emb], + num_units=[32, 16], cell_type='lstm') # Input builders def input_fn_train: # returns x, y @@ -438,8 +451,8 @@ class RNNClassifier(estimator.Estimator): encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. - optimizer: An instance of `tf.Optimizer` used to train the model. Defaults - to Adagrad optimizer. + optimizer: An instance of `tf.Optimizer` or string specifying optimizer + type. Defaults to Adagrad optimizer. input_layer_partitioner: Optional. Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. @@ -448,14 +461,7 @@ class RNNClassifier(estimator.Estimator): ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not compatible. """ - if rnn_cell_fn and (num_units or cell_type != USE_DEFAULT): - raise ValueError( - 'num_units and cell_type must not be specified when using rnn_cell_fn' - ) - if not rnn_cell_fn: - if cell_type == USE_DEFAULT: - cell_type = 'basic_rnn' - rnn_cell_fn = _make_rnn_cell_fn(num_units, cell_type) + rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type) if n_classes == 2: head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( # pylint: disable=protected-access @@ -479,3 +485,137 @@ class RNNClassifier(estimator.Estimator): config=config) super(RNNClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) + + +class RNNEstimator(estimator.Estimator): + """An Estimator for TensorFlow RNN models with user-specified head. + + Example: + + ```python + token_sequence = sequence_categorical_column_with_hash_bucket(...) + token_emb = embedding_column(categorical_column=token_sequence, ...) + + estimator = RNNEstimator( + head=tf.contrib.estimator.regression_head(), + sequence_feature_columns=[token_emb], + num_units=[32, 16], cell_type='lstm') + + # Or with custom RNN cell: + def rnn_cell_fn(mode): + cells = [ tf.contrib.rnn.LSTMCell(size) for size in [32, 16] ] + if mode == tf.estimator.ModeKeys.TRAIN: + cells = [ tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=0.5) + for cell in cells ] + return tf.contrib.rnn.MultiRNNCell(cells) + + estimator = RNNEstimator( + head=tf.contrib.estimator.regression_head(), + sequence_feature_columns=[token_emb], + rnn_cell_fn=rnn_cell_fn) + + # Input builders + def input_fn_train: # returns x, y + pass + estimator.train(input_fn=input_fn_train, steps=100) + + def input_fn_eval: # returns x, y + pass + metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) + def input_fn_predict: # returns x, None + pass + predictions = estimator.predict(input_fn=input_fn_predict) + ``` + + Input of `train` and `evaluate` should have following features, + otherwise there will be a `KeyError`: + + * if the head's `weight_column` is not `None`, a feature with + `key=weight_column` whose value is a `Tensor`. + * for each `column` in `sequence_feature_columns`: + - a feature with `key=column.name` whose `value` is a `SparseTensor`. + * for each `column` in `context_feature_columns`: + - if `column` is a `_CategoricalColumn`, a feature with `key=column.name` + whose `value` is a `SparseTensor`. + - if `column` is a `_WeightedCategoricalColumn`, two features: the first + with `key` the id column name, the second with `key` the weight column + name. Both features' `value` must be a `SparseTensor`. + - if `column` is a `_DenseColumn`, a feature with `key=column.name` + whose `value` is a `Tensor`. + + Loss and predicted output are determined by the specified head. + + @compatibility(eager) + Estimators are not compatible with eager execution. + @end_compatibility + """ + + def __init__(self, + head, + sequence_feature_columns, + context_feature_columns=None, + num_units=None, + cell_type=USE_DEFAULT, + rnn_cell_fn=None, + model_dir=None, + optimizer='Adagrad', + input_layer_partitioner=None, + config=None): + """Initializes a `RNNClassifier` instance. + + Args: + head: A `_Head` instance constructed with a method such as + `tf.contrib.estimator.multi_label_head`. This specifies the model's + output and loss function to be optimized. + sequence_feature_columns: An iterable containing the `FeatureColumn`s + that represent sequential input. All items in the set should either be + sequence columns (e.g. `sequence_numeric_column`) or constructed from + one (e.g. `embedding_column` with `sequence_categorical_column_*` as + input). + context_feature_columns: An iterable containing the `FeatureColumn`s + for contextual input. The data represented by these columns will be + replicated and given to the RNN at each timestep. These columns must be + instances of classes derived from `_DenseColumn` such as + `numeric_column`, not the sequential variants. + num_units: Iterable of integer number of hidden units per RNN layer. If + set, `cell_type` must also be specified and `rnn_cell_fn` must be + `None`. + cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying + the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and + `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn` + must be `None`. + rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and + returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to + construct the RNN. If set, `num_units` and `cell_type` cannot be set. + This is for advanced users who need additional customization beyond + `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is + needed for stacked RNNs. + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator to + continue training a previously saved model. + optimizer: An instance of `tf.Optimizer` or string specifying optimizer + type. Defaults to Adagrad optimizer. + input_layer_partitioner: Optional. Partitioner for input layer. Defaults + to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. + config: `RunConfig` object to configure the runtime settings. + + Raises: + ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not + compatible. + """ + rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type) + + def _model_fn(features, labels, mode, config): + return _rnn_model_fn( + features=features, + labels=labels, + mode=mode, + head=head, + rnn_cell_fn=rnn_cell_fn, + sequence_feature_columns=tuple(sequence_feature_columns or []), + context_feature_columns=tuple(context_feature_columns or []), + optimizer=optimizer, + input_layer_partitioner=input_layer_partitioner, + config=config) + super(RNNEstimator, self).__init__( + model_fn=_model_fn, model_dir=model_dir, config=config) diff --git a/tensorflow/contrib/estimator/python/estimator/rnn_test.py b/tensorflow/contrib/estimator/python/estimator/rnn_test.py index 393f94f5c7de02c56d93993bbeb8aaec4ea8234c..959b40371aa5fa83a40af999cffade18e5b502e5 100644 --- a/tensorflow/contrib/estimator/python/estimator/rnn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/rnn_test.py @@ -25,12 +25,15 @@ import tempfile import numpy as np import six +from tensorflow.contrib.data.python.ops import readers +from tensorflow.contrib.estimator.python.estimator import head as head_lib from tensorflow.contrib.estimator.python.estimator import rnn from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import metric_keys +from tensorflow.python.estimator.canned import parsing_utils from tensorflow.python.estimator.canned import prediction_keys from tensorflow.python.estimator.export import export from tensorflow.python.estimator.inputs import numpy_io @@ -38,9 +41,9 @@ from tensorflow.python.feature_column import feature_column as fc from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.lib.io import python_io from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import rnn_cell from tensorflow.python.ops import state_ops @@ -50,7 +53,6 @@ from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache from tensorflow.python.training import checkpoint_utils -from tensorflow.python.training import input as input_lib from tensorflow.python.training import monitored_session from tensorflow.python.training import optimizer from tensorflow.python.training import training_util @@ -984,7 +986,10 @@ class RNNClassifierPredictionTest(test.TestCase): predictions[prediction_keys.PredictionKeys.CLASSES]) -class RNNClassifierIntegrationTest(test.TestCase): +class BaseRNNClassificationIntegrationTest(object): + + def __init__(self, _create_estimator_fn): + self._create_estimator_fn = _create_estimator_fn def setUp(self): self._model_dir = tempfile.mkdtemp() @@ -994,20 +999,11 @@ class RNNClassifierIntegrationTest(test.TestCase): writer_cache.FileWriterCache.clear() shutil.rmtree(self._model_dir) - def _test_complete_flow( - self, train_input_fn, eval_input_fn, predict_input_fn, n_classes, - batch_size): - col = seq_fc.sequence_categorical_column_with_hash_bucket( - 'tokens', hash_bucket_size=10) - embed = fc.embedding_column(col, dimension=2) - feature_columns = [embed] - + def _test_complete_flow(self, feature_columns, train_input_fn, eval_input_fn, + predict_input_fn, n_classes, batch_size): cell_units = [4, 2] - est = rnn.RNNClassifier( - num_units=cell_units, - sequence_feature_columns=feature_columns, - n_classes=n_classes, - model_dir=self._model_dir) + est = self._create_estimator_fn(feature_columns, n_classes, cell_units, + self._model_dir) # TRAIN num_steps = 10 @@ -1026,10 +1022,10 @@ class RNNClassifierIntegrationTest(test.TestCase): self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT - feature_spec = { - 'tokens': parsing_ops.VarLenFeature(dtypes.string), - 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), - } + feature_spec = parsing_utils.classifier_parse_example_spec( + feature_columns, + label_key='label', + label_dtype=dtypes.int64) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), @@ -1069,7 +1065,13 @@ class RNNClassifierIntegrationTest(test.TestCase): batch_size=batch_size, shuffle=False) + col = seq_fc.sequence_categorical_column_with_hash_bucket( + 'tokens', hash_bucket_size=10) + embed = fc.embedding_column(col, dimension=2) + feature_columns = [embed] + self._test_complete_flow( + feature_columns=feature_columns, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, @@ -1082,7 +1084,8 @@ class RNNClassifierIntegrationTest(test.TestCase): batch_size = 10 words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'] - serialized_examples = [] + _, examples_file = tempfile.mkstemp() + writer = python_io.TFRecordWriter(examples_file) for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) @@ -1096,30 +1099,36 @@ class RNNClassifierIntegrationTest(test.TestCase): feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) - serialized_examples.append(example.SerializeToString()) + writer.write(example.SerializeToString()) + writer.close() + + col = seq_fc.sequence_categorical_column_with_hash_bucket( + 'tokens', hash_bucket_size=10) + embed = fc.embedding_column(col, dimension=2) + feature_columns = [embed] + feature_spec = parsing_utils.classifier_parse_example_spec( + feature_columns, + label_key='label', + label_dtype=dtypes.int64) - feature_spec = { - 'tokens': parsing_ops.VarLenFeature(dtypes.string), - 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), - } def _train_input_fn(): - features = parsing_ops.parse_example(serialized_examples, feature_spec) - labels = features.pop('label') - return features, labels + dataset = readers.make_batched_features_dataset( + examples_file, batch_size, feature_spec) + return dataset.map(lambda features: (features, features.pop('label'))) def _eval_input_fn(): - features = parsing_ops.parse_example( - input_lib.limit_epochs(serialized_examples, num_epochs=1), - feature_spec) - labels = features.pop('label') - return features, labels + dataset = readers.make_batched_features_dataset( + examples_file, batch_size, feature_spec, num_epochs=1) + return dataset.map(lambda features: (features, features.pop('label'))) def _predict_input_fn(): - features = parsing_ops.parse_example( - input_lib.limit_epochs(serialized_examples, num_epochs=1), - feature_spec) - features.pop('label') - return features, None + dataset = readers.make_batched_features_dataset( + examples_file, batch_size, feature_spec, num_epochs=1) + def features_fn(features): + features.pop('label') + return features + return dataset.map(features_fn) self._test_complete_flow( + feature_columns=feature_columns, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, @@ -1127,5 +1136,37 @@ class RNNClassifierIntegrationTest(test.TestCase): batch_size=batch_size) +def _rnn_classifier_fn(feature_columns, n_classes, cell_units, model_dir): + return rnn.RNNClassifier( + num_units=cell_units, + sequence_feature_columns=feature_columns, + n_classes=n_classes, + model_dir=model_dir) + + +class RNNClassifierIntegrationTest(BaseRNNClassificationIntegrationTest, + test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + BaseRNNClassificationIntegrationTest.__init__(self, _rnn_classifier_fn) + + +def _rnn_estimator_fn(feature_columns, n_classes, cell_units, model_dir): + return rnn.RNNEstimator( + head=head_lib.multi_class_head(n_classes=n_classes), + num_units=cell_units, + sequence_feature_columns=feature_columns, + model_dir=model_dir) + + +class RNNEstimatorIntegrationTest(BaseRNNClassificationIntegrationTest, + test.TestCase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + test.TestCase.__init__(self, methodName) + BaseRNNClassificationIntegrationTest.__init__(self, _rnn_estimator_fn) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops.py b/tensorflow/contrib/factorization/python/ops/factorization_ops.py index 811fa89bc38c61b16710a441b99d9e5dfac67668..5cef4068ed119d5dbccd585c5b4e5e28840d2cc7 100644 --- a/tensorflow/contrib/factorization/python/ops/factorization_ops.py +++ b/tensorflow/contrib/factorization/python/ops/factorization_ops.py @@ -107,7 +107,7 @@ class WALSModel(object): # the prep_gramian_op for row(column) can be run. worker_init_op = model.worker_init - # To be run once per integration sweep before the row(column) update + # To be run once per iteration sweep before the row(column) update # initialize ops can be run. Note that in the distributed training # situations, this should only be run by the chief trainer. All other # trainers need to block until this is done. @@ -436,7 +436,7 @@ class WALSModel(object): gramian: Variable storing the gramian calculated from the factors. Returns: - A op that updates the gramian with the calculated value from the factors. + An op that updates the gramian with the calculated value from the factors. """ partial_gramians = [] for f in factors: diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h index a8d5a0dd83fb504b5e6671c3e82dc7d2dd3e6a9b..bf2aa75545813f7da88ed503798572474c7c2eb8 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h +++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h @@ -53,7 +53,7 @@ Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, int32 samples_per_second, int32 channel_count, const std::vector& samples, string* output_data); -// Reads an video file using ffmpeg adn converts it into a RGB24 in uint8 +// Reads an video file using ffmpeg and converts it into a RGB24 in uint8 // [frames, height, width, 3]. The w, h, and frames are obtained from ffmpeg. Status ReadVideoFile(const string& filename, std::vector* output_data, uint32* width, uint32* height, uint32* frames); diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index bd764ed57a6da0a4d356235108e998a80ac34362..72835c3ad86e6321eb30324c7dd0751034759ce4 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -202,7 +202,7 @@ class CriticalSection(object): or lazy way that may cause a deadlock. ValueError: If `exclusive_resource_access` is not provided (is `True`) and another `CriticalSection` has an execution requesting the same - resources as in `*args`, `**kwargs`, and any additionaly captured + resources as in `*args`, `**kwargs`, and any additionally captured inputs in `fn`. Note, even if `exclusive_resource_access` is `True`, if another execution in another `CriticalSection` was created without `exclusive_resource_access=True`, a `ValueError` will be raised. diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index e3fc6bf0f034051fc33ff5966e2f4ea85aa538db..4092b320042162e4eb4c5f4879c2c3ea5dc14fc9 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -112,6 +112,7 @@ class GANEstimator(estimator.Estimator): generator_optimizer=None, discriminator_optimizer=None, get_hooks_fn=None, + get_eval_metric_ops_fn=None, add_summaries=None, use_loss_summaries=True, config=None): @@ -146,6 +147,9 @@ class GANEstimator(estimator.Estimator): list of hooks. These hooks are run on the generator and discriminator train ops, and can be used to implement the GAN training scheme. Defaults to `train.get_sequential_train_hooks()`. + get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a + dict of metric results keyed by name. The output of this function is + passed into `tf.estimator.EstimatorSpec` during evaluation. add_summaries: `None`, a single `SummaryType`, or a list of `SummaryType`. use_loss_summaries: If `True`, add loss summaries. If `False`, does not. If `None`, uses defaults. @@ -160,7 +164,8 @@ class GANEstimator(estimator.Estimator): else discriminator_optimizer) gan_head = head_lib.gan_head( generator_loss_fn, discriminator_loss_fn, gopt, dopt, - use_loss_summaries, get_hooks_fn=get_hooks_fn) + use_loss_summaries, get_hooks_fn=get_hooks_fn, + get_eval_metric_ops_fn=get_eval_metric_ops_fn) return _gan_model_fn( features, labels, mode, generator_fn, discriminator_fn, gan_head, add_summaries) diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py index 387a62bd741bd42c03dc1bf70592060c29ccd7a8..955482599b372be3f0d0cbc81451c514958d0eb1 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py @@ -38,6 +38,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -194,6 +195,12 @@ class GANEstimatorIntegrationTest(test.TestCase): lr = learning_rate_decay.exponential_decay(1.0, gstep, 10, 0.9) return training.GradientDescentOptimizer(lr) + def get_metrics(gan_model): + return { + 'mse_custom_metric': metrics_lib.mean_squared_error( + gan_model.real_data, gan_model.generated_data) + } + gopt = make_opt if lr_decay else training.GradientDescentOptimizer(1.0) dopt = make_opt if lr_decay else training.GradientDescentOptimizer(1.0) est = estimator.GANEstimator( @@ -203,6 +210,7 @@ class GANEstimatorIntegrationTest(test.TestCase): discriminator_loss_fn=losses.wasserstein_discriminator_loss, generator_optimizer=gopt, discriminator_optimizer=dopt, + get_eval_metric_ops_fn=get_metrics, model_dir=self._model_dir) # TRAIN @@ -213,6 +221,9 @@ class GANEstimatorIntegrationTest(test.TestCase): scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) + self.assertEqual(scores['discriminator_loss'] + scores['generator_loss'], + scores['loss']) + self.assertIn('mse_custom_metric', six.iterkeys(scores)) # PREDICT predictions = np.array([x for x in est.predict(predict_input_fn)]) diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py index a21358c50bbdb4a1a929b0c5bc322cec4c9923b5..ff903a78cc36c1965b7655aa902501b1943637a8 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py @@ -25,17 +25,21 @@ from tensorflow.contrib.gan.python import train as tfgan_train from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator.canned import head from tensorflow.python.framework import ops +from tensorflow.python.ops import metrics as metrics_lib __all__ = [ 'GANHead', 'gan_head', ] +def _summary_key(head_name, val): + return '%s/%s' % (val, head_name) if head_name else val + def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer, use_loss_summaries=True, get_hooks_fn=tfgan_train.get_sequential_train_hooks(), - name=None): + get_eval_metric_ops_fn=None, name=None): """Creates a `GANHead`. Args: @@ -47,9 +51,12 @@ def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer: Same as `generator_optimizer`, but for the discriminator updates. use_loss_summaries: If `True`, add loss summaries. If `False`, does not. - If `None`, uses defaults. - get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list - of hooks. + If `None`, uses defaults. + get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a + list of hooks. + get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a + dict of metric results keyed by name. The output of this function is + passed into `tf.estimator.EstimatorSpec` during evaluation. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. @@ -62,6 +69,7 @@ def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer=discriminator_optimizer, use_loss_summaries=use_loss_summaries, get_hooks_fn=get_hooks_fn, + get_eval_metric_ops_fn=get_eval_metric_ops_fn, name=name) @@ -72,6 +80,7 @@ class GANHead(head._Head): # pylint: disable=protected-access generator_optimizer, discriminator_optimizer, use_loss_summaries=True, get_hooks_fn=None, + get_eval_metric_ops_fn=None, name=None): """`Head` for GAN training. @@ -85,8 +94,11 @@ class GANHead(head._Head): # pylint: disable=protected-access discriminator updates. use_loss_summaries: If `True`, add loss summaries. If `False`, does not. If `None`, uses defaults. - get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list - of hooks. Defaults to `train.get_sequential_train_hooks()` + get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a + list of hooks. Defaults to `train.get_sequential_train_hooks()` + get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a + dict of metric results keyed by name. The output of this function is + passed into `tf.estimator.EstimatorSpec` during evaluation. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. """ @@ -104,6 +116,8 @@ class GANHead(head._Head): # pylint: disable=protected-access self._generator_optimizer = generator_optimizer self._discriminator_optimizer = discriminator_optimizer self._get_hooks_fn = get_hooks_fn + self._get_eval_metric_ops_fn = get_eval_metric_ops_fn + self._name = name @property def name(self): @@ -173,13 +187,26 @@ class GANHead(head._Head): # pylint: disable=protected-access gan_loss = self.create_loss( features=None, mode=mode, logits=gan_model, labels=None) scalar_loss = gan_loss.generator_loss + gan_loss.discriminator_loss + with ops.name_scope(None, 'metrics', + [gan_loss.generator_loss, + gan_loss.discriminator_loss]): + eval_metric_ops = { + _summary_key(self._name, 'generator_loss'): + metrics_lib.mean(gan_loss.generator_loss), + _summary_key(self._name, 'discriminator_loss'): + metrics_lib.mean(gan_loss.discriminator_loss) + } + if self._get_eval_metric_ops_fn is not None: + custom_eval_metric_ops = self._get_eval_metric_ops_fn(gan_model) + if not isinstance(custom_eval_metric_ops, dict): + raise TypeError('get_eval_metric_ops_fn must return a dict, ' + 'received: {}'.format(custom_eval_metric_ops)) + eval_metric_ops.update(custom_eval_metric_ops) return model_fn_lib.EstimatorSpec( mode=model_fn_lib.ModeKeys.EVAL, predictions=gan_model.generated_data, loss=scalar_loss, - # TODO(joelshor): Add metrics. If head name provided, append it to - # metric keys. - eval_metric_ops={}) + eval_metric_ops=eval_metric_ops) elif mode == model_fn_lib.ModeKeys.TRAIN: if train_op_fn is None: raise ValueError('train_op_fn can not be None.') diff --git a/tensorflow/contrib/gan/python/estimator/python/head_test.py b/tensorflow/contrib/gan/python/estimator/python/head_test.py index 8168f005cd1105886390a2384a936663c83fa5f5..6587f1fc600b94d27f7c12b44ca2136d0be5a8c5 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_test.py @@ -62,9 +62,14 @@ class GANHeadTest(test.TestCase): generator_loss_fn=dummy_loss, discriminator_loss_fn=dummy_loss, generator_optimizer=training.GradientDescentOptimizer(1.0), - discriminator_optimizer=training.GradientDescentOptimizer(1.0)) + discriminator_optimizer=training.GradientDescentOptimizer(1.0), + get_eval_metric_ops_fn=self.get_metrics) self.assertTrue(isinstance(self.gan_head, head.GANHead)) + def get_metrics(self, gan_model): + self.assertTrue(isinstance(gan_model, tfgan_tuples.GANModel)) + return {} + def _test_modes_helper(self, mode): self.gan_head.create_estimator_spec( features=None, diff --git a/tensorflow/contrib/gan/python/features/python/conditioning_utils.py b/tensorflow/contrib/gan/python/features/python/conditioning_utils.py index df71187fbd98c8ce1372bb89c83656dd666ce677..a9b8faa7126253126a3bc3c30e831b26b8326996 100644 --- a/tensorflow/contrib/gan/python/features/python/conditioning_utils.py +++ b/tensorflow/contrib/gan/python/features/python/conditioning_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Miscellanous utilities for TFGAN code and examples.""" +"""Miscellaneous utilities for TFGAN code and examples.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index a320a3f232fc1dc8c9ccfd1d0f2a9a40225db5cb..592d37b432ee605d74162e0b8ec6ccdf426c45d1 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -677,7 +677,7 @@ def copy_with_input_replacements(sgv, replacement_ts, def _add_control_flow_ops(ops, control_ios): - """Complete `ops` so that the tranformed graph is valid. + """Complete `ops` so that the transformed graph is valid. Partially copying a graph can lead to a malformed graph. For instance, copying half of a while construct is likely to result in an invalid graph. diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py index 8f406ace1d5dcc13a018e56cc98c621a511da29b..f230d93da4a9c01e8dee47aa258d9c28499469f1 100755 --- a/tensorflow/contrib/image/__init__.py +++ b/tensorflow/contrib/image/__init__.py @@ -17,7 +17,7 @@ ### API This module provides functions for image manipulation; currently, chrominance -transformas (including changing saturation and hue) in YIQ space and +transforms (including changing saturation and hue) in YIQ space and projective transforms (including rotation) are supported. ## Image Transformation `Ops` diff --git a/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py b/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py index 91929184a2e6f3cccae92cb819501a7c6ef81673..2ff4d41d75fe59fb765a83e1b6a5b3eaad9d9163 100644 --- a/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py +++ b/tensorflow/contrib/kernel_methods/python/mappers/random_fourier_features_test.py @@ -31,7 +31,7 @@ from tensorflow.python.platform import googletest def _inner_product(x, y): - """Inner product between tensors x and y. + r"""Inner product between tensors x and y. The input tensors are assumed to be in ROW representation, that is, the method returns \\(x * y^T\\). @@ -131,10 +131,6 @@ class RandomFourierFeatureMapperTest(TensorFlowTestCase): mapped_dim = 5000 stddev = 5.0 - # TODO(sibyl-vie3Poto): Reduce test's running time before moving to third_party. One - # possible way to speed the test up is to compute both the approximate and - # the exact kernel matrix directly using matrix operations instead of - # computing the values for each pair of points separately. points_shape = [1, input_dim] points = [ random_ops.random_uniform(shape=points_shape, maxval=1.0) diff --git a/tensorflow/contrib/kfac/examples/convnet.py b/tensorflow/contrib/kfac/examples/convnet.py index b261f41bf97db188f38bc057d83dc78cc5aafcbf..d6b1a61b716ab7412f6b09ba2cfbc4325f790637 100644 --- a/tensorflow/contrib/kfac/examples/convnet.py +++ b/tensorflow/contrib/kfac/examples/convnet.py @@ -325,7 +325,7 @@ def distributed_grads_only_and_ops_chief_worker( All workers perform gradient computation. Chief worker applies gradient after averaging the gradients obtained from all the workers. All workers block - execution untill the update is applied. Chief worker runs covariance and + execution until the update is applied. Chief worker runs covariance and inverse update ops. Covariance and inverse matrices are placed on parameter servers in a round robin manner. For further details on synchronous distributed optimization check `tf.train.SyncReplicasOptimizer`. diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index 32c776cb381f1b55e7e8eb979377f7fd0cb4c6f7..3a5c8eb5f9630fbcc121e4c502f771af32a96bcb 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -673,9 +673,6 @@ class KroneckerProductFB(FisherBlock): output factors. """ - def __init__(self, layer_collection): - super(KroneckerProductFB, self).__init__(layer_collection) - def _setup_damping(self, damping, normalization=None): """Makes functions that compute the damping values for both factors.""" def compute_damping(): @@ -1309,6 +1306,8 @@ class InputOutputMultiTowerMultiUse(InputOutputMultiTower): else: raise ValueError("Global config variable TOWER_STRATEGY must be one of " "'concat' or 'separate'.") + else: + inputs = tuple(inputs) # Now we perform the analogous processing for grads_list if isinstance(grads_list[0][0], (list, tuple)): @@ -1351,6 +1350,8 @@ class InputOutputMultiTowerMultiUse(InputOutputMultiTower): else: raise ValueError("Global config variable TOWER_STRATEGY must be one of " "'concat' or 'separate'.") + else: + grads_list = tuple(tuple(grads) for grads in grads_list) if self._num_uses is None: raise ValueError("You must supply a value for the num_uses argument if " diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index 45a760c9f1013da828a3bff105c0205b6a24243d..b7f63d8d94a7a427eb57afefeda3939f0c530f8e 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -66,7 +66,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): the local approximation with the Fisher information matrix, and to regularize the update direction by making it closer to the gradient. If damping is adapted during training then this value is used for - initializing damping varaible. + initializing damping variable. (Higher damping means the update looks more like a standard gradient update - see Tikhonov regularization.) layer_collection: The layer collection object, which holds the fisher @@ -114,7 +114,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): self._estimation_mode = estimation_mode self._colocate_gradients_with_ops = colocate_gradients_with_ops - # The below paramaters are required only if damping needs to be adapated. + # The below parameters are required only if damping needs to be adapated. # These parameters can be set by calling # set_damping_adaptation_params() explicitly. self._damping_adaptation_decay = 0.95 @@ -195,7 +195,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): min_damping: `float`(Optional), Minimum value the damping parameter can take. Default value 1e-5. damping_adaptation_decay: `float`(Optional), The `damping` parameter is - multipled by the `damping_adaptation_decay` every + multiplied by the `damping_adaptation_decay` every `damping_adaptation_interval` number of iterations. Default value 0.99. damping_adaptation_interval: `int`(Optional), Number of steps in between updating the `damping` parameter. Default value 5. diff --git a/tensorflow/contrib/kfac/python/ops/placement.py b/tensorflow/contrib/kfac/python/ops/placement.py index 8a20ebe19844e62bf112dbafce1f816413ea7878..c4454325aebe131058282ff15c2734bf10d1cc49 100644 --- a/tensorflow/contrib/kfac/python/ops/placement.py +++ b/tensorflow/contrib/kfac/python/ops/placement.py @@ -51,7 +51,7 @@ class RoundRobinPlacementMixin(object): self._inv_devices = inv_devices def make_vars_and_create_op_thunks(self, scope=None): - """Make vars and create op thunks w/ a round-robin device placement strat. + """Make vars and create op thunks w/ a round-robin device placement start. For each factor, all of that factor's cov variables and their associated update ops will be placed on a particular device. A new device is chosen diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index d5b3b279a1b7327602790c0260349cb0c758aa86..7355a403aeef78cc7e76d58adfe114e4729f6595 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -381,7 +381,7 @@ py_test( py_test( name = "rev_block_lib_test", - size = "small", + size = "medium", srcs = ["python/layers/rev_block_lib_test.py"], srcs_version = "PY2AND3", deps = [ diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index bf2514498202e9227c2d74c036c7eecba5ccdf2c..dd2395f8c9748dadbecfe47df5511874d5f848ea 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import init_ops @@ -691,11 +692,12 @@ class EmbeddingLookupSparseWithDistributedAggregationTest(test.TestCase): index += num_val return grouped_vals + @test_util.enable_c_shapes def testEmbeddingLookupSparse(self): vocab_size = 13 batch_size = 10 param_shape = [2, 5] - expected_lookup_result_shape = [None] + param_shape + expected_lookup_result_shape = param_shape sp_ids, sp_weights, ids, weights, vals_per_batch_entry = ( self._RandomIdsAndWeights(batch_size, vocab_size)) @@ -719,7 +721,7 @@ class EmbeddingLookupSparseWithDistributedAggregationTest(test.TestCase): None if ignore_weights else sp_weights, combiner=combiner) - self.assertEqual(embedding_sum.get_shape().as_list(), + self.assertEqual(embedding_sum.get_shape().as_list()[1:], expected_lookup_result_shape) tf_embedding_sum = embedding_sum.eval(feed_dict=feed_dict) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 3b053cd4c66952cf6c494186b16c17f38801bcaf..4a360711f834354ce77b7a9579c05780a72c2661 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -485,6 +485,7 @@ py_test( name = "state_saving_rnn_estimator_test", size = "medium", srcs = ["python/learn/estimators/state_saving_rnn_estimator_test.py"], + shard_count = 4, srcs_version = "PY2AND3", tags = ["noasan"], deps = [ diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py index d3bb0fda5765d88ec064047f523de853d3de6a3f..0a863f0e20c05d3372ffd8f7677cd518390ecc9d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py @@ -863,6 +863,38 @@ class LinearClassifierTest(test.TestCase): scores = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(scores['accuracy'], 0.9) + def testSdcaOptimizerWeightedSparseFeaturesOOVWithNoOOVBuckets(self): + """LinearClassifier with SDCAOptimizer with OOV features (-1 IDs).""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + sparse_tensor.SparseTensor( + values=[2., 3., 1.], + indices=[[0, 0], [1, 0], [2, 0]], + dense_shape=[3, 5]), + 'country': + sparse_tensor.SparseTensor( + # 'GB' is out of the vocabulary. + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 0], [2, 0]], + dense_shape=[3, 5]) + }, constant_op.constant([[1], [0], [1]]) + + country = feature_column_lib.sparse_column_with_keys( + 'country', keys=['US', 'CA', 'MK', 'IT', 'CN']) + country_weighted_by_price = feature_column_lib.weighted_sparse_column( + country, 'price') + sdca_optimizer = sdca_optimizer_lib.SDCAOptimizer( + example_id_column='example_id') + classifier = linear.LinearClassifier( + feature_columns=[country_weighted_by_price], optimizer=sdca_optimizer) + classifier.fit(input_fn=input_fn, steps=50) + scores = classifier.evaluate(input_fn=input_fn, steps=1) + self.assertGreater(scores['accuracy'], 0.9) + def testSdcaOptimizerCrossedFeatures(self): """Tests LinearClassifier with SDCAOptimizer and crossed features.""" diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 3744abd860e7f460133873eb534fd75887182f78..dfc6a393d069fccb0fa93dc265f744e199db0dcf 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -468,10 +468,15 @@ class Experiment(object): on which that evaluation was based. At the beginning of evaluation, the passed `eval_results` will be None so it's expected that the predicate function handles that gracefully. - When `predicate_fn` is not specified, continuous eval will run in an - infinite loop (if `train_steps` is None). or exit once global step - reaches `train_steps`. - + Continuous eval behavior under different conditions: + * When `predicate_fn` is specified: + + if `train_steps` is None, run until `predicate_fn` returns False. + + if `train_steps` is specified, run until either global step + reaches `train_steps` or `predicate_fn` returns False. + * When `predicate_fn` is not specified: + + if `train_steps` is None, run in an infinite loop. + + if `train_steps` is specified, run until global step reaches + `train_steps`. export: Whether to export from this step. Default is 'True'. Raises: diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index c7cdb4131215c388412407a008113de13bdd0934..f8106d1e4a7e79f1cd651c40995be480721a8129 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -343,7 +343,8 @@ def get_temp_export_dir(timestamped_export_dir): """ (dirname, basename) = os.path.split(timestamped_export_dir) temp_export_dir = os.path.join( - compat.as_bytes(dirname), compat.as_bytes('temp-{}'.format(basename))) + compat.as_bytes(dirname), + compat.as_bytes('temp-{}'.format(compat.as_text(basename)))) return temp_export_dir diff --git a/tensorflow/contrib/linalg/BUILD b/tensorflow/contrib/linalg/BUILD index 2e92ad6eb39d8aa8876a34572f50d5b6aff0511a..78b7970069fec2d67f816b39d8fa4c58021cef85 100644 --- a/tensorflow/contrib/linalg/BUILD +++ b/tensorflow/contrib/linalg/BUILD @@ -42,47 +42,3 @@ cuda_py_test( "//tensorflow/python:platform_test", ], ) - -cuda_py_test( - name = "linear_operator_block_diag_test", - size = "medium", - srcs = ["python/kernel_tests/linear_operator_block_diag_test.py"], - additional_deps = [ - ":linalg_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], - shard_count = 5, - tags = [ - "noasan", - "optonly", - ], -) - -cuda_py_test( - name = "linear_operator_kronecker_test", - size = "medium", - srcs = ["python/kernel_tests/linear_operator_kronecker_test.py"], - additional_deps = [ - ":linalg_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], - shard_count = 8, - tags = [ - "noasan", - "optonly", - ], -) diff --git a/tensorflow/contrib/linalg/__init__.py b/tensorflow/contrib/linalg/__init__.py index 554854da84715ee8c8d00ec7f8e3156642b43d80..a262a099cf8f843a4d228ce5d53664cb85fd046f 100644 --- a/tensorflow/contrib/linalg/__init__.py +++ b/tensorflow/contrib/linalg/__init__.py @@ -39,14 +39,14 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member from tensorflow.contrib.linalg.python.ops.linear_operator_addition import * -from tensorflow.contrib.linalg.python.ops.linear_operator_block_diag import * -from tensorflow.contrib.linalg.python.ops.linear_operator_kronecker import * from tensorflow.python.ops.linalg.linear_operator import * +from tensorflow.python.ops.linalg.linear_operator_block_diag import * from tensorflow.python.ops.linalg.linear_operator_circulant import * from tensorflow.python.ops.linalg.linear_operator_composition import * from tensorflow.python.ops.linalg.linear_operator_diag import * from tensorflow.python.ops.linalg.linear_operator_full_matrix import * from tensorflow.python.ops.linalg.linear_operator_identity import * +from tensorflow.python.ops.linalg.linear_operator_kronecker import * from tensorflow.python.ops.linalg.linear_operator_low_rank_update import * from tensorflow.python.ops.linalg.linear_operator_lower_triangular import * diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py index 213c2eced5c7f99bad4a8ad41c71837c8186c18b..12039ecc6f357af07e0c2a08e17d46396f3ad386 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py @@ -198,6 +198,14 @@ class SDCAOptimizer(object): example_ids = array_ops.reshape(id_tensor.indices[:, 0], [-1]) flat_ids = array_ops.reshape(id_tensor.values, [-1]) + # Prune invalid IDs (< 0) from the flat_ids, example_ids, and + # weight_tensor. These can come from looking up an OOV entry in the + # vocabulary (default value being -1). + is_id_valid = math_ops.greater_equal(flat_ids, 0) + flat_ids = array_ops.boolean_mask(flat_ids, is_id_valid) + example_ids = array_ops.boolean_mask(example_ids, is_id_valid) + weight_tensor = array_ops.boolean_mask(weight_tensor, is_id_valid) + projection_length = math_ops.reduce_max(flat_ids) + 1 # project ids based on example ids so that we can dedup ids that # occur multiple times for a single example. diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 1534f97d7600151e78c7fa7e8509d9e871240421..10065e894c48d48b8b7136895c55599c8854e03b 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -92,6 +92,8 @@ cc_library( deps = [":context"], ) +exports_files(["builtin_ops.h"]) + cc_library( name = "string", hdrs = [ diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index 65fba52d461461f4594e2222ef6df3849b741f99..e4f86e258afe3df9ba149c82066b6d145f332488 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -1,4 +1,3 @@ - # Find where we're running from, so we can store generated files here. ifeq ($(origin MAKEFILE_DIR), undefined) MAKEFILE_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) @@ -69,12 +68,12 @@ LIB_NAME := libtensorflow-lite.a LIB_PATH := $(LIBDIR)$(LIB_NAME) # A small example program that shows how to link against the library. -BENCHMARK_PATH := $(BINDIR)benchmark_model +MINIMAL_PATH := $(BINDIR)minimal -BENCHMARK_SRCS := \ -tensorflow/contrib/lite/tools/benchmark_model.cc -BENCHMARK_OBJS := $(addprefix $(OBJDIR), \ -$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(BENCHMARK_SRCS)))) +MINIMAL_SRCS := \ +tensorflow/contrib/lite/examples/minimal/minimal.cc +MINIMAL_OBJS := $(addprefix $(OBJDIR), \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MINIMAL_SRCS)))) # What sources we want to compile, must be kept in sync with the main Bazel # build files. @@ -100,7 +99,7 @@ $(wildcard tensorflow/contrib/lite/*/*test.cc) \ $(wildcard tensorflow/contrib/lite/*/*/*test.cc) \ $(wildcard tensorflow/contrib/lite/*/*/*/*test.cc) \ $(wildcard tensorflow/contrib/lite/kernels/test_util.cc) \ -$(BENCHMARK_SRCS) +$(MINIMAL_SRCS) # Filter out all the excluded files. TF_LITE_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) # File names of the intermediate files target compilation generates. @@ -119,17 +118,17 @@ $(OBJDIR)%.o: %.c $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@ # The target that's compiled if there's no command-line arguments. -all: $(LIB_PATH) $(BENCHMARK_PATH) +all: $(LIB_PATH) $(MINIMAL_PATH) # Gathers together all the objects we've compiled into a single '.a' archive. $(LIB_PATH): $(LIB_OBJS) @mkdir -p $(dir $@) $(AR) $(ARFLAGS) $(LIB_PATH) $(LIB_OBJS) -$(BENCHMARK_PATH): $(BENCHMARK_OBJS) $(LIB_PATH) +$(MINIMAL_PATH): $(MINIMAL_OBJS) $(LIB_PATH) @mkdir -p $(dir $@) $(CXX) $(CXXFLAGS) $(INCLUDES) \ - -o $(BENCHMARK_PATH) $(BENCHMARK_OBJS) \ + -o $(MINIMAL_PATH) $(MINIMAL_OBJS) \ $(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS) # Gets rid of all generated files. diff --git a/tensorflow/contrib/lite/RELEASE.md b/tensorflow/contrib/lite/RELEASE.md new file mode 100644 index 0000000000000000000000000000000000000000..8fd63d5cee7db38fadf63ab8530bef7a3d99dd0d --- /dev/null +++ b/tensorflow/contrib/lite/RELEASE.md @@ -0,0 +1,8 @@ +# Release 0.1.7 + +* TensorFlow Lite 0.1.7 is based on tag `tflite-v0.1.7` (git commit + fa1db5eb0da85b5baccc2a46d534fdeb3bb473d0). +* To reproduce the iOS library, it's required to cherry pick git commit + f1f1d5172fe5bfeaeb2cf657ffc43ba744187bee to fix a dependency issue. +* The code is based on TensorFlow 1.8.0 release candidate and it's very close + to TensorFlow 1.8.0 release. diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h index 4910c89eaebabb7bd9a4e003b75fa6de4d5af69d..35cf43dd32b484f64e0db7d24a06691bdd0c830a 100644 --- a/tensorflow/contrib/lite/builtin_op_data.h +++ b/tensorflow/contrib/lite/builtin_op_data.h @@ -161,6 +161,9 @@ typedef struct { typedef struct { } TfLitePadParams; +typedef struct { +} TfLitePadV2Params; + typedef struct { // TODO(ahentz): We can't have dynamic data in this struct, at least not yet. // For now we will fix the maximum possible number of dimensions. diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 21e0e04ef6bc5b1e467ef5e27035e866f21049a0..a038acf2848b21a225cbe9933cc8ae1f09739cee 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -84,6 +84,12 @@ typedef enum { kTfLiteBuiltinArgMax = 56, kTfLiteBuiltinMinimum = 57, kTfLiteBuiltinLess = 58, + kTfLiteBuiltinNeg = 59, + kTfLiteBuiltinPadv2 = 60, + kTfLiteBuiltinGreater = 61, + kTfLiteBuiltinGreaterEqual = 62, + kTfLiteBuiltinLessEqual = 63, + kTfLiteBuiltinSelect = 64, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/examples/android/BUILD b/tensorflow/contrib/lite/examples/android/BUILD index 49280129971e38247c2216d9422bc5de9176e13d..57000072561303e8457f61b1ebe95d382fc01f10 100644 --- a/tensorflow/contrib/lite/examples/android/BUILD +++ b/tensorflow/contrib/lite/examples/android/BUILD @@ -42,7 +42,6 @@ android_binary( custom_package = "org.tensorflow.lite.demo", inline_constants = 1, manifest = "AndroidManifest.xml", - manifest_merger = "android", nocompress_extensions = [ ".tflite", ], diff --git a/tensorflow/contrib/lite/examples/minimal/minimal.cc b/tensorflow/contrib/lite/examples/minimal/minimal.cc new file mode 100644 index 0000000000000000000000000000000000000000..106e3b027055b67092f653c6bcdc4827b56bdbaa --- /dev/null +++ b/tensorflow/contrib/lite/examples/minimal/minimal.cc @@ -0,0 +1,71 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include + +// This is an example that is minimal to read a model +// from disk and perform inference. There is no data being loaded +// that is up to you to add as a user. +// +// NOTE: Do not add any dependencies to this that cannot be built with +// the minimal makefile. This example must remain trivial to build with +// the minimal build tool. +// +// Usage: minimal + +using namespace tflite; + +#define TFLITE_MINIMAL_CHECK(x) \ + if(!(x)) { \ + fprintf(stderr, "Error at %s:%d\n", __FILE__, __LINE__); \ + exit(1); \ + } + + +int main(int argc, char *argv[]) { + if(argc != 2) { + fprintf(stderr, "Usage: %s \n"); + return 1; + } + const char* filename = argv[1]; + + // Load model + std::unique_ptr model + = tflite::FlatBufferModel::BuildFromFile(filename); + TFLITE_MINIMAL_CHECK(model != nullptr); + + // Build the interpreter + tflite::ops::builtin::BuiltinOpResolver resolver; + InterpreterBuilder builder(*model.get(), resolver); + std::unique_ptr interpreter; + builder(&interpreter); + TFLITE_MINIMAL_CHECK(interpreter != nullptr); + + // Allocate tensor buffers. + TFLITE_MINIMAL_CHECK(interpreter->AllocateTensors() == kTfLiteOk); + + // Fill input buffers + // TODO(user): Insert code to fill input tensors + + // Run inference + TFLITE_MINIMAL_CHECK(interpreter->Invoke() == kTfLiteOk); + + // Read output buffers + // TODO(user): Insert getting data out code. + + return 0; +} diff --git a/tensorflow/contrib/lite/g3doc/models.md b/tensorflow/contrib/lite/g3doc/models.md index d8134d5a00097b3eef24d5583d7f114c34e3bef2..c1c8ef049f693dae038e5e0ca242b9219329cc50 100644 --- a/tensorflow/contrib/lite/g3doc/models.md +++ b/tensorflow/contrib/lite/g3doc/models.md @@ -1,28 +1,63 @@ # List of Hosted Models -* [NASNet large](https://storage.googleapis.com/download.tensorflow.org/models/tflite/nasnet_large_2018_03_27.zip) -* [NASNet mobile](https://storage.googleapis.com/download.tensorflow.org/models/tflite/nasnet_mobile_2018_03_27.zip) -* [ResNet v2 101](https://storage.googleapis.com/download.tensorflow.org/models/tflite/resnet_v2_101_2018_03_27.zip) -* [ResNet v2 50](https://storage.googleapis.com/download.tensorflow.org/models/tflite/resnet_v2_50_2018_03_27.zip) -* [Inception ResNet v2](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_resnet_v2_2018_03_27.zip) -* [Inception v4](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v4_2018_03_27.zip) -* [Inception v3 2015](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_2015_2017_11_10.zip) -* [Inception v3 Slim 2016](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) -* [Mobilenet 0.25 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_128_float_2017_11_08.zip) -* [Mobilenet 0.25 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_160_float_2017_11_08.zip) -* [Mobilenet 0.25 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_192_float_2017_11_08.zip) -* [Mobilenet 0.25 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_224_float_2017_11_08.zip) -* [Mobilenet 0.50 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.50_128_float_2017_11_08.zip) -* [Mobilenet 0.50 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.50_160_float_2017_11_08.zip) -* [Mobilenet 0.50 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.50_192_float_2017_11_08.zip) -* [Mobilenet 0.50 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.50_224_float_2017_11_08.zip) -* [Mobilenet 0.75 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.75_128_float_2017_11_08.zip) -* [Mobilenet 0.75 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.75_160_float_2017_11_08.zip) -* [Mobilenet 0.75 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.75_192_float_2017_11_08.zip) -* [Mobilenet 0.75 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.75_224_float_2017_11_08.zip) -* [Mobilenet 1.0 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_128_float_2017_11_08.zip) -* [Mobilenet 1.0 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_160_float_2017_11_08.zip) -* [Mobilenet 1.0 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_192_float_2017_11_08.zip) -* [Mobilenet 1.0 224 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_224_float_2017_11_08.zip) -* [Mobilenet 1.0 224 Quant](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) -* [Smart Reply 1.0 Android ](https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip) +## Image classification (Float Models) + +Model Name | Paper_Model_Files^ | Model_Size | Top-1 Accuracy | Top-5 Accuracy | TF Lite Performance^^ | Tensorflow Performance +------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | ---------: | -------------: | -------------: | --------------------: | ---------------------: +DenseNet | [paper](https://arxiv.org/abs/1608.06993), [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/densenet_2018_04_27.tgz) | 43.6 Mb | 64.2% | 85.6% | 894 ms | 1262 ms +SqueezeNet | [paper](https://arxiv.org/abs/1602.07360), [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz) | 5.0 Mb | 49.0% | 72.9% | 224 ms | 255 ms +NASNet mobile | [paper](https://arxiv.org/abs/1707.07012), [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/nasnet_mobile_2018_04_27.tgz) | 21.4 Mb | 72.2% | 90.6% | 261 ms | 389 ms +NASNet large | [paper](https://arxiv.org/abs/1707.07012), [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/nasnet_large_2018_04_27.tgz) | 355.3 Mb | 82.1% | 95.8% | 6697 ms | 7940 ms +ResNet_V2_50 | [paper](https://arxiv.org/abs/1603.05027), [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/resnet_v2_50_2018_04_27.tgz) | 102.3 Mb | 68.1% | 88.4% | 942 ms | 1008 ms +ResNet_V2_101 | [paper](https://arxiv.org/abs/1603.05027), [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/resnet_v2_101_2018_04_27.tgz) | 178.3 Mb | 70.4% | 89.6% | 1880 ms | 1970 ms +Inception_V3 | [paper](http://arxiv.org/abs/1512.00567), [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v3_2018_04_27.tgz) | 95.3 Mb | 76.9% | 93.5% | 1433 ms | 1522 ms +Inception_V4 | [paper](http://arxiv.org/abs/1602.07261), [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v4_2018_04_27.tgz) | 170.7 Mb | 79.6% | 94.6% | 2986 ms | 3139 ms +Inception_ResNet_V2 | [paper](https://arxiv.org/abs/1602.07261), [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_resnet_v2_2018_04_27.tgz) | 121.0 Mb | 76.8% | 93.5% | 2731 ms | 2926 ms +Mobilenet_0.25_128 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_128.tgz) | 1.9 Mb | 41.5% | 66.3% | 6.2 ms | 13.0 ms +Mobilenet_0.25_160 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_160.tgz) | 1.9 Mb | 45.5% | 70.3% | 8.6 ms | 19.5 ms +Mobilenet_0.25_192 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_192.tgz) | 1.9 Mb | 47.7% | 72.3% | 12.1 ms | 27.8 ms +Mobilenet_0.25_224 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_224.tgz) | 1.9 Mb | 49.8% | 74.2% | 16.2 ms | 37.3 ms +Mobilenet_0.50_128 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_128.tgz) | 5.3 Mb | 56.3% | 79.4% | 18.1 ms | 29.9 ms +Mobilenet_0.50_160 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_160.tgz) | 5.3 Mb | 59.1% | 81.9% | 26.8 ms | 45.9 ms +Mobilenet_0.50_192 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_192.tgz) | 5.3 Mb | 61.7% | 83.6% | 35.6 ms | 65.3 ms +Mobilenet_0.50_224 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_224.tgz) | 5.3 Mb | 63.3% | 84.9% | 47.6 ms | 164.2 ms +Mobilenet_0.75_128 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.75_128.tgz) | 10.3 Mb | 62.1% | 83.9% | 34.6 ms | 48.7 ms +Mobilenet_0.75_160 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.75_160.tgz) | 10.3 Mb | 65.3% | 86.0% | 51.3 ms | 75.2 ms +Mobilenet_0.75_192 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.75_192.tgz) | 10.3 Mb | 67.2% | 87.3% | 71.7 ms | 107.0 ms +Mobilenet_0.75_224 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.75_224.tgz) | 10.3 Mb | 68.4% | 88.2% | 95.7 ms | 143.4 ms +Mobilenet_1.0_128 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_128.tgz) | 16.9 Mb | 65.2% | 85.8% | 57.4 ms | 76.8 ms +Mobilenet_1.0_160 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_160.tgz) | 16.9 Mb | 68.0% | 87.7% | 86.0 ms | 117.7 ms +Mobilenet_1.0_192 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_192.tgz) | 16.9 Mb | 70.0% | 89.2% | 118.6 ms | 167.3 ms +Mobilenet_1.0_224 | [paper](https://arxiv.org/pdf/1704.04861.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz) | 16.9 Mb | 70.9% | 89.9% | 160.1 ms | 224.3 ms + +^ The model files include both TF Lite FlatBuffer and Tensorflow frozen Graph. + +^^ The performance numbers are generated in the benchmark on Pixel-2 using +single thread large core. + +## Image classification (Quantized Models) + +Model Name | Paper_Model_Files | Model_Size | Top-1 Accuracy | Top-5 Accuracy | TF Lite Performance +------------------------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------: | ---------: | -------------: | -------------: | ------------------: +Mobilenet_0.25_128_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_128_quant.tgz) | 0.5 Mb | 39.9% | 65.8% | 3.7 ms +Mobilenet_0.25_160_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_160_quant.tgz) | 0.5 Mb | 43.5% | 69.1% | 5.5 ms +Mobilenet_0.25_192_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_192_quant.tgz) | 0.5 Mb | 45.8% | 71.9% | 7.9 ms +Mobilenet_0.25_224_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_224_quant.tgz) | 0.5 Mb | 48.2% | 73.8% | 10.4 ms +Mobilenet_0.50_128_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_128_quant.tgz) | 1.4 Mb | 54.9% | 78.9% | 8.8 ms +Mobilenet_0.50_160_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_160_quant.tgz) | 1.4 Mb | 57.7% | 81.3% | 13.0 ms +Mobilenet_0.50_192_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_192_quant.tgz) | 1.4 Mb | 60.4% | 83.2% | 18.3 ms +Mobilenet_0.50_224_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_224_quant.tgz) | 1.4 Mb | 62.2% | 84.5% | 24.7 ms +Mobilenet_0.75_128_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.75_128_quant.tgz) | 2.6 Mb | 59.8% | 82.8% | 16.2 ms +Mobilenet_0.75_160_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.75_160_quant.tgz) | 2.6 Mb | 63.9% | 85.5% | 24.3 ms +Mobilenet_0.75_192_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.75_192_quant.tgz) | 2.6 Mb | 66.2% | 87.1% | 33.8 ms +Mobilenet_0.75_224_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.75_224_quant.tgz) | 2.6 Mb | 67.9% | 88.1% | 45.4 ms +Mobilenet_1.0_128_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_128_quant.tgz) | 4.3 Mb | 64.0% | 85.5% | 24.9 ms +Mobilenet_1.0_160_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_160_quant.tgz) | 4.3 Mb | 67.3% | 87.7% | 37.4 ms +Mobilenet_1.0_192_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_192_quant.tgz) | 4.3 Mb | 69.0% | 88.9% | 51.9 ms +Mobilenet_1.0_224_quant | [paper](https://arxiv.org/pdf/1712.05877.pdf), [tflite&pb](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224_quant.tgz) | 4.3 Mb | 69.7% | 89.5% | 70.2 ms + +## Other models + +Model | TF Lite FlatBuffer +----------------------- | :----------------: +Smart Reply 1.0 Android | [reference](https://research.googleblog.com/2017/11/on-device-conversational-modeling-with.html), [tflite](https://storage.googleapis.com/download.tensorflow.org/models/smartreply_1.0_2017_11_01.zip) diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md index 7a3a231626d0e1c71e474ff4ff16789ebe2901db..ab50789307414255bccd84d4cfcb6ddecc25ba08 100644 --- a/tensorflow/contrib/lite/g3doc/rpi.md +++ b/tensorflow/contrib/lite/g3doc/rpi.md @@ -32,7 +32,7 @@ This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc v Log in to you RPI, install the toolchain. ```bash -sudo apt-get instal build-essential +sudo apt-get install build-essential ``` First, clone this TensorFlow repository. Run this at the root of the repository: diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index aa28f8d050944e3b4ad8be91871388b32f593e2d..f45fcceb2e615222ea9c14bf6da9fd0f7dc8c487 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -281,6 +281,32 @@ Options { } ``` +**GREATER** + +``` +Inputs { + 0: a tensor + 1: a tensor +} +Outputs { + 0: a tensor of type bool, true whenever an element of the first tensor is + greater than the corresponding element of the second tensor. +} +``` + +**GREATER_EQUAL** + +``` +Inputs { + 0: a tensor + 1: a tensor +} +Outputs { + 0: a tensor of type bool, true whenever an element of the first tensor is + greater than or equal to the corresponding element of the second tensor. +} +``` + **L2_NORMALIZATION** ``` @@ -325,6 +351,19 @@ Outputs { } ``` +**LESS_EQUAL** + +``` +Inputs { + 0: a tensor + 1: a tensor +} +Outputs { + 0: a tensor of type bool, true whenever an element of the first tensor is less + than or equal to the corresponding element of the second tensor. +} +``` + **LOCAL_RESPONSE_NORMALIZATION** ``` @@ -397,6 +436,17 @@ Options { } ``` +**NEG** + +``` +Inputs { + 0: a tensor +} +Outputs { + 0: elementwise negation of the input tensor +} +``` + **PAD** ``` @@ -589,6 +639,20 @@ Outputs { } ``` +**SELECT** + +``` +Inputs { + 0: tensor + 1: tensor + 2: tensor +} +Outputs { + 0: tensor that contains the elementwise values of 'tensor 1' if the + corresponding value of 'tensor 0' is true or the value of 'tensor 2' if false. +} +``` + And these are TensorFlow Lite operations that are present but not ready for custom models yet: diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 9d8ea55fd1edc0dacc821536cc2b564c59f65b71..ebb0aedc2001a86b7fcff67ef8703b5e4a845818 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -125,7 +125,8 @@ Interpreter::~Interpreter() { for (int i = 0; i < context_.tensors_size; i++) { TfLiteTensor* tensor = &context_.tensors[i]; - if (tensor->buffer_handle != kTfLiteNullBufferHandle) { + if (tensor->buffer_handle != kTfLiteNullBufferHandle && + tensor->delegate->FreeBufferHandle != nullptr) { tensor->delegate->FreeBufferHandle(tensor->delegate, &tensor->buffer_handle); } diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 6f3433abcf71b6090b434d47e925775a2e517064..0450e86ae7f84e4aa6c70235eb825ca3b4f7aebc 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -201,7 +201,7 @@ class Interpreter { // Overrides execution plan. This bounds checks indices sent in. TfLiteStatus SetExecutionPlan(const std::vector& new_plan); - // Get a tensor data structure. + // Get a mutable tensor data structure. // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this // read/write access to structure TfLiteTensor* tensor(int tensor_index) { @@ -210,9 +210,14 @@ class Interpreter { return &context_.tensors[tensor_index]; } + // Get an immutable tensor data structure. + const TfLiteTensor* tensor(int tensor_index) const { + if (tensor_index >= context_.tensors_size || tensor_index < 0) + return nullptr; + return &context_.tensors[tensor_index]; + } + // Get a pointer to an operation and registration data structure if in bounds. - // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this - // read/write access to structure const std::pair* node_and_registration( int node_index) const { if (node_index >= nodes_and_registration_.size() || node_index < 0) @@ -220,7 +225,8 @@ class Interpreter { return &nodes_and_registration_[node_index]; } - // Perform a checked cast to the appropriate tensor type. + // Perform a checked cast to the appropriate tensor type (mutable pointer + // version). template T* typed_tensor(int tensor_index) { if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) { @@ -231,6 +237,18 @@ class Interpreter { return nullptr; } + // Perform a checked cast to the appropriate tensor type (immutable pointer + // version). + template + const T* typed_tensor(int tensor_index) const { + if (const TfLiteTensor* tensor_ptr = tensor(tensor_index)) { + if (tensor_ptr->type == typeToTfLiteType()) { + return reinterpret_cast(tensor_ptr->data.raw); + } + } + return nullptr; + } + // Return a pointer into the data of a given input tensor. The given index // must be between 0 and inputs().size(). template @@ -238,13 +256,20 @@ class Interpreter { return typed_tensor(inputs_[index]); } - // Return a pointer into the data of a given output tensor. The given index - // must be between 0 and outputs().size(). + // Return a mutable pointer into the data of a given output tensor. The given + // index must be between 0 and outputs().size(). template T* typed_output_tensor(int index) { return typed_tensor(outputs_[index]); } + // Return an immutable pointer into the data of a given output tensor. The + // given index must be between 0 and outputs().size(). + template + const T* typed_output_tensor(int index) const { + return typed_tensor(outputs_[index]); + } + // Change the dimensionality of a given tensor. Note, this is only acceptable // for tensor indices that are inputs. // Returns status of failure or success. @@ -325,9 +350,7 @@ class Interpreter { void SetProfiler(profiling::Profiler* profiler) { profiler_ = profiler; } - profiling::Profiler* GetProfiler(profiling::Profiler* profiler) { - return profiler_; - } + profiling::Profiler* GetProfiler() { return profiler_; } // The default capacity of `tensors_` vector. static constexpr int kTensorsReservedCapacity = 128; diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index 1dda55b8edf8f85293c473b51b8a19066bac5f73..1e579226037fa360e4d5dad25077b8966e1126bc 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -46,12 +46,27 @@ android_library( ], ) -java_library( +android_library( name = "ovicbenchmarkerlib", srcs = [ "ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java", "ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java", ], + manifest = "AndroidManifest.xml", + visibility = ["//visibility:public"], + deps = [ + ":tensorflowlite", + "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", + "@org_checkerframework_qual", + ], +) + +java_library( + name = "ovicbenchmarkerlib_java", + srcs = [ + "ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java", + "ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java", + ], javacopts = JAVACOPTS, visibility = ["//visibility:public"], deps = [ @@ -170,18 +185,14 @@ java_test( size = "medium", srcs = ["ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java"], data = [ - "ovic/src/testdata/float_model.lite", - "ovic/src/testdata/labels.txt", - "ovic/src/testdata/low_res_model.lite", - "ovic/src/testdata/quantized_model.lite", - "ovic/src/testdata/test_image_128.jpg", - "ovic/src/testdata/test_image_224.jpg", + "//tensorflow/contrib/lite/java/ovic/src/testdata:labels.txt", + "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata", ], javacopts = JAVACOPTS, test_class = "org.tensorflow.ovic.OvicClassifierTest", visibility = ["//visibility:public"], deps = [ - ":ovicbenchmarkerlib", + ":ovicbenchmarkerlib_java", "@com_google_truth", "@junit", ], diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml index 20f520814d7154764932638c5e9dddc32639b677..ef8a9e08450d72e392815756606f5ef8301cdd58 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml @@ -13,51 +13,55 @@ See the License for the specific language governing permissions and limitations under the License. --> - + android:layout_height="match_parent" + android:background="#bb7700" + android:orientation="horizontal"> + + + + + + - + + + - - - - - - - - - - - - - - + android:paddingTop="20dp" + android:textColor="#FFF" + android:textSize="20sp"/> + + + + diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml index d12435d5abda45917b8a4f12c4b3179997eae689..72a229ecdb19f5309994e994d82e0b5b5ed617a2 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml +++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml @@ -15,45 +15,47 @@ --> + android:layout_height="match_parent" + android:background="#bb7700"> - - - - - + android:layout_weight="1" /> - + android:layout_alignParentTop="false" + android:background="#bb7700" + android:orientation="vertical" + android:weightSum="100"> + + + - - - - - - - - diff --git a/tensorflow/contrib/lite/java/ovic/README.md b/tensorflow/contrib/lite/java/ovic/README.md index 76c33838bfe5b8596d78cae7d022c51d2a379e76..77799b35691813868fb65a2c8b068f41751717db 100644 --- a/tensorflow/contrib/lite/java/ovic/README.md +++ b/tensorflow/contrib/lite/java/ovic/README.md @@ -6,7 +6,7 @@ This folder contains building code for track one of the [Low Power ImageNet Reco Follow the steps [here](https://www.tensorflow.org/mobile/tflite/demo_android) to install Tensorflow, Bazel, and the Android NDK and SDK. -## To test the benchmarker: +## Test the benchmarker: The testing utilities helps the developers (you) to make sure that your submissions in TfLite format will be processed as expected in the competition's benchmarking system. @@ -37,7 +37,7 @@ unzip -j /tmp/ovic.zip -d tensorflow/contrib/lite/java/ovic/src/testdata/ You can run test with Bazel as below. This helps to ensure that the installation is correct. ```sh -bazel test --cxxopt=--std=c++11 //tensorflow/contrib/lite/java:OvicClassifierTest --test_output=all +bazel test --cxxopt=--std=c++11 //tensorflow/contrib/lite/java:OvicClassifierTest --cxxopt=-Wno-all --test_output=all ``` ### Test your submissions @@ -56,28 +56,83 @@ cp /tmp/my_model.lite tensorflow/contrib/lite/java/ovic/src/testdata/ The test images can be found at `tensorflow/contrib/lite/java/ovic/src/testdata/test_image_*.jpg`. You may reuse these images if your image resolutions are 128x128 or 224x224. -* Add your model and test image to the BUILD rule: +* Add your model and test image to the BUILD rule at `tensorflow/contrib/lite/java/ovic/src/testdata/BUILD`: ```JSON -java_test( - name = "OvicClassifierTest", - size = "medium", - srcs = ["ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java"], - data = [ - "ovic/src/testdata/float_model.lite", - "ovic/src/testdata/labels.txt", - "ovic/src/testdata/low_res_model.lite", - "ovic/src/testdata/quantized_model.lite", - "ovic/src/testdata/test_image_128.jpg", - "ovic/src/testdata/test_image_224.jpg", - "ovic/src/testdata/my_model.lite", # <--- Your submission. - "ovic/src/testdata/my_test_image.jpg", # <--- Your test image. - ], - ... +filegroup( + name = "ovic_testdata", + srcs = [ + "@tflite_ovic_testdata//:float_model.lite", + "@tflite_ovic_testdata//:low_res_model.lite", + "@tflite_ovic_testdata//:quantized_model.lite", + "@tflite_ovic_testdata//:test_image_128.jpg", + "@tflite_ovic_testdata//:test_image_224.jpg" + "my_model.lite", # <--- Your submission. + "my_test_image.jpg", # <--- Your test image. + ], + ... ``` * Modify `OvicClassifierTest.java` to test your model. -Change `TEST_IMAGE_PATH` to `testdata/my_test_image.jpg`. If your model runs inference in floating point, change `FLOAT_MODEL_PATH` to `testdata/my_model.lite`. If your model runs [quantized inference](https://www.tensorflow.org/performance/quantization), change `QUANTIZED_MODEL_PATH` to `testdata/my_model.lite`. +Change `TEST_IMAGE_PATH` to `my_test_image.jpg`. Change either `FLOAT_MODEL_PATH` or `QUANTIZED_MODEL_PATH` to `my_model.lite` depending on whether your model runs inference in float or [8-bit](https://www.tensorflow.org/performance/quantization). Now you can run the bazel tests to catch any runtime issues with the submission. + +Note: Please make sure that your submission passes the test. If a submission fails to pass the test it will not be processed by the submission server. + +## Measure on-device latency + +We provide two ways to measure the on-device latency of your submission. The first is through our competition server, which is reliable and repeatable, but is limited to a few trials per day. The second is through the benchmarker Apk, which requires a device and may not be as accurate as the server, but has a fast turn-around and no access limitations. We recommend that the participants use the benchmarker apk for early development, and reserve the competition server for evaluating promising submissions. + +### Running the benchmarker app + +Make sure that you have followed instructions in [Test your submissions](#test-your-submissions) to add your model to the testdata folder and to the corresponding build rules. + +Modify `tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java`: + +* Add your model to the benchmarker apk by changing `MODEL_PATH` and `TEST_IMAGE_PATH` below to your submission and test image. + +``` + private static final String TEST_IMAGE_PATH = "my_test_image.jpg"; + private static final String MODEL_PATH = "my_model.lite"; +``` + +* Adjust the benchmark parameters when needed: + +You can chnage the length of each experiment, and the processor affinity below. `BIG_CORE_MASK` is an integer whose binary encoding represents the set of used cores. This number is phone-specific. For example, Pixel 2 has 8 cores: the 4 little cores are represented by the 4 less significant bits, and the 4 big cores by the 4 more significant bits. Therefore a mask value of 16, or in binary `00010000`, represents using only the first big core. The mask 32, or in binary `00100000` uses the second big core and should deliver identical results as the mask 16 because the big cores are interchangeable. + +``` + /** Wall time for each benchmarking experiment. */ + private static final double WALL_TIME = 3000; + /** Maximum number of iterations in each benchmarking experiment. */ + private static final int MAX_ITERATIONS = 100; + /** Mask for binding to a single big core. Pixel 1 (4), Pixel 2 (16). */ + private static final int BIG_CORE_MASK = 16; +``` + +Note: You'll need ROOT access to the phone to change processor affinity. + +* Build and install the app. + +``` +bazel build -c opt --cxxopt=--std=c++11 --cxxopt=-Wno-all //tensorflow/contrib/lite/java/ovic/demo/app:ovic_benchmarker_binary +adb install -r bazel-bin/tensorflow/contrib/lite/java/ovic/demo/app/ovic_benchmarker_binary.apk +``` + +Start the app and click the `Start` button in dark green. The button should turn bright green, signaling that the experiment is running. The benchmarking results will be displayed after about the `WALL_TIME` you specified above. For example: + +``` +my_model.lite: Average latency=158.6ms after 20 runs. +``` + +### Sample latencies + +Note: the benchmarking results can be quite different depending on the background processes running on the phone. A few things that help stabilize the app's readings are placing the phone on a cooling plate, restarting the phone, and shutting down internet access. + +| Model | Pixel 1 latency (ms) | Pixel 2 latency (ms) | +| -------------------- |:---------------------:| --------------------:| +| float_model.lite | 120 | 155 | +| quantized_model.lite | 85 | 74 | +| low_res_model.lite | 4.2 | 4.0 | + diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/AndroidManifest.xml b/tensorflow/contrib/lite/java/ovic/demo/app/AndroidManifest.xml new file mode 100644 index 0000000000000000000000000000000000000000..55f2961fd717bdeebf5f3f1e66bb537f53cbe4e0 --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/demo/app/AndroidManifest.xml @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..47101ff574a797a81c5d993b0863c024885f03a0 --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD @@ -0,0 +1,29 @@ +# Sample app for OVIC benchmarking. +licenses(["notice"]) # Apache 2.0 + +android_binary( + name = "ovic_benchmarker_binary", + srcs = [ + "OvicBenchmarker.java", + "OvicBenchmarkerActivity.java", + ], + assets = [ + "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata", + "//tensorflow/contrib/lite/java/ovic/src/testdata:labels.txt", + ], + assets_dir = "", + custom_package = "ovic.demo.app", + manifest = "AndroidManifest.xml", + nocompress_extensions = [ + ".lite", + ".tflite", + ], + resource_files = glob(["res/**"]), + tags = ["manual"], + deps = [ + "//tensorflow/contrib/lite/java:ovicbenchmarkerlib", + "//tensorflow/contrib/lite/java:tensorflowlite", + "@androidsdk//com.android.support:support-v13-25.2.0", + "@androidsdk//com.android.support:support-v4-25.2.0", + ], +) diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarker.java similarity index 97% rename from tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java rename to tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarker.java index d0102883e6b41f5c33a0061c5fd53b5f69b8ab54..113ab74a20dabc7e283804348509702b7f412917 100644 --- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java +++ b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarker.java @@ -1,4 +1,4 @@ -/*Copyright 2018 Google LLC +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -package org.tensorflow.ovic; +package ovic.demo.app; import android.graphics.Bitmap; import android.os.SystemClock; @@ -22,6 +22,8 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.MappedByteBuffer; +import org.tensorflow.ovic.OvicClassifier; +import org.tensorflow.ovic.OvicSingleImageResult; /** * Class that benchmarks image classifier models. diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java new file mode 100644 index 0000000000000000000000000000000000000000..59457c308ad7caa17c52563f6a70df79e8a17914 --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java @@ -0,0 +1,247 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +package ovic.demo.app; + +import android.app.Activity; +import android.content.res.AssetFileDescriptor; +import android.content.res.AssetManager; +import android.graphics.Bitmap; +import android.graphics.BitmapFactory; +import android.os.Bundle; +import android.os.Process; +import android.os.SystemClock; +import android.util.Log; +import android.view.View; +import android.widget.TextView; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.text.DecimalFormat; +import org.tensorflow.ovic.OvicSingleImageResult; + +/** Class that benchmark image classifier models. */ +public class OvicBenchmarkerActivity extends Activity { + /** Tag for the {@link Log}. */ + private static final String TAG = "OvicBenchmarkerActivity"; + + /** Name of the label file stored in Assets. */ + private static final String LABEL_PATH = "labels.txt"; + + private static final String TEST_IMAGE_PATH = "test_image_224.jpg"; + private static final String MODEL_PATH = "float_model.lite"; + /** + * Each bottom press will launch a benchmarking experiment. The experiment stops when either the + * total native latency reaches WALL_TIME or the number of iterations reaches MAX_ITERATIONS, + * whichever comes first. + */ + /** Wall time for each benchmarking experiment. */ + private static final double WALL_TIME = 3000; + /** Maximum number of iterations in each benchmarking experiment. */ + private static final int MAX_ITERATIONS = 100; + /** Mask for binding to a single big core. Pixel 1 (4), Pixel 2 (16). */ + private static final int BIG_CORE_MASK = 16; + /** Amount of time in milliseconds to wait for affinity to set. */ + private static final int WAIT_TIME_FOR_AFFINITY = 1000; + + /* The model to be benchmarked. */ + private MappedByteBuffer model = null; + private InputStream labelInputStream = null; + private OvicBenchmarker benchmarker; + /** Inference result of each iteration. */ + OvicSingleImageResult iterResult = null; + + private TextView textView = null; + // private Button startButton = null; + private static final DecimalFormat df2 = new DecimalFormat(".##"); + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + + // TextView used to display the progress, for information purposes only. + textView = (TextView) findViewById(R.id.textView); + } + + private Bitmap loadTestBitmap() throws IOException { + InputStream imageStream = getAssets().open(TEST_IMAGE_PATH); + return BitmapFactory.decodeStream(imageStream); + } + + public void initializeTest() throws IOException { + Log.i(TAG, "Initializing benchmarker."); + benchmarker = new OvicBenchmarker(WALL_TIME); + AssetManager am = getAssets(); + AssetFileDescriptor fileDescriptor = am.openFd(MODEL_PATH); + FileInputStream modelInputStream = new FileInputStream(fileDescriptor.getFileDescriptor()); + FileChannel fileChannel = modelInputStream.getChannel(); + long startOffset = fileDescriptor.getStartOffset(); + long declaredLength = fileDescriptor.getDeclaredLength(); + model = fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength); + labelInputStream = am.open(LABEL_PATH); + } + + public Boolean doTestIteration() throws IOException, InterruptedException { + if (benchmarker == null) { + throw new RuntimeException("Benchmarker has not been initialized."); + } + if (benchmarker.shouldStop()) { + return false; + } + if (!benchmarker.readyToTest()) { + Log.i(TAG, "getting ready to test."); + benchmarker.getReadyToTest(labelInputStream, model); + if (!benchmarker.readyToTest()) { + throw new RuntimeException("Failed to get the benchmarker ready."); + } + } + Log.i(TAG, "Going to do test iter."); + // Start testing. + Bitmap testImageBitmap = loadTestBitmap(); + iterResult = benchmarker.doTestIteration(testImageBitmap); + testImageBitmap.recycle(); + if (iterResult == null) { + throw new RuntimeException("Inference failed to produce a result."); + } + Log.i(TAG, iterResult.toString()); + return true; + } + + public void startPressed(View view) throws IOException { + Log.i(TAG, "Start pressed"); + try { + initializeTest(); + } catch (IOException e) { + Log.e(TAG, "Can't initialize benchmarker.", e); + throw e; + } + String displayText = ""; + try { + setProcessorAffinity(BIG_CORE_MASK); + } catch (IOException e) { + Log.e(TAG, e.getMessage()); + displayText = e.getMessage() + "\n"; + } + Log.i(TAG, "Successfully initialized benchmarker."); + int testIter = 0; + Boolean iterSuccess = false; + double totalLatency = 0.0f; + while (testIter < MAX_ITERATIONS) { + try { + iterSuccess = doTestIteration(); + } catch (IOException e) { + Log.e(TAG, "Error during iteration " + testIter); + throw e; + } catch (InterruptedException e) { + Log.e(TAG, "Interrupted at iteration " + testIter); + } + if (!iterSuccess) { + break; + } + testIter++; + totalLatency += (double) iterResult.latency; + } + ; + Log.i(TAG, "Benchmarking finished"); + + if (textView != null) { + if (testIter > 0) { + textView.setText( + displayText + + MODEL_PATH + + ": Average latency=" + + df2.format(totalLatency / testIter) + + "ms after " + + testIter + + " runs."); + } else { + textView.setText("Benchmarker failed to run on more than one images."); + } + } + } + + private static void setProcessorAffinity(int mask) throws IOException { + int myPid = Process.myPid(); + Log.i(TAG, String.format("Setting processor affinity to 0x%02x", mask)); + + String command = String.format("taskset -a -p %x %d", mask, myPid); + try { + Runtime.getRuntime().exec(command).waitFor(); + } catch (InterruptedException e) { + throw new IOException("Interrupted: " + e); + } + + // Make sure set took effect - try for a second to confirm the change took. If not then fail. + long startTimeMs = SystemClock.elapsedRealtime(); + while (true) { + int readBackMask = readCpusAllowedMask(); + if (readBackMask == mask) { + Log.i(TAG, String.format("Successfully set affinity to 0x%02x", mask)); + return; + } + if (SystemClock.elapsedRealtime() > startTimeMs + WAIT_TIME_FOR_AFFINITY) { + throw new IOException( + String.format( + "Core-binding failed: affinity set to 0x%02x but read back as 0x%02x\n" + + "please root device.", + mask, readBackMask)); + } + + try { + Thread.sleep(50); + } catch (InterruptedException e) { + // Ignore sleep interrupted, will sleep again and compare is final cross-check. + } + } + } + + public static int readCpusAllowedMask() throws IOException { + // Determine how many CPUs there are total + final String pathname = "/proc/self/status"; + final String resultPrefix = "Cpus_allowed:"; + File file = new File(pathname); + String line = ""; + String allowedCPU = ""; + Integer allowedMask = null; + BufferedReader bufReader = null; + try { + bufReader = new BufferedReader(new FileReader(file)); + while ((line = bufReader.readLine()) != null) { + if (line.startsWith(resultPrefix)) { + allowedMask = Integer.valueOf(line.substring(resultPrefix.length()).trim(), 16); + allowedCPU = bufReader.readLine(); + break; + } + } + } catch (RuntimeException e) { + throw new IOException( + "Invalid number in " + pathname + " line: \"" + line + "\": " + e.getMessage()); + } finally { + if (bufReader != null) { + bufReader.close(); + } + } + if (allowedMask == null) { + throw new IOException(pathname + " missing " + resultPrefix + " line"); + } + Log.i(TAG, allowedCPU); + return allowedMask; + } +} diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/build.gradle b/tensorflow/contrib/lite/java/ovic/demo/app/build.gradle new file mode 100644 index 0000000000000000000000000000000000000000..c5d19bad89a93988a6830a17fe2fb4a60e2fb00f --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/demo/app/build.gradle @@ -0,0 +1,58 @@ +apply plugin: 'com.android.application' + +android { + compileSdkVersion 26 + buildToolsVersion "26.0.1" + defaultConfig { + applicationId "android.example.com.ovicbenchmarker" + minSdkVersion 15 + targetSdkVersion 26 + versionCode 1 + versionName "1.0" + testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" + + // Remove this block. + jackOptions { + enabled true + } + } + lintOptions { + abortOnError false + } + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' + } + } + aaptOptions { + noCompress "lite", "tflite" + } + + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } +} + +repositories { + maven { + url 'https://google.bintray.com/tensorflow' + } +} + +dependencies { + compile fileTree(dir: 'libs', include: ['*.jar']) + androidTestCompile('com.android.support.test.espresso:espresso-core:2.2.2', { + exclude group: 'com.android.support', module: 'support-annotations' + }) + compile 'com.android.support:appcompat-v7:25.2.0' + compile 'com.android.support.constraint:constraint-layout:1.0.2' + compile 'com.android.support:design:25.2.0' + compile 'com.android.support:support-annotations:25.3.1' + compile 'com.android.support:support-v13:25.2.0' + + compile 'org.tensorflow:tensorflow-lite:+' + + testCompile 'junit:junit:4.12' +} diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/res/drawable-mdpi/ic_launcher.png b/tensorflow/contrib/lite/java/ovic/demo/app/res/drawable-mdpi/ic_launcher.png new file mode 100644 index 0000000000000000000000000000000000000000..715d1b6d69c0f4dc4d1ae58c8262c22856b20f43 Binary files /dev/null and b/tensorflow/contrib/lite/java/ovic/demo/app/res/drawable-mdpi/ic_launcher.png differ diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/res/drawable-xhdpi/ic_launcher.png b/tensorflow/contrib/lite/java/ovic/demo/app/res/drawable-xhdpi/ic_launcher.png new file mode 100644 index 0000000000000000000000000000000000000000..9beff0885fd4c8c65ea30c99c838370dcd745f3c Binary files /dev/null and b/tensorflow/contrib/lite/java/ovic/demo/app/res/drawable-xhdpi/ic_launcher.png differ diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/res/drawable/start_button_color.xml b/tensorflow/contrib/lite/java/ovic/demo/app/res/drawable/start_button_color.xml new file mode 100644 index 0000000000000000000000000000000000000000..93f5c6a016b499f1bd7bacde9b4b94a4ee9fdb6b --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/demo/app/res/drawable/start_button_color.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml b/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml new file mode 100644 index 0000000000000000000000000000000000000000..e9d83bae543ae62ba8749c4c91b36b20bf09a176 --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml @@ -0,0 +1,54 @@ + + + + + + +