From 5e6b20e53720e8d00619d851ce983f8da77c5cf4 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 8 May 2018 14:54:53 -0700 Subject: [PATCH 001/365] Deploy TensorFlow ecosystem jars --- tensorflow/java/maven/pom.xml | 10 +- tensorflow/java/maven/release.sh | 1 + tensorflow/java/maven/run_inside_container.sh | 42 ++++- .../pom-spark.xml.template | 19 +++ .../spark-tensorflow-connector/update.py | 152 ++++++++++++++++++ .../tensorflow-hadoop/pom-hadoop.xml.template | 18 +++ .../java/maven/tensorflow-hadoop/update.py | 114 +++++++++++++ 7 files changed, 352 insertions(+), 4 deletions(-) create mode 100644 tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template create mode 100644 tensorflow/java/maven/spark-tensorflow-connector/update.py create mode 100644 tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template create mode 100644 tensorflow/java/maven/tensorflow-hadoop/update.py diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 0a09a5ea7c..21fed5a419 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.8.0 + 1.8.0-SNAPSHOT pom https://www.tensorflow.org @@ -32,6 +32,8 @@ libtensorflow_jni_gpu tensorflow proto + tensorflow-hadoop + spark-tensorflow-connector ossrh - https://oss.sonatype.org/content/repositories/snapshots + https://tap.jfrog.io/tap/public-snapshots + ossrh @@ -74,6 +77,7 @@ + diff --git a/tensorflow/java/maven/release.sh b/tensorflow/java/maven/release.sh index 9012ea14ea..6c51029198 100755 --- a/tensorflow/java/maven/release.sh +++ b/tensorflow/java/maven/release.sh @@ -48,6 +48,7 @@ fi set -ex docker run \ + $DOCKER_PROXY_RUN_ARGS \ -e TF_VERSION="${TF_VERSION}" \ -e DEPLOY_OSSRH="${DEPLOY_OSSRH:-true}" \ -e DEPLOY_BINTRAY="${DEPLOY_BINTRAY:-true}" \ diff --git a/tensorflow/java/maven/run_inside_container.sh b/tensorflow/java/maven/run_inside_container.sh index 6136ccfdfb..73f7ee94a0 100644 --- a/tensorflow/java/maven/run_inside_container.sh +++ b/tensorflow/java/maven/run_inside_container.sh @@ -32,11 +32,15 @@ if [[ "${TF_VERSION}" == *"-SNAPSHOT" ]]; then DEPLOY_BINTRAY="false" fi PROTOC_RELEASE_URL="https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip" +TF_ECOSYSTEM_URL="https://github.com/tensorflow/ecosystem.git" + if [[ "${DEPLOY_BINTRAY}" != "true" && "${DEPLOY_OSSRH}" != "true" ]]; then echo "Must deploy to at least one of Bintray or OSSRH" >&2 exit 2 fi +IS_SNAPSHOT="true" + set -ex clean() { @@ -183,6 +187,41 @@ generate_java_protos() { rm -rf "${DIR}/proto/tmp" } + +download_tf_ecosystem() { + ECOSYSTEM_DIR="/tmp/tensorflow-ecosystem" + HADOOP_DIR="${DIR}/tensorflow-hadoop" + SPARK_DIR="${DIR}/spark-tensorflow-connector" + + # Clean any previous attempts + rm -rf "${ECOSYSTEM_DIR}" + + # Clone the TensorFlow ecosystem project + mkdir -p "${ECOSYSTEM_DIR}" + cd "${ECOSYSTEM_DIR}" + git clone "${TF_ECOSYSTEM_URL}" + + # Copy the TensorFlow Hadoop source + cp -r "${ECOSYSTEM_DIR}/ecosystem/hadoop/src" "${HADOOP_DIR}" + python ${HADOOP_DIR}/update.py --template ${HADOOP_DIR}/pom-hadoop.xml.template \ + --input_pom ${ECOSYSTEM_DIR}/ecosystem/hadoop/pom.xml \ + --output_pom ${HADOOP_DIR}/pom.xml \ + --version ${TF_VERSION} + + # Copy the TensorFlow Spark connector source + cp -r "${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/src" "${SPARK_DIR}" + python ${SPARK_DIR}/update.py --template ${SPARK_DIR}/pom-spark.xml.template \ + --input_pom ${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/pom.xml \ + --output_pom ${SPARK_DIR}/pom.xml \ + --version ${TF_VERSION} \ + --scala_version 2.11 + + # Cleanup + rm -rf "${ECOSYSTEM_DIR}" + + cd "${DIR}" +} + # Deploy artifacts using a specific profile. # Arguments: # profile - name of selected profile. @@ -240,7 +279,7 @@ cd "${DIR}" # Comment lines out appropriately if debugging/tinkering with the release # process. # gnupg2 is required for signing -apt-get -qq update && apt-get -qqq install -y gnupg2 +apt-get -qq update && apt-get -qqq install -y gnupg2 && apt-get -qqq install -y git clean update_version_in_pom download_libtensorflow @@ -248,6 +287,7 @@ download_libtensorflow_jni download_libtensorflow_jni_gpu update_tensorflow_android generate_java_protos +download_tf_ecosystem # Build the release artifacts mvn verify # Push artifacts to repository diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template b/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template new file mode 100644 index 0000000000..d8a3d559be --- /dev/null +++ b/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template @@ -0,0 +1,19 @@ + + 4.0.0 + TensorFlow TFRecord connector for Apache Spark DataFrames + spark-tensorflow-connector_${scala_version} + ${version} + jar + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + ${version} + ../ + + + diff --git a/tensorflow/java/maven/spark-tensorflow-connector/update.py b/tensorflow/java/maven/spark-tensorflow-connector/update.py new file mode 100644 index 0000000000..6185ccbb00 --- /dev/null +++ b/tensorflow/java/maven/spark-tensorflow-connector/update.py @@ -0,0 +1,152 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Merge TensorFlow Spark connector pom from with deployment template. + +The TensorFlow Spark connector pom is here: https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import string +import xml.etree.ElementTree as ET + +POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" +SCALA_VERSION_TAG = "scala.binary.version" + + +def get_args(): + """Parse command line args.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--version', + required=True, + help='Version for the artifact.') + parser.add_argument( + '--scala_version', + required=True, + choices=['2.10', '2.11'], + help='Scala version for the artifact.') + parser.add_argument( + '--template', + required=True, + help='Path to the pom file template.') + parser.add_argument( + '--input_pom', + required=True, + help='Path to input pom file to merge with template.') + parser.add_argument( + '--output_pom', + required=True, + help='Path to output pom file.') + return parser.parse_args() + + +def load_pom(input_path): + """ Loads POM file to XML tree""" + ET.register_namespace("", POM_NAMESPACE) + tree = ET.parse(input_path) + return tree + + +def update_scala_version(tree, version, is_template=False): + """ Updates scala version in XML tree""" + + if is_template: + tag = "{%s}artifactId" % POM_NAMESPACE + nodes = tree.findall(tag) + + if nodes is None: + raise ValueError("Missing artifactId in template pom") + + for node in nodes: + template = string.Template(node.text) + + text = template.substitute({"scala_version": version}) + node.text = text + else: + # Update scala version property in pom + tag = "{%s}%s" % (POM_NAMESPACE, SCALA_VERSION_TAG) + nodes = nodes = list(tree.iter(tag)) + + if len(nodes) == 0: + raise ValueError("Missing %s property in Spark connector pom") + + for node in nodes: + node.text = version + + return tree + + +def update_version(tree, version): + """ Updates version tags in XML tree """ + version_tag = "{%s}version" % POM_NAMESPACE + nodes = list(tree.iter(version_tag)) + + if len(nodes) == 0: + raise ValueError("Missing version in template pom") + + for node in nodes: + node.text = version + + return tree + + +def merge_tags(template_root, pom_root): + """ Merge pom file from TensorFlow Spark connector with deployment template. + + Modify the TensorFlow Spark connector pom to inherit parent pom and version info and + other tags provided by deployment template. + + TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed + for propagating the deployment profile. + + Args: + template_root: Root XML element for template file. + pom_root: Root XML element for TensorFlow Spark connector pom file. + + Return: + template_root: Root XML element with merged tree. + """ + template_tags = [child.tag for child in template_root] + template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent + + for child in pom_root: + if child.tag not in template_tags: + template_root.append(child) + + return template_root + + +def main(): + args = get_args() + template_tree = load_pom(args.template) + pom_tree = load_pom(args.input_pom) + + template_tree = update_version(template_tree, args.version) + template_tree = update_scala_version(template_tree, args.scala_version, is_template=True) + pom_tree = update_scala_version(pom_tree, args.scala_version, is_template=False) + template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) + + with open(args.output_pom, "w") as f: + f.write(ET.tostring(template_root)) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template b/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template new file mode 100644 index 0000000000..6a82c56cc7 --- /dev/null +++ b/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template @@ -0,0 +1,18 @@ + + 4.0.0 + TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop + tensorflow-hadoop + ${version} + jar + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + ${version} + ../ + + diff --git a/tensorflow/java/maven/tensorflow-hadoop/update.py b/tensorflow/java/maven/tensorflow-hadoop/update.py new file mode 100644 index 0000000000..503062608d --- /dev/null +++ b/tensorflow/java/maven/tensorflow-hadoop/update.py @@ -0,0 +1,114 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Merge TensorFlow Hadoop pom from with deployment template. + +The TensorFlow Hadoop pom is here: https://github.com/tensorflow/ecosystem/tree/master/hadoop +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import xml.etree.ElementTree as ET + +POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" + + +def get_args(): + """Parse command line args.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--version', + required=True, + help='Version for the artifact.') + parser.add_argument( + '--template', + required=True, + help='Path to the pom file template.') + parser.add_argument( + '--input_pom', + required=True, + help='Path to input pom file to merge with template.') + parser.add_argument( + '--output_pom', + required=True, + help='Path to output pom file.') + return parser.parse_args() + + +def load_pom(input_path): + """ Loads POM file to XML tree""" + ET.register_namespace("", POM_NAMESPACE) + tree = ET.parse(input_path) + return tree + + +def update_version(tree, version): + """ Updates version tags in XML tree """ + version_tag = "{%s}version" % POM_NAMESPACE + nodes = list(tree.iter(version_tag)) + + if len(nodes) == 0: + raise ValueError("Missing version in template pom") + + for node in nodes: + node.text = version + + return tree + + +def merge_tags(template_root, pom_root): + """ Merge pom file from TensorFlow Hadoop with deployment template. + + Modify the TensorFlow Hadoop pom to inherit parent pom and version info and + other tags provided by deployment template. + + TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed + for propagating the deployment profile. + + Args: + template_root: Root XML element for template file. + pom_root: Root XML element for TensorFlow Hadoop pom file. + + Return: + template_root: Root XML element with merged tree. + """ + template_tags = [child.tag for child in template_root] + template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent + + for child in pom_root: + if child.tag not in template_tags: + template_root.append(child) + + return template_root + + +def main(): + args = get_args() + template_tree = load_pom(args.template) + pom_tree = load_pom(args.input_pom) + + template_tree = update_version(template_tree, args.version) + template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) + + with open(args.output_pom, "w") as f: + f.write(ET.tostring(template_root)) + + +if __name__ == '__main__': + sys.exit(main()) -- GitLab From f957cfbc4d27a57bf08d128b41042a16f1155ab0 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 8 May 2018 18:40:20 -0700 Subject: [PATCH 002/365] Add TensorFlow ecosystem Spark and Hadoop jars to Maven deployment --- tensorflow/java/maven/README.md | 6 +++++ tensorflow/java/maven/pom.xml | 8 +++--- tensorflow/java/maven/release.sh | 1 - tensorflow/java/maven/run_inside_container.sh | 26 ++++++++++--------- .../maven/spark-tensorflow-connector/pom.xml | 24 +++++++++++++++++ .../java/maven/tensorflow-hadoop/pom.xml | 24 +++++++++++++++++ 6 files changed, 71 insertions(+), 18 deletions(-) create mode 100644 tensorflow/java/maven/spark-tensorflow-connector/pom.xml create mode 100644 tensorflow/java/maven/tensorflow-hadoop/pom.xml diff --git a/tensorflow/java/maven/README.md b/tensorflow/java/maven/README.md index c7e8f03806..fa756815a9 100644 --- a/tensorflow/java/maven/README.md +++ b/tensorflow/java/maven/README.md @@ -53,6 +53,12 @@ There are seven artifacts and thus `pom.xml`s involved in this release: 7. [`parentpom`](https://maven.apache.org/pom/index.html): Common settings shared by all of the above. +8. `tensorflow-hadoop`: The TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop. + The source code for this package is available in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/hadoop) + +9. `spark-tensorflow-connector`: A Scala library for loading and storing TensorFlow TFRecord + using Apache Spark DataFrames. The source code for this package is available + in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector) ## Updating the release diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 21fed5a419..7a95fb2556 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.8.0-SNAPSHOT + 1.8.0 pom https://www.tensorflow.org @@ -46,8 +46,7 @@ ossrh - https://tap.jfrog.io/tap/public-snapshots - + https://oss.sonatype.org/content/repositories/snapshots ossrh @@ -77,7 +76,6 @@ - + diff --git a/tensorflow/java/maven/release.sh b/tensorflow/java/maven/release.sh index 6c51029198..9012ea14ea 100755 --- a/tensorflow/java/maven/release.sh +++ b/tensorflow/java/maven/release.sh @@ -48,7 +48,6 @@ fi set -ex docker run \ - $DOCKER_PROXY_RUN_ARGS \ -e TF_VERSION="${TF_VERSION}" \ -e DEPLOY_OSSRH="${DEPLOY_OSSRH:-true}" \ -e DEPLOY_BINTRAY="${DEPLOY_BINTRAY:-true}" \ diff --git a/tensorflow/java/maven/run_inside_container.sh b/tensorflow/java/maven/run_inside_container.sh index 73f7ee94a0..3808104bc1 100644 --- a/tensorflow/java/maven/run_inside_container.sh +++ b/tensorflow/java/maven/run_inside_container.sh @@ -39,8 +39,6 @@ if [[ "${DEPLOY_BINTRAY}" != "true" && "${DEPLOY_OSSRH}" != "true" ]]; then exit 2 fi -IS_SNAPSHOT="true" - set -ex clean() { @@ -48,7 +46,9 @@ clean() { # (though if run inside a clean docker container, there won't be any dirty # artifacts lying around) mvn -q clean - rm -rf libtensorflow_jni/src libtensorflow_jni/target libtensorflow_jni_gpu/src libtensorflow_jni_gpu/target libtensorflow/src libtensorflow/target tensorflow-android/target + rm -rf libtensorflow_jni/src libtensorflow_jni/target libtensorflow_jni_gpu/src libtensorflow_jni_gpu/target \ + libtensorflow/src libtensorflow/target tensorflow-android/target \ + tensorflow-hadoop/src spark-tensorflow-connector/src } update_version_in_pom() { @@ -188,6 +188,9 @@ generate_java_protos() { } +# Download the TensorFlow ecosystem source from git. +# The pom files from this repo do not inherit from the parent pom so the maven version +# is updated for each module. download_tf_ecosystem() { ECOSYSTEM_DIR="/tmp/tensorflow-ecosystem" HADOOP_DIR="${DIR}/tensorflow-hadoop" @@ -203,18 +206,15 @@ download_tf_ecosystem() { # Copy the TensorFlow Hadoop source cp -r "${ECOSYSTEM_DIR}/ecosystem/hadoop/src" "${HADOOP_DIR}" - python ${HADOOP_DIR}/update.py --template ${HADOOP_DIR}/pom-hadoop.xml.template \ - --input_pom ${ECOSYSTEM_DIR}/ecosystem/hadoop/pom.xml \ - --output_pom ${HADOOP_DIR}/pom.xml \ - --version ${TF_VERSION} + cp "${ECOSYSTEM_DIR}/ecosystem/hadoop/pom.xml" "${HADOOP_DIR}" + cd "${HADOOP_DIR}" + update_version_in_pom # Copy the TensorFlow Spark connector source cp -r "${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/src" "${SPARK_DIR}" - python ${SPARK_DIR}/update.py --template ${SPARK_DIR}/pom-spark.xml.template \ - --input_pom ${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/pom.xml \ - --output_pom ${SPARK_DIR}/pom.xml \ - --version ${TF_VERSION} \ - --scala_version 2.11 + cp "${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/pom.xml" "${SPARK_DIR}" + cd "${SPARK_DIR}" + update_version_in_pom # Cleanup rm -rf "${ECOSYSTEM_DIR}" @@ -280,6 +280,7 @@ cd "${DIR}" # process. # gnupg2 is required for signing apt-get -qq update && apt-get -qqq install -y gnupg2 && apt-get -qqq install -y git + clean update_version_in_pom download_libtensorflow @@ -288,6 +289,7 @@ download_libtensorflow_jni_gpu update_tensorflow_android generate_java_protos download_tf_ecosystem + # Build the release artifacts mvn verify # Push artifacts to repository diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml new file mode 100644 index 0000000000..8c962d111f --- /dev/null +++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml @@ -0,0 +1,24 @@ + + + 4.0.0 + TensorFlow TFRecord connector for Apache Spark DataFrames + spark-tensorflow-connector + jar + + + https://github.com/tensorflow/ecosystem.git + git@github.com:tensorflow/ecosystem.git + scm:git:https://github.com/tensorflow/ecosystem.git + + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + 1.8.0 + ../ + + diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml new file mode 100644 index 0000000000..ee90d8c92b --- /dev/null +++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml @@ -0,0 +1,24 @@ + + + 4.0.0 + TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop + tensorflow-hadoop + jar + + + https://github.com/tensorflow/ecosystem.git + git@github.com:tensorflow/ecosystem.git + scm:git:https://github.com/tensorflow/ecosystem.git + + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + 1.8.0 + ../ + + -- GitLab From 90b01f238d83d833ce9a843845dd96bb816a6c76 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 8 May 2018 18:46:35 -0700 Subject: [PATCH 003/365] Delete templating approach for deploying TensorFlow ecosystem jars --- .../pom-spark.xml.template | 19 --- .../spark-tensorflow-connector/update.py | 152 ------------------ .../tensorflow-hadoop/pom-hadoop.xml.template | 18 --- .../java/maven/tensorflow-hadoop/update.py | 114 ------------- 4 files changed, 303 deletions(-) delete mode 100644 tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template delete mode 100644 tensorflow/java/maven/spark-tensorflow-connector/update.py delete mode 100644 tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template delete mode 100644 tensorflow/java/maven/tensorflow-hadoop/update.py diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template b/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template deleted file mode 100644 index d8a3d559be..0000000000 --- a/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template +++ /dev/null @@ -1,19 +0,0 @@ - - 4.0.0 - TensorFlow TFRecord connector for Apache Spark DataFrames - spark-tensorflow-connector_${scala_version} - ${version} - jar - - https://github.com/tensorflow/ecosystem/ - - org.tensorflow - parentpom - ${version} - ../ - - - diff --git a/tensorflow/java/maven/spark-tensorflow-connector/update.py b/tensorflow/java/maven/spark-tensorflow-connector/update.py deleted file mode 100644 index 6185ccbb00..0000000000 --- a/tensorflow/java/maven/spark-tensorflow-connector/update.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Merge TensorFlow Spark connector pom from with deployment template. - -The TensorFlow Spark connector pom is here: https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -import string -import xml.etree.ElementTree as ET - -POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" -SCALA_VERSION_TAG = "scala.binary.version" - - -def get_args(): - """Parse command line args.""" - parser = argparse.ArgumentParser() - parser.add_argument( - '--version', - required=True, - help='Version for the artifact.') - parser.add_argument( - '--scala_version', - required=True, - choices=['2.10', '2.11'], - help='Scala version for the artifact.') - parser.add_argument( - '--template', - required=True, - help='Path to the pom file template.') - parser.add_argument( - '--input_pom', - required=True, - help='Path to input pom file to merge with template.') - parser.add_argument( - '--output_pom', - required=True, - help='Path to output pom file.') - return parser.parse_args() - - -def load_pom(input_path): - """ Loads POM file to XML tree""" - ET.register_namespace("", POM_NAMESPACE) - tree = ET.parse(input_path) - return tree - - -def update_scala_version(tree, version, is_template=False): - """ Updates scala version in XML tree""" - - if is_template: - tag = "{%s}artifactId" % POM_NAMESPACE - nodes = tree.findall(tag) - - if nodes is None: - raise ValueError("Missing artifactId in template pom") - - for node in nodes: - template = string.Template(node.text) - - text = template.substitute({"scala_version": version}) - node.text = text - else: - # Update scala version property in pom - tag = "{%s}%s" % (POM_NAMESPACE, SCALA_VERSION_TAG) - nodes = nodes = list(tree.iter(tag)) - - if len(nodes) == 0: - raise ValueError("Missing %s property in Spark connector pom") - - for node in nodes: - node.text = version - - return tree - - -def update_version(tree, version): - """ Updates version tags in XML tree """ - version_tag = "{%s}version" % POM_NAMESPACE - nodes = list(tree.iter(version_tag)) - - if len(nodes) == 0: - raise ValueError("Missing version in template pom") - - for node in nodes: - node.text = version - - return tree - - -def merge_tags(template_root, pom_root): - """ Merge pom file from TensorFlow Spark connector with deployment template. - - Modify the TensorFlow Spark connector pom to inherit parent pom and version info and - other tags provided by deployment template. - - TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed - for propagating the deployment profile. - - Args: - template_root: Root XML element for template file. - pom_root: Root XML element for TensorFlow Spark connector pom file. - - Return: - template_root: Root XML element with merged tree. - """ - template_tags = [child.tag for child in template_root] - template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent - - for child in pom_root: - if child.tag not in template_tags: - template_root.append(child) - - return template_root - - -def main(): - args = get_args() - template_tree = load_pom(args.template) - pom_tree = load_pom(args.input_pom) - - template_tree = update_version(template_tree, args.version) - template_tree = update_scala_version(template_tree, args.scala_version, is_template=True) - pom_tree = update_scala_version(pom_tree, args.scala_version, is_template=False) - template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) - - with open(args.output_pom, "w") as f: - f.write(ET.tostring(template_root)) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template b/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template deleted file mode 100644 index 6a82c56cc7..0000000000 --- a/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template +++ /dev/null @@ -1,18 +0,0 @@ - - 4.0.0 - TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop - tensorflow-hadoop - ${version} - jar - - https://github.com/tensorflow/ecosystem/ - - org.tensorflow - parentpom - ${version} - ../ - - diff --git a/tensorflow/java/maven/tensorflow-hadoop/update.py b/tensorflow/java/maven/tensorflow-hadoop/update.py deleted file mode 100644 index 503062608d..0000000000 --- a/tensorflow/java/maven/tensorflow-hadoop/update.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Merge TensorFlow Hadoop pom from with deployment template. - -The TensorFlow Hadoop pom is here: https://github.com/tensorflow/ecosystem/tree/master/hadoop -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -import xml.etree.ElementTree as ET - -POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" - - -def get_args(): - """Parse command line args.""" - parser = argparse.ArgumentParser() - parser.add_argument( - '--version', - required=True, - help='Version for the artifact.') - parser.add_argument( - '--template', - required=True, - help='Path to the pom file template.') - parser.add_argument( - '--input_pom', - required=True, - help='Path to input pom file to merge with template.') - parser.add_argument( - '--output_pom', - required=True, - help='Path to output pom file.') - return parser.parse_args() - - -def load_pom(input_path): - """ Loads POM file to XML tree""" - ET.register_namespace("", POM_NAMESPACE) - tree = ET.parse(input_path) - return tree - - -def update_version(tree, version): - """ Updates version tags in XML tree """ - version_tag = "{%s}version" % POM_NAMESPACE - nodes = list(tree.iter(version_tag)) - - if len(nodes) == 0: - raise ValueError("Missing version in template pom") - - for node in nodes: - node.text = version - - return tree - - -def merge_tags(template_root, pom_root): - """ Merge pom file from TensorFlow Hadoop with deployment template. - - Modify the TensorFlow Hadoop pom to inherit parent pom and version info and - other tags provided by deployment template. - - TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed - for propagating the deployment profile. - - Args: - template_root: Root XML element for template file. - pom_root: Root XML element for TensorFlow Hadoop pom file. - - Return: - template_root: Root XML element with merged tree. - """ - template_tags = [child.tag for child in template_root] - template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent - - for child in pom_root: - if child.tag not in template_tags: - template_root.append(child) - - return template_root - - -def main(): - args = get_args() - template_tree = load_pom(args.template) - pom_tree = load_pom(args.input_pom) - - template_tree = update_version(template_tree, args.version) - template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) - - with open(args.output_pom, "w") as f: - f.write(ET.tostring(template_root)) - - -if __name__ == '__main__': - sys.exit(main()) -- GitLab From b0ec8d2c467173ce5a43c13631bc51fd89f072e5 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Wed, 30 May 2018 19:23:08 -0700 Subject: [PATCH 004/365] Update artifactId for TensorFlow Hadoop and spark-connector jars --- tensorflow/java/maven/README.md | 4 ++-- .../java/maven/{tensorflow-hadoop => hadoop}/pom.xml | 4 ++-- tensorflow/java/maven/pom.xml | 4 ++-- tensorflow/java/maven/run_inside_container.sh | 10 ++++++---- .../pom.xml | 4 ++-- 5 files changed, 14 insertions(+), 12 deletions(-) rename tensorflow/java/maven/{tensorflow-hadoop => hadoop}/pom.xml (94%) rename tensorflow/java/maven/{spark-tensorflow-connector => spark-connector}/pom.xml (93%) diff --git a/tensorflow/java/maven/README.md b/tensorflow/java/maven/README.md index fa756815a9..3e030dcd09 100644 --- a/tensorflow/java/maven/README.md +++ b/tensorflow/java/maven/README.md @@ -53,10 +53,10 @@ There are seven artifacts and thus `pom.xml`s involved in this release: 7. [`parentpom`](https://maven.apache.org/pom/index.html): Common settings shared by all of the above. -8. `tensorflow-hadoop`: The TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop. +8. `hadoop`: The TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop. The source code for this package is available in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/hadoop) -9. `spark-tensorflow-connector`: A Scala library for loading and storing TensorFlow TFRecord +9. `spark-connector`: A Scala library for loading and storing TensorFlow TFRecord using Apache Spark DataFrames. The source code for this package is available in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector) diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/hadoop/pom.xml similarity index 94% rename from tensorflow/java/maven/tensorflow-hadoop/pom.xml rename to tensorflow/java/maven/hadoop/pom.xml index ee90d8c92b..a872c20d3b 100644 --- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml +++ b/tensorflow/java/maven/hadoop/pom.xml @@ -5,7 +5,7 @@ 4.0.0 TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop - tensorflow-hadoop + hadoop jar @@ -21,4 +21,4 @@ 1.8.0 ../ - + \ No newline at end of file diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 7a95fb2556..19287f8245 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -32,8 +32,8 @@ libtensorflow_jni_gpu tensorflow proto - tensorflow-hadoop - spark-tensorflow-connector + hadoop + spark-connector 4.0.0 TensorFlow TFRecord connector for Apache Spark DataFrames - spark-tensorflow-connector + spark-connector jar @@ -21,4 +21,4 @@ 1.8.0 ../ - + \ No newline at end of file -- GitLab From cd25a9544915654022e2cfff4923c31822166112 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Thu, 7 Jun 2018 10:38:50 -0700 Subject: [PATCH 005/365] Updated SavedModels in Python TOCO API. PiperOrigin-RevId: 199658431 --- tensorflow/contrib/lite/python/BUILD | 3 +- .../lite/python/convert_saved_model.py | 31 ++++++------------- tensorflow/contrib/lite/python/lite.py | 2 +- tensorflow/contrib/lite/python/lite_test.py | 2 +- .../contrib/lite/python/tflite_convert.py | 2 +- 5 files changed, 15 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index 7e6ff6c0a8..27909a9458 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -57,8 +57,9 @@ py_library( ":interpreter", ":lite_constants", ":op_hint", - "//tensorflow/contrib/saved_model:saved_model_py", "//tensorflow/python:graph_util", + "//tensorflow/python/saved_model:constants", + "//tensorflow/python/saved_model:loader", "//tensorflow/python/tools:freeze_graph_lib", ], ) diff --git a/tensorflow/contrib/lite/python/convert_saved_model.py b/tensorflow/contrib/lite/python/convert_saved_model.py index 5dad49f1ed..1553464b9f 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model.py +++ b/tensorflow/contrib/lite/python/convert_saved_model.py @@ -19,13 +19,12 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.lite.python.convert import tensor_name -from tensorflow.contrib.saved_model.python.saved_model import reader -from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils from tensorflow.core.framework import types_pb2 from tensorflow.python.client import session from tensorflow.python.framework import graph_util as tf_graph_util from tensorflow.python.framework import ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.saved_model import constants from tensorflow.python.saved_model import loader @@ -58,21 +57,8 @@ def _get_meta_graph_def(saved_model_dir, tag_set): Raises: ValueError: No valid MetaGraphDef for given tag_set. """ - saved_model = reader.read_saved_model(saved_model_dir) - tag_sets = [] - result_meta_graph_def = None - for meta_graph_def in saved_model.meta_graphs: - meta_graph_tag_set = set(meta_graph_def.meta_info_def.tags) - tag_sets.append(meta_graph_tag_set) - if meta_graph_tag_set == tag_set: - result_meta_graph_def = meta_graph_def - logging.info("The given saved_model contains the following tags: %s", - tag_sets) - if result_meta_graph_def is not None: - return result_meta_graph_def - else: - raise ValueError("No valid MetaGraphDef for this tag_set '{}'. Possible " - "values are '{}'. ".format(tag_set, tag_sets)) + with session.Session(graph=ops.Graph()) as sess: + return loader.load(sess, tag_set, saved_model_dir) def _get_signature_def(meta_graph, signature_key): @@ -97,9 +83,7 @@ def _get_signature_def(meta_graph, signature_key): raise ValueError("No '{}' in the SavedModel\'s SignatureDefs. Possible " "values are '{}'.".format(signature_key, ",".join(signature_def_keys))) - signature_def = signature_def_utils.get_signature_def_by_key( - meta_graph, signature_key) - return signature_def + return signature_def_map[signature_key] def _get_inputs_outputs(signature_def): @@ -247,6 +231,7 @@ def freeze_saved_model(saved_model_dir, input_arrays, input_shapes, ValueError: SavedModel doesn't contain a MetaGraphDef identified by tag_set. signature_key is not in the MetaGraphDef. + assets/ directory is in the MetaGraphDef. input_shapes does not match the length of input_arrays. input_arrays or output_arrays are not valid. """ @@ -255,9 +240,13 @@ def freeze_saved_model(saved_model_dir, input_arrays, input_shapes, signature_def = _get_signature_def(meta_graph, signature_key) inputs, outputs = _get_inputs_outputs(signature_def) + # Check SavedModel for assets directory. + collection_def = meta_graph.collection_def + if constants.ASSETS_KEY in collection_def: + raise ValueError("SavedModels with assets/ directory are not supported.") + graph = ops.Graph() with session.Session(graph=graph) as sess: - # TODO(nupurgarg): Throw ValueError if SavedModel has assets/ directory. loader.load(sess, meta_graph.meta_info_def.tags, saved_model_dir) # Gets input and output tensors. diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 253e3f72b1..e3a2d19e05 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -207,7 +207,7 @@ class TocoConverter(object): # Check if graph is frozen. if not _is_frozen_graph(sess): - raise ValueError("Please freeze the graph using freeze_graph.py") + raise ValueError("Please freeze the graph using freeze_graph.py.") # Create TocoConverter class. return cls(sess.graph_def, input_tensors, output_tensors) diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py index bbb00021f9..b04caaf263 100644 --- a/tensorflow/contrib/lite/python/lite_test.py +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -401,7 +401,7 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase): with self.assertRaises(ValueError) as error: lite.TocoConverter.from_frozen_graph(graph_def_file, ['Placeholder'], ['add']) - self.assertEqual('Please freeze the graph using freeze_graph.py', + self.assertEqual('Please freeze the graph using freeze_graph.py.', str(error.exception)) def testPbtxt(self): diff --git a/tensorflow/contrib/lite/python/tflite_convert.py b/tensorflow/contrib/lite/python/tflite_convert.py index 2b7ad29a27..4c215b62b2 100644 --- a/tensorflow/contrib/lite/python/tflite_convert.py +++ b/tensorflow/contrib/lite/python/tflite_convert.py @@ -114,7 +114,7 @@ def _convert_model(flags): "--input_arrays must be present when specifying " "--std_dev_values and --mean_values with multiple input " "tensors in order to map between names and " - "values".format(",".join(input_arrays))) + "values.".format(",".join(input_arrays))) converter.quantized_input_stats = dict(zip(input_arrays, quant_stats)) if flags.default_ranges_min and flags.default_ranges_max: converter.default_ranges_stats = (flags.default_ranges_min, -- GitLab From 1da05443167eebcfd31b8d00b2bb84dfceb84812 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 7 Jun 2018 10:55:29 -0700 Subject: [PATCH 006/365] Handle tensor array grad only accessed in one branch. Previously recompiling due to tensor array grad in branches weren't correctly handled. PiperOrigin-RevId: 199661353 --- tensorflow/compiler/tf2xla/kernels/if_op.cc | 30 ++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/if_op.cc b/tensorflow/compiler/tf2xla/kernels/if_op.cc index 8b9b026643..d48c6eea75 100644 --- a/tensorflow/compiler/tf2xla/kernels/if_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/if_op.cc @@ -48,11 +48,11 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { VLOG(1) << "Building If: " << input_types_.size() << " inputs"; - std::vector inputs(input_types_.size()); std::vector arguments(input_types_.size()); for (int i = 0; i < input_types_.size(); ++i) { XlaCompiler::Argument& arg = arguments[i]; DataType type = ctx->input_type(i + 1); + if (type == DT_RESOURCE) { XlaResource* resource; OP_REQUIRES_OK(ctx, ctx->GetResourceInput(i + 1, &resource)); @@ -60,7 +60,6 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { arg.initialized = resource->initialized(); arg.kind = XlaCompiler::Argument::kResource; arg.resource_kind = resource->kind(); - OP_REQUIRES_OK(ctx, resource->Pack(&inputs[i], b)); arg.type = resource->type(); arg.shape = resource->shape(); @@ -79,7 +78,6 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { arg.kind = XlaCompiler::Argument::kParameter; arg.type = input_types_[i]; arg.shape = ctx->InputShape(i + 1); - inputs[i] = ctx->Input(i + 1); VLOG(2) << "Arg type: " << DataTypeString(arg.type) << " shape: " << arg.shape.DebugString(); } @@ -100,6 +98,7 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { OP_REQUIRES_OK(ctx, compiler->CompileFunction(options, else_branch_, arguments, &else_result)); + bool has_tensor_array_gradients = false; for (XlaCompiler::CompilationResult* result : {&then_result, &else_result}) { for (const XlaCompiler::ResourceUpdate& update : result->resource_updates) { XlaResource* resource; @@ -121,9 +120,21 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { for (const auto& gradient : resource->tensor_array_gradients()) { arg.tensor_array_gradients.insert(gradient.first); } + if (!resource->tensor_array_gradients().empty()) + has_tensor_array_gradients = true; } } + // Recompile the functions to update the argument shapes for tensor arrays. + if (has_tensor_array_gradients) { + then_result = {}; + OP_REQUIRES_OK(ctx, compiler->CompileFunction(options, then_branch_, + arguments, &then_result)); + else_result = {}; + OP_REQUIRES_OK(ctx, compiler->CompileFunction(options, else_branch_, + arguments, &else_result)); + } + // Check that both branches have identical input shapes. OP_REQUIRES(ctx, then_result.xla_input_shapes.size() == 1, errors::FailedPrecondition("Expected one input shape")); @@ -175,6 +186,19 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { "Mismatch in resource of then and else branch for resource ", i)); } + int num_inputs = then_result.input_mapping.size(); + std::vector inputs(num_inputs); + for (int i = 0; i < num_inputs; ++i) { + int input_num = then_result.input_mapping[i] + 1; + if (ctx->input_type(input_num) == DT_RESOURCE) { + XlaResource* resource; + OP_REQUIRES_OK(ctx, ctx->GetResourceInput(input_num, &resource)); + OP_REQUIRES_OK(ctx, resource->Pack(&inputs[i], b)); + } else { + inputs[i] = ctx->Input(i + 1); + } + } + xla::XlaOp outputs = b->Conditional(ctx->Input(0), b->Tuple(inputs), *then_result.computation, b->Tuple(inputs), *else_result.computation); -- GitLab From 0ea841d4bb79b0322dccad73728e428854d1aed2 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 7 Jun 2018 11:00:50 -0700 Subject: [PATCH 007/365] [TF:XLA] Bump open source llvm revision to r334038 PiperOrigin-RevId: 199662287 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index e66af3c8bc..b007d3f597 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -452,11 +452,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/40c66c3d40377cf85640b3a35e6ec5c5b1cbc41f.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/40c66c3d40377cf85640b3a35e6ec5c5b1cbc41f.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/7488dbc1218de926f3de0e9bb3d465f3bbe5b80e.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/7488dbc1218de926f3de0e9bb3d465f3bbe5b80e.tar.gz", ], - sha256 = "6f782a0d2e9d7946bdf20807e0fcd8f5eaed8afd93bdd610cdefbe9435ca551f", - strip_prefix = "llvm-40c66c3d40377cf85640b3a35e6ec5c5b1cbc41f", + sha256 = "dd4a2e2a4f21ab69cf99534bcb2739c04fc12d12b63e5e3d8f2b85a2eb55d5d1", + strip_prefix = "llvm-7488dbc1218de926f3de0e9bb3d465f3bbe5b80e", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) -- GitLab From 1485d75eb98d40d3770f0d3a850bc349e274b099 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 7 Jun 2018 11:09:08 -0700 Subject: [PATCH 008/365] Iterate over the K dimension in the innermost loop nest in the LLVM IR GEMM This itself does not improve performance in the current tile sizes, shows improvement with larger tiles (CL upcoming). PiperOrigin-RevId: 199663960 --- tensorflow/compiler/xla/service/cpu/BUILD | 1 + .../xla/service/cpu/dot_op_emitter.cc | 62 +++++++++++-------- .../xla/service/cpu/vector_support_library.cc | 22 +++++++ .../xla/service/cpu/vector_support_library.h | 16 +++++ 4 files changed, 74 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index f10d71fdba..d82922a359 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -882,6 +882,7 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/core:lib", "@llvm//:core", "@llvm//:support", ], diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index cda623f8e8..fe4ba2a070 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -740,7 +740,7 @@ class MatrixMatrixBlockPanelEmitter { private: // The HandleResiduesOnX helpers split the iteration space for dimension X // into a multiple of the tile size on dimension X and an epilogue. These - // helpers ultimately call into `EmitTiledReductionLoop` for emitting the + // helpers ultimately call into `EmitTiledGemm` for emitting the // tiled GEMM kernel. void HandleResiduesOnN(); @@ -750,15 +750,13 @@ class MatrixMatrixBlockPanelEmitter { llvm::Value* k_start, llvm::Value* k_end, llvm::Value* n_start, llvm::Value* n_end); - // This emits the inner reduction loop. This inner reduction loop multiplies - // a tile from the LHS of size [tile_size_m,tile_size_k] and a tile from the - // RHS of size [`tile_size_k`, vls->vector_width()] to update a tile of size - // [`tile_size_m`, vls->vector_width()] in the result. - void EmitTiledReductionLoop(VectorSupportLibrary* vsl, int64 tile_size_k, - llvm::Value* k_start, llvm::Value* k_end, - llvm::Value* n_start, llvm::Value* n_end, - int64 tile_size_m, llvm::Value* m_start, - llvm::Value* m_end); + // This emits a tiled GEMM kernel. For a detailed description see the comment + // on the implementation. + void EmitTiledGemm(VectorSupportLibrary* vsl, int64 tile_size_k, + llvm::Value* k_start, llvm::Value* k_end, + llvm::Value* n_start, llvm::Value* n_end, + int64 tile_size_m, llvm::Value* m_start, + llvm::Value* m_end); llvm::Value* GetInt64(int64 value) { return ir_builder_->getInt64(value); } @@ -848,16 +846,24 @@ void MatrixMatrixBlockPanelEmitter::HandleResiduesOnM( VectorSupportLibrary* vsl, int64 tile_size_k, llvm::Value* k_start, llvm::Value* k_end, llvm::Value* n_start, llvm::Value* n_end) { const int64 m_end = dims().m() - dims().m() % tile_size_m(); - EmitTiledReductionLoop(vsl, tile_size_k, k_start, k_end, n_start, n_end, - tile_size_m(), GetInt64(0), GetInt64(m_end)); + EmitTiledGemm(vsl, tile_size_k, k_start, k_end, n_start, n_end, tile_size_m(), + GetInt64(0), GetInt64(m_end)); if (m_end != dims().m()) { - EmitTiledReductionLoop(vsl, tile_size_k, k_start, k_end, n_start, n_end, - dims().m() - m_end, GetInt64(m_end), - GetInt64(dims().m())); + EmitTiledGemm(vsl, tile_size_k, k_start, k_end, n_start, n_end, + dims().m() - m_end, GetInt64(m_end), GetInt64(dims().m())); } } +// The loop structure is: +// +// Iterate over dimension M as m: +// Iterate over dimension N as n: +// Iterate over dimension K as k: +// OutputTile[m,n] += Dot(LhsTile[m,k], RhsTile[k,n]) +// +// I.e. a just a tiled version of a "naive" GEMM. +// // The tiling scheme is as follows: // // Let the LHS be: @@ -919,7 +925,7 @@ void MatrixMatrixBlockPanelEmitter::HandleResiduesOnM( // +-------------------+-------------------+-------------------+--------- // | a0*p0+b0*q0+c0*r0 | a0*p1+b0*q1+c0*r1 | a0*p2+b0*q2+c0*r2 | ... // +-------------------+-------------------+-------------------+--------- -void MatrixMatrixBlockPanelEmitter::EmitTiledReductionLoop( +void MatrixMatrixBlockPanelEmitter::EmitTiledGemm( VectorSupportLibrary* vsl, int64 tile_size_k, llvm::Value* k_start, llvm::Value* k_end, llvm::Value* n_start, llvm::Value* n_end, int64 tile_size_m, llvm::Value* m_start, llvm::Value* m_end) { @@ -933,16 +939,16 @@ void MatrixMatrixBlockPanelEmitter::EmitTiledReductionLoop( /*major_dim_offset=*/m_i, /*tile_size_along_major_dim=*/tile_size_m); - ksl_.For("dot.k", k_start, k_end, tile_size_k, [&](llvm::Value* k_i) { - MemoryTile rhs_memory_tile(vsl, ir_builder_, rhs_, dims().n(), k_i, - tile_size_k); - std::vector> lhs_tile = - lhs_memory_tile.LoadBroadcastTile(k_i, tile_size_k); - ksl_.For( - "dot.n", n_start, n_end, vsl->vector_size(), [&](llvm::Value* n_i) { + ksl_.For( + "dot.n", n_start, n_end, vsl->vector_size(), [&](llvm::Value* n_i) { + TileVariable result_tile_var(vsl, result_memory_tile.LoadTile(n_i)); + ksl_.For("dot.k", k_start, k_end, tile_size_k, [&](llvm::Value* k_i) { + MemoryTile rhs_memory_tile(vsl, ir_builder_, rhs_, dims().n(), k_i, + tile_size_k); + std::vector> lhs_tile = + lhs_memory_tile.LoadBroadcastTile(k_i, tile_size_k); std::vector rhs_tile = rhs_memory_tile.LoadTile(n_i); - std::vector result_tile = - result_memory_tile.LoadTile(n_i); + std::vector result_tile = result_tile_var.Get(); for (int64 r_m_i = 0; r_m_i < tile_size_m; r_m_i++) { for (int64 r_k_i = 0; r_k_i < tile_size_k; r_k_i++) { result_tile[r_m_i] = @@ -950,9 +956,11 @@ void MatrixMatrixBlockPanelEmitter::EmitTiledReductionLoop( result_tile[r_m_i]); } } - result_memory_tile.StoreTile(result_tile, n_i); + result_tile_var.Set(result_tile); }); - }); + + result_memory_tile.StoreTile(result_tile_var.Get(), n_i); + }); }); } diff --git a/tensorflow/compiler/xla/service/cpu/vector_support_library.cc b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc index cd1165e238..c444d15185 100644 --- a/tensorflow/compiler/xla/service/cpu/vector_support_library.cc +++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc @@ -427,5 +427,27 @@ llvm::Value* LlvmVariable::Get() const { void LlvmVariable::Set(llvm::Value* new_value) { ir_builder_->CreateStore(new_value, alloca_); } + +TileVariable::TileVariable(VectorSupportLibrary* vector_support, + std::vector initial_value) { + for (llvm::Value* initial_vector_value : initial_value) { + storage_.emplace_back(vector_support, initial_vector_value); + } +} + +std::vector TileVariable::Get() const { + std::vector result; + c_transform(storage_, std::back_inserter(result), + [&](VectorVariable vect_var) { return vect_var.Get(); }); + return result; +} + +void TileVariable::Set(tensorflow::gtl::ArraySlice value) { + CHECK_EQ(value.size(), storage_.size()); + for (int64 i = 0, e = value.size(); i < e; i++) { + storage_[i].Set(value[i]); + } +} + } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/vector_support_library.h b/tensorflow/compiler/xla/service/cpu/vector_support_library.h index edcaec5849..49c2a4e2f4 100644 --- a/tensorflow/compiler/xla/service/cpu/vector_support_library.h +++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/xla/primitive_util.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" namespace xla { namespace cpu { @@ -317,6 +318,21 @@ class ScalarVariable : public LlvmVariable { Set(initial_value); } }; + +// This wraps a set of alloca-backed stack variables that can, as a whole, store +// a tile. A "tile" is a sequence of vectors that is typically used as a 2D +// grid of scalar values (e.g. for tiled GEMMs). +class TileVariable { + public: + TileVariable(VectorSupportLibrary* vector_support, + std::vector initial_value); + + std::vector Get() const; + void Set(tensorflow::gtl::ArraySlice value); + + private: + std::vector storage_; +}; } // namespace cpu } // namespace xla -- GitLab From 4d47e9bc927ed29918a5524bfebe6075a4dccfb9 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 7 Jun 2018 11:34:34 -0700 Subject: [PATCH 009/365] Tune the GEMM tile size for broadwell PiperOrigin-RevId: 199668758 --- tensorflow/compiler/xla/service/cpu/dot_op_emitter.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h index 2effb7fc36..ed2a18976a 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.h @@ -144,8 +144,12 @@ class DotOpEmitter { } std::tuple GetGemmTileSize() const { + // Tuned for broadwell - Intel(R) Xeon(R) CPU E5-2690 v4 @ 2.60GHz + // + // TODO(b/80093688): Tune for other architectures and centralize this + // information in one place. const std::tuple kDefaultTileSize = - std::tuple(3, 5, 1); + std::tuple(11, 9, 1); return options::LlvmIrGemmTileSize(hlo_module_config_) .value_or(kDefaultTileSize); } -- GitLab From e343b8072833765c85a5685b0f56b1b3d6add275 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 7 Jun 2018 11:36:47 -0700 Subject: [PATCH 010/365] Don't use `std::move()` on `const ...&` arguments. PiperOrigin-RevId: 199669177 --- tensorflow/core/kernels/data/iterator_ops.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index 9d9e74adba..d71cac4ebc 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -782,7 +782,7 @@ class OneShotIteratorOp : public AsyncOpKernel { return; } } - ProduceOutput(ctx, std::move(done)); + ProduceOutput(ctx, done); } private: @@ -803,9 +803,9 @@ class OneShotIteratorOp : public AsyncOpKernel { } for (auto&& ctx_done : callbacks_to_run) { - ProduceOutput(ctx_done.first, std::move(ctx_done.second)); + ProduceOutput(ctx_done.first, ctx_done.second); } - ProduceOutput(ctx, std::move(done)); + ProduceOutput(ctx, done); } Status TryInit(OpKernelContext* ctx, IteratorResource** iterator, -- GitLab From 642dc96bd4627a4f6305cf61b8553324054d9122 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 11:45:01 -0700 Subject: [PATCH 011/365] Add FillTriangular Bijector to create triangular matrices. PiperOrigin-RevId: 199670547 --- tensorflow/contrib/distributions/BUILD | 19 +++ .../bijectors/fill_triangular_test.py | 98 ++++++++++++ .../python/ops/bijectors/__init__.py | 2 + .../python/ops/bijectors/fill_triangular.py | 148 ++++++++++++++++++ 4 files changed, 267 insertions(+) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/bijectors/fill_triangular_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/fill_triangular.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 23d9dbcd91..d8baf49e81 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -940,6 +940,25 @@ cuda_py_test( ], ) +cuda_py_test( + name = "fill_triangular_test", + size = "small", + srcs = ["python/kernel_tests/bijectors/fill_triangular_test.py"], + additional_deps = [ + ":bijectors_py", + ":distributions_py", + "//third_party/py/numpy", + "@six_archive//:six", + "//tensorflow/contrib/linalg:linalg_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "gumbel_test", size = "small", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/fill_triangular_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/fill_triangular_test.py new file mode 100644 index 0000000000..caeaf2a0c6 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/fill_triangular_test.py @@ -0,0 +1,98 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for FillTriangular bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops import bijectors +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class FillTriangularBijectorTest(test.TestCase): + """Tests the correctness of the FillTriangular bijector.""" + + @test_util.run_in_graph_and_eager_modes() + def testBijector(self): + x = np.float32(np.array([1., 2., 3.])) + y = np.float32(np.array([[3., 0.], + [2., 1.]])) + + b = bijectors.FillTriangular() + + y_ = self.evaluate(b.forward(x)) + self.assertAllClose(y, y_) + + x_ = self.evaluate(b.inverse(y)) + self.assertAllClose(x, x_) + + fldj = self.evaluate(b.forward_log_det_jacobian(x, event_ndims=1)) + self.assertAllClose(fldj, 0.) + + ildj = self.evaluate(b.inverse_log_det_jacobian(y, event_ndims=2)) + self.assertAllClose(ildj, 0.) + + @test_util.run_in_graph_and_eager_modes() + def testShape(self): + x_shape = tensor_shape.TensorShape([5, 4, 6]) + y_shape = tensor_shape.TensorShape([5, 4, 3, 3]) + + b = bijectors.FillTriangular(validate_args=True) + + x = array_ops.ones(shape=x_shape, dtype=dtypes.float32) + y_ = b.forward(x) + self.assertAllEqual(y_.shape.as_list(), y_shape.as_list()) + x_ = b.inverse(y_) + self.assertAllEqual(x_.shape.as_list(), x_shape.as_list()) + + y_shape_ = b.forward_event_shape(x_shape) + self.assertAllEqual(y_shape_.as_list(), y_shape.as_list()) + x_shape_ = b.inverse_event_shape(y_shape) + self.assertAllEqual(x_shape_.as_list(), x_shape.as_list()) + + y_shape_tensor = self.evaluate( + b.forward_event_shape_tensor(x_shape.as_list())) + self.assertAllEqual(y_shape_tensor, y_shape.as_list()) + x_shape_tensor = self.evaluate( + b.inverse_event_shape_tensor(y_shape.as_list())) + self.assertAllEqual(x_shape_tensor, x_shape.as_list()) + + @test_util.run_in_graph_and_eager_modes() + def testShapeError(self): + + b = bijectors.FillTriangular(validate_args=True) + + x_shape_bad = tensor_shape.TensorShape([5, 4, 7]) + with self.assertRaisesRegexp(ValueError, "is not a triangular number"): + b.forward_event_shape(x_shape_bad) + with self.assertRaisesOpError("is not a triangular number"): + self.evaluate(b.forward_event_shape_tensor(x_shape_bad.as_list())) + + y_shape_bad = tensor_shape.TensorShape([5, 4, 3, 2]) + with self.assertRaisesRegexp(ValueError, "Matrix must be square"): + b.inverse_event_shape(y_shape_bad) + with self.assertRaisesOpError("Matrix must be square"): + self.evaluate(b.inverse_event_shape_tensor(y_shape_bad.as_list())) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py index 4965381ef3..59b8cf1bb2 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py @@ -24,6 +24,7 @@ @@CholeskyOuterProduct @@ConditionalBijector @@Exp +@@FillTriangular @@Gumbel @@Identity @@Inline @@ -64,6 +65,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.chain import * from tensorflow.contrib.distributions.python.ops.bijectors.cholesky_outer_product import * from tensorflow.contrib.distributions.python.ops.bijectors.conditional_bijector import * from tensorflow.contrib.distributions.python.ops.bijectors.exp import * +from tensorflow.contrib.distributions.python.ops.bijectors.fill_triangular import * from tensorflow.contrib.distributions.python.ops.bijectors.gumbel import * from tensorflow.contrib.distributions.python.ops.bijectors.inline import * from tensorflow.contrib.distributions.python.ops.bijectors.invert import * diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/fill_triangular.py b/tensorflow/contrib/distributions/python/ops/bijectors/fill_triangular.py new file mode 100644 index 0000000000..7b06325ead --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/bijectors/fill_triangular.py @@ -0,0 +1,148 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""FillTriangular bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.distributions import bijector +from tensorflow.python.ops.distributions import util as dist_util + + +__all__ = [ + "FillTriangular", +] + + +class FillTriangular(bijector.Bijector): + """Transforms vectors to triangular. + + Triangular matrix elements are filled in a clockwise spiral. + + Given input with shape `batch_shape + [d]`, produces output with + shape `batch_shape + [n, n]`, where + `n = (-1 + sqrt(1 + 8 * d))/2`. + This follows by solving the quadratic equation + `d = 1 + 2 + ... + n = n * (n + 1)/2`. + + #### Example + + ```python + b = tfb.FillTriangular(upper=False) + b.forward([1, 2, 3, 4, 5, 6]) + # ==> [[4, 0, 0], + # [6, 5, 0], + # [3, 2, 1]] + + b = tfb.FillTriangular(upper=True) + b.forward([1, 2, 3, 4, 5, 6]) + # ==> [[1, 2, 3], + # [0, 5, 6], + # [0, 0, 4]] + + ``` + """ + + def __init__(self, + upper=False, + validate_args=False, + name="fill_triangular"): + """Instantiates the `FillTriangular` bijector. + + Args: + upper: Python `bool` representing whether output matrix should be upper + triangular (`True`) or lower triangular (`False`, default). + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. + """ + self._upper = upper + super(FillTriangular, self).__init__( + forward_min_event_ndims=1, + inverse_min_event_ndims=2, + validate_args=validate_args, + name=name) + + def _forward(self, x): + return dist_util.fill_triangular(x, upper=self._upper) + + def _inverse(self, y): + return dist_util.fill_triangular_inverse(y, upper=self._upper) + + def _forward_log_det_jacobian(self, x): + return array_ops.zeros_like(x[..., 0]) + + def _inverse_log_det_jacobian(self, y): + return array_ops.zeros_like(y[..., 0, 0]) + + def _forward_event_shape(self, input_shape): + batch_shape, d = input_shape[:-1], input_shape[-1].value + if d is None: + n = None + else: + n = vector_size_to_square_matrix_size(d, self.validate_args) + return batch_shape.concatenate([n, n]) + + def _inverse_event_shape(self, output_shape): + batch_shape, n1, n2 = (output_shape[:-2], + output_shape[-2].value, + output_shape[-1].value) + if n1 is None or n2 is None: + m = None + elif n1 != n2: + raise ValueError("Matrix must be square. (saw [{}, {}])".format(n1, n2)) + else: + m = n1 * (n1 + 1) / 2 + return batch_shape.concatenate([m]) + + def _forward_event_shape_tensor(self, input_shape_tensor): + batch_shape, d = input_shape_tensor[:-1], input_shape_tensor[-1] + n = vector_size_to_square_matrix_size(d, self.validate_args) + return array_ops.concat([batch_shape, [n, n]], axis=0) + + def _inverse_event_shape_tensor(self, output_shape_tensor): + batch_shape, n = output_shape_tensor[:-2], output_shape_tensor[-1] + if self.validate_args: + is_square_matrix = check_ops.assert_equal( + n, output_shape_tensor[-2], message="Matrix must be square.") + with ops.control_dependencies([is_square_matrix]): + n = array_ops.identity(n) + d = math_ops.cast(n * (n + 1) / 2, output_shape_tensor.dtype) + return array_ops.concat([batch_shape, [d]], axis=0) + + +def vector_size_to_square_matrix_size(d, validate_args, name=None): + """Convert a vector size to a matrix size.""" + if isinstance(d, (float, int, np.generic, np.ndarray)): + n = (-1 + np.sqrt(1 + 8 * d)) / 2. + if float(int(n)) != n: + raise ValueError("Vector length is not a triangular number.") + return int(n) + else: + with ops.name_scope(name, "vector_size_to_square_matrix_size", [d]) as name: + n = (-1. + math_ops.sqrt(1 + 8. * math_ops.to_float(d))) / 2. + if validate_args: + with ops.control_dependencies([check_ops.assert_equal( + math_ops.to_float(math_ops.to_int32(n)), n, + message="Vector length is not a triangular number")]): + n = array_ops.identity(n) + return math_ops.cast(n, d.dtype) -- GitLab From f9acd2548a508fc90357e93ad2b5efb2611ccb98 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 12:03:44 -0700 Subject: [PATCH 012/365] [XLA] Redesign: delete versioned_computation_handle and compilation_cache. PiperOrigin-RevId: 199673573 --- tensorflow/compiler/xla/service/BUILD | 32 -------- .../compiler/xla/service/channel_tracker.h | 1 - .../compiler/xla/service/compilation_cache.cc | 78 ------------------- .../compiler/xla/service/compilation_cache.h | 78 ------------------- .../xla/service/copy_insertion_test.cc | 9 +-- tensorflow/compiler/xla/service/executable.h | 7 -- .../xla/service/gpu/hlo_schedule_test.cc | 3 +- .../xla/service/gpu/stream_assignment_test.cc | 3 +- .../xla/service/hlo_evaluator_test.cc | 2 +- tensorflow/compiler/xla/service/hlo_module.cc | 17 +--- tensorflow/compiler/xla/service/hlo_module.h | 17 +--- .../compiler/xla/service/local_service.cc | 1 - tensorflow/compiler/xla/service/service.h | 5 -- .../service/versioned_computation_handle.cc | 32 -------- .../service/versioned_computation_handle.h | 55 ------------- .../compiler/xla/tests/hlo_test_base.cc | 3 +- .../compiler/xla/tests/llvm_compiler_test.cc | 3 +- 17 files changed, 11 insertions(+), 335 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/compilation_cache.cc delete mode 100644 tensorflow/compiler/xla/service/compilation_cache.h delete mode 100644 tensorflow/compiler/xla/service/versioned_computation_handle.cc delete mode 100644 tensorflow/compiler/xla/service/versioned_computation_handle.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 20cc671ba3..89de302f4d 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -292,7 +292,6 @@ cc_library( ":hlo_proto", ":hlo_reachability", ":name_uniquer", - ":versioned_computation_handle", "//tensorflow/compiler/xla:array", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:protobuf_util", @@ -401,17 +400,6 @@ tf_cc_test( ], ) -cc_library( - name = "versioned_computation_handle", - srcs = ["versioned_computation_handle.cc"], - hdrs = ["versioned_computation_handle.h"], - deps = [ - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/core:lib", - ], -) - tf_cc_test( name = "hlo_instruction_test", srcs = ["hlo_instruction_test.cc"], @@ -591,7 +579,6 @@ cc_library( ":allocation_tracker", ":backend", ":channel_tracker", - ":compilation_cache", ":compiler", ":computation_layout", ":device_memory_allocator", @@ -606,7 +593,6 @@ cc_library( ":platform_util", ":source_map_util", ":transfer_manager", - ":versioned_computation_handle", "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:execution_options_util", "//tensorflow/compiler/xla:service_interface", @@ -641,7 +627,6 @@ cc_library( ":platform_util", ":service", ":shaped_buffer", - ":versioned_computation_handle", "//tensorflow/compiler/xla:execution_options_util", "//tensorflow/compiler/xla:shape_layout", "//tensorflow/compiler/xla:shape_util", @@ -762,7 +747,6 @@ cc_library( ":hlo_proto", ":pool", ":shaped_buffer", - ":versioned_computation_handle", "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:status", "//tensorflow/compiler/xla:status_macros", @@ -864,7 +848,6 @@ cc_library( hdrs = ["channel_tracker.h"], deps = [ ":hlo", - ":versioned_computation_handle", "//tensorflow/compiler/xla:status", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", @@ -1646,7 +1629,6 @@ tf_cc_test( ":hlo_cost_analysis", ":local_service", ":service", - ":versioned_computation_handle", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:test_helpers", @@ -1987,20 +1969,6 @@ tf_cc_test( ], ) -cc_library( - name = "compilation_cache", - srcs = ["compilation_cache.cc"], - hdrs = ["compilation_cache.h"], - deps = [ - ":executable", - ":hlo_module_config", - ":versioned_computation_handle", - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/core:lib", - ], -) - cc_library( name = "layout_assignment", srcs = [ diff --git a/tensorflow/compiler/xla/service/channel_tracker.h b/tensorflow/compiler/xla/service/channel_tracker.h index 52f33a1318..fac0afd672 100644 --- a/tensorflow/compiler/xla/service/channel_tracker.h +++ b/tensorflow/compiler/xla/service/channel_tracker.h @@ -19,7 +19,6 @@ limitations under the License. #include #include "tensorflow/compiler/xla/service/hlo_module.h" -#include "tensorflow/compiler/xla/service/versioned_computation_handle.h" #include "tensorflow/compiler/xla/status.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" diff --git a/tensorflow/compiler/xla/service/compilation_cache.cc b/tensorflow/compiler/xla/service/compilation_cache.cc deleted file mode 100644 index b16907da9e..0000000000 --- a/tensorflow/compiler/xla/service/compilation_cache.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/compilation_cache.h" - -#include - -#include "tensorflow/compiler/xla/types.h" -#include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/lib/strings/strcat.h" -#include "tensorflow/core/platform/logging.h" - -namespace xla { - -std::shared_ptr CompilationCache::Insert( - std::unique_ptr executable, - const HloModuleConfig& module_config) { - tensorflow::mutex_lock lock(mutex_); - - CacheKey key = - BuildKey(executable->entry_computation_handle(), module_config); - VLOG(2) << "inserting cache key: " << key; - if (cache_.count(key) == 0) { - cache_.emplace(key, std::move(executable)); - } else { - // Executable already exists in the cache. This can happen if two Execute - // calls for a new computation are received simultaneously by the - // service. In this case, we discard the Executable given as a parameter and - // return what is in the cache. This is necessary because the service relies - // on the cache to keep ownership of the Executable. We only want to store - // one Executable for a given computation version and we can't discard the - // executable which is in the cache because it may be in use. - executable.reset(); - } - return cache_.at(key); -} - -std::shared_ptr CompilationCache::LookUp( - const VersionedComputationHandle& versioned_handle, - const HloModuleConfig& module_config) const { - tensorflow::mutex_lock lock(mutex_); - - CacheKey key = BuildKey(versioned_handle, module_config); - VLOG(2) << "looking up cache key: " << key; - if (cache_.count(key) == 0) { - VLOG(2) << "cache key not found: " << key; - return nullptr; - } else { - std::shared_ptr result = cache_.at(key); - VLOG(2) << "hit executable with module config: " - << result->module_config().compilation_cache_key(); - return result; - } -} - -CompilationCache::CacheKey CompilationCache::BuildKey( - const VersionedComputationHandle& versioned_handle, - const HloModuleConfig& module_config) const { - // The computation shape is represented entirely by its ProgramShape member, - // so just serialize the proto as part of the key. - return tensorflow::strings::StrCat(versioned_handle.handle.handle(), "::", - versioned_handle.version, "::", - module_config.compilation_cache_key()); -} - -} // namespace xla diff --git a/tensorflow/compiler/xla/service/compilation_cache.h b/tensorflow/compiler/xla/service/compilation_cache.h deleted file mode 100644 index 09989726ae..0000000000 --- a/tensorflow/compiler/xla/service/compilation_cache.h +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_COMPILATION_CACHE_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_COMPILATION_CACHE_H_ - -#include -#include -#include - -#include "tensorflow/compiler/xla/service/executable.h" -#include "tensorflow/compiler/xla/service/hlo_module_config.h" -#include "tensorflow/compiler/xla/service/versioned_computation_handle.h" -#include "tensorflow/compiler/xla/types.h" -#include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/thread_annotations.h" - -namespace xla { - -// A cache which stores Executables indexed by computation handle and version. -class CompilationCache { - public: - CompilationCache() {} - - // Insert the given Executable into the cache. Return a bare Executable - // pointer for the caller to use. Note: the returned pointer will *not* be the - // same as the given unique pointer if the computation already exists in the - // cache. See comments in the .cc implementation for details of this case. - // - // module_config is provided by the caller, instead of being taken from the - // executable, so that we can insert keys into the compilation cache that are - // devoid of layout (where XLA gets to choose what layout to compile). - // - // A shared_ptr is returned so the caller can keep the Executable from being - // destructed in the event that the Executable is evicted from the - // computation cache (and the cache's shared_ptr to the Executable is - // destructed). - std::shared_ptr Insert(std::unique_ptr executable, - const HloModuleConfig& module_config); - - // Lookup the Executable for the specified versioned computation in the cache. - // Return a shared_ptr to the Executable if it exists in the cache. Return - // nullptr otherwise. - std::shared_ptr LookUp( - const VersionedComputationHandle& versioned_handle, - const HloModuleConfig& module_config) const; - - protected: - mutable tensorflow::mutex mutex_; - - // Map from versioned handle with program layout to Executable built - // for that computation version and program layout. - using CacheKey = string; - - CacheKey BuildKey(const VersionedComputationHandle& versioned_handle, - const HloModuleConfig& module_config) const; - std::map> cache_ GUARDED_BY(mutex_); - - private: - TF_DISALLOW_COPY_AND_ASSIGN(CompilationCache); -}; - -} // namespace xla - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_COMPILATION_CACHE_H_ diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc index 153f062d01..684fff8a6f 100644 --- a/tensorflow/compiler/xla/service/copy_insertion_test.cc +++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc @@ -1636,8 +1636,7 @@ void BM_SequentialWhiles(int num_iters, int num_whiles) { for (int i = 0; i < num_iters; ++i) { HloModuleConfig config; config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); - HloModule module("BM_SequentialWhiles", VersionedComputationHandle(), - config); + HloModule module("BM_SequentialWhiles", config); auto builder = HloComputation::Builder("BM_SequentialWhiles"); HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter( @@ -1677,8 +1676,7 @@ void BM_ParallelWhiles(int num_iters, int num_whiles) { for (int i = 0; i < num_iters; ++i) { HloModuleConfig config; config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); - HloModule module("BM_SequentialWhiles", VersionedComputationHandle(), - config); + HloModule module("BM_SequentialWhiles", config); auto builder = HloComputation::Builder("BM_ParallelWhiles"); HloInstruction* x = builder.AddInstruction(HloInstruction::CreateParameter( @@ -1750,8 +1748,7 @@ void BM_ManyElementTuple(int num_iters, const int num_tuple_inputs) { std::vector tuple_params(num_tuple_inputs); for (int i = 0; i < num_iters; ++i) { auto builder = HloComputation::Builder("BM_ParallelWhiles"); - HloModule module("BM_ManyElementTuple", VersionedComputationHandle(), - config); + HloModule module("BM_ManyElementTuple", config); for (int j = 0; j < num_tuple_inputs; ++j) { tuple_params[j] = builder.AddInstruction( HloInstruction::CreateParameter(j, element_shape, "")); diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 087bd14329..dc1f26ea65 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -28,7 +28,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/service_executable_run_options.h" #include "tensorflow/compiler/xla/service/shaped_buffer.h" -#include "tensorflow/compiler/xla/service/versioned_computation_handle.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -131,12 +130,6 @@ class Executable { const HloModuleConfig& module_config() const { return hlo_module_->config(); } - // Returns the versioned computation handle of the computation computed by - // this executable. - const VersionedComputationHandle& entry_computation_handle() const { - return hlo_module_->entry_computation_handle(); - } - // The shape (including layout) that results from this execution. This is the // shape of the DeviceMemoryBase result value in ExecuteOnStream above. const Shape& host_result_shape() const { diff --git a/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc b/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc index e230d538cc..45f0a1c645 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_schedule_test.cc @@ -47,8 +47,7 @@ class HloScheduleTest : public HloTestBase { auto debug_options = GetDebugOptionsForTest(); debug_options.set_xla_gpu_disable_multi_streaming(false); config.set_debug_options(debug_options); - return MakeUnique("test_module", VersionedComputationHandle(), - config); + return MakeUnique("test_module", config); } HloVec RemoveHlo(const HloVec& input, diff --git a/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc index 696fa7e019..6f4bb0580e 100644 --- a/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc +++ b/tensorflow/compiler/xla/service/gpu/stream_assignment_test.cc @@ -33,8 +33,7 @@ class StreamAssignmentTest : public HloTestBase { auto debug_options = GetDebugOptionsForTest(); debug_options.set_xla_gpu_disable_multi_streaming(false); config.set_debug_options(debug_options); - return MakeUnique("test_module", VersionedComputationHandle(), - config); + return MakeUnique("test_module", config); } // Pre-canned shapes. diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 84b4ead2dd..72eb9930e9 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -1248,7 +1248,7 @@ void BM_ReducePrecisely(int num_iters) { HloComputation::Builder b("BM_ReducePrecisely"); HloModuleConfig config; config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); - HloModule module("BM_ReducePrecisely", VersionedComputationHandle(), config); + HloModule module("BM_ReducePrecisely", config); constexpr int kNumElements = 1 << 25; // float += 1 saturates at 1<<24 std::vector v(kNumElements, 1.0f); diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index e63424c2df..ab60258677 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -32,15 +32,6 @@ limitations under the License. namespace xla { -HloModule::HloModule(const string& name, - const VersionedComputationHandle& entry_computation_handle, - const HloModuleConfig& config) - : name_(NameUniquer::GetSanitizedName(name)), - config_(config), - has_entry_computation_handle_(true), - entry_computation_handle_(entry_computation_handle), - unique_id_(next_unique_module_id_++) {} - HloModule::HloModule(const string& name, const HloModuleConfig& config) : name_(NameUniquer::GetSanitizedName(name)), config_(config), @@ -234,8 +225,7 @@ HloModuleProto HloModule::ToProto() const { /* static */ StatusOr> HloModule::CreateFromProto( - const HloModuleProto& proto, const HloModuleConfig& module_config, - const VersionedComputationHandle& entry_computation_handle) { + const HloModuleProto& proto, const HloModuleConfig& module_config) { // The ProgramShape in the passed in module config must match the shapes of // the entry parameters and root. TF_RET_CHECK(proto.has_program_shape()) @@ -287,8 +277,7 @@ StatusOr> HloModule::CreateFromProto( } TF_RET_CHECK(entry != nullptr); - auto module = MakeUnique(proto.name(), entry_computation_handle, - module_config); + auto module = MakeUnique(proto.name(), module_config); // Sort the computations in the proto id's order. std::sort(computations.begin(), computations.end(), @@ -525,8 +514,6 @@ std::vector HloModule::MakeNonfusionComputations() const { std::unique_ptr HloModule::Clone(const string& suffix) const { VLOG(1) << "Cloning module :" << name_ << " --> " << suffix << "\n"; auto module = MakeUnique(name_ + "-" + suffix, config_); - module->entry_computation_handle_ = entry_computation_handle_; - module->has_entry_computation_handle_ = has_entry_computation_handle_; HloCloneContext context(module.get(), suffix); auto cloned_computation = entry_computation_->Clone(suffix, &context); diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index c93c74d34a..757e65bda2 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/name_uniquer.h" -#include "tensorflow/compiler/xla/service/versioned_computation_handle.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -57,10 +56,6 @@ namespace xla { // attached to. class HloModule { public: - HloModule(const string& name, - const VersionedComputationHandle& entry_computation_handle, - const HloModuleConfig& config); - // Constructor without a versioned computation handle. This constructor should // only be used for HloModules used outside of the XLA service (eg // tests). The versioned handle is used by the service in the compilation @@ -126,10 +121,6 @@ class HloModule { return config_.device_entry_computation_layout(); } - const VersionedComputationHandle& entry_computation_handle() const { - return entry_computation_handle_; - } - // Gets the computations in this module. // // Returns a view of HloComputation*s, so you can iterate over this in the @@ -188,9 +179,7 @@ class HloModule { // Convert an HloModule to or from a proto. HloModuleProto ToProto() const; static StatusOr> CreateFromProto( - const HloModuleProto& proto, const HloModuleConfig& module_config, - const VersionedComputationHandle& entry_computation_handle = - VersionedComputationHandle()); + const HloModuleProto& proto, const HloModuleConfig& module_config); // Creates and returns an HloModuleConfig with an appropriate program shape // for the HLO module in the given proto. @@ -264,10 +253,6 @@ class HloModule { mutable std::mt19937_64 rng_{42}; mutable tensorflow::mutex rng_mutex_; - // Versioned handle of the entry computation of the module. - bool has_entry_computation_handle_ = false; - VersionedComputationHandle entry_computation_handle_; - // Unique name generator for computation and instruction names, which are // unique per module. NameUniquer computation_name_uniquer_{/*separator=*/"."}; diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 1d9c9e0678..296d04d436 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -30,7 +30,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/platform_util.h" -#include "tensorflow/compiler/xla/service/versioned_computation_handle.h" #include "tensorflow/compiler/xla/shape_layout.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index d64b2b4d0a..8748a4c144 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -26,14 +26,12 @@ limitations under the License. #include "tensorflow/compiler/xla/service/allocation_tracker.h" #include "tensorflow/compiler/xla/service/backend.h" #include "tensorflow/compiler/xla/service/channel_tracker.h" -#include "tensorflow/compiler/xla/service/compilation_cache.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" #include "tensorflow/compiler/xla/service/executable.h" #include "tensorflow/compiler/xla/service/execution_tracker.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" -#include "tensorflow/compiler/xla/service/versioned_computation_handle.h" #include "tensorflow/compiler/xla/service_interface.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" @@ -297,9 +295,6 @@ class Service : public ServiceInterface { // Tracks asynchronously launched executions via the API. ExecutionTracker execution_tracker_; - // Cache containing previously built Executables. - CompilationCache compilation_cache_; - // Backend to compile and execute computations on. std::unique_ptr execute_backend_; diff --git a/tensorflow/compiler/xla/service/versioned_computation_handle.cc b/tensorflow/compiler/xla/service/versioned_computation_handle.cc deleted file mode 100644 index a693c4695f..0000000000 --- a/tensorflow/compiler/xla/service/versioned_computation_handle.cc +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/versioned_computation_handle.h" - -#include "tensorflow/core/lib/strings/strcat.h" - -namespace xla { - -string VersionedComputationHandle::ToString() const { - return tensorflow::strings::StrCat(handle.handle(), ":v", version); -} - -std::ostream& operator<<(std::ostream& out, - const VersionedComputationHandle& versioned_handle) { - out << versioned_handle.ToString(); - return out; -} - -} // namespace xla diff --git a/tensorflow/compiler/xla/service/versioned_computation_handle.h b/tensorflow/compiler/xla/service/versioned_computation_handle.h deleted file mode 100644 index 5732a56caf..0000000000 --- a/tensorflow/compiler/xla/service/versioned_computation_handle.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_VERSIONED_COMPUTATION_HANDLE_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_VERSIONED_COMPUTATION_HANDLE_H_ - -#include - -#include "tensorflow/compiler/xla/types.h" -#include "tensorflow/compiler/xla/xla_data.pb.h" - -namespace xla { - -// A data structure encapsulating a ComputationHandle and version value of that -// computation. This object is used to unambiguously refer to a particular -// computation in the service. -struct VersionedComputationHandle { - // A version value unambiguously specifying the state of the computation at a - // particular point in time as it is being built. This value is the - // ComputationDataHandle of the current root instruction. - using Version = int64; - - ComputationHandle handle; - Version version; - - string ToString() const; - bool operator==(const VersionedComputationHandle& other) const { - return (handle.handle() == other.handle.handle()) && - (version == other.version); - } - bool operator<(const VersionedComputationHandle& other) const { - return ((handle.handle() < other.handle.handle()) || - ((handle.handle() == other.handle.handle()) && - (version < other.version))); - } -}; - -std::ostream& operator<<(std::ostream& out, - const VersionedComputationHandle& versioned_handle); - -} // namespace xla - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_VERSIONED_COMPUTATION_HANDLE_H_ diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc index 08ed826c80..242cc5db11 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -94,8 +94,7 @@ HloTestBase::HloTestBase(se::Platform* test_platform, /* static */ std::unique_ptr HloTestBase::CreateNewModule(const string& name) { - return MakeUnique(name, VersionedComputationHandle(), - GetModuleConfigForTest()); + return MakeUnique(name, GetModuleConfigForTest()); } /*static*/ DebugOptions HloTestBase::GetDebugOptionsForTest() { diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 2f46ee0be2..082bc34136 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -124,8 +124,7 @@ class LLVMCompilerTest : public ::testing::Test { static std::unique_ptr CreateNewModule() { HloModuleConfig config; config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); - return MakeUnique(TestName(), VersionedComputationHandle(), - config); + return MakeUnique(TestName(), config); } }; -- GitLab From 4d0d60a82c52c6c71650db33bf826f03559d91fc Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Thu, 7 Jun 2018 12:03:52 -0700 Subject: [PATCH 013/365] Expand DistributionStrategy.group to address single variable case properly as well, in addition to a single Tensor case. PiperOrigin-RevId: 199673590 --- tensorflow/python/training/distribute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index ab8b37bb65..7cd175f25b 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -946,7 +946,7 @@ class DistributionStrategy(object): return control_flow_ops.group(value, name=name) # Special handling for the common case of one op. v, = value - if isinstance(v, ops.Tensor): + if hasattr(v, "op"): v = v.op return v -- GitLab From 501cf726cbee2ee13efef43884a6552ca211979d Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 7 Jun 2018 12:05:24 -0700 Subject: [PATCH 014/365] Internal Change. PiperOrigin-RevId: 199673803 --- tensorflow/BUILD | 7 ++- tensorflow/api_template.__init__.py | 17 +++++- tensorflow/contrib/cmake/tf_python.cmake | 45 ++++++++++++++ tensorflow/python/estimator/BUILD | 4 ++ tensorflow/python/estimator/api/BUILD | 17 ++++++ .../python/estimator/canned/baseline.py | 6 +- .../python/estimator/canned/boosted_trees.py | 6 +- tensorflow/python/estimator/canned/dnn.py | 6 +- .../estimator/canned/dnn_linear_combined.py | 6 +- tensorflow/python/estimator/canned/linear.py | 6 +- .../python/estimator/canned/parsing_utils.py | 6 +- tensorflow/python/estimator/estimator.py | 12 ++-- tensorflow/python/estimator/export/export.py | 10 ++-- .../python/estimator/export/export_output.py | 10 ++-- tensorflow/python/estimator/exporter.py | 10 ++-- .../python/estimator/inputs/numpy_io.py | 4 +- .../python/estimator/inputs/pandas_io.py | 4 +- tensorflow/python/estimator/model_fn.py | 6 +- tensorflow/python/estimator/run_config.py | 4 +- tensorflow/python/estimator/training.py | 8 +-- tensorflow/python/util/tf_export.py | 58 ++++++++++++------- tensorflow/python/util/tf_export_test.py | 7 --- tensorflow/tools/api/generator/api_gen.bzl | 20 +++++-- .../tools/api/generator/create_python_api.py | 35 ++++++----- .../api/generator/create_python_api_test.py | 9 ++- 25 files changed, 218 insertions(+), 105 deletions(-) create mode 100644 tensorflow/python/estimator/api/BUILD diff --git a/tensorflow/BUILD b/tensorflow/BUILD index e0bce820d1..a73c4ca3aa 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -541,14 +541,17 @@ exports_files( ) gen_api_init_files( - name = "python_api_gen", + name = "tensorflow_python_api_gen", srcs = ["api_template.__init__.py"], root_init_template = "api_template.__init__.py", ) py_library( name = "tensorflow_py", - srcs = [":python_api_gen"], + srcs = [ + ":tensorflow_python_api_gen", + "//tensorflow/python/estimator/api:estimator_python_api_gen", + ], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = ["//tensorflow/python"], diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py index 9b0d7d48af..9662d7b478 100644 --- a/tensorflow/api_template.__init__.py +++ b/tensorflow/api_template.__init__.py @@ -22,7 +22,22 @@ from __future__ import print_function from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import # API IMPORTS PLACEHOLDER -from tensorflow.python.util.lazy_loader import LazyLoader +try: + import os # pylint: disable=g-import-not-at-top + # Add `estimator` attribute to allow access to estimator APIs via + # "tf.estimator..." + from tensorflow.python.estimator.api import estimator # pylint: disable=g-import-not-at-top + + # Add `estimator` to the __path__ to allow "from tensorflow.estimator..." + # style imports. + from tensorflow.python.estimator import api as estimator_api # pylint: disable=g-import-not-at-top + __path__ += [os.path.dirname(estimator_api.__file__)] + del estimator_api + del os +except (ImportError, AttributeError): + print('tf.estimator package not installed.') + +from tensorflow.python.util.lazy_loader import LazyLoader # pylint: disable=g-import-not-at-top contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib') del LazyLoader diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index d019dd48f2..a0c3ddd28b 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -756,6 +756,8 @@ add_custom_command( "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/tools/api/generator/create_python_api.py" "--root_init_template=${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/api_template.__init__.py" "--apidir=${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow" + "--package=tensorflow.python" + "--apiname=tensorflow" "${api_init_list_file}" COMMENT "Generating __init__.py files for Python API." @@ -765,7 +767,49 @@ add_custom_command( add_custom_target(tf_python_api SOURCES ${api_init_files}) add_dependencies(tf_python_api tf_python_ops) +# TODO(mikecase): This can be removed once tf.estimator is moved +# out of TensorFlow. +######################################################## +# Generate API __init__.py files for tf.estimator. +######################################################## + +# Parse tensorflow/tools/api/generator/BUILD to get list of generated files. +FILE(READ ${tensorflow_source_dir}/tensorflow/tools/api/generator/api_gen.bzl api_generator_BUILD_text) +STRING(REGEX MATCH "# BEGIN GENERATED ESTIMATOR FILES.*# END GENERATED ESTIMATOR FILES" api_init_files_text ${api_generator_BUILD_text}) +string(REPLACE "# BEGIN GENERATED ESTIMATOR FILES" "" api_init_files_text ${api_init_files_text}) +string(REPLACE "# END GENERATED ESTIMATOR FILES" "" api_init_files_text ${api_init_files_text}) +string(REPLACE "," ";" api_init_files_list ${api_init_files_text}) + +set(api_init_files "") +foreach(api_init_file ${api_init_files_list}) + string(STRIP "${api_init_file}" api_init_file) + if(api_init_file) + string(REPLACE "\"" "" api_init_file "${api_init_file}") # Remove quotes + list(APPEND api_init_files "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/estimator/api/${api_init_file}") + endif() +endforeach(api_init_file) +set(estimator_api_init_list_file "${tensorflow_source_dir}/estimator_api_init_files_list.txt") +file(WRITE "${estimator_api_init_list_file}" "${api_init_files}") + +# Run create_python_api.py to generate __init__.py files. +add_custom_command( + OUTPUT ${api_init_files} + DEPENDS tf_python_ops tf_python_copy_scripts_to_destination pywrap_tensorflow_internal tf_python_touchup_modules tf_extension_ops + + # Run create_python_api.py to generate API init files. + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}/tf_python ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/tools/api/generator/create_python_api.py" + "--apidir=${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/estimator/api" + "--package=tensorflow.python.estimator" + "--apiname=estimator" + "${estimator_api_init_list_file}" + + COMMENT "Generating __init__.py files for Python API." + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tf_python" +) +add_custom_target(estimator_python_api SOURCES ${api_init_files}) +add_dependencies(estimator_python_api tf_python_ops) ############################################################ # Build a PIP package containing the TensorFlow runtime. ############################################################ @@ -776,6 +820,7 @@ add_dependencies(tf_python_build_pip_package tf_python_touchup_modules tf_python_ops tf_python_api + estimator_python_api tf_extension_ops) # Fix-up Python files that were not included by the add_python_module() macros. diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index d538c6c415..c0d63b79a6 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -12,6 +12,10 @@ py_library( name = "estimator_py", srcs = ["estimator_lib.py"], srcs_version = "PY2AND3", + visibility = [ + "//tensorflow:__pkg__", + "//tensorflow:internal", + ], deps = [ ":baseline", ":boosted_trees", diff --git a/tensorflow/python/estimator/api/BUILD b/tensorflow/python/estimator/api/BUILD new file mode 100644 index 0000000000..cddee9b8f3 --- /dev/null +++ b/tensorflow/python/estimator/api/BUILD @@ -0,0 +1,17 @@ +package( + default_visibility = [ + "//tensorflow:internal", + ], +) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow/tools/api/generator:api_gen.bzl", "gen_api_init_files") +load("//tensorflow/tools/api/generator:api_gen.bzl", "ESTIMATOR_API_INIT_FILES") + +gen_api_init_files( + name = "estimator_python_api_gen", + api_name = "estimator", + output_files = ESTIMATOR_API_INIT_FILES, + package = "tensorflow.python.estimator", +) diff --git a/tensorflow/python/estimator/canned/baseline.py b/tensorflow/python/estimator/canned/baseline.py index 980c057372..3c6816cb03 100644 --- a/tensorflow/python/estimator/canned/baseline.py +++ b/tensorflow/python/estimator/canned/baseline.py @@ -59,7 +59,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.training import training_util -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export # The default learning rate of 0.3 is a historical artifact of the initial # implementation, but seems a reasonable choice. @@ -174,7 +174,7 @@ def _baseline_model_fn(features, labels, mode, head, optimizer, train_op_fn=train_op_fn) -@tf_export('estimator.BaselineClassifier') +@estimator_export('estimator.BaselineClassifier') class BaselineClassifier(estimator.Estimator): """A classifier that can establish a simple baseline. @@ -277,7 +277,7 @@ class BaselineClassifier(estimator.Estimator): config=config) -@tf_export('estimator.BaselineRegressor') +@estimator_export('estimator.BaselineRegressor') class BaselineRegressor(estimator.Estimator): """A regressor that can establish a simple baseline. diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 4e6010a162..6b54f51ca6 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -39,7 +39,7 @@ from tensorflow.python.summary import summary from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export # TODO(nponomareva): Reveal pruning params here. _TreeHParams = collections.namedtuple('TreeHParams', [ @@ -712,7 +712,7 @@ def _create_regression_head(label_dimension, weight_column=None): # pylint: enable=protected-access -@tf_export('estimator.BoostedTreesClassifier') +@estimator_export('estimator.BoostedTreesClassifier') class BoostedTreesClassifier(estimator.Estimator): """A Classifier for Tensorflow Boosted Trees models.""" @@ -830,7 +830,7 @@ class BoostedTreesClassifier(estimator.Estimator): model_fn=_model_fn, model_dir=model_dir, config=config) -@tf_export('estimator.BoostedTreesRegressor') +@estimator_export('estimator.BoostedTreesRegressor') class BoostedTreesRegressor(estimator.Estimator): """A Regressor for Tensorflow Boosted Trees models.""" diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py index 1feac36f35..b924ad5df4 100644 --- a/tensorflow/python/estimator/canned/dnn.py +++ b/tensorflow/python/estimator/canned/dnn.py @@ -32,7 +32,7 @@ from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export # The default learning rate of 0.05 is a historical artifact of the initial # implementation, but seems a reasonable choice. @@ -201,7 +201,7 @@ def _dnn_model_fn(features, logits=logits) -@tf_export('estimator.DNNClassifier') +@estimator_export('estimator.DNNClassifier') class DNNClassifier(estimator.Estimator): """A classifier for TensorFlow DNN models. @@ -353,7 +353,7 @@ class DNNClassifier(estimator.Estimator): warm_start_from=warm_start_from) -@tf_export('estimator.DNNRegressor') +@estimator_export('estimator.DNNRegressor') class DNNRegressor(estimator.Estimator): """A regressor for TensorFlow DNN models. diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined.py b/tensorflow/python/estimator/canned/dnn_linear_combined.py index 95efc0a028..64d81c46ce 100644 --- a/tensorflow/python/estimator/canned/dnn_linear_combined.py +++ b/tensorflow/python/estimator/canned/dnn_linear_combined.py @@ -37,7 +37,7 @@ from tensorflow.python.summary import summary from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.training import sync_replicas_optimizer from tensorflow.python.training import training_util -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export # The default learning rates are a historical artifact of the initial # implementation. @@ -225,7 +225,7 @@ def _dnn_linear_combined_model_fn(features, logits=logits) -@tf_export('estimator.DNNLinearCombinedClassifier') +@estimator_export('estimator.DNNLinearCombinedClassifier') class DNNLinearCombinedClassifier(estimator.Estimator): """An estimator for TensorFlow Linear and DNN joined classification models. @@ -406,7 +406,7 @@ class DNNLinearCombinedClassifier(estimator.Estimator): warm_start_from=warm_start_from) -@tf_export('estimator.DNNLinearCombinedRegressor') +@estimator_export('estimator.DNNLinearCombinedRegressor') class DNNLinearCombinedRegressor(estimator.Estimator): """An estimator for TensorFlow Linear and DNN joined models for regression. diff --git a/tensorflow/python/estimator/canned/linear.py b/tensorflow/python/estimator/canned/linear.py index 81657f0c01..705fc3ce06 100644 --- a/tensorflow/python/estimator/canned/linear.py +++ b/tensorflow/python/estimator/canned/linear.py @@ -33,7 +33,7 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary from tensorflow.python.training import ftrl -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export # The default learning rate of 0.2 is a historical artifact of the initial @@ -164,7 +164,7 @@ def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, logits=logits) -@tf_export('estimator.LinearClassifier') +@estimator_export('estimator.LinearClassifier') class LinearClassifier(estimator.Estimator): """Linear classifier model. @@ -317,7 +317,7 @@ class LinearClassifier(estimator.Estimator): warm_start_from=warm_start_from) -@tf_export('estimator.LinearRegressor') +@estimator_export('estimator.LinearRegressor') class LinearRegressor(estimator.Estimator): """An estimator for TensorFlow Linear regression problems. diff --git a/tensorflow/python/estimator/canned/parsing_utils.py b/tensorflow/python/estimator/canned/parsing_utils.py index 74e5e5a1be..1ae0f1e9f7 100644 --- a/tensorflow/python/estimator/canned/parsing_utils.py +++ b/tensorflow/python/estimator/canned/parsing_utils.py @@ -23,10 +23,10 @@ import six from tensorflow.python.feature_column import feature_column as fc from tensorflow.python.framework import dtypes from tensorflow.python.ops import parsing_ops -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export -@tf_export('estimator.classifier_parse_example_spec') +@estimator_export('estimator.classifier_parse_example_spec') def classifier_parse_example_spec(feature_columns, label_key, label_dtype=dtypes.int64, @@ -166,7 +166,7 @@ def classifier_parse_example_spec(feature_columns, return parsing_spec -@tf_export('estimator.regressor_parse_example_spec') +@estimator_export('estimator.regressor_parse_example_spec') def regressor_parse_example_spec(feature_columns, label_key, label_dtype=dtypes.float32, diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 4be1af1e66..41c25f1c73 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -66,14 +66,14 @@ from tensorflow.python.util import compat from tensorflow.python.util import compat_internal from tensorflow.python.util import function_utils from tensorflow.python.util import nest -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export _VALID_MODEL_FN_ARGS = set( ['features', 'labels', 'mode', 'params', 'self', 'config']) -@tf_export('estimator.Estimator') +@estimator_export('estimator.Estimator') class Estimator(object): """Estimator class to train and evaluate TensorFlow models. @@ -566,7 +566,8 @@ class Estimator(object): allowed_overrides = set([ '_call_input_fn', '_create_global_step', '_convert_train_steps_to_hooks', '_convert_eval_steps_to_hooks', - '_tf_api_names', '_validate_features_in_predict_input', + '_tf_api_names', '_estimator_api_names', '_estimator_api_constants', + '_validate_features_in_predict_input', '_call_model_fn', '_add_meta_graph_for_mode' ]) estimator_members = set([m for m in Estimator.__dict__.keys() @@ -1634,11 +1635,12 @@ def _has_dataset_or_queue_runner(maybe_tensor): # Now, check queue. return ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS) + VocabInfo = warm_starting_util.VocabInfo # pylint: disable=invalid-name -tf_export('estimator.VocabInfo', allow_multiple_exports=True)(VocabInfo) +estimator_export('estimator.VocabInfo')(VocabInfo) -@tf_export('estimator.WarmStartSettings') +@estimator_export('estimator.WarmStartSettings') class WarmStartSettings( collections.namedtuple('WarmStartSettings', [ 'ckpt_to_initialize_from', diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py index ff19a0a7f4..010c0f3f59 100644 --- a/tensorflow/python/estimator/export/export.py +++ b/tensorflow/python/estimator/export/export.py @@ -34,7 +34,7 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils from tensorflow.python.util import compat -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export _SINGLE_FEATURE_DEFAULT_NAME = 'feature' _SINGLE_RECEIVER_DEFAULT_NAME = 'input' @@ -93,7 +93,7 @@ def _check_tensor_key(name, error_label='feature'): raise ValueError('{} keys must be strings: {}.'.format(error_label, name)) -@tf_export('estimator.export.ServingInputReceiver') +@estimator_export('estimator.export.ServingInputReceiver') class ServingInputReceiver( collections.namedtuple( 'ServingInputReceiver', @@ -161,7 +161,7 @@ class ServingInputReceiver( receiver_tensors_alternatives=receiver_tensors_alternatives) -@tf_export('estimator.export.TensorServingInputReceiver') +@estimator_export('estimator.export.TensorServingInputReceiver') class TensorServingInputReceiver( collections.namedtuple( 'TensorServingInputReceiver', @@ -263,7 +263,7 @@ class SupervisedInputReceiver( receiver_tensors=receiver_tensors) -@tf_export('estimator.export.build_parsing_serving_input_receiver_fn') +@estimator_export('estimator.export.build_parsing_serving_input_receiver_fn') def build_parsing_serving_input_receiver_fn(feature_spec, default_batch_size=None): """Build a serving_input_receiver_fn expecting fed tf.Examples. @@ -313,7 +313,7 @@ def _placeholders_from_receiver_tensors_dict(input_vals, } -@tf_export('estimator.export.build_raw_serving_input_receiver_fn') +@estimator_export('estimator.export.build_raw_serving_input_receiver_fn') def build_raw_serving_input_receiver_fn(features, default_batch_size=None): """Build a serving_input_receiver_fn expecting feature Tensors. diff --git a/tensorflow/python/estimator/export/export_output.py b/tensorflow/python/estimator/export/export_output.py index d387ea2940..6c26d29985 100644 --- a/tensorflow/python/estimator/export/export_output.py +++ b/tensorflow/python/estimator/export/export_output.py @@ -26,10 +26,10 @@ import six from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.saved_model import signature_def_utils -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export -@tf_export('estimator.export.ExportOutput') +@estimator_export('estimator.export.ExportOutput') class ExportOutput(object): """Represents an output of a model that can be served. @@ -100,7 +100,7 @@ class ExportOutput(object): return output_dict -@tf_export('estimator.export.ClassificationOutput') +@estimator_export('estimator.export.ClassificationOutput') class ClassificationOutput(ExportOutput): """Represents the output of a classification head. @@ -169,7 +169,7 @@ class ClassificationOutput(ExportOutput): examples, self.classes, self.scores) -@tf_export('estimator.export.RegressionOutput') +@estimator_export('estimator.export.RegressionOutput') class RegressionOutput(ExportOutput): """Represents the output of a regression head.""" @@ -202,7 +202,7 @@ class RegressionOutput(ExportOutput): return signature_def_utils.regression_signature_def(examples, self.value) -@tf_export('estimator.export.PredictOutput') +@estimator_export('estimator.export.PredictOutput') class PredictOutput(ExportOutput): """Represents the output of a generic prediction head. diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index 5981fa59b7..7cdf840c97 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -28,10 +28,10 @@ from tensorflow.python.framework import errors_impl from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging from tensorflow.python.summary import summary_iterator -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export -@tf_export('estimator.Exporter') +@estimator_export('estimator.Exporter') class Exporter(object): """A class representing a type of model export.""" @@ -172,7 +172,7 @@ def _verify_compre_fn_args(compare_fn): (compare_fn, non_valid_args)) -@tf_export('estimator.BestExporter') +@estimator_export('estimator.BestExporter') class BestExporter(Exporter): """This class exports the serving graph and checkpoints of the best models. @@ -367,7 +367,7 @@ class BestExporter(Exporter): return best_eval_result -@tf_export('estimator.FinalExporter') +@estimator_export('estimator.FinalExporter') class FinalExporter(Exporter): """This class exports the serving graph and checkpoints in the end. @@ -418,7 +418,7 @@ class FinalExporter(Exporter): is_the_final_export) -@tf_export('estimator.LatestExporter') +@estimator_export('estimator.LatestExporter') class LatestExporter(Exporter): """This class regularly exports the serving graph and checkpoints. diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index a6f4712910..035c7c148c 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -24,7 +24,7 @@ import numpy as np from six import string_types from tensorflow.python.estimator.inputs.queues import feeding_functions -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export # Key name to pack the target into dict of `features`. See # `_get_unique_target_key` for details. @@ -87,7 +87,7 @@ def _validate_and_convert_features(x): return ordered_dict_data -@tf_export('estimator.inputs.numpy_input_fn') +@estimator_export('estimator.inputs.numpy_input_fn') def numpy_input_fn(x, y=None, batch_size=128, diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index bd06843021..938e244fb3 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -21,7 +21,7 @@ from __future__ import print_function import numpy as np from tensorflow.python.estimator.inputs.queues import feeding_functions -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export try: # pylint: disable=g-import-not-at-top @@ -35,7 +35,7 @@ except ImportError: HAS_PANDAS = False -@tf_export('estimator.inputs.pandas_input_fn') +@estimator_export('estimator.inputs.pandas_input_fn') def pandas_input_fn(x, y=None, batch_size=128, diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py index 3edf9fe940..c60c7f63ba 100644 --- a/tensorflow/python/estimator/model_fn.py +++ b/tensorflow/python/estimator/model_fn.py @@ -32,10 +32,10 @@ from tensorflow.python.saved_model import tag_constants from tensorflow.python.training import monitored_session from tensorflow.python.training import session_run_hook from tensorflow.python.util import nest -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export -@tf_export('estimator.ModeKeys') +@estimator_export('estimator.ModeKeys') class ModeKeys(object): """Standard names for model modes. @@ -62,7 +62,7 @@ EXPORT_TAG_MAP = { } -@tf_export('estimator.EstimatorSpec') +@estimator_export('estimator.EstimatorSpec') class EstimatorSpec( collections.namedtuple('EstimatorSpec', [ 'mode', 'predictions', 'loss', 'train_op', 'eval_metric_ops', diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index c7707be839..b948ce96e0 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -29,7 +29,7 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib from tensorflow.python.util import compat_internal from tensorflow.python.util import function_utils -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export _USE_DEFAULT = object() @@ -296,7 +296,7 @@ class TaskType(object): EVALUATOR = 'evaluator' -@tf_export('estimator.RunConfig') +@estimator_export('estimator.RunConfig') class RunConfig(object): """This class specifies the configurations for an `Estimator` run.""" diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index fb6a68b4f7..1572af579b 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -35,7 +35,7 @@ from tensorflow.python.training import basic_session_run_hooks from tensorflow.python.training import server_lib from tensorflow.python.training import session_run_hook from tensorflow.python.util import compat -from tensorflow.python.util.tf_export import tf_export +from tensorflow.python.util.tf_export import estimator_export _MAX_DELAY_SECS = 60 _DELAY_SECS_PER_WORKER = 5 @@ -115,7 +115,7 @@ def _is_google_env(): return tf_config.get(_ENVIRONMENT_KEY) == _ENVIRONMENT_GOOGLE_VALUE -@tf_export('estimator.TrainSpec') +@estimator_export('estimator.TrainSpec') class TrainSpec( collections.namedtuple('TrainSpec', ['input_fn', 'max_steps', 'hooks'])): """Configuration for the "train" part for the `train_and_evaluate` call. @@ -167,7 +167,7 @@ class TrainSpec( cls, input_fn=input_fn, max_steps=max_steps, hooks=hooks) -@tf_export('estimator.EvalSpec') +@estimator_export('estimator.EvalSpec') class EvalSpec( collections.namedtuple('EvalSpec', [ 'input_fn', 'steps', 'name', 'hooks', 'exporters', 'start_delay_secs', @@ -263,7 +263,7 @@ class EvalSpec( throttle_secs=throttle_secs) -@tf_export('estimator.train_and_evaluate') +@estimator_export('estimator.train_and_evaluate') def train_and_evaluate(estimator, train_spec, eval_spec): """Train and evaluate the `estimator`. diff --git a/tensorflow/python/util/tf_export.py b/tensorflow/python/util/tf_export.py index bf3961c692..e154ffb68a 100644 --- a/tensorflow/python/util/tf_export.py +++ b/tensorflow/python/util/tf_export.py @@ -41,17 +41,35 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections +import functools import sys from tensorflow.python.util import tf_decorator +ESTIMATOR_API_NAME = 'estimator' +TENSORFLOW_API_NAME = 'tensorflow' + +_Attributes = collections.namedtuple( + 'ExportedApiAttributes', ['names', 'constants']) + +# Attribute values must be unique to each API. +API_ATTRS = { + TENSORFLOW_API_NAME: _Attributes( + '_tf_api_names', + '_tf_api_constants'), + ESTIMATOR_API_NAME: _Attributes( + '_estimator_api_names', + '_estimator_api_constants') +} + class SymbolAlreadyExposedError(Exception): """Raised when adding API names to symbol that already has API names.""" pass -class tf_export(object): # pylint: disable=invalid-name +class api_export(object): # pylint: disable=invalid-name """Provides ways to export symbols to the TensorFlow API.""" def __init__(self, *args, **kwargs): @@ -63,15 +81,12 @@ class tf_export(object): # pylint: disable=invalid-name overrides: List of symbols that this is overriding (those overrided api exports will be removed). Note: passing overrides has no effect on exporting a constant. - allow_multiple_exports: Allows exporting the same symbol multiple - times with multiple `tf_export` usages. Prefer however, to list all - of the exported names in a single `tf_export` usage when possible. - + api_name: Name of the API you want to generate (e.g. `tensorflow` or + `estimator`). Default is `tensorflow`. """ self._names = args + self._api_name = kwargs.get('api_name', TENSORFLOW_API_NAME) self._overrides = kwargs.get('overrides', []) - self._allow_multiple_exports = kwargs.get( - 'allow_multiple_exports', False) def __call__(self, func): """Calls this decorator. @@ -86,25 +101,24 @@ class tf_export(object): # pylint: disable=invalid-name SymbolAlreadyExposedError: Raised when a symbol already has API names and kwarg `allow_multiple_exports` not set. """ + api_names_attr = API_ATTRS[self._api_name].names + # Undecorate overridden names for f in self._overrides: _, undecorated_f = tf_decorator.unwrap(f) - del undecorated_f._tf_api_names # pylint: disable=protected-access + delattr(undecorated_f, api_names_attr) _, undecorated_func = tf_decorator.unwrap(func) # Check for an existing api. We check if attribute name is in # __dict__ instead of using hasattr to verify that subclasses have # their own _tf_api_names as opposed to just inheriting it. - if '_tf_api_names' in undecorated_func.__dict__: - if self._allow_multiple_exports: - undecorated_func._tf_api_names += self._names # pylint: disable=protected-access - else: - raise SymbolAlreadyExposedError( - 'Symbol %s is already exposed as %s.' % - (undecorated_func.__name__, undecorated_func._tf_api_names)) # pylint: disable=protected-access - else: - undecorated_func._tf_api_names = self._names # pylint: disable=protected-access + if api_names_attr in undecorated_func.__dict__: + raise SymbolAlreadyExposedError( + 'Symbol %s is already exposed as %s.' % + (undecorated_func.__name__, getattr( + undecorated_func, api_names_attr))) # pylint: disable=protected-access + setattr(undecorated_func, api_names_attr, self._names) return func def export_constant(self, module_name, name): @@ -126,8 +140,12 @@ class tf_export(object): # pylint: disable=invalid-name name: (string) Current constant name. """ module = sys.modules[module_name] - if not hasattr(module, '_tf_api_constants'): - module._tf_api_constants = [] # pylint: disable=protected-access + if not hasattr(module, API_ATTRS[self._api_name].constants): + setattr(module, API_ATTRS[self._api_name].constants, []) # pylint: disable=protected-access - module._tf_api_constants.append((self._names, name)) + getattr(module, API_ATTRS[self._api_name].constants).append( + (self._names, name)) + +tf_export = functools.partial(api_export, api_name=TENSORFLOW_API_NAME) +estimator_export = functools.partial(tf_export, api_name=ESTIMATOR_API_NAME) diff --git a/tensorflow/python/util/tf_export_test.py b/tensorflow/python/util/tf_export_test.py index ace3f054ba..b9e26ecb33 100644 --- a/tensorflow/python/util/tf_export_test.py +++ b/tensorflow/python/util/tf_export_test.py @@ -128,13 +128,6 @@ class ValidateExportTest(test.TestCase): with self.assertRaises(tf_export.SymbolAlreadyExposedError): export_decorator(_test_function) - def testEAllowMultipleExports(self): - _test_function._tf_api_names = ['name1', 'name2'] - tf_export.tf_export('nameRed', 'nameBlue', allow_multiple_exports=True)( - _test_function) - self.assertEquals(['name1', 'name2', 'nameRed', 'nameBlue'], - _test_function._tf_api_names) - def testOverridesFunction(self): _test_function2._tf_api_names = ['abc'] diff --git a/tensorflow/tools/api/generator/api_gen.bzl b/tensorflow/tools/api/generator/api_gen.bzl index fe3e4d1434..41713a94ec 100644 --- a/tensorflow/tools/api/generator/api_gen.bzl +++ b/tensorflow/tools/api/generator/api_gen.bzl @@ -11,9 +11,6 @@ TENSORFLOW_API_INIT_FILES = [ "distributions/__init__.py", "distributions/bijectors/__init__.py", "errors/__init__.py", - "estimator/__init__.py", - "estimator/export/__init__.py", - "estimator/inputs/__init__.py", "feature_column/__init__.py", "gfile/__init__.py", "graph_util/__init__.py", @@ -91,6 +88,16 @@ TENSORFLOW_API_INIT_FILES = [ # END GENERATED FILES ] +# keep sorted +ESTIMATOR_API_INIT_FILES = [ + # BEGIN GENERATED ESTIMATOR FILES + "__init__.py", + "estimator/__init__.py", + "estimator/export/__init__.py", + "estimator/inputs/__init__.py", + # END GENERATED ESTIMATOR FILES +] + # Creates a genrule that generates a directory structure with __init__.py # files that import all exported modules (i.e. modules with tf_export # decorators). @@ -110,7 +117,9 @@ TENSORFLOW_API_INIT_FILES = [ def gen_api_init_files(name, output_files=TENSORFLOW_API_INIT_FILES, root_init_template=None, - srcs=[]): + srcs=[], + api_name="tensorflow", + package="tensorflow.python"): root_init_template_flag = "" if root_init_template: root_init_template_flag = "--root_init_template=$(location " + root_init_template + ")" @@ -119,7 +128,8 @@ def gen_api_init_files(name, outs = output_files, cmd = ( "$(location //tensorflow/tools/api/generator:create_python_api) " + - root_init_template_flag + " --apidir=$(@D) $(OUTS)"), + root_init_template_flag + " --apidir=$(@D) --apiname=" + api_name + " --package=" + package + " $(OUTS)"), srcs = srcs, tools = ["//tensorflow/tools/api/generator:create_python_api"], + visibility = ["//tensorflow:__pkg__"], ) diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index de0a50ab44..972bdc84ae 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -25,10 +25,10 @@ import os import sys from tensorflow.python.util import tf_decorator +from tensorflow.python.util import tf_export +API_ATTRS = tf_export.API_ATTRS -_API_CONSTANTS_ATTR = '_tf_api_constants' -_API_NAMES_ATTR = '_tf_api_names' _DEFAULT_PACKAGE = 'tensorflow.python' _GENFILES_DIR_SUFFIX = 'genfiles/' _SYMBOLS_TO_SKIP_EXPLICITLY = { @@ -154,12 +154,13 @@ __all__.extend([_s for _s in _names_with_underscore]) return module_text_map -def get_api_init_text(package): +def get_api_init_text(package, api_name): """Get a map from destination module to __init__.py code for that module. Args: package: Base python package containing python with target tf_export decorators. + api_name: API you want to generate (e.g. `tensorflow` or `estimator`). Returns: A dictionary where @@ -187,7 +188,7 @@ def get_api_init_text(package): attr = getattr(module, module_contents_name) # If attr is _tf_api_constants attribute, then add the constants. - if module_contents_name == _API_CONSTANTS_ATTR: + if module_contents_name == API_ATTRS[api_name].constants: for exports, value in attr: for export in exports: names = export.split('.') @@ -196,15 +197,12 @@ def get_api_init_text(package): -1, dest_module, module.__name__, value, names[-1]) continue - try: - _, attr = tf_decorator.unwrap(attr) - except Exception as e: - print('5555: %s %s' % (module, module_contents_name), file=sys.stderr) - raise e + _, attr = tf_decorator.unwrap(attr) # If attr is a symbol with _tf_api_names attribute, then # add import for it. - if hasattr(attr, '__dict__') and _API_NAMES_ATTR in attr.__dict__: - for export in attr._tf_api_names: # pylint: disable=protected-access + if (hasattr(attr, '__dict__') and + API_ATTRS[api_name].names in attr.__dict__): + for export in getattr(attr, API_ATTRS[api_name].names): # pylint: disable=protected-access names = export.split('.') dest_module = '.'.join(names[:-1]) module_code_builder.add_import( @@ -241,7 +239,7 @@ def get_module(dir_path, relative_to_dir): relative_to_dir: Get module relative to this directory. Returns: - module that corresponds to the given directory. + Name of module that corresponds to the given directory. """ dir_path = dir_path[len(relative_to_dir):] # Convert path separators to '/' for easier parsing below. @@ -250,7 +248,7 @@ def get_module(dir_path, relative_to_dir): def create_api_files( - output_files, package, root_init_template, output_dir): + output_files, package, root_init_template, output_dir, api_name): """Creates __init__.py files for the Python API. Args: @@ -262,6 +260,7 @@ def create_api_files( "#API IMPORTS PLACEHOLDER" comment in the template file will be replaced with imports. output_dir: output API root directory. + api_name: API you want to generate (e.g. `tensorflow` or `estimator`). Raises: ValueError: if an output file is not under api/ directory, @@ -278,7 +277,7 @@ def create_api_files( os.makedirs(os.path.dirname(file_path)) open(file_path, 'a').close() - module_text_map = get_api_init_text(package) + module_text_map = get_api_init_text(package, api_name) # Add imports to output files. missing_output_files = [] @@ -329,6 +328,10 @@ def main(): help='Directory where generated output files are placed. ' 'gendir should be a prefix of apidir. Also, apidir ' 'should be a prefix of every directory in outputs.') + parser.add_argument( + '--apiname', required=True, type=str, + choices=API_ATTRS.keys(), + help='The API you want to generate.') args = parser.parse_args() @@ -342,8 +345,8 @@ def main(): # Populate `sys.modules` with modules containing tf_export(). importlib.import_module(args.package) - create_api_files( - outputs, args.package, args.root_init_template, args.apidir) + create_api_files(outputs, args.package, args.root_init_template, + args.apidir, args.apiname) if __name__ == '__main__': diff --git a/tensorflow/tools/api/generator/create_python_api_test.py b/tensorflow/tools/api/generator/create_python_api_test.py index 986340cf6d..651ec9d040 100644 --- a/tensorflow/tools/api/generator/create_python_api_test.py +++ b/tensorflow/tools/api/generator/create_python_api_test.py @@ -57,7 +57,8 @@ class CreatePythonApiTest(test.TestCase): def testFunctionImportIsAdded(self): imports = create_python_api.get_api_init_text( - package=create_python_api._DEFAULT_PACKAGE) + package=create_python_api._DEFAULT_PACKAGE, + api_name='tensorflow') expected_import = ( 'from tensorflow.python.test_module ' 'import test_op as test_op1') @@ -73,7 +74,8 @@ class CreatePythonApiTest(test.TestCase): def testClassImportIsAdded(self): imports = create_python_api.get_api_init_text( - package=create_python_api._DEFAULT_PACKAGE) + package=create_python_api._DEFAULT_PACKAGE, + api_name='tensorflow') expected_import = ('from tensorflow.python.test_module ' 'import TestClass') self.assertTrue( @@ -82,7 +84,8 @@ class CreatePythonApiTest(test.TestCase): def testConstantIsAdded(self): imports = create_python_api.get_api_init_text( - package=create_python_api._DEFAULT_PACKAGE) + package=create_python_api._DEFAULT_PACKAGE, + api_name='tensorflow') expected = ('from tensorflow.python.test_module ' 'import _TEST_CONSTANT') self.assertTrue(expected in str(imports), -- GitLab From 0dab0f538b78b0a0f1ec4f7dc5fb3005b5efdc94 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 7 Jun 2018 12:07:18 -0700 Subject: [PATCH 015/365] Avoid unnecessary `DoneCallback` copies in functional_ops.cc. PiperOrigin-RevId: 199674121 --- tensorflow/core/kernels/functional_ops.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/functional_ops.cc b/tensorflow/core/kernels/functional_ops.cc index e0d594fa25..e0be57f972 100644 --- a/tensorflow/core/kernels/functional_ops.cc +++ b/tensorflow/core/kernels/functional_ops.cc @@ -152,7 +152,7 @@ class IfOp : public AsyncOpKernel { : kernel_(kernel), ctx_(ctx), cond_(cond), - done_(done), + done_(std::move(done)), lib_(CHECK_NOTNULL(ctx_->function_library())) { SetRunOptions(ctx_, &opts_, true /* always_collect_stats */); for (int i = 1; i < ctx_->num_inputs(); ++i) { @@ -174,9 +174,9 @@ class IfOp : public AsyncOpKernel { s = SetOutputs(kernel_, ctx_, rets_); } ctx_->SetStatus(s); - auto done = done_; + DoneCallback captured_done(std::move(done_)); delete this; - done(); + captured_done(); }); } @@ -257,7 +257,7 @@ class WhileOp : public AsyncOpKernel { ctx_(ctx), cond_handle_(cond_handle), body_handle_(body_handle), - done_(done), + done_(std::move(done)), lib_(CHECK_NOTNULL(ctx_->function_library())) { SetRunOptions(ctx_, &opts_, false /* always_collect_stats */); for (int i = 0; i < ctx_->num_inputs(); ++i) { -- GitLab From 5c74172fa5bd9f2ae6275d536f70971810a40548 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Thu, 7 Jun 2018 12:20:28 -0700 Subject: [PATCH 016/365] Add features to TOCO Python API. PiperOrigin-RevId: 199676295 --- tensorflow/contrib/lite/python/convert.py | 13 ++++++- tensorflow/contrib/lite/python/lite.py | 12 ++++++- tensorflow/contrib/lite/python/lite_test.py | 34 +++++++++++++++++++ .../contrib/lite/python/tflite_convert.py | 22 ++++++++++++ tensorflow/contrib/lite/toco/python/BUILD | 1 + .../lite/toco/python/toco_python_api.cc | 13 ++++++- tensorflow/contrib/lite/toco/toco_flags.proto | 9 +++++ 7 files changed, 101 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py index 08f3f8bf32..fce8ffb54a 100644 --- a/tensorflow/contrib/lite/python/convert.py +++ b/tensorflow/contrib/lite/python/convert.py @@ -124,7 +124,9 @@ def toco_convert(input_data, reorder_across_fake_quant=False, allow_custom_ops=False, change_concat_input_ranges=False, - quantize_weights=False): + quantize_weights=False, + dump_graphviz_dir=None, + dump_graphviz_video=False): """Convert a model using TOCO from `input_format` to `output_format`. Typically this is to convert from TensorFlow GraphDef to TFLite, in which @@ -170,6 +172,12 @@ def toco_convert(input_data, weights followed by dequantize operations. Computation is still done in float, but reduces model size (at the cost of accuracy and latency). (default False) + dump_graphviz_dir: Full filepath of folder to dump the graphs at various + stages of processing GraphViz .dot files. Preferred over + --output_format=GRAPHVIZ_DOT in order to keep the requirements of the + output file. (default None) + dump_graphviz_video: Boolean indicating whether to dump the graph after + every graph transformation. (default False) Returns: The converted data. For example if TFLite was the destination, then @@ -193,6 +201,9 @@ def toco_convert(input_data, if default_ranges_stats: toco.default_ranges_min = default_ranges_stats[0] toco.default_ranges_max = default_ranges_stats[1] + if dump_graphviz_dir: + toco.dump_graphviz_dir = dump_graphviz_dir + toco.dump_graphviz_include_video = dump_graphviz_video model = _model_flags_pb2.ModelFlags() model.change_concat_input_ranges = change_concat_input_ranges diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index e3a2d19e05..4fb88c1ad6 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -96,6 +96,12 @@ class TocoConverter(object): weights followed by dequantize operations. Computation is still done in float, but reduces model size (at the cost of accuracy and latency). (default False) + dump_graphviz_dir: Full filepath of folder to dump the graphs at various + stages of processing GraphViz .dot files. Preferred over + --output_format=GRAPHVIZ_DOT in order to keep the requirements of the + output file. (default None) + dump_graphviz_video: Boolean indicating whether to dump the graph after + every graph transformation. (default False) Example usage: @@ -138,6 +144,8 @@ class TocoConverter(object): self.change_concat_input_ranges = False self.allow_custom_ops = False self.quantize_weights = False + self.dump_graphviz_dir = None + self.dump_graphviz_video = False @classmethod def from_session(cls, sess, input_tensors, output_tensors): @@ -308,7 +316,9 @@ class TocoConverter(object): reorder_across_fake_quant=self.reorder_across_fake_quant, change_concat_input_ranges=self.change_concat_input_ranges, allow_custom_ops=self.allow_custom_ops, - quantize_weights=self.quantize_weights) + quantize_weights=self.quantize_weights, + dump_graphviz_dir=self.dump_graphviz_dir, + dump_graphviz_video=self.dump_graphviz_video) return result def get_input_arrays(self): diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py index b04caaf263..8c9d2c1651 100644 --- a/tensorflow/contrib/lite/python/lite_test.py +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -220,6 +220,7 @@ class FromSessionTest(test_util.TensorFlowTestCase): self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) self.assertEqual((0., 0.), output_details[0]['quantization']) + # TODO(nupurgarg): Verify value of contents in GraphViz. def testGraphviz(self): in_tensor = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32) @@ -232,6 +233,39 @@ class FromSessionTest(test_util.TensorFlowTestCase): graphviz_output = converter.convert() self.assertTrue(graphviz_output) + # TODO(nupurgarg): Verify value of contents in GraphViz. + def testDumpGraphviz(self): + in_tensor = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32) + out_tensor = in_tensor + in_tensor + sess = session.Session() + + # Convert model and ensure model is not None. + converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) + graphviz_dir = self.get_temp_dir() + converter.dump_graphviz_dir = graphviz_dir + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Ensure interpreter is able to allocate and check graphviz data. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + num_items_graphviz = len(os.listdir(graphviz_dir)) + self.assertTrue(num_items_graphviz) + + # Convert model and ensure model is not None. + converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) + graphviz_dir = self.get_temp_dir() + converter.dump_graphviz_dir = graphviz_dir + converter.dump_graphviz_video = True + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Ensure graphviz folder has more data after using video flag. + num_items_graphviz_video = len(os.listdir(graphviz_dir)) + self.assertTrue(num_items_graphviz_video > num_items_graphviz) + def testInferenceInputType(self): in_tensor = array_ops.placeholder(shape=[1, 16, 16, 3], dtype=dtypes.uint8) out_tensor = in_tensor + in_tensor diff --git a/tensorflow/contrib/lite/python/tflite_convert.py b/tensorflow/contrib/lite/python/tflite_convert.py index 4c215b62b2..492d2632fe 100644 --- a/tensorflow/contrib/lite/python/tflite_convert.py +++ b/tensorflow/contrib/lite/python/tflite_convert.py @@ -130,6 +130,10 @@ def _convert_model(flags): converter.allow_custom_ops = flags.allow_custom_ops if flags.quantize_weights: converter.quantize_weights = flags.quantize_weights + if flags.dump_graphviz_dir: + converter.dump_graphviz_dir = flags.dump_graphviz_dir + if flags.dump_graphviz_video: + converter.dump_graphviz_vode = flags.dump_graphviz_video # Convert model. output_data = converter.convert() @@ -161,8 +165,12 @@ def _check_flags(flags, unparsed): output = "" for flag in unparsed: output += _get_message_unparsed(flag, "--input_file", "--graph_def_file") + output += _get_message_unparsed(flag, "--savedmodel_directory", + "--saved_model_dir") output += _get_message_unparsed(flag, "--std_value", "--std_dev_values") output += _get_message_unparsed(flag, "--batch_size", "--input_shapes") + output += _get_message_unparsed(flag, "--dump_graphviz", + "--dump_graphviz_dir") if output: raise ValueError(output) @@ -322,6 +330,20 @@ def run_main(_): "provide these to the TensorFlow Lite runtime with a custom " "resolver. (default False)")) + # Logging flags. + parser.add_argument( + "--dump_graphviz_dir", + type=str, + help=("Full filepath of folder to dump the graphs at various stages of " + "processing GraphViz .dot files. Preferred over --output_format=" + "GRAPHVIZ_DOT in order to keep the requirements of the output " + "file.")) + parser.add_argument( + "--dump_graphviz_video", + action="store_true", + help=("Boolean indicating whether to dump the graph after every graph " + "transformation")) + tflite_flags, unparsed = parser.parse_known_args(args=sys.argv[1:]) try: _check_flags(tflite_flags, unparsed) diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index a954f1d6ba..93fe756a55 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -12,6 +12,7 @@ cc_library( deps = [ "//tensorflow/contrib/lite/toco:model_flags_proto_cc", "//tensorflow/contrib/lite/toco:toco_flags_proto_cc", + "//tensorflow/contrib/lite/toco:toco_graphviz_dump_options", "//tensorflow/contrib/lite/toco:toco_port", "//tensorflow/contrib/lite/toco:toco_tooling", "//tensorflow/core:lib", diff --git a/tensorflow/contrib/lite/toco/python/toco_python_api.cc b/tensorflow/contrib/lite/toco/python/toco_python_api.cc index 5b1db852b4..d93e104038 100644 --- a/tensorflow/contrib/lite/toco/python/toco_python_api.cc +++ b/tensorflow/contrib/lite/toco/python/toco_python_api.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/contrib/lite/toco/model_flags.pb.h" #include "tensorflow/contrib/lite/toco/python/toco_python_api.h" #include "tensorflow/contrib/lite/toco/toco_flags.pb.h" +#include "tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h" #include "tensorflow/contrib/lite/toco/toco_port.h" #include "tensorflow/contrib/lite/toco/toco_tooling.h" #include "tensorflow/contrib/lite/toco/toco_types.h" @@ -62,7 +63,7 @@ PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, std::string input_contents_txt = ConvertArg(input_contents_txt_raw, &error); if (error) return nullptr; - // Use toco to produce new outputs + // Use TOCO to produce new outputs. toco::ModelFlags model_flags; if (!model_flags.ParseFromString(model_flags_proto_txt)) { LOG(FATAL) << "Model proto failed to parse." << std::endl; @@ -71,6 +72,16 @@ PyObject* TocoConvert(PyObject* model_flags_proto_txt_raw, if (!toco_flags.ParseFromString(toco_flags_proto_txt)) { LOG(FATAL) << "Toco proto failed to parse." << std::endl; } + + auto& dump_options = *GraphVizDumpOptions::singleton(); + if (toco_flags.has_dump_graphviz_dir()) { + dump_options.dump_graphviz = toco_flags.dump_graphviz_dir(); + } + if (toco_flags.has_dump_graphviz_include_video()) { + dump_options.dump_graphviz_video = toco_flags.dump_graphviz_include_video(); + } + + // Convert model. std::unique_ptr model = toco::Import(toco_flags, model_flags, input_contents_txt); toco::Transform(toco_flags, model.get()); diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto index 4fe57879fb..ad4e94ded9 100644 --- a/tensorflow/contrib/lite/toco/toco_flags.proto +++ b/tensorflow/contrib/lite/toco/toco_flags.proto @@ -174,4 +174,13 @@ message TocoFlags { // Computation is still done in float, but reduces model size (at the cost of // accuracy and latency). optional bool quantize_weights = 20 [default = false]; + + // Full filepath of folder to dump the graphs at various stages of processing + // GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order + // to keep the requirements of the output file. + optional string dump_graphviz_dir = 24; + + // Boolean indicating whether to dump the graph after every graph + // transformation. + optional bool dump_graphviz_include_video = 25; } -- GitLab From 6f20926fb7a181c44cca6191eec8961040d83cd1 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 7 Jun 2018 12:21:29 -0700 Subject: [PATCH 017/365] [XLA] Don't de-emphasize copy nodes in graph dumps. PiperOrigin-RevId: 199676435 --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 61612bebd1..a6750460e5 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -975,7 +975,6 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { } return kGreen; case HloOpcode::kConcatenate: - case HloOpcode::kCopy: case HloOpcode::kDynamicSlice: case HloOpcode::kGather: case HloOpcode::kPad: @@ -997,6 +996,10 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { return kWhite; } return kGreen; + case HloOpcode::kCopy: + // Emphasize copy nodes, which are either physical transposes (and thus + // significant), or copies of read-only buffers (and thus dead weight). + return kGreen; case HloOpcode::kConvolution: case HloOpcode::kDot: case HloOpcode::kFft: -- GitLab From 2857228ba6c7b357185e7a0af346f4fc93a10f74 Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Thu, 7 Jun 2018 12:23:10 -0700 Subject: [PATCH 018/365] Misc fixes to benchmarks. PiperOrigin-RevId: 199676652 --- .../contrib/lite/profiling/profile_summarizer.cc | 11 +++++++++-- tensorflow/contrib/lite/tools/benchmark/BUILD | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer.cc b/tensorflow/contrib/lite/profiling/profile_summarizer.cc index 6f2c9cd2b3..45388b500c 100644 --- a/tensorflow/contrib/lite/profiling/profile_summarizer.cc +++ b/tensorflow/contrib/lite/profiling/profile_summarizer.cc @@ -85,11 +85,18 @@ OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter, return details; } +tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() { + auto options = tensorflow::StatSummarizerOptions(); + options.show_summary = true; + options.show_memory = false; + return options; +} + } // namespace ProfileSummarizer::ProfileSummarizer() - : stats_calculator_(new ::tensorflow::StatsCalculator( - tensorflow::StatSummarizerOptions())) {} + : stats_calculator_( + new ::tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {} void ProfileSummarizer::ProcessProfiles( const std::vector& profile_stats, diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD index c5aa27d07c..f918010e2b 100644 --- a/tensorflow/contrib/lite/tools/benchmark/BUILD +++ b/tensorflow/contrib/lite/tools/benchmark/BUILD @@ -6,6 +6,7 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow/contrib/lite:special_rules.bzl", "tflite_portable_test_suite") load("//tensorflow/contrib/lite:build_def.bzl", "tflite_linkopts") +load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts") common_copts = ["-Wall"] @@ -15,7 +16,7 @@ cc_binary( "benchmark_main.cc", "logging.h", ], - copts = common_copts, + copts = tflite_copts() + common_copts, linkopts = select({ "//tensorflow:android": [ "-pie", -- GitLab From 9639db8d18d979e98061504a2c6ee4bba0f74610 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 12:52:35 -0700 Subject: [PATCH 019/365] Add TransformDiagonal higher-order bijector to transform only the diagonal of a matrix. PiperOrigin-RevId: 199680859 --- tensorflow/contrib/distributions/BUILD | 19 ++++ .../bijectors/transform_diagonal_test.py | 66 ++++++++++++ .../python/ops/bijectors/__init__.py | 2 + .../ops/bijectors/transform_diagonal.py | 102 ++++++++++++++++++ 4 files changed, 189 insertions(+) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/bijectors/transform_diagonal_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/transform_diagonal.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index d8baf49e81..61d4e90ea2 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -1254,6 +1254,25 @@ cuda_py_test( ], ) +cuda_py_test( + name = "transform_diagonal_test", + size = "small", + srcs = ["python/kernel_tests/bijectors/transform_diagonal_test.py"], + additional_deps = [ + ":bijectors_py", + ":distributions_py", + "//third_party/py/numpy", + "@six_archive//:six", + "//tensorflow/contrib/linalg:linalg_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "weibull_test", size = "small", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/transform_diagonal_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/transform_diagonal_test.py new file mode 100644 index 0000000000..6428a68702 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/transform_diagonal_test.py @@ -0,0 +1,66 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for TransformDiagonal bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops import bijectors +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test + + +class TransformDiagonalBijectorTest(test.TestCase): + """Tests correctness of the TransformDiagonal bijector.""" + + def setUp(self): + self._rng = np.random.RandomState(42) + + @test_util.run_in_graph_and_eager_modes() + def testBijector(self): + x = np.float32(np.random.randn(3, 4, 4)) + + y = x.copy() + for i in range(x.shape[0]): + np.fill_diagonal(y[i, :, :], np.exp(np.diag(x[i, :, :]))) + + exp = bijectors.Exp() + b = bijectors.TransformDiagonal(diag_bijector=exp) + + y_ = self.evaluate(b.forward(x)) + self.assertAllClose(y, y_) + + x_ = self.evaluate(b.inverse(y)) + self.assertAllClose(x, x_) + + fldj = self.evaluate(b.forward_log_det_jacobian(x, event_ndims=2)) + ildj = self.evaluate(b.inverse_log_det_jacobian(y, event_ndims=2)) + self.assertAllEqual( + fldj, + self.evaluate(exp.forward_log_det_jacobian( + np.array([np.diag(x_mat) for x_mat in x]), + event_ndims=1))) + self.assertAllEqual( + ildj, + self.evaluate(exp.inverse_log_det_jacobian( + np.array([np.diag(y_mat) for y_mat in y]), + event_ndims=1))) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py index 59b8cf1bb2..d97a1f0d30 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py @@ -43,6 +43,7 @@ @@Softplus @@Softsign @@Square +@@TransformDiagonal @@Weibull @@masked_autoregressive_default_template @@ -83,6 +84,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered impo from tensorflow.contrib.distributions.python.ops.bijectors.softplus import * from tensorflow.contrib.distributions.python.ops.bijectors.softsign import * from tensorflow.contrib.distributions.python.ops.bijectors.square import * +from tensorflow.contrib.distributions.python.ops.bijectors.transform_diagonal import * from tensorflow.python.ops.distributions.bijector import * from tensorflow.python.ops.distributions.identity_bijector import Identity diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/transform_diagonal.py b/tensorflow/contrib/distributions/python/ops/bijectors/transform_diagonal.py new file mode 100644 index 0000000000..65669fc2bf --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/bijectors/transform_diagonal.py @@ -0,0 +1,102 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TransformDiagonal bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import array_ops +from tensorflow.python.ops.distributions import bijector + +__all__ = [ + "TransformDiagonal", +] + + +class TransformDiagonal(bijector.Bijector): + """Applies a Bijector to the diagonal of a matrix. + + #### Example + + ```python + b = tfb.TransformDiagonal(diag_bijector=tfb.Exp()) + + b.forward([[1., 0.], + [0., 1.]]) + # ==> [[2.718, 0.], + [0., 2.718]] + ``` + + """ + + def __init__(self, + diag_bijector, + validate_args=False, + name="transform_diagonal"): + """Instantiates the `TransformDiagonal` bijector. + + Args: + diag_bijector: `Bijector` instance used to transform the diagonal. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. + """ + self._diag_bijector = diag_bijector + super(TransformDiagonal, self).__init__( + forward_min_event_ndims=2, + inverse_min_event_ndims=2, + validate_args=validate_args, + name=name) + + def _forward(self, x): + diag = self._diag_bijector.forward(array_ops.matrix_diag_part(x)) + return array_ops.matrix_set_diag(x, diag) + + def _inverse(self, y): + diag = self._diag_bijector.inverse(array_ops.matrix_diag_part(y)) + return array_ops.matrix_set_diag(y, diag) + + def _forward_log_det_jacobian(self, x): + # We formulate the Jacobian with respect to the flattened matrices + # `vec(x)` and `vec(y)`. Suppose for notational convenience that + # the first `n` entries of `vec(x)` are the diagonal of `x`, and + # the remaining `n**2-n` entries are the off-diagonals in + # arbitrary order. Then the Jacobian is a block-diagonal matrix, + # with the Jacobian of the diagonal bijector in the first block, + # and the identity Jacobian for the remaining entries (since this + # bijector acts as the identity on non-diagonal entries): + # + # J_vec(x) (vec(y)) = + # ------------------------------- + # | J_diag(x) (diag(y)) 0 | n entries + # | | + # | 0 I | n**2-n entries + # ------------------------------- + # n n**2-n + # + # Since the log-det of the second (identity) block is zero, the + # overall log-det-jacobian is just the log-det of first block, + # from the diagonal bijector. + # + # Note that for elementwise operations (exp, softplus, etc) the + # first block of the Jacobian will itself be a diagonal matrix, + # but our implementation does not require this to be true. + return self._diag_bijector.forward_log_det_jacobian( + array_ops.matrix_diag_part(x), event_ndims=1) + + def _inverse_log_det_jacobian(self, y): + return self._diag_bijector.inverse_log_det_jacobian( + array_ops.matrix_diag_part(y), event_ndims=1) -- GitLab From 09c25a87cf321f317662f67d1b08deb3585e9abe Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Thu, 7 Jun 2018 12:55:59 -0700 Subject: [PATCH 020/365] Update documentation. PiperOrigin-RevId: 199681316 --- .../contrib/lite/tools/benchmark/README.md | 104 ++++++++---------- 1 file changed, 45 insertions(+), 59 deletions(-) diff --git a/tensorflow/contrib/lite/tools/benchmark/README.md b/tensorflow/contrib/lite/tools/benchmark/README.md index e6f333aa5b..2788f76faf 100644 --- a/tensorflow/contrib/lite/tools/benchmark/README.md +++ b/tensorflow/contrib/lite/tools/benchmark/README.md @@ -93,80 +93,66 @@ This compiles TFLite with profiling enabled, now you can run the benchmark binar ============================== Run Order ============================== [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] - CONV_2D 0.000 9.132 9.132 0.121% 0.121% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6] - DEPTHWISE_CONV_2D 9.135 3.280 3.280 0.043% 0.165% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6] - CONV_2D 12.419 6.877 6.877 0.091% 0.256% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6] - DEPTHWISE_CONV_2D 19.299 1.708 1.708 0.023% 0.278% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6] - CONV_2D 21.012 4.162 4.162 0.055% 0.334% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6] - DEPTHWISE_CONV_2D 25.177 3.520 3.520 0.047% 0.380% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6] - CONV_2D 28.701 10.218 10.218 0.136% 0.516% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6] - DEPTHWISE_CONV_2D 38.922 0.827 0.827 0.011% 0.527% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6] - CONV_2D 39.752 1.401 1.401 0.019% 0.545% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6] - DEPTHWISE_CONV_2D 41.156 1.290 1.290 0.017% 0.563% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6] - CONV_2D 42.448 5.995 5.995 0.080% 0.642% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6] - DEPTHWISE_CONV_2D 48.445 0.409 0.409 0.005% 0.647% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6] - CONV_2D 48.856 6.167 6.167 0.082% 0.729% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6] - DEPTHWISE_CONV_2D 55.026 0.629 0.629 0.008% 0.738% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6] - CONV_2D 55.656 6.464 6.464 0.086% 0.823% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] - DEPTHWISE_CONV_2D 62.124 0.647 0.647 0.009% 0.832% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6] - CONV_2D 62.774 14.666 14.666 0.195% 1.026% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] - DEPTHWISE_CONV_2D 77.444 0.635 0.635 0.008% 1.035% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6] - CONV_2D 78.081 7.186 7.186 0.095% 1.130% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] - DEPTHWISE_CONV_2D 85.270 0.646 0.646 0.009% 1.139% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6] - CONV_2D 85.918 9.529 9.529 0.126% 1.265% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6] - DEPTHWISE_CONV_2D 95.451 0.628 0.628 0.008% 1.273% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6] - CONV_2D 96.081 2.077 2.077 0.028% 1.301% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6] - DEPTHWISE_CONV_2D 98.162 0.168 0.168 0.002% 1.303% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6] - CONV_2D 98.332 1.007 1.007 0.013% 1.317% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6] - DEPTHWISE_CONV_2D 99.342 0.288 0.288 0.004% 1.320% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6] - CONV_2D 99.632 8.197 8.197 0.109% 1.429% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6] - AVERAGE_POOL_2D 107.832 0.045 0.045 0.001% 1.430% 0.000 0 [MobilenetV1/Logits/AvgPool_1a/AvgPool] - CONV_2D 107.878 0.325 0.325 0.004% 1.434% 0.000 0 [MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd] - RESHAPE 108.206 0.003 0.003 0.000% 1.434% 0.000 0 [MobilenetV1/Predictions/Reshape] - SOFTMAX 108.211 0.038 0.038 0.001% 1.434% 0.000 0 [MobilenetV1/Predictions/Softmax] + CONV_2D 0.000 4.269 4.269 0.107% 0.107% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6] + DEPTHWISE_CONV_2D 4.270 2.150 2.150 0.054% 0.161% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6] + CONV_2D 6.421 6.107 6.107 0.153% 0.314% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6] + DEPTHWISE_CONV_2D 12.528 1.366 1.366 0.034% 0.348% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6] + CONV_2D 13.895 4.195 4.195 0.105% 0.454% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6] + DEPTHWISE_CONV_2D 18.091 1.260 1.260 0.032% 0.485% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6] + CONV_2D 19.352 6.652 6.652 0.167% 0.652% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6] + DEPTHWISE_CONV_2D 26.005 0.698 0.698 0.018% 0.670% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6] + CONV_2D 26.703 3.344 3.344 0.084% 0.754% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6] + DEPTHWISE_CONV_2D 30.047 0.646 0.646 0.016% 0.770% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6] + CONV_2D 30.694 5.800 5.800 0.145% 0.915% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6] + DEPTHWISE_CONV_2D 36.495 0.331 0.331 0.008% 0.924% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6] + CONV_2D 36.826 2.838 2.838 0.071% 0.995% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6] + DEPTHWISE_CONV_2D 39.665 0.439 0.439 0.011% 1.006% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6] + CONV_2D 40.105 5.293 5.293 0.133% 1.139% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] + DEPTHWISE_CONV_2D 45.399 0.352 0.352 0.009% 1.147% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6] + CONV_2D 45.752 5.322 5.322 0.133% 1.281% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] + DEPTHWISE_CONV_2D 51.075 0.357 0.357 0.009% 1.290% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6] + CONV_2D 51.432 5.693 5.693 0.143% 1.433% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] + DEPTHWISE_CONV_2D 57.126 0.366 0.366 0.009% 1.442% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6] + CONV_2D 57.493 5.472 5.472 0.137% 1.579% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6] + DEPTHWISE_CONV_2D 62.966 0.364 0.364 0.009% 1.588% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6] + CONV_2D 63.330 5.404 5.404 0.136% 1.724% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6] + DEPTHWISE_CONV_2D 68.735 0.155 0.155 0.004% 1.728% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6] + CONV_2D 68.891 2.970 2.970 0.074% 1.802% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6] + DEPTHWISE_CONV_2D 71.862 0.206 0.206 0.005% 1.807% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6] + CONV_2D 72.069 5.888 5.888 0.148% 1.955% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6] + AVERAGE_POOL_2D 77.958 0.036 0.036 0.001% 1.956% 0.000 0 [MobilenetV1/Logits/AvgPool_1a/AvgPool] + CONV_2D 77.994 1.445 1.445 0.036% 1.992% 0.000 0 [MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd] + RESHAPE 79.440 0.002 0.002 0.000% 1.992% 0.000 0 [MobilenetV1/Predictions/Reshape] + SOFTMAX 79.443 0.029 0.029 0.001% 1.993% 0.000 0 [MobilenetV1/Predictions/Softmax] ============================== Top by Computation Time ============================== [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] - CONV_2D 62.774 14.666 14.666 0.195% 0.195% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] - CONV_2D 28.701 10.218 10.218 0.136% 0.330% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6] - CONV_2D 85.918 9.529 9.529 0.126% 0.456% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6] - CONV_2D 0.000 9.132 9.132 0.121% 0.578% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6] - CONV_2D 99.632 8.197 8.197 0.109% 0.686% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6] - CONV_2D 78.081 7.186 7.186 0.095% 0.782% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] - CONV_2D 12.419 6.877 6.877 0.091% 0.873% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6] - CONV_2D 55.656 6.464 6.464 0.086% 0.958% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] - CONV_2D 48.856 6.167 6.167 0.082% 1.040% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6] - CONV_2D 42.448 5.995 5.995 0.080% 1.120% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6] - -============================== Top by Memory Use ============================== - [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] - SOFTMAX 108.211 0.038 0.038 0.001% 0.001% 0.000 0 [MobilenetV1/Predictions/Softmax] - RESHAPE 108.206 0.003 0.003 0.000% 0.001% 0.000 0 [MobilenetV1/Predictions/Reshape] - CONV_2D 78.081 7.186 7.186 0.095% 0.096% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] - DEPTHWISE_CONV_2D 77.444 0.635 0.635 0.008% 0.104% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6] - CONV_2D 62.774 14.666 14.666 0.195% 0.299% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] - DEPTHWISE_CONV_2D 62.124 0.647 0.647 0.009% 0.307% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6] - CONV_2D 55.656 6.464 6.464 0.086% 0.393% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] - DEPTHWISE_CONV_2D 55.026 0.629 0.629 0.008% 0.401% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6] - CONV_2D 48.856 6.167 6.167 0.082% 0.483% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6] - DEPTHWISE_CONV_2D 48.445 0.409 0.409 0.005% 0.489% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6] + CONV_2D 19.352 6.652 6.652 0.167% 0.167% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6] + CONV_2D 6.421 6.107 6.107 0.153% 0.320% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6] + CONV_2D 72.069 5.888 5.888 0.148% 0.468% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6] + CONV_2D 30.694 5.800 5.800 0.145% 0.613% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6] + CONV_2D 51.432 5.693 5.693 0.143% 0.756% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] + CONV_2D 57.493 5.472 5.472 0.137% 0.893% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6] + CONV_2D 63.330 5.404 5.404 0.136% 1.029% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6] + CONV_2D 45.752 5.322 5.322 0.133% 1.162% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] + CONV_2D 40.105 5.293 5.293 0.133% 1.295% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] + CONV_2D 0.000 4.269 4.269 0.107% 1.402% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6] Number of nodes executed: 31 ============================== Summary by node type ============================== [Node type] [count] [avg ms] [avg %] [cdf %] [mem KB] [times called] - CONV_2D 15 1.861 86.679% 86.679% 0.000 0 - DEPTHWISE_CONV_2D 13 0.286 13.321% 100.000% 0.000 0 + CONV_2D 15 1.406 89.270% 89.270% 0.000 0 + DEPTHWISE_CONV_2D 13 0.169 10.730% 100.000% 0.000 0 SOFTMAX 1 0.000 0.000% 100.000% 0.000 0 RESHAPE 1 0.000 0.000% 100.000% 0.000 0 AVERAGE_POOL_2D 1 0.000 0.000% 100.000% 0.000 0 -Timings (microseconds): count=50 first=108164 curr=128308 min=102850 max=197072 avg=150805 std=24368 +Timings (microseconds): count=50 first=79449 curr=81350 min=77385 max=88213 avg=79732 std=1929 Memory (bytes): count=0 31 nodes observed -Average inference timings in us: Warmup: 135310, Init: 12123, no stats: 150988 - +Average inference timings in us: Warmup: 83235, Init: 38467, no stats: 79760.9 ``` -- GitLab From 5174b67f70645210429db837df3047c7d52637bf Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Thu, 7 Jun 2018 13:03:54 -0700 Subject: [PATCH 021/365] [TF:XLA] Introduce a new HostTensorToBorrowingLiteral path without the memcpy from Tensor to Literal, and use it in xla_helpers. PiperOrigin-RevId: 199682452 --- tensorflow/compiler/tf2xla/literal_util.cc | 31 ++++++++++++++++++++ tensorflow/compiler/tf2xla/literal_util.h | 12 ++++++++ tensorflow/compiler/tf2xla/xla_helpers.cc | 11 ++++--- tensorflow/compiler/xla/literal_util.cc | 22 +++++++------- tensorflow/compiler/xla/literal_util.h | 6 ++-- tensorflow/compiler/xla/literal_util_test.cc | 4 +-- 6 files changed, 67 insertions(+), 19 deletions(-) diff --git a/tensorflow/compiler/tf2xla/literal_util.cc b/tensorflow/compiler/tf2xla/literal_util.cc index 43e1c1e9fe..db56b12837 100644 --- a/tensorflow/compiler/tf2xla/literal_util.cc +++ b/tensorflow/compiler/tf2xla/literal_util.cc @@ -40,6 +40,37 @@ Status HostTensorToLiteral(const Tensor& host_tensor, xla::Literal* literal) { return Status::OK(); } +Status HostTensorToBorrowingLiteral(const Tensor& host_tensor, + xla::BorrowingLiteral* literal) { + xla::Shape xla_shape; + TF_RETURN_IF_ERROR(TensorShapeToXLAShape(host_tensor.dtype(), + host_tensor.shape(), &xla_shape)); + *literal = xla::BorrowingLiteral( + static_cast(DMAHelper::base(&host_tensor)), xla_shape); + return Status::OK(); +} + +Status HostTensorsToBorrowingLiteralTuple( + tensorflow::gtl::ArraySlice host_tensors, + xla::BorrowingLiteral* literal) { + std::vector buf_ptrs; + buf_ptrs.reserve(host_tensors.size()); + std::vector tensor_shapes(host_tensors.size()); + + for (int i = 0; i < host_tensors.size(); i++) { + // Validate runtime shapes and fail if it doesn't match the contract. + const Tensor* tensor = &host_tensors[i]; + buf_ptrs.emplace_back(static_cast(DMAHelper::base(tensor))); + TF_RETURN_IF_ERROR(TensorShapeToXLAShape(tensor->dtype(), tensor->shape(), + &tensor_shapes[i])); + } + + *literal = xla::BorrowingLiteral( + buf_ptrs, xla::ShapeUtil::MakeTupleShape(tensor_shapes)); + + return Status::OK(); +} + Status CopyLiteralToHostTensor(const xla::LiteralSlice& literal, Tensor* host_tensor) { TF_RET_CHECK(xla::ShapeUtil::IsArray(literal.shape()) && diff --git a/tensorflow/compiler/tf2xla/literal_util.h b/tensorflow/compiler/tf2xla/literal_util.h index 220bec1553..74685025c1 100644 --- a/tensorflow/compiler/tf2xla/literal_util.h +++ b/tensorflow/compiler/tf2xla/literal_util.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/array_slice.h" namespace tensorflow { @@ -29,6 +30,17 @@ namespace tensorflow { // unsupported type. Status HostTensorToLiteral(const Tensor& host_tensor, xla::Literal* literal); +// Returns a BorrowingLiteral that utilizes the same underlying buffer owned by +// 'host_tensor'. +Status HostTensorToBorrowingLiteral(const Tensor& host_tensor, + xla::BorrowingLiteral* literal); + +// Returns a BorrowingLiteral tuple that utilizes the same underlying buffers +// owned by 'host_tensors'. +Status HostTensorsToBorrowingLiteralTuple( + tensorflow::gtl::ArraySlice host_tensors, + xla::BorrowingLiteral* literal); + // Copies 'literal' to freshly allocated 'host_tensor', which is allocated of // type . // Fails if the literal's primitive type != diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index f1594193af..a1da176fe3 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -19,11 +19,13 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/lib/util.h" #include "tensorflow/compiler/tf2xla/literal_util.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_context.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/common_runtime/dma_helper.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -210,8 +212,9 @@ Status XlaHelpers::Iota(xla::XlaBuilder* builder, DataType dtype, int64 size, return errors::InvalidArgument("Invalid argument type ", DataTypeString(dtype)); } - xla::Literal linspace_literal; - TF_RETURN_IF_ERROR(HostTensorToLiteral(linspace, &linspace_literal)); + xla::BorrowingLiteral linspace_literal; + TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(linspace, &linspace_literal)); + *iota = builder->ConstantLiteral(linspace_literal); return Status::OK(); } @@ -245,8 +248,8 @@ Status XlaHelpers::OneHot(xla::XlaBuilder* builder, int64 depth, int axis, return errors::InvalidArgument("Invalid argument type ", DataTypeString(index_type)); } - xla::Literal linspace_literal; - TF_RETURN_IF_ERROR(HostTensorToLiteral(linspace, &linspace_literal)); + xla::BorrowingLiteral linspace_literal; + TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(linspace, &linspace_literal)); // Broadcast the linspace constant across the indices along the new axis, // and test equality at each position. diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 61afc311a7..6b29589700 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -2341,28 +2341,28 @@ LiteralSlice::LiteralSlice(const LiteralBase& literal, : LiteralBase(), root_piece_(&literal.piece(view_root)) {} BorrowingLiteral::BorrowingLiteral(const char* src_buf_ptr, const Shape& shape) - : LiteralBase(), shape_(shape) { - CHECK(ShapeUtil::IsArray(shape_)); + : LiteralBase(), shape_(MakeUnique(shape)) { + CHECK(ShapeUtil::IsArray(*shape_)); CHECK_NE(src_buf_ptr, nullptr); - CHECK(LayoutUtil::HasLayout(shape_)); + CHECK(LayoutUtil::HasLayout(*shape_)); root_piece_ = Piece(); root_piece_.set_buffer(const_cast(src_buf_ptr)); - root_piece_.set_subshape(&shape_); + root_piece_.set_subshape(shape_.get()); } BorrowingLiteral::BorrowingLiteral( tensorflow::gtl::ArraySlice src_buf_ptrs, const Shape& shape) - : LiteralBase(), shape_(shape) { - CHECK(ShapeUtil::IsTuple(shape_)); - CHECK(!ShapeUtil::IsNestedTuple(shape_)); - CHECK_EQ(src_buf_ptrs.size(), ShapeUtil::TupleElementCount(shape_)); + : LiteralBase(), shape_(MakeUnique(shape)) { + CHECK(ShapeUtil::IsTuple(*shape_)); + CHECK(!ShapeUtil::IsNestedTuple(*shape_)); + CHECK_EQ(src_buf_ptrs.size(), ShapeUtil::TupleElementCount(*shape_)); root_piece_ = Piece(); - root_piece_.set_subshape(&shape_); - BuildPieceSubtree(shape_, &root_piece_); + root_piece_.set_subshape(shape_.get()); + BuildPieceSubtree(*shape_, &root_piece_); for (int i = 0; i < src_buf_ptrs.size(); ++i) { - const auto& src_shape = shape_.tuple_shapes(i); + const auto& src_shape = shape_->tuple_shapes(i); CHECK(ShapeUtil::IsArray(src_shape)); root_piece_.child(i).set_buffer(const_cast(src_buf_ptrs[i])); } diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index 1e26eb7ad4..8e4159e360 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -1099,8 +1099,10 @@ class BorrowingLiteral : public LiteralBase { const Piece& root_piece() const override { return root_piece_; }; Piece root_piece_; - // Shape of this literal. - const Shape shape_; + // Shape of this literal. Stored as unique_ptr so such that the (default) + // move construction of this class would be trivially correct: the pointer to + // Shape root_piece_ stores will still point to the correct address. + std::unique_ptr shape_; }; template diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index f127cee0fd..53b926163c 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -1431,7 +1431,7 @@ TEST_F(LiteralUtilTest, LiteralSliceOfALiteralSlice) { EXPECT_EQ(matrix_view, *Literal::CreateR2({{1.0, 2.0}, {3.0, 4.0}})); } -TEST_F(LiteralUtilTest, BorrowingLiteralFromOneBufferPtrTest) { +TEST_F(LiteralUtilTest, BorrowingLiteralFromOneBufferPtr) { std::vector int64_values = {1, 2, 3}; const Shape literal_shape = ShapeUtil::MakeShape(S64, {3}); @@ -1443,7 +1443,7 @@ TEST_F(LiteralUtilTest, BorrowingLiteralFromOneBufferPtrTest) { EXPECT_EQ(literal.Get({2}), 3); } -TEST_F(LiteralUtilTest, BorrowingLiteralFromMultipleBufferPtrsTest) { +TEST_F(LiteralUtilTest, BorrowingLiteralFromMultipleBufferPtrs) { std::vector one_two_three = {1, 2, 3}; const Shape one_two_three_shape = ShapeUtil::MakeShape(S64, {3}); -- GitLab From d736c6622aec39d874fe77d8b2d03a57bbdcbb78 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Thu, 7 Jun 2018 13:46:56 -0700 Subject: [PATCH 022/365] Make TOCO cmdline inputs case insensitive. PiperOrigin-RevId: 199689105 --- tensorflow/contrib/lite/python/tflite_convert.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/python/tflite_convert.py b/tensorflow/contrib/lite/python/tflite_convert.py index 492d2632fe..32ad84ec3c 100644 --- a/tensorflow/contrib/lite/python/tflite_convert.py +++ b/tensorflow/contrib/lite/python/tflite_convert.py @@ -227,17 +227,17 @@ def run_main(_): # Model format flags. parser.add_argument( "--output_format", - type=str, + type=str.upper, choices=["TFLITE", "GRAPHVIZ_DOT"], help="Output file format.") parser.add_argument( "--inference_type", - type=str, + type=str.upper, choices=["FLOAT", "QUANTIZED_UINT8"], help="Target data type of arrays in the output file.") parser.add_argument( "--inference_input_type", - type=str, + type=str.upper, choices=["FLOAT", "QUANTIZED_UINT8"], help=("Target data type of input arrays. Allows for a different type for " "input arrays in the case of quantization.")) -- GitLab From a0dc8144f09da4d0597c423c2d786e206fb462ac Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 14:42:24 -0700 Subject: [PATCH 023/365] Internal change. PiperOrigin-RevId: 199698515 --- tensorflow/contrib/lite/kernels/internal/kernel_utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc index 6e62183975..09044193c1 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc @@ -350,7 +350,7 @@ void LstmStep( for (int b = 0; b < n_batch; ++b) { product_scaling_factors[b] = - scaling_factors[b] * input_to_cell_weights_scale; + scaling_factors[b] * input_to_output_weights_scale; } tensor_utils::MatrixBatchVectorMultiplyAccumulate( input_to_output_weights_ptr, n_cell, n_input, quantized_input_ptr_batch, -- GitLab From ae6e7c90611903591270f5221c51dca556a4759b Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 7 Jun 2018 15:02:49 -0700 Subject: [PATCH 024/365] Avoid unintentional copy of a const function when capturing it. PiperOrigin-RevId: 199702086 --- tensorflow/core/kernels/functional_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/functional_ops.cc b/tensorflow/core/kernels/functional_ops.cc index e0be57f972..519c475332 100644 --- a/tensorflow/core/kernels/functional_ops.cc +++ b/tensorflow/core/kernels/functional_ops.cc @@ -184,7 +184,7 @@ class IfOp : public AsyncOpKernel { IfOp* const kernel_; OpKernelContext* const ctx_; const bool cond_; - const DoneCallback done_; + DoneCallback done_; FunctionLibraryRuntime* const lib_; FunctionLibraryRuntime::Options opts_; TensorVec args_; -- GitLab From ed15a7b00f9dd0094cd784a823a65db7aef9d79c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 15:21:17 -0700 Subject: [PATCH 025/365] Fix and enable TFlite label_image_test Resolve memory leaks from read_bmp() calls. PiperOrigin-RevId: 199705513 --- .../contrib/lite/examples/label_image/BUILD | 31 +++++++++---------- .../examples/label_image/bitmap_helpers.cc | 28 ++++++++--------- .../examples/label_image/bitmap_helpers.h | 4 +-- .../lite/examples/label_image/label_image.cc | 12 +++---- .../examples/label_image/label_image_test.cc | 16 +++++----- 5 files changed, 45 insertions(+), 46 deletions(-) diff --git a/tensorflow/contrib/lite/examples/label_image/BUILD b/tensorflow/contrib/lite/examples/label_image/BUILD index 9322e186a2..c61445114e 100644 --- a/tensorflow/contrib/lite/examples/label_image/BUILD +++ b/tensorflow/contrib/lite/examples/label_image/BUILD @@ -53,19 +53,18 @@ cc_library( ], ) -# TODO(ahentz): Test disabled as it has a memory leek from read_bmp -# cc_test( -# name = "label_image_test", -# srcs = [ -# "get_top_n.h", -# "get_top_n_impl.h", -# "label_image_test.cc", -# ], -# data = [ -# "testdata/grace_hopper.bmp", -# ], -# deps = [ -# ":bitmap_helpers", -# "//testing/base/public:gunit", -# ], -# ) +cc_test( + name = "label_image_test", + srcs = [ + "get_top_n.h", + "get_top_n_impl.h", + "label_image_test.cc", + ], + data = [ + "testdata/grace_hopper.bmp", + ], + deps = [ + ":bitmap_helpers", + "@com_google_googletest//:gtest", + ], +) diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.cc b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.cc index 0b38cd38c8..2735d1f5ea 100644 --- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.cc +++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.cc @@ -28,8 +28,9 @@ limitations under the License. namespace tflite { namespace label_image { -uint8_t* decode_bmp(const uint8_t* input, int row_size, uint8_t* const output, - int width, int height, int channels, bool top_down) { +std::vector decode_bmp(const uint8_t* input, int row_size, int width, + int height, int channels, bool top_down) { + std::vector output(height * width * channels); for (int i = 0; i < height; i++) { int src_pos; int dst_pos; @@ -66,12 +67,11 @@ uint8_t* decode_bmp(const uint8_t* input, int row_size, uint8_t* const output, } } } - return output; } -uint8_t* read_bmp(const std::string& input_bmp_name, int* width, int* height, - int* channels, Settings* s) { +std::vector read_bmp(const std::string& input_bmp_name, int* width, + int* height, int* channels, Settings* s) { int begin, end; std::ifstream file(input_bmp_name, std::ios::in | std::ios::binary); @@ -87,14 +87,15 @@ uint8_t* read_bmp(const std::string& input_bmp_name, int* width, int* height, if (s->verbose) LOG(INFO) << "len: " << len << "\n"; - const uint8_t* img_bytes = new uint8_t[len]; + std::vector img_bytes(len); file.seekg(0, std::ios::beg); - file.read((char*)img_bytes, len); + file.read(reinterpret_cast(img_bytes.data()), len); const int32_t header_size = - *(reinterpret_cast(img_bytes + 10)); - *width = *(reinterpret_cast(img_bytes + 18)); - *height = *(reinterpret_cast(img_bytes + 22)); - const int32_t bpp = *(reinterpret_cast(img_bytes + 28)); + *(reinterpret_cast(img_bytes.data() + 10)); + *width = *(reinterpret_cast(img_bytes.data() + 18)); + *height = *(reinterpret_cast(img_bytes.data() + 22)); + const int32_t bpp = + *(reinterpret_cast(img_bytes.data() + 28)); *channels = bpp / 8; if (s->verbose) @@ -110,10 +111,9 @@ uint8_t* read_bmp(const std::string& input_bmp_name, int* width, int* height, bool top_down = (*height < 0); // Decode image, allocating tensor once the image size is known - uint8_t* output = new uint8_t[abs(*height) * *width * *channels]; const uint8_t* bmp_pixels = &img_bytes[header_size]; - return decode_bmp(bmp_pixels, row_size, output, *width, abs(*height), - *channels, top_down); + return decode_bmp(bmp_pixels, row_size, *width, abs(*height), *channels, + top_down); } } // namespace label_image diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h index 97343dde6b..5fc75b1f72 100644 --- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h +++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers.h @@ -22,8 +22,8 @@ limitations under the License. namespace tflite { namespace label_image { -uint8_t* read_bmp(const std::string& input_bmp_name, int* width, int* height, - int* channels, Settings* s); +std::vector read_bmp(const std::string& input_bmp_name, int* width, + int* height, int* channels, Settings* s); template void resize(T* out, uint8_t* in, int image_height, int image_width, diff --git a/tensorflow/contrib/lite/examples/label_image/label_image.cc b/tensorflow/contrib/lite/examples/label_image/label_image.cc index 966fcd2a31..86d7d1cc4a 100644 --- a/tensorflow/contrib/lite/examples/label_image/label_image.cc +++ b/tensorflow/contrib/lite/examples/label_image/label_image.cc @@ -138,8 +138,8 @@ void RunInference(Settings* s) { int image_width = 224; int image_height = 224; int image_channels = 3; - uint8_t* in = read_bmp(s->input_bmp_name, &image_width, &image_height, - &image_channels, s); + std::vector in = read_bmp(s->input_bmp_name, &image_width, + &image_height, &image_channels, s); int input = interpreter->inputs()[0]; if (s->verbose) LOG(INFO) << "input: " << input << "\n"; @@ -168,12 +168,12 @@ void RunInference(Settings* s) { switch (interpreter->tensor(input)->type) { case kTfLiteFloat32: s->input_floating = true; - resize(interpreter->typed_tensor(input), in, image_height, - image_width, image_channels, wanted_height, wanted_width, - wanted_channels, s); + resize(interpreter->typed_tensor(input), in.data(), + image_height, image_width, image_channels, wanted_height, + wanted_width, wanted_channels, s); break; case kTfLiteUInt8: - resize(interpreter->typed_tensor(input), in, + resize(interpreter->typed_tensor(input), in.data(), image_height, image_width, image_channels, wanted_height, wanted_width, wanted_channels, s); break; diff --git a/tensorflow/contrib/lite/examples/label_image/label_image_test.cc b/tensorflow/contrib/lite/examples/label_image/label_image_test.cc index ce35483f76..de7de21f77 100644 --- a/tensorflow/contrib/lite/examples/label_image/label_image_test.cc +++ b/tensorflow/contrib/lite/examples/label_image/label_image_test.cc @@ -27,20 +27,20 @@ namespace label_image { TEST(LabelImageTest, GraceHopper) { std::string lena_file = - "tensorflow/contrib/lite/examples/label_image/testdata/grace_hopper.bmp"; + "tensorflow/contrib/lite/examples/label_image/testdata/" + "grace_hopper.bmp"; int height, width, channels; Settings s; - uint8_t *data; - - data = read_bmp(lena_file, &width, &height, &channels, &s); + std::vector input = + read_bmp(lena_file, &width, &height, &channels, &s); ASSERT_EQ(height, 606); ASSERT_EQ(width, 517); ASSERT_EQ(channels, 3); - uint8_t *out = new uint8_t[606 * 517 * 3]; - downsize(out, data, 606, 517, 3, 214, 214, 3, &s); - ASSERT_EQ(out[0], 0x15); - ASSERT_EQ(out[214 * 214 * 3 - 1], 0x12); + std::vector output(606 * 517 * 3); + resize(output.data(), input.data(), 606, 517, 3, 214, 214, 3, &s); + ASSERT_EQ(output[0], 0x15); + ASSERT_EQ(output[214 * 214 * 3 - 1], 0x11); } TEST(LabelImageTest, GetTopN) { -- GitLab From 9f640dc874dba2e10b634cb7e87837f040fa83dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 15:21:40 -0700 Subject: [PATCH 026/365] [TF:XLA] Fix invalid HLO graph in hlo_rematerialization_test. The shape of the while-init did not match the body computation parameter's shape. Also, invoke the HLO verifier in the test to verify shapes. PiperOrigin-RevId: 199705580 --- tensorflow/compiler/xla/service/BUILD | 1 + .../xla/service/hlo_rematerialization_test.cc | 122 +++++++++--------- 2 files changed, 62 insertions(+), 61 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 89de302f4d..29718e057b 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2139,6 +2139,7 @@ tf_cc_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", ], ) diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc index 83de54f3fa..e81334d5a8 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" namespace xla { namespace { @@ -40,7 +41,8 @@ class HloRematerializationTest : public HloTestBase { // Creates and returns a computation which can benefit from // rematerialization. The computation looks like: // - // F32[] %param = {...} + // F32[1] %param = {...} + // F32[] %reshape = reshape(F32[], param) // F32[1024] %bcast = broadcast(%param) // F32[1024] %negate = negate(%bcast) // F32[2048] %concat_1 = concat({%negate, %negate}) @@ -57,9 +59,11 @@ class HloRematerializationTest : public HloTestBase { const string& suffix = "") { auto builder = HloComputation::Builder(TestName() + suffix); auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); + HloInstruction::CreateParameter(0, vec1_shape_, "param")); + auto reshape = builder.AddInstruction( + HloInstruction::CreateReshape(scalar_shape_, param)); auto bcast = builder.AddInstruction( - HloInstruction::CreateBroadcast(vec1024_shape_, param, {})); + HloInstruction::CreateBroadcast(vec1024_shape_, reshape, {})); auto negate = builder.AddInstruction( HloInstruction::CreateUnary(vec1024_shape_, HloOpcode::kNegate, bcast)); auto concat_1 = builder.AddInstruction(HloInstruction::CreateConcatenate( @@ -100,9 +104,11 @@ class HloRematerializationTest : public HloTestBase { const string& suffix = "") { auto builder = HloComputation::Builder(TestName() + suffix); auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); + HloInstruction::CreateParameter(0, vec1_shape_, "param")); + auto reshape = builder.AddInstruction( + HloInstruction::CreateReshape(scalar_shape_, param)); auto bcast = builder.AddInstruction( - HloInstruction::CreateBroadcast(vec1024_shape_, param, {})); + HloInstruction::CreateBroadcast(vec1024_shape_, reshape, {})); auto slice_1 = builder.AddInstruction( HloInstruction::CreateSlice(vec1_shape_, bcast, /*start_indices=*/{0}, /*limit_indices=*/{1}, @@ -135,6 +141,15 @@ class HloRematerializationTest : public HloTestBase { return ShapeUtil::ByteSizeOf(shape, sizeof(void*)); } + StatusOr RunHloRematerialization( + int64 memory_limit_bytes, HloModule* module, + SequentialHloOrdering::HloModuleSequence* sequence) { + TF_EXPECT_OK(verifier().Run(module).status()); + return HloRematerialization::RematerializeAndSchedule( + ByteSizeOf, memory_limit_bytes, module, DefaultMemoryScheduler, + sequence); + } + // Various shapes used in the canned computations. const Shape scalar_shape_ = ShapeUtil::MakeShape(xla::F32, {}); const Shape vec1_shape_ = ShapeUtil::MakeShape(xla::F32, {1}); @@ -158,11 +173,9 @@ TEST_F(HloRematerializationTest, SingleComputation) { SequentialHloOrdering::HloModuleSequence sequence; // Computation requires 16KB without rematerialization, but uses only 12KB // with rematerialization so pick a memory limit between these values (14KB). - TF_ASSERT_OK_AND_ASSIGN(bool changed, - HloRematerialization::RematerializeAndSchedule( - ByteSizeOf, - /*memory_limit_bytes=*/14 * 1024, module.get(), - DefaultMemoryScheduler, &sequence)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, RunHloRematerialization( + /*memory_limit_bytes=*/14 * 1024, + module.get(), &sequence)); EXPECT_TRUE(changed); // Root should not have changed. @@ -188,18 +201,16 @@ TEST_F(HloRematerializationTest, SingleComputationNoRematerialization) { HloComputation* computation = module->AddEntryComputation(MakeRematerializableComputation()); - EXPECT_EQ(computation->instruction_count(), 7); + EXPECT_EQ(computation->instruction_count(), 8); SequentialHloOrdering::HloModuleSequence sequence; - TF_ASSERT_OK_AND_ASSIGN(bool changed, - HloRematerialization::RematerializeAndSchedule( - ByteSizeOf, - /*memory_limit_bytes=*/20 * 1024, module.get(), - DefaultMemoryScheduler, &sequence)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, RunHloRematerialization( + /*memory_limit_bytes=*/20 * 1024, + module.get(), &sequence)); // No instructions should have been materialized. EXPECT_FALSE(changed); - EXPECT_EQ(computation->instruction_count(), 7); + EXPECT_EQ(computation->instruction_count(), 8); } // Test rematerialization of a computation which calls another computation via a @@ -225,23 +236,21 @@ TEST_F(HloRematerializationTest, RematerializeAroundWhile) { module->AddEntryComputation(MakeRematerializableWhileComputation( while_cond, /*while_body=*/body_computation)); - EXPECT_EQ(entry_computation->instruction_count(), 6); - EXPECT_EQ(body_computation->instruction_count(), 7); + EXPECT_EQ(entry_computation->instruction_count(), 7); + EXPECT_EQ(body_computation->instruction_count(), 8); // The body computation uses 16KB and the entry computation uses 2KB at the // while so the peak memory use of the module is 18KB. Set the memory limit a // bit lower (17KB) to force rematerialization of the entry computation. SequentialHloOrdering::HloModuleSequence sequence; - TF_ASSERT_OK_AND_ASSIGN(bool changed, - HloRematerialization::RematerializeAndSchedule( - ByteSizeOf, - /*memory_limit_bytes=*/17 * 1024, module.get(), - DefaultMemoryScheduler, &sequence)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, RunHloRematerialization( + /*memory_limit_bytes=*/17 * 1024, + module.get(), &sequence)); EXPECT_TRUE(changed); // Only the entry computation should have a rematerialized instruction added. - EXPECT_EQ(entry_computation->instruction_count(), 7); - EXPECT_EQ(body_computation->instruction_count(), 7); + EXPECT_EQ(entry_computation->instruction_count(), 8); + EXPECT_EQ(body_computation->instruction_count(), 8); } // Test rematerialization of a computation which calls another computation via a @@ -264,20 +273,18 @@ TEST_F(HloRematerializationTest, RematerializeEntryAndWhileBody) { module->AddEntryComputation(MakeRematerializableWhileComputation( while_cond, /*while_body=*/body_computation)); - EXPECT_EQ(entry_computation->instruction_count(), 6); - EXPECT_EQ(body_computation->instruction_count(), 7); + EXPECT_EQ(entry_computation->instruction_count(), 7); + EXPECT_EQ(body_computation->instruction_count(), 8); SequentialHloOrdering::HloModuleSequence sequence; - TF_ASSERT_OK_AND_ASSIGN(bool changed, - HloRematerialization::RematerializeAndSchedule( - ByteSizeOf, - /*memory_limit_bytes=*/15 * 1024, module.get(), - DefaultMemoryScheduler, &sequence)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, RunHloRematerialization( + /*memory_limit_bytes=*/15 * 1024, + module.get(), &sequence)); EXPECT_TRUE(changed); - // Both computations should have a rematerialized instruction added. - EXPECT_EQ(entry_computation->instruction_count(), 7); - EXPECT_EQ(body_computation->instruction_count(), 8); + // Both computations should have rematerialized instructions added. + EXPECT_EQ(entry_computation->instruction_count(), 9); + EXPECT_EQ(body_computation->instruction_count(), 9); } // Test rematerialization of a doubly nested computation. All computations @@ -303,24 +310,22 @@ TEST_F(HloRematerializationTest, RematerializeNestedComputations) { module->AddEntryComputation(MakeRematerializableWhileComputation( while_cond, /*while_body=*/middle_computation)); - EXPECT_EQ(entry_computation->instruction_count(), 6); - EXPECT_EQ(middle_computation->instruction_count(), 6); - EXPECT_EQ(inner_computation->instruction_count(), 7); + EXPECT_EQ(entry_computation->instruction_count(), 7); + EXPECT_EQ(middle_computation->instruction_count(), 7); + EXPECT_EQ(inner_computation->instruction_count(), 8); // If all computations are maximally rematerialized then peak memory usage is // ~12K so pick something slightly larger. SequentialHloOrdering::HloModuleSequence sequence; - TF_ASSERT_OK_AND_ASSIGN(bool changed, - HloRematerialization::RematerializeAndSchedule( - ByteSizeOf, - /*memory_limit_bytes=*/13 * 1024, module.get(), - DefaultMemoryScheduler, &sequence)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, RunHloRematerialization( + /*memory_limit_bytes=*/13 * 1024, + module.get(), &sequence)); EXPECT_TRUE(changed); - // All computations should have a rematerialized instruction added. - EXPECT_EQ(entry_computation->instruction_count(), 7); - EXPECT_EQ(middle_computation->instruction_count(), 7); - EXPECT_EQ(inner_computation->instruction_count(), 8); + // All computations should have rematerialized instructions added. + EXPECT_EQ(entry_computation->instruction_count(), 9); + EXPECT_EQ(middle_computation->instruction_count(), 9); + EXPECT_EQ(inner_computation->instruction_count(), 9); } TEST_F(HloRematerializationTest, RngNotRematerialized) { @@ -382,10 +387,9 @@ TEST_F(HloRematerializationTest, RngNotRematerialized) { // parameter and output) and 20KB (peak memory possible with // rematerialization). TF_ASSERT_OK_AND_ASSIGN( - bool changed, HloRematerialization::RematerializeAndSchedule( - ByteSizeOf, + bool changed, RunHloRematerialization( /*memory_limit_bytes=*/4 * ByteSizeOf(vec1024_shape_), - module.get(), DefaultMemoryScheduler, &sequence)); + module.get(), &sequence)); EXPECT_TRUE(changed); // The rng should not have been rematerialized. EXPECT_EQ(count_rngs(entry_computation), 1); @@ -476,11 +480,9 @@ TEST_F(HloRematerializationTest, InstructionRematerializedMultipleTimes) { // Pick a memory limit some where between 24KB (initial peak memory including // parameter and output) and 20KB (peak memory possible with // rematerialization). - TF_ASSERT_OK_AND_ASSIGN(bool changed, - HloRematerialization::RematerializeAndSchedule( - ByteSizeOf, - /*memory_limit_bytes=*/22 * 1024, module.get(), - DefaultMemoryScheduler, &sequence)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, RunHloRematerialization( + /*memory_limit_bytes=*/22 * 1024, + module.get(), &sequence)); EXPECT_TRUE(changed); // The broadcast should have been rematerialized 3 times. @@ -573,11 +575,9 @@ TEST_P(IndirectUseTest, IndirectUseNotRematerialized) { // Pick a memory limit some where between 24KB (initial peak memory including // parameter and output) and 20KB (peak memory possible with // rematerialization). - TF_ASSERT_OK_AND_ASSIGN(bool changed, - HloRematerialization::RematerializeAndSchedule( - ByteSizeOf, - /*memory_limit_bytes=*/22 * 1024, module.get(), - DefaultMemoryScheduler, &sequence)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, RunHloRematerialization( + /*memory_limit_bytes=*/22 * 1024, + module.get(), &sequence)); // Rematerialization should only occur if the rematerializable instruction has // no indirect uses. if (indirectly_used) { -- GitLab From e73c66f8152690b9f2466bfcca887283ed380980 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 15:28:16 -0700 Subject: [PATCH 027/365] Add ScaleTriL Bijector to enable transformed distributions over PSD matrices. PiperOrigin-RevId: 199706732 --- tensorflow/contrib/distributions/BUILD | 19 +++ .../kernel_tests/bijectors/scale_tril_test.py | 69 +++++++++++ .../python/ops/bijectors/__init__.py | 2 + .../python/ops/bijectors/scale_tril.py | 114 ++++++++++++++++++ 4 files changed, 204 insertions(+) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/bijectors/scale_tril_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/scale_tril.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 61d4e90ea2..51f7028566 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -1137,6 +1137,25 @@ cuda_py_test( ], ) +cuda_py_test( + name = "scale_tril_test", + size = "small", + srcs = ["python/kernel_tests/bijectors/scale_tril_test.py"], + additional_deps = [ + ":bijectors_py", + ":distributions_py", + "//third_party/py/numpy", + "@six_archive//:six", + "//tensorflow/contrib/linalg:linalg_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "sigmoid_test", size = "small", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/scale_tril_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/scale_tril_test.py new file mode 100644 index 0000000000..566a7b3dff --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/scale_tril_test.py @@ -0,0 +1,69 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for ScaleTriL bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops import bijectors +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test + + +class ScaleTriLBijectorTest(test.TestCase): + """Tests the correctness of the ScaleTriL bijector.""" + + def setUp(self): + self._rng = np.random.RandomState(42) + + def testComputesCorrectValues(self): + shift = 1.61803398875 + x = np.float32(np.array([-1, .5, 2])) + y = np.float32(np.array([[np.exp(2) + shift, 0.], + [.5, np.exp(-1) + shift]])) + + b = bijectors.ScaleTriL(diag_bijector=bijectors.Exp(), + diag_shift=shift) + + y_ = self.evaluate(b.forward(x)) + self.assertAllClose(y, y_) + + x_ = self.evaluate(b.inverse(y)) + self.assertAllClose(x, x_) + + @test_util.run_in_graph_and_eager_modes() + def testInvertible(self): + + # Generate random inputs from an unconstrained space, with + # event size 6 to specify 3x3 triangular matrices. + batch_shape = [2, 1] + x = np.float32(np.random.randn(*(batch_shape + [6]))) + b = bijectors.ScaleTriL(diag_bijector=bijectors.Softplus(), + diag_shift=3.14159) + y = self.evaluate(b.forward(x)) + self.assertAllEqual(y.shape, batch_shape + [3, 3]) + + x_ = self.evaluate(b.inverse(y)) + self.assertAllClose(x, x_) + + fldj = self.evaluate(b.forward_log_det_jacobian(x, event_ndims=1)) + ildj = self.evaluate(b.inverse_log_det_jacobian(y, event_ndims=2)) + self.assertAllClose(fldj, -ildj) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py index d97a1f0d30..e141f8b5c6 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py @@ -37,6 +37,7 @@ @@PowerTransform @@RealNVP @@Reshape +@@ScaleTriL @@Sigmoid @@SinhArcsinh @@SoftmaxCentered @@ -78,6 +79,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.permute import * from tensorflow.contrib.distributions.python.ops.bijectors.power_transform import * from tensorflow.contrib.distributions.python.ops.bijectors.real_nvp import * from tensorflow.contrib.distributions.python.ops.bijectors.reshape import * +from tensorflow.contrib.distributions.python.ops.bijectors.scale_tril import * from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid import * from tensorflow.contrib.distributions.python.ops.bijectors.sinh_arcsinh import * from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import * diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/scale_tril.py b/tensorflow/contrib/distributions/python/ops/bijectors/scale_tril.py new file mode 100644 index 0000000000..96bd242c63 --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/bijectors/scale_tril.py @@ -0,0 +1,114 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""ScaleTriL bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distributions.python.ops.bijectors import affine_scalar +from tensorflow.contrib.distributions.python.ops.bijectors import chain +from tensorflow.contrib.distributions.python.ops.bijectors import fill_triangular +from tensorflow.contrib.distributions.python.ops.bijectors import softplus +from tensorflow.contrib.distributions.python.ops.bijectors import transform_diagonal + +__all__ = [ + "ScaleTriL", +] + + +class ScaleTriL(chain.Chain): + """Transforms unconstrained vectors to TriL matrices with positive diagonal. + + This is implemented as a simple `tfb.Chain` of `tfb.FillTriangular` + followed by `tfb.TransformDiagonal`, and provided mostly as a + convenience. The default setup is somewhat opinionated, using a + Softplus transformation followed by a small shift (`1e-5`) which + attempts to avoid numerical issues from zeros on the diagonal. + + #### Examples + + ```python + tfb = tf.contrib.distributions.bijectors + b = tfb.ScaleTriL( + diag_bijector=tfb.Exp(), + diag_shift=None) + b.forward(x=[0., 0., 0.]) + # Result: [[1., 0.], + # [0., 1.]] + b.inverse(y=[[1., 0], + [.5, 2]]) + # Result: [log(2), .5, log(1)] + + # Define a distribution over PSD matrices of shape `[3, 3]`, + # with `1 + 2 + 3 = 6` degrees of freedom. + dist = tfd.TransformedDistribution( + tfd.Normal(tf.zeros(6), tf.ones(6)), + tfb.Chain([tfb.CholeskyOuterProduct(), tfb.ScaleTriL()])) + + # Using an identity transformation, ScaleTriL is equivalent to + # tfb.FillTriangular. + b = tfb.ScaleTriL( + diag_bijector=tfb.Identity(), + diag_shift=None) + + # For greater control over initialization, one can manually encode + # pre- and post- shifts inside of `diag_bijector`. + b = tfb.ScaleTriL( + diag_bijector=tfb.Chain([ + tfb.AffineScalar(shift=1e-3), + tfb.Softplus(), + tfb.AffineScalar(shift=0.5413)]), # softplus_inverse(1.) + # = log(expm1(1.)) = 0.5413 + diag_shift=None) + ``` + """ + + def __init__(self, + diag_bijector=None, + diag_shift=1e-5, + validate_args=False, + name="scale_tril"): + """Instantiates the `ScaleTriL` bijector. + + Args: + diag_bijector: `Bijector` instance, used to transform the output diagonal + to be positive. + Default value: `None` (i.e., `tfb.Softplus()`). + diag_shift: Float value broadcastable and added to all diagonal entries + after applying the `diag_bijector`. Setting a positive + value forces the output diagonal entries to be positive, but + prevents inverting the transformation for matrices with + diagonal entries less than this value. + Default value: `1e-5` (i.e., no shift is applied). + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + Default value: `False` (i.e., arguments are not validated). + name: Python `str` name given to ops managed by this object. + Default value: `scale_tril`. + """ + + if diag_bijector is None: + diag_bijector = softplus.Softplus(validate_args=validate_args) + + if diag_shift is not None: + diag_bijector = chain.Chain([affine_scalar.AffineScalar(shift=diag_shift), + diag_bijector]) + + super(ScaleTriL, self).__init__( + [transform_diagonal.TransformDiagonal(diag_bijector=diag_bijector), + fill_triangular.FillTriangular()], + validate_args=validate_args, + name=name) -- GitLab From 5ad9d9cb933864e5eb938c31551d5ba861ced0f2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 16:02:37 -0700 Subject: [PATCH 028/365] Split out HloFftInstruction and HloSendRecvInstruction as subclasses from HloInstruction. PiperOrigin-RevId: 199712253 --- .../compiler/xla/service/hlo_instruction.cc | 154 ++++++++---------- .../compiler/xla/service/hlo_instruction.h | 62 +++---- .../compiler/xla/service/hlo_instructions.cc | 150 ++++++++++++++++- .../compiler/xla/service/hlo_instructions.h | 145 ++++++++++++++--- 4 files changed, 358 insertions(+), 153 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 8d7604fae1..cf1530abe1 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -86,6 +86,31 @@ StatusOr> HloInstruction::CreateFromProto( operands(2), operands(3), operands(4), proto.epsilon(), proto.feature_index()); break; + case HloOpcode::kFft: { + CHECK_EQ(proto.operand_ids_size(), 1); + std::vector fft_length(proto.fft_length().begin(), + proto.fft_length().end()); + instruction = CreateFft(proto.shape(), operands(0), proto.fft_type(), + tensorflow::gtl::ArraySlice(fft_length)); + break; + } + case HloOpcode::kSend: + CHECK_EQ(proto.operand_ids_size(), 1); + instruction = CreateSend(operands(0), proto.channel_id()); + break; + case HloOpcode::kSendDone: + CHECK_EQ(proto.operand_ids_size(), 1); + instruction = CreateSendDone(operands(0)); + break; + case HloOpcode::kRecv: + CHECK_EQ(proto.operand_ids_size(), 0); + instruction = + CreateRecv(proto.shape().tuple_shapes(0), proto.channel_id()); + break; + case HloOpcode::kRecvDone: + CHECK_EQ(proto.operand_ids_size(), 1); + instruction = CreateRecvDone(operands(0)); + break; default: { instruction = WrapUnique(new HloInstruction(opcode, proto.shape())); for (const int64 operand_id : proto.operand_ids()) { @@ -181,14 +206,9 @@ StatusOr> HloInstruction::CreateFromProto( } instruction->outfeed_config_ = proto.outfeed_config(); instruction->distribution_ = proto.distribution(); - instruction->channel_id_ = proto.channel_id(); instruction->infeed_config_ = proto.infeed_config(); instruction->custom_call_target_ = proto.custom_call_target(); instruction->outfeed_shape_ = proto.outfeed_shape(); - instruction->fft_type_ = proto.fft_type(); - for (int64 fft_len : proto.fft_length()) { - instruction->fft_length_.push_back(fft_len); - } if (proto.has_sharding()) { TF_ASSIGN_OR_RETURN(const auto& sharding, @@ -404,11 +424,7 @@ HloInstruction::CreateGetTupleElement(const Shape& shape, /* static */ std::unique_ptr HloInstruction::CreateFft( const Shape& shape, HloInstruction* operand, FftType fft_type, tensorflow::gtl::ArraySlice fft_length) { - auto instruction = WrapUnique(new HloInstruction(HloOpcode::kFft, shape)); - instruction->AppendOperand(operand); - instruction->fft_type_ = fft_type; - instruction->fft_length_.assign(fft_length.begin(), fft_length.end()); - return instruction; + return MakeUnique(shape, operand, fft_type, fft_length); } /* static */ std::unique_ptr HloInstruction::CreateDot( @@ -490,48 +506,28 @@ HloInstruction::CreateCrossReplicaSum( /* static */ std::unique_ptr HloInstruction::CreateSend( HloInstruction* operand, int64 channel_id) { - // Send instruction produces a tuple of {aliased operand, U32 context}. - Shape output_shape = ShapeUtil::MakeTupleShape( - {operand->shape(), ShapeUtil::MakeShape(U32, {})}); - auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kSend, output_shape)); - instruction->AppendOperand(operand); - instruction->channel_id_ = channel_id; - return instruction; + return MakeUnique(operand, channel_id); } /* static */ std::unique_ptr HloInstruction::CreateSendDone( HloInstruction* operand) { - CHECK(operand->opcode() == HloOpcode::kSend) + auto send_operand = DynCast(operand); + CHECK(send_operand != nullptr) << "SendDone must take the context operand from Send"; - auto instruction = WrapUnique( - new HloInstruction(HloOpcode::kSendDone, ShapeUtil::MakeNil())); - instruction->AppendOperand(operand); - instruction->channel_id_ = operand->channel_id(); - return instruction; + return MakeUnique(send_operand); } /* static */ std::unique_ptr HloInstruction::CreateRecv( const Shape& shape, int64 channel_id) { - // Recv instruction produces a tuple of {receive buffer, U32 context}. - Shape output_shape = - ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U32, {})}); - auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kRecv, output_shape)); - instruction->channel_id_ = channel_id; - return instruction; + return MakeUnique(shape, channel_id); } /* static */ std::unique_ptr HloInstruction::CreateRecvDone( HloInstruction* operand) { - CHECK(operand->opcode() == HloOpcode::kRecv) + auto recv_operand = DynCast(operand); + CHECK(recv_operand != nullptr) << "RecvDone must take the context operand from Recv"; - Shape output_shape = ShapeUtil::GetTupleElementShape(operand->shape(), 0); - auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kRecvDone, output_shape)); - instruction->AppendOperand(operand); - instruction->channel_id_ = operand->channel_id(); - return instruction; + return MakeUnique(recv_operand); } /* static */ std::unique_ptr HloInstruction::CreateReverse( @@ -674,8 +670,8 @@ HloInstruction::CreateBatchNormTraining(const Shape& shape, HloInstruction* scale, HloInstruction* offset, float epsilon, int64 feature_index) { - return WrapUnique(new HloBatchNormTrainingInstruction( - shape, operand, scale, offset, epsilon, feature_index)); + return MakeUnique( + shape, operand, scale, offset, epsilon, feature_index); } /* static */ std::unique_ptr @@ -683,8 +679,8 @@ HloInstruction::CreateBatchNormInference( const Shape& shape, HloInstruction* operand, HloInstruction* scale, HloInstruction* offset, HloInstruction* mean, HloInstruction* variance, float epsilon, int64 feature_index) { - return WrapUnique(new HloBatchNormInferenceInstruction( - shape, operand, scale, offset, mean, variance, epsilon, feature_index)); + return MakeUnique( + shape, operand, scale, offset, mean, variance, epsilon, feature_index); } /* static */ std::unique_ptr @@ -693,9 +689,9 @@ HloInstruction::CreateBatchNormGrad(const Shape& shape, HloInstruction* operand, HloInstruction* variance, HloInstruction* grad_output, float epsilon, int64 feature_index) { - return WrapUnique( - new HloBatchNormGradInstruction(shape, operand, scale, mean, variance, - grad_output, epsilon, feature_index)); + return MakeUnique(shape, operand, scale, mean, + variance, grad_output, epsilon, + feature_index); } /* static */ std::unique_ptr @@ -1287,6 +1283,11 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( case HloOpcode::kBatchNormTraining: case HloOpcode::kBatchNormInference: case HloOpcode::kBatchNormGrad: + case HloOpcode::kFft: + case HloOpcode::kSend: + case HloOpcode::kSendDone: + case HloOpcode::kRecv: + case HloOpcode::kRecvDone: clone = CloneWithNewOperandsImpl(shape, new_operands, context); break; // Unary ops. @@ -1395,10 +1396,6 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( clone = CreateDot(shape, new_operands[0], new_operands[1], *dot_dimension_numbers_); break; - case HloOpcode::kFft: - CHECK_EQ(new_operands.size(), 1); - clone = CreateFft(shape, new_operands[0], fft_type_, fft_length_); - break; case HloOpcode::kCrossReplicaSum: clone = CreateCrossReplicaSum(shape, new_operands, to_apply()); break; @@ -1504,24 +1501,6 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( true_computation(), new_operands[2], false_computation()); break; - case HloOpcode::kSend: - CHECK_EQ(new_operands.size(), 1); - clone = CreateSend(new_operands[0], channel_id()); - break; - case HloOpcode::kSendDone: - CHECK_EQ(new_operands.size(), 1); - clone = CreateSendDone(new_operands[0]); - break; - case HloOpcode::kRecv: - CHECK_EQ(new_operands.size(), 0); - // The shape is a tuple, but CreateRecv() wants the raw data shape. - clone = - CreateRecv(ShapeUtil::GetTupleElementShape(shape, 0), channel_id()); - break; - case HloOpcode::kRecvDone: - CHECK_EQ(new_operands.size(), 1); - clone = CreateRecvDone(new_operands[0]); - break; case HloOpcode::kGather: CHECK_EQ(new_operands.size(), 2); clone = CreateGather(shape, new_operands[0], new_operands[1], @@ -1855,11 +1834,6 @@ bool HloInstruction::IdenticalSlowPath( other.gather_dimension_numbers()) && gather_window_bounds() == other.gather_window_bounds(); - // FFT has various types & lengths. - case HloOpcode::kFft: - return fft_type() == other.fft_type() && - fft_length() == other.fft_length(); - // Reduction results are determined by the reduction dimension and the // reduction computation. case HloOpcode::kReduce: @@ -1915,10 +1889,6 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kSort: - case HloOpcode::kRecv: - case HloOpcode::kRecvDone: - case HloOpcode::kSend: - case HloOpcode::kSendDone: case HloOpcode::kHostCompute: return false; @@ -1927,6 +1897,11 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kBatchNormTraining: case HloOpcode::kBatchNormInference: case HloOpcode::kBatchNormGrad: + case HloOpcode::kFft: + case HloOpcode::kSend: + case HloOpcode::kSendDone: + case HloOpcode::kRecv: + case HloOpcode::kRecvDone: LOG(FATAL) << "Base class impl called for opcode with subclass: " << opcode(); } @@ -2292,7 +2267,8 @@ string HloInstruction::OperandsToStringWithCanonicalNameMap( std::vector HloInstruction::ExtraAttributesToString( const HloPrintOptions& options) const { - std::vector extra; + std::vector extra = ExtraAttributesToStringImpl(options); + if (opcode() == HloOpcode::kFusion) { extra.push_back(StrCat("kind=", xla::ToString(fusion_kind()))); } @@ -2337,10 +2313,6 @@ std::vector HloInstruction::ExtraAttributesToString( extra.push_back( StrCat("window_bounds={", Join(gather_window_bounds(), ","), "}")); } - if (opcode() == HloOpcode::kFft) { - extra.push_back(StrCat("fft_type=", FftType_Name(fft_type()))); - extra.push_back(StrCat("fft_length={", Join(fft_length(), ","), "}")); - } if (options.print_subcomputation_mode() == HloPrintOptions::PrintSubcomputationMode::kNameOnly) { @@ -2411,10 +2383,6 @@ std::vector HloInstruction::ExtraAttributesToString( break; } } - if (opcode() == HloOpcode::kSend || opcode() == HloOpcode::kRecv || - opcode() == HloOpcode::kSendDone || opcode() == HloOpcode::kRecvDone) { - extra.push_back(StrCat("channel_id=", channel_id_)); - } if (opcode() == HloOpcode::kGetTupleElement) { extra.push_back(StrCat("index=", tuple_index())); @@ -2543,14 +2511,9 @@ HloInstructionProto HloInstruction::ToProto() const { if (opcode() == HloOpcode::kRng) { proto.set_distribution(distribution_); } - proto.set_channel_id(channel_id_); proto.set_infeed_config(infeed_config_); proto.set_custom_call_target(custom_call_target_); *proto.mutable_outfeed_shape() = outfeed_shape_; - proto.set_fft_type(fft_type_); - for (int64 fft_len : fft_length_) { - proto.add_fft_length(fft_len); - } if (has_sharding()) { *proto.mutable_sharding() = sharding().ToProto(); @@ -3617,4 +3580,15 @@ float HloInstruction::epsilon() const { return Cast(this)->epsilon(); } +FftType HloInstruction::fft_type() const { + return Cast(this)->fft_type(); +} + +const std::vector& HloInstruction::fft_length() const { + return Cast(this)->fft_length(); +} + +int64 HloInstruction::channel_id() const { + return Cast(this)->channel_id(); +} } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index b16837eaec..6232d55e1b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -992,7 +992,7 @@ class HloInstruction { string OperandsToString(const HloPrintOptions& options) const; // Returns string representation of op-specific attributes. - virtual std::vector ExtraAttributesToString( + std::vector ExtraAttributesToString( const HloPrintOptions& options) const; // As ToString, but returns a shorter string. @@ -1011,27 +1011,12 @@ class HloInstruction { HloInstruction* tracing() const; void set_tracing(HloInstruction* trace_instruction); - // Returns the channel id associated with the instruction. The id is - // shared between each Send/Recv pair and is globally unique to identify each - // channel. - // - // Precondition: opcode() == HloOpcode::kSend or HloOpcode::kRecv - int64 channel_id() const { return channel_id_; } - // Returns the channel name associated with the instruction. The name is // used to identify host Send/Recv operations. // // Precondition: opcode() == HloOpcode::kHostCompute string channel_name() const { return channel_name_; } - // Delegates to HloBatchNormInstruction::feature_index. - // TODO(b/80131774): Remove this code. - int64 feature_index() const; - - // Delegates to HloBatchNormInstruction::epsilon. - // TODO(b/80131774): Remove this code. - float epsilon() const; - // Returns the infeed configuration string. The infeed configuration includes // any metadata needed for the backend compiler (e.g., infeed buffer address) // and is target-dependent. @@ -1318,16 +1303,6 @@ class HloInstruction { MakeUnique(dnums); } - FftType fft_type() const { - CHECK_EQ(HloOpcode::kFft, opcode_); - return fft_type_; - } - - const std::vector& fft_length() const { - CHECK_EQ(HloOpcode::kFft, opcode_); - return fft_length_; - } - // Returns data on the dimension numbers used for a dot operation. const DotDimensionNumbers& dot_dimension_numbers() const { CHECK(dot_dimension_numbers_ != nullptr); @@ -1526,6 +1501,25 @@ class HloInstruction { void RelayoutConstant(const Layout& new_layout, const ShapeIndex& shape_index = {}); + // Old methods kept for smooth subclassing transition BEGIN. + // TODO(b/80131774): Remove this code. + + // Delegates to HloBatchNormInstruction::feature_index. + int64 feature_index() const; + + // Delegates to HloBatchNormInstruction::epsilon. + float epsilon() const; + + // Delegates to HloFftInstruction::fft_type. + FftType fft_type() const; + + // Delegates to HloFftInstruction::fft_length. + const std::vector& fft_length() const; + + // Delegates to HloSendRecvInstruction::channel_id. + int64 channel_id() const; + // Old methods kept for smooth subclassing transition END. + protected: // Internal constructor for a given opcode/shape, other fields must be filled // by factory methods. @@ -1544,6 +1538,12 @@ class HloInstruction { // TODO(b/80131774): This should be pure virtual. LOG(FATAL) << "Unimplemented method."; } + + // Implementation for non-common logic of ExtraAttributesToString. + virtual std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {}; + } // Prints an instruction to a string. // // The canonical string representation needs to name operands and instruction @@ -1675,12 +1675,6 @@ class HloInstruction { std::unique_ptr gather_dimension_numbers_; std::vector gather_window_bounds_; - // Describes FFT type for an FFT instruction. - FftType fft_type_ = FftType::FFT; - - // Indicates the FFT length for an FFT instruction. - std::vector fft_length_; - // Describes the [begin, end) index range for a slice. std::vector slice_starts_; std::vector slice_limits_; @@ -1755,10 +1749,6 @@ class HloInstruction { // Only present for kRng. RandomDistribution distribution_; - // Represents a unique identifier for each Send/Recv instruction pair. - // Only present for kSend or kRecv. - int64 channel_id_ = -1; - // The string representation of the infeed configuration. string infeed_config_; diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index adbebb135b..109bf1a9bd 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -15,8 +15,11 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instructions.h" +#include "tensorflow/compiler/xla/service/hlo_casting_utils.h" + namespace xla { +using ::tensorflow::str_util::Join; using ::tensorflow::strings::StrCat; HloBatchNormInstruction::HloBatchNormInstruction( @@ -38,13 +41,6 @@ bool HloBatchNormInstruction::IdenticalSlowPath( epsilon() == casted_other.epsilon(); } -std::vector HloBatchNormInstruction::ExtraAttributesToString( - const HloPrintOptions& options) const { - std::vector extra = {StrCat("epsilon=", epsilon()), - StrCat("feature_index=", feature_index())}; - return extra; -} - HloInstructionProto HloBatchNormInstruction::ToProto() const { HloInstructionProto proto = HloInstruction::ToProto(); proto.set_epsilon(epsilon_); @@ -52,6 +48,12 @@ HloInstructionProto HloBatchNormInstruction::ToProto() const { return proto; } +std::vector HloBatchNormInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {StrCat("epsilon=", epsilon()), + StrCat("feature_index=", feature_index())}; +} + HloBatchNormTrainingInstruction::HloBatchNormTrainingInstruction( const Shape& shape, HloInstruction* operand, HloInstruction* scale, HloInstruction* offset, float epsilon, int64 feature_index) @@ -115,4 +117,138 @@ HloBatchNormGradInstruction::CloneWithNewOperandsImpl( new_operands[4], epsilon(), feature_index()); } +HloFftInstruction::HloFftInstruction( + const Shape& shape, HloInstruction* operand, FftType fft_type, + tensorflow::gtl::ArraySlice fft_length) + : HloInstruction(HloOpcode::kFft, shape), fft_type_(fft_type) { + fft_length_.assign(fft_length.begin(), fft_length.end()); + AppendOperand(operand); +} + +HloInstructionProto HloFftInstruction::ToProto() const { + HloInstructionProto proto = HloInstruction::ToProto(); + proto.set_fft_type(fft_type_); + for (int64 fft_len : fft_length_) { + proto.add_fft_length(fft_len); + } + return proto; +} + +std::vector HloFftInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {StrCat("fft_type=", FftType_Name(fft_type())), + StrCat("fft_length={", Join(fft_length(), ","), "}")}; +} + +bool HloFftInstruction::IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const { + const auto& casted_other = static_cast(other); + return fft_type() == casted_other.fft_type() && + fft_length() == casted_other.fft_length(); +} + +std::unique_ptr HloFftInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + CHECK_EQ(new_operands.size(), 1); + return MakeUnique(shape, new_operands[0], fft_type_, + fft_length_); +} + +HloSendRecvInstruction::HloSendRecvInstruction(HloOpcode opcode, + const Shape& shape, + int64 channel_id) + : HloInstruction(opcode, shape), channel_id_(channel_id) {} + +HloInstructionProto HloSendRecvInstruction::ToProto() const { + HloInstructionProto proto = HloInstruction::ToProto(); + proto.set_channel_id(channel_id_); + return proto; +} + +std::vector HloSendRecvInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {StrCat("channel_id=", channel_id_)}; +} + +bool HloSendRecvInstruction::IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const { + // Not yet supported. + return false; +} + +// Send instruction produces a tuple of {aliased operand, U32 context}. +HloSendInstruction::HloSendInstruction(HloInstruction* operand, + int64 channel_id) + : HloSendRecvInstruction( + HloOpcode::kSend, + ShapeUtil::MakeTupleShape( + {CHECK_NOTNULL(operand)->shape(), ShapeUtil::MakeShape(U32, {})}), + channel_id) { + AppendOperand(operand); +} + +std::unique_ptr HloSendInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + CHECK_EQ(new_operands.size(), 1); + return MakeUnique(new_operands[0], channel_id()); +} + +HloSendDoneInstruction::HloSendDoneInstruction(HloSendInstruction* operand) + : HloSendRecvInstruction(HloOpcode::kSendDone, ShapeUtil::MakeNil(), + CHECK_NOTNULL(operand)->channel_id()) { + AppendOperand(operand); +} + +std::unique_ptr +HloSendDoneInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + CHECK_EQ(new_operands.size(), 1); + return MakeUnique( + Cast(new_operands[0])); +} + +// Recv instruction produces a tuple of {receive buffer, U32 context}. +HloRecvInstruction::HloRecvInstruction(const Shape& shape, int64 channel_id) + : HloSendRecvInstruction( + HloOpcode::kRecv, + ShapeUtil::MakeTupleShape({shape, ShapeUtil::MakeShape(U32, {})}), + channel_id) {} + +std::unique_ptr HloRecvInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + CHECK_EQ(new_operands.size(), 0); + return MakeUnique( + ShapeUtil::GetTupleElementShape(shape, 0), channel_id()); +} + +HloRecvDoneInstruction::HloRecvDoneInstruction(HloRecvInstruction* operand) + : HloSendRecvInstruction( + HloOpcode::kRecvDone, + ShapeUtil::GetTupleElementShape(operand->shape(), 0), + CHECK_NOTNULL(operand)->channel_id()) { + AppendOperand(operand); +} + +std::unique_ptr +HloRecvDoneInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + CHECK_EQ(new_operands.size(), 1); + return MakeUnique( + Cast(new_operands[0])); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 6fcd96a8c6..22d2fe6b27 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -32,19 +32,18 @@ class HloBatchNormInstruction : public HloInstruction { // number added to the variance to avoid divide-by-zero error. float epsilon() const { return epsilon_; } - // Returns string representation of op-specific attributes. - std::vector ExtraAttributesToString( - const HloPrintOptions& options) const override; - // Returns a serialized representation of this instruction. HloInstructionProto ToProto() const override; protected: - HloBatchNormInstruction(HloOpcode opcode, const Shape& shape, - HloInstruction* operand, HloInstruction* scale, - float epsilon, int64 feature_index); + explicit HloBatchNormInstruction(HloOpcode opcode, const Shape& shape, + HloInstruction* operand, + HloInstruction* scale, float epsilon, + int64 feature_index); private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; bool IdenticalSlowPath( const HloInstruction& other, const std::function& @@ -58,9 +57,11 @@ class HloBatchNormInstruction : public HloInstruction { class HloBatchNormTrainingInstruction : public HloBatchNormInstruction { public: - HloBatchNormTrainingInstruction(const Shape& shape, HloInstruction* operand, - HloInstruction* scale, HloInstruction* offset, - float epsilon, int64 feature_index); + explicit HloBatchNormTrainingInstruction(const Shape& shape, + HloInstruction* operand, + HloInstruction* scale, + HloInstruction* offset, + float epsilon, int64 feature_index); private: // Implementation for non-common logic of CloneWithNewOperands. @@ -72,11 +73,10 @@ class HloBatchNormTrainingInstruction : public HloBatchNormInstruction { class HloBatchNormInferenceInstruction : public HloBatchNormInstruction { public: - HloBatchNormInferenceInstruction(const Shape& shape, HloInstruction* operand, - HloInstruction* scale, - HloInstruction* offset, HloInstruction* mean, - HloInstruction* variance, float epsilon, - int64 feature_index); + explicit HloBatchNormInferenceInstruction( + const Shape& shape, HloInstruction* operand, HloInstruction* scale, + HloInstruction* offset, HloInstruction* mean, HloInstruction* variance, + float epsilon, int64 feature_index); private: // Implementation for non-common logic of CloneWithNewOperands. @@ -88,11 +88,116 @@ class HloBatchNormInferenceInstruction : public HloBatchNormInstruction { class HloBatchNormGradInstruction : public HloBatchNormInstruction { public: - HloBatchNormGradInstruction(const Shape& shape, HloInstruction* operand, - HloInstruction* scale, HloInstruction* mean, - HloInstruction* variance, - HloInstruction* grad_output, float epsilon, - int64 feature_index); + explicit HloBatchNormGradInstruction( + const Shape& shape, HloInstruction* operand, HloInstruction* scale, + HloInstruction* mean, HloInstruction* variance, + HloInstruction* grad_output, float epsilon, int64 feature_index); + + private: + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; +}; + +class HloFftInstruction : public HloInstruction { + public: + explicit HloFftInstruction(const Shape& shape, HloInstruction* operand, + FftType fft_type, + tensorflow::gtl::ArraySlice fft_length); + FftType fft_type() const { return fft_type_; } + + const std::vector& fft_length() const { return fft_length_; } + + // Returns a serialized representation of this instruction. + HloInstructionProto ToProto() const override; + + private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; + bool IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const override; + + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; + + // Describes FFT type for an FFT instruction. + FftType fft_type_ = FftType::FFT; + + // Indicates the FFT length for an FFT instruction. + std::vector fft_length_; +}; + +class HloSendRecvInstruction : public HloInstruction { + public: + // Returns the channel id associated with the instruction. The id is + // shared between each Send/Recv pair and is globally unique to identify each + // channel. + int64 channel_id() const { return channel_id_; } + + // Returns a serialized representation of this instruction. + HloInstructionProto ToProto() const override; + + protected: + explicit HloSendRecvInstruction(HloOpcode opcode, const Shape& shape, + int64 channel_id); + + private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; + bool IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const override; + // Represents a unique identifier for each Send/Recv instruction pair. + int64 channel_id_; +}; + +class HloSendInstruction : public HloSendRecvInstruction { + public: + explicit HloSendInstruction(HloInstruction* operand, int64 channel_id); + + private: + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; +}; + +class HloSendDoneInstruction : public HloSendRecvInstruction { + public: + explicit HloSendDoneInstruction(HloSendInstruction* operand); + + private: + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; +}; + +class HloRecvInstruction : public HloSendRecvInstruction { + public: + explicit HloRecvInstruction(const Shape& shape, int64 channel_id); + + private: + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; +}; + +class HloRecvDoneInstruction : public HloSendRecvInstruction { + public: + explicit HloRecvDoneInstruction(HloRecvInstruction* operand); private: // Implementation for non-common logic of CloneWithNewOperands. -- GitLab From 80eb65f367c8a5b8a80e752984e001f2479761d6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 16:17:00 -0700 Subject: [PATCH 029/365] TOCO: return Status instead of crashing while converting "Conv". PiperOrigin-RevId: 199714511 --- .../contrib/lite/toco/import_tensorflow.cc | 87 +++++++++++++------ 1 file changed, 62 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index b13a88a9eb..5cc999314c 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -48,6 +48,12 @@ limitations under the License. #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/public/version.h" +#define TOCO_RETURN_IF_ERROR(...) \ + do { \ + const ::toco::port::Status _status = (__VA_ARGS__); \ + if (!_status.ok()) return _status; \ + } while (0) + using tensorflow::AttrValue; using tensorflow::DT_BOOL; using tensorflow::DT_FLOAT; @@ -130,6 +136,37 @@ const AttrValue::ListValue& GetListAttr(const NodeDef& node, return attr.list(); } +Status CheckOptionalAttr(const NodeDef& node, const string& attr_name, + const string& expected_value) { + if (HasAttr(node, attr_name)) { + const string& value = GetStringAttr(node, attr_name); + if (value != expected_value) { + return Status(false, "Unexpected value for attribute '" + attr_name + + "'. Expected '" + expected_value + "'"); + } + } + return Status::OK(); +} +Status CheckOptionalAttr(const NodeDef& node, const string& attr_name, + const tensorflow::DataType& expected_value) { + if (HasAttr(node, attr_name)) { + const tensorflow::DataType& value = GetDataTypeAttr(node, attr_name); + if (value != expected_value) { + return Status(false, "Unexpected value for attribute '" + attr_name + + "'. Expected '" + + tensorflow::DataType_Name(expected_value) + "'"); + } + } + return Status::OK(); +} + +template +Status ExpectValue(const T1& v1, const T2& v2, const string& description) { + if (v1 == v2) return Status::OK(); + return Status(false, absl::StrCat("Unexpected ", description, ": got ", v1, + ", expected ", v2)); +} + ArrayDataType ConvertDataType(tensorflow::DataType dtype) { if (dtype == DT_UINT8) return ArrayDataType::kUint8; @@ -466,18 +503,16 @@ Status ConvertConstOperator(const NodeDef& node, return status; } -void ConvertConvOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { +Status ConvertConvOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { CHECK_EQ(node.op(), "Conv2D"); CheckInputsCount(node, tf_import_flags, 2); // We only support NHWC, which is the default data_format. // So if data_format is not defined, we're all good. - if (HasAttr(node, "data_format")) { - CHECK_EQ(GetStringAttr(node, "data_format"), "NHWC"); - } - CHECK_EQ(GetDataTypeAttr(node, "T"), DT_FLOAT); + TOCO_RETURN_IF_ERROR(CheckOptionalAttr(node, "data_format", "NHWC")); + TOCO_RETURN_IF_ERROR(CheckOptionalAttr(node, "T", DT_FLOAT)); const auto& input_name = node.input(0); const auto& weights_name = node.input(1); @@ -502,27 +537,27 @@ void ConvertConvOperator(const NodeDef& node, auto* conv = new ConvOperator; conv->inputs = {input_name, reordered_weights_name}; conv->outputs = {node.name()}; + TOCO_RETURN_IF_ERROR( + Status(HasAttr(node, "strides"), "Missing attribute 'strides'")); const auto& strides = GetListAttr(node, "strides"); - CHECK_EQ(strides.i_size(), 4); - CHECK_EQ(strides.i(0), 1); - CHECK_EQ(strides.i(3), 1); + TOCO_RETURN_IF_ERROR(ExpectValue(strides.i_size(), 4, "number of strides")); + TOCO_RETURN_IF_ERROR(ExpectValue(strides.i(0), 1, "strides(0)")); + TOCO_RETURN_IF_ERROR(ExpectValue(strides.i(3), 1, "strides(3)")); conv->stride_height = strides.i(1); conv->stride_width = strides.i(2); if (HasAttr(node, "dilations")) { const auto& dilations = GetListAttr(node, "dilations"); - CHECK_EQ(dilations.i_size(), 4); - CHECK_EQ(dilations.i(0), 1) - << "Can only import Conv ops with dilation along the height (1st) or " - "width (2nd) axis. TensorFlow op \"" - << node.name() << "\" had dilations:[ " << dilations.i(0) << ", " - << dilations.i(1) << ", " << dilations.i(2) << ", " << dilations.i(3) - << "]."; - CHECK_EQ(dilations.i(3), 1) - << "Can only import Conv ops with dilation along the height (1st) or " - "width (2nd) axis. TensorFlow op \"" - << node.name() << "\" had dilations:[ " << dilations.i(0) << ", " - << dilations.i(1) << ", " << dilations.i(2) << ", " << dilations.i(3) - << "]."; + TOCO_RETURN_IF_ERROR( + ExpectValue(dilations.i_size(), 4, "number of dilations")); + if (dilations.i(0) != 1 || dilations.i(3) != 1) { + return Status( + false, absl::StrCat( + "Can only import Conv ops with dilation along the height " + "(1st) or width (2nd) axis. TensorFlow op \"", + node.name(), "\" had dilations:[ ", dilations.i(0), ", ", + dilations.i(1), ", ", dilations.i(2), ", ", dilations.i(3), + "].")); + } conv->dilation_height_factor = dilations.i(1); conv->dilation_width_factor = dilations.i(2); } else { @@ -535,9 +570,11 @@ void ConvertConvOperator(const NodeDef& node, } else if (padding == "VALID") { conv->padding.type = PaddingType::kValid; } else { - LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + return Status(false, "Bad padding (only SAME and VALID are supported)"); } model->operators.emplace_back(conv); + + return Status::OK(); } void ConvertDepthwiseConvOperator(const NodeDef& node, @@ -1722,7 +1759,7 @@ Status ImportTensorFlowNode(const tensorflow::NodeDef& node, if (node.op() == "Const") { return ConvertConstOperator(node, tf_import_flags, model); } else if (node.op() == "Conv2D") { - ConvertConvOperator(node, tf_import_flags, model); + return ConvertConvOperator(node, tf_import_flags, model); } else if (node.op() == "Conv2DBackpropInput") { ConvertTransposeConvOperator(node, tf_import_flags, model); } else if (node.op() == "DepthwiseConv2dNative") { -- GitLab From 82f152ee75261afa3ae59ae7c9e18493d7e8b55e Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Thu, 7 Jun 2018 16:44:51 -0700 Subject: [PATCH 030/365] [data-stats] Adds support to collect `features` and `feature-values` statistics from `Example` record of dataset. This change-list also applies transformation function `feature_stats()` to collect stats in an associated stats_aggregator (if any) to dataset in `make_batched_feature_dataset()` by default. PiperOrigin-RevId: 199718439 --- .../contrib/data/python/kernel_tests/BUILD | 28 ++- .../kernel_tests/reader_dataset_ops_test.py | 207 +++-------------- .../reader_dataset_ops_test_base.py | 218 ++++++++++++++++++ .../kernel_tests/stats_dataset_ops_test.py | 45 +++- tensorflow/contrib/data/python/ops/BUILD | 4 +- tensorflow/contrib/data/python/ops/readers.py | 3 + .../contrib/data/python/ops/stats_ops.py | 21 ++ .../api_def_FeatureStatsDataset.pbtxt | 3 + .../api_def_FeatureStatsDataset.pbtxt | 4 + tensorflow/core/kernels/data/BUILD | 1 + .../core/kernels/data/stats_dataset_ops.cc | 185 +++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 12 + 12 files changed, 547 insertions(+), 184 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py create mode 100644 tensorflow/core/api_def/base_api/api_def_FeatureStatsDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FeatureStatsDataset.pbtxt diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index ba707d8d6e..fd15103870 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -330,6 +330,26 @@ py_test( ], ) +py_library( + name = "reader_dataset_ops_test_base", + testonly = 1, + srcs = [ + "reader_dataset_ops_test_base.py", + ], + srcs_version = "PY2AND3", + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/contrib/data/python/ops:readers", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:lib", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:readers", + ], +) + py_test( name = "reader_dataset_ops_test", size = "medium", @@ -339,8 +359,8 @@ py_test( tags = ["no_pip"], deps = [ ":dataset_serialization_test", + ":reader_dataset_ops_test_base", "//tensorflow/contrib/data/python/ops:readers", - "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -352,6 +372,7 @@ py_test( "//tensorflow/python:string_ops", "//tensorflow/python:util", "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/ops:readers", "//third_party/py/numpy", ], ) @@ -478,10 +499,15 @@ py_test( tags = ["no_pip"], deps = [ ":dataset_serialization_test", + ":reader_dataset_ops_test_base", "//tensorflow/contrib/data/python/ops:stats_ops", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", "//tensorflow/python/data/ops:dataset_ops", + "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py index e0237198b7..3b07ef290b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py @@ -24,9 +24,8 @@ import zlib import numpy as np from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base +from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base from tensorflow.contrib.data.python.ops import readers -from tensorflow.core.example import example_pb2 -from tensorflow.core.example import feature_pb2 from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.ops import readers as core_readers from tensorflow.python.framework import constant_op @@ -280,163 +279,8 @@ def _interleave(iterators, cycle_length): num_open -= 1 -class ReadBatchFeaturesTest(test.TestCase): - - def setUp(self): - super(ReadBatchFeaturesTest, self).setUp() - self._num_files = 2 - self._num_records = 7 - self.test_filenames = self._createFiles() - - def _read_batch_features(self, - filenames, - num_epochs, - batch_size, - reader_num_threads=1, - parser_num_threads=1, - shuffle=False, - shuffle_seed=None, - drop_final_batch=False): - self.filenames = filenames - self.num_epochs = num_epochs - self.batch_size = batch_size - - return readers.make_batched_features_dataset( - file_pattern=self.filenames, - batch_size=self.batch_size, - features={ - "file": parsing_ops.FixedLenFeature([], dtypes.int64), - "record": parsing_ops.FixedLenFeature([], dtypes.int64), - "keywords": parsing_ops.VarLenFeature(dtypes.string) - }, - reader=core_readers.TFRecordDataset, - num_epochs=self.num_epochs, - shuffle=shuffle, - shuffle_seed=shuffle_seed, - reader_num_threads=reader_num_threads, - parser_num_threads=parser_num_threads, - drop_final_batch=drop_final_batch).make_one_shot_iterator( - ).get_next() - - def _record(self, f, r): - example = example_pb2.Example( - features=feature_pb2.Features( - feature={ - "file": - feature_pb2.Feature( - int64_list=feature_pb2.Int64List(value=[f])), - "record": - feature_pb2.Feature( - int64_list=feature_pb2.Int64List(value=[r])), - "keywords": - feature_pb2.Feature( - bytes_list=feature_pb2.BytesList( - value=self._get_keywords(f, r))) - })) - return example.SerializeToString() - - def _get_keywords(self, f, r): - num_keywords = 1 + (f + r) % 2 - keywords = [] - for index in range(num_keywords): - keywords.append(compat.as_bytes("keyword%d" % index)) - return keywords - - def _createFiles(self): - filenames = [] - for i in range(self._num_files): - fn = os.path.join(self.get_temp_dir(), "tf_record.%d.txt" % i) - filenames.append(fn) - writer = python_io.TFRecordWriter(fn) - for j in range(self._num_records): - writer.write(self._record(i, j)) - writer.close() - return filenames - - def _run_actual_batch(self, outputs, sess): - file_op = outputs["file"] - keywords_indices_op = outputs["keywords"].indices - keywords_values_op = outputs["keywords"].values - keywords_dense_shape_op = outputs["keywords"].dense_shape - record_op = outputs["record"] - return sess.run([ - file_op, keywords_indices_op, keywords_values_op, - keywords_dense_shape_op, record_op - ]) - - def _next_actual_batch(self, sess): - return self._run_actual_batch(self.outputs, sess) - - def _next_expected_batch(self, - file_indices, - batch_size, - num_epochs, - cycle_length=1): - - def _next_record(file_indices): - for j in file_indices: - for i in range(self._num_records): - yield j, i - - def _next_record_interleaved(file_indices, cycle_length): - return _interleave([_next_record([i]) for i in file_indices], - cycle_length) - - file_batch = [] - keywords_batch_indices = [] - keywords_batch_values = [] - keywords_batch_max_len = 0 - record_batch = [] - batch_index = 0 - for _ in range(num_epochs): - if cycle_length == 1: - next_records = _next_record(file_indices) - else: - next_records = _next_record_interleaved(file_indices, cycle_length) - for record in next_records: - f = record[0] - r = record[1] - file_batch.append(f) - record_batch.append(r) - keywords = self._get_keywords(f, r) - keywords_batch_values.extend(keywords) - keywords_batch_indices.extend( - [[batch_index, i] for i in range(len(keywords))]) - batch_index += 1 - keywords_batch_max_len = max(keywords_batch_max_len, len(keywords)) - if len(file_batch) == batch_size: - yield [ - file_batch, keywords_batch_indices, keywords_batch_values, - [batch_size, keywords_batch_max_len], record_batch - ] - file_batch = [] - keywords_batch_indices = [] - keywords_batch_values = [] - keywords_batch_max_len = 0 - record_batch = [] - batch_index = 0 - if file_batch: - yield [ - file_batch, keywords_batch_indices, keywords_batch_values, - [len(file_batch), keywords_batch_max_len], record_batch - ] - - def _verify_records(self, - sess, - batch_size, - file_index=None, - num_epochs=1, - interleave_cycle_length=1): - if file_index is not None: - file_indices = [file_index] - else: - file_indices = range(self._num_files) - - for expected_batch in self._next_expected_batch( - file_indices, batch_size, num_epochs, interleave_cycle_length): - actual_batch = self._next_actual_batch(sess) - for i in range(len(expected_batch)): - self.assertAllEqual(expected_batch[i], actual_batch[i]) +class ReadBatchFeaturesTest( + reader_dataset_ops_test_base.ReadBatchFeaturesTestBase): def testRead(self): for batch_size in [1, 2]: @@ -444,33 +288,33 @@ class ReadBatchFeaturesTest(test.TestCase): with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: # Basic test: read from file 0. - self.outputs = self._read_batch_features( + self.outputs = self.make_batch_feature( filenames=self.test_filenames[0], num_epochs=num_epochs, - batch_size=batch_size) - self._verify_records(sess, batch_size, 0, num_epochs=num_epochs) + batch_size=batch_size).make_one_shot_iterator().get_next() + self.verify_records(sess, batch_size, 0, num_epochs=num_epochs) with self.assertRaises(errors.OutOfRangeError): self._next_actual_batch(sess) with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: # Basic test: read from file 1. - self.outputs = self._read_batch_features( + self.outputs = self.make_batch_feature( filenames=self.test_filenames[1], num_epochs=num_epochs, - batch_size=batch_size) - self._verify_records(sess, batch_size, 1, num_epochs=num_epochs) + batch_size=batch_size).make_one_shot_iterator().get_next() + self.verify_records(sess, batch_size, 1, num_epochs=num_epochs) with self.assertRaises(errors.OutOfRangeError): self._next_actual_batch(sess) with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: # Basic test: read from both files. - self.outputs = self._read_batch_features( + self.outputs = self.make_batch_feature( filenames=self.test_filenames, num_epochs=num_epochs, - batch_size=batch_size) - self._verify_records(sess, batch_size, num_epochs=num_epochs) + batch_size=batch_size).make_one_shot_iterator().get_next() + self.verify_records(sess, batch_size, num_epochs=num_epochs) with self.assertRaises(errors.OutOfRangeError): self._next_actual_batch(sess) @@ -504,18 +348,18 @@ class ReadBatchFeaturesTest(test.TestCase): # Test that shuffling with same seed produces the same result. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: - outputs1 = self._read_batch_features( + outputs1 = self.make_batch_feature( filenames=self.test_filenames[0], num_epochs=num_epochs, batch_size=batch_size, shuffle=True, - shuffle_seed=5) - outputs2 = self._read_batch_features( + shuffle_seed=5).make_one_shot_iterator().get_next() + outputs2 = self.make_batch_feature( filenames=self.test_filenames[0], num_epochs=num_epochs, batch_size=batch_size, shuffle=True, - shuffle_seed=5) + shuffle_seed=5).make_one_shot_iterator().get_next() for _ in range(total_records // batch_size): batch1 = self._run_actual_batch(outputs1, sess) batch2 = self._run_actual_batch(outputs2, sess) @@ -525,18 +369,18 @@ class ReadBatchFeaturesTest(test.TestCase): # Test that shuffling with different seeds produces a different order. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: - outputs1 = self._read_batch_features( + outputs1 = self.make_batch_feature( filenames=self.test_filenames[0], num_epochs=num_epochs, batch_size=batch_size, shuffle=True, - shuffle_seed=5) - outputs2 = self._read_batch_features( + shuffle_seed=5).make_one_shot_iterator().get_next() + outputs2 = self.make_batch_feature( filenames=self.test_filenames[0], num_epochs=num_epochs, batch_size=batch_size, shuffle=True, - shuffle_seed=15) + shuffle_seed=15).make_one_shot_iterator().get_next() all_equal = True for _ in range(total_records // batch_size): batch1 = self._run_actual_batch(outputs1, sess) @@ -552,13 +396,14 @@ class ReadBatchFeaturesTest(test.TestCase): for parser_num_threads in [2, 4]: with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: - self.outputs = self._read_batch_features( + self.outputs = self.make_batch_feature( filenames=self.test_filenames, num_epochs=num_epochs, batch_size=batch_size, reader_num_threads=reader_num_threads, - parser_num_threads=parser_num_threads) - self._verify_records( + parser_num_threads=parser_num_threads).make_one_shot_iterator( + ).get_next() + self.verify_records( sess, batch_size, num_epochs=num_epochs, @@ -571,11 +416,11 @@ class ReadBatchFeaturesTest(test.TestCase): for num_epochs in [1, 10]: with ops.Graph().as_default(): # Basic test: read from file 0. - self.outputs = self._read_batch_features( + self.outputs = self.make_batch_feature( filenames=self.test_filenames[0], num_epochs=num_epochs, batch_size=batch_size, - drop_final_batch=True) + drop_final_batch=True).make_one_shot_iterator().get_next() for _, tensor in self.outputs.items(): if isinstance(tensor, ops.Tensor): # Guard against SparseTensor. self.assertEqual(tensor.shape[0], batch_size) diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py new file mode 100644 index 0000000000..805a7c7b73 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py @@ -0,0 +1,218 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.data.python.ops import readers +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 +from tensorflow.python.data.ops import readers as core_readers +from tensorflow.python.framework import dtypes +from tensorflow.python.lib.io import python_io +from tensorflow.python.ops import parsing_ops +from tensorflow.python.platform import test +from tensorflow.python.util import compat + + +class ReadBatchFeaturesTestBase(test.TestCase): + """Base class for setting up and testing `make_batched_feature_dataset`.""" + + def setUp(self): + super(ReadBatchFeaturesTestBase, self).setUp() + self._num_files = 2 + self._num_records = 7 + self.test_filenames = self._createFiles() + + def make_batch_feature(self, + filenames, + num_epochs, + batch_size, + reader_num_threads=1, + parser_num_threads=1, + shuffle=False, + shuffle_seed=None, + drop_final_batch=False): + self.filenames = filenames + self.num_epochs = num_epochs + self.batch_size = batch_size + + return readers.make_batched_features_dataset( + file_pattern=self.filenames, + batch_size=self.batch_size, + features={ + "file": parsing_ops.FixedLenFeature([], dtypes.int64), + "record": parsing_ops.FixedLenFeature([], dtypes.int64), + "keywords": parsing_ops.VarLenFeature(dtypes.string) + }, + reader=core_readers.TFRecordDataset, + num_epochs=self.num_epochs, + shuffle=shuffle, + shuffle_seed=shuffle_seed, + reader_num_threads=reader_num_threads, + parser_num_threads=parser_num_threads, + drop_final_batch=drop_final_batch) + + def _record(self, f, r): + example = example_pb2.Example( + features=feature_pb2.Features( + feature={ + "file": + feature_pb2.Feature( + int64_list=feature_pb2.Int64List(value=[f])), + "record": + feature_pb2.Feature( + int64_list=feature_pb2.Int64List(value=[r])), + "keywords": + feature_pb2.Feature( + bytes_list=feature_pb2.BytesList( + value=self._get_keywords(f, r))) + })) + return example.SerializeToString() + + def _get_keywords(self, f, r): + num_keywords = 1 + (f + r) % 2 + keywords = [] + for index in range(num_keywords): + keywords.append(compat.as_bytes("keyword%d" % index)) + return keywords + + def _sum_keywords(self, num_files): + sum_keywords = 0 + for i in range(num_files): + for j in range(self._num_records): + sum_keywords += 1 + (i + j) % 2 + return sum_keywords + + def _createFiles(self): + filenames = [] + for i in range(self._num_files): + fn = os.path.join(self.get_temp_dir(), "tf_record.%d.txt" % i) + filenames.append(fn) + writer = python_io.TFRecordWriter(fn) + for j in range(self._num_records): + writer.write(self._record(i, j)) + writer.close() + return filenames + + def _run_actual_batch(self, outputs, sess): + file_op = outputs["file"] + keywords_indices_op = outputs["keywords"].indices + keywords_values_op = outputs["keywords"].values + keywords_dense_shape_op = outputs["keywords"].dense_shape + record_op = outputs["record"] + return sess.run([ + file_op, keywords_indices_op, keywords_values_op, + keywords_dense_shape_op, record_op + ]) + + def _next_actual_batch(self, sess): + return self._run_actual_batch(self.outputs, sess) + + def _interleave(self, iterators, cycle_length): + pending_iterators = iterators + open_iterators = [] + num_open = 0 + for i in range(cycle_length): + if pending_iterators: + open_iterators.append(pending_iterators.pop(0)) + num_open += 1 + + while num_open: + for i in range(min(cycle_length, len(open_iterators))): + if open_iterators[i] is None: + continue + try: + yield next(open_iterators[i]) + except StopIteration: + if pending_iterators: + open_iterators[i] = pending_iterators.pop(0) + else: + open_iterators[i] = None + num_open -= 1 + + def _next_expected_batch(self, + file_indices, + batch_size, + num_epochs, + cycle_length=1): + + def _next_record(file_indices): + for j in file_indices: + for i in range(self._num_records): + yield j, i + + def _next_record_interleaved(file_indices, cycle_length): + return self._interleave([_next_record([i]) for i in file_indices], + cycle_length) + + file_batch = [] + keywords_batch_indices = [] + keywords_batch_values = [] + keywords_batch_max_len = 0 + record_batch = [] + batch_index = 0 + for _ in range(num_epochs): + if cycle_length == 1: + next_records = _next_record(file_indices) + else: + next_records = _next_record_interleaved(file_indices, cycle_length) + for record in next_records: + f = record[0] + r = record[1] + file_batch.append(f) + record_batch.append(r) + keywords = self._get_keywords(f, r) + keywords_batch_values.extend(keywords) + keywords_batch_indices.extend( + [[batch_index, i] for i in range(len(keywords))]) + batch_index += 1 + keywords_batch_max_len = max(keywords_batch_max_len, len(keywords)) + if len(file_batch) == batch_size: + yield [ + file_batch, keywords_batch_indices, keywords_batch_values, + [batch_size, keywords_batch_max_len], record_batch + ] + file_batch = [] + keywords_batch_indices = [] + keywords_batch_values = [] + keywords_batch_max_len = 0 + record_batch = [] + batch_index = 0 + if file_batch: + yield [ + file_batch, keywords_batch_indices, keywords_batch_values, + [len(file_batch), keywords_batch_max_len], record_batch + ] + + def verify_records(self, + sess, + batch_size, + file_index=None, + num_epochs=1, + interleave_cycle_length=1): + if file_index is not None: + file_indices = [file_index] + else: + file_indices = range(self._num_files) + + for expected_batch in self._next_expected_batch( + file_indices, batch_size, num_epochs, interleave_cycle_length): + actual_batch = self._next_actual_batch(sess) + for i in range(len(expected_batch)): + self.assertAllEqual(expected_batch[i], actual_batch[i]) diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py index 5c74ed6ae7..17b6644759 100644 --- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base +from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base from tensorflow.contrib.data.python.ops import stats_ops from tensorflow.core.framework import summary_pb2 from tensorflow.python.data.ops import dataset_ops @@ -29,7 +30,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.platform import test -class StatsDatasetTest(test.TestCase): +class StatsDatasetTestBase(test.TestCase): def _assertSummaryHasCount(self, summary_str, tag, expected_value): summary_proto = summary_pb2.Summary() @@ -49,6 +50,9 @@ class StatsDatasetTest(test.TestCase): return self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto)) + +class StatsDatasetTest(StatsDatasetTestBase): + def testBytesProduced(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( @@ -193,6 +197,45 @@ class StatsDatasetTest(test.TestCase): self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0) +class FeatureStatsDatasetTest( + StatsDatasetTestBase, + reader_dataset_ops_test_base.ReadBatchFeaturesTestBase): + + def testFeaturesStats(self): + num_epochs = 5 + total_records = num_epochs * self._num_records + batch_size = 2 + stats_aggregator = stats_ops.StatsAggregator() + dataset = self.make_batch_feature( + filenames=self.test_filenames[0], + num_epochs=num_epochs, + batch_size=batch_size, + shuffle=True, + shuffle_seed=5, + drop_final_batch=True).apply( + stats_ops.set_stats_aggregator(stats_aggregator)) + iterator = dataset.make_initializable_iterator() + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run(iterator.initializer) + for _ in range(total_records // batch_size): + sess.run(next_element) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount( + sess.run(summary_t), "record_stats:features", total_records) + self._assertSummaryHasCount( + sess.run(summary_t), "record_stats:feature-values", total_records) + self._assertSummaryHasSum( + sess.run(summary_t), "record_stats:features", total_records * 3) + self._assertSummaryHasSum( + sess.run(summary_t), "record_stats:feature-values", + self._sum_keywords(1) * num_epochs + 2 * total_records) + + class StatsDatasetSerializationTest( dataset_serialization_test_base.DatasetSerializationTestBase): diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 086661adb7..fc8ec5961c 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -96,8 +96,10 @@ py_library( srcs_version = "PY2AND3", deps = [ ":batching", + ":gen_dataset_ops", ":interleave_ops", ":shuffle_ops", + ":stats_ops", "//tensorflow/python:constant_op", "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", @@ -106,12 +108,12 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:parsing_ops", "//tensorflow/python:platform", - "//tensorflow/python:sparse_tensor", "//tensorflow/python:string_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/ops:readers", + "//tensorflow/python/data/util:convert", "//tensorflow/python/data/util:nest", "//third_party/py/numpy", ], diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index f938153f5f..83095c7ba1 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -26,6 +26,7 @@ from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.data.python.ops import gen_dataset_ops as contrib_gen_dataset_ops from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.contrib.data.python.ops import shuffle_ops +from tensorflow.contrib.data.python.ops import stats_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers as core_readers from tensorflow.python.data.util import convert @@ -754,6 +755,8 @@ def make_batched_features_dataset(file_pattern, dataset = _maybe_shuffle_and_repeat( dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed) + dataset = dataset.apply(stats_ops.feature_stats("record_stats")) + if drop_final_batch: dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size)) else: diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/contrib/data/python/ops/stats_ops.py index 3cbaab5aff..8c30202ba7 100644 --- a/tensorflow/contrib/data/python/ops/stats_ops.py +++ b/tensorflow/contrib/data/python/ops/stats_ops.py @@ -176,6 +176,27 @@ def latency_stats(tag): return _apply_fn +def feature_stats(tag): + """Records the features stats from `Example` records of the input dataset. + + To consume the statistics, associate a `StatsAggregator` with the output + dataset. + + Args: + tag: String. All statistics recorded by the returned transformation will be + associated with the given `tag`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + """ + + def _apply_fn(dataset): + return _StatsDataset(dataset, gen_dataset_ops.feature_stats_dataset, tag) + + return _apply_fn + + class _StatsDataset(dataset_ops.Dataset): """A `Dataset` that acts as an identity, and also records statistics.""" diff --git a/tensorflow/core/api_def/base_api/api_def_FeatureStatsDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_FeatureStatsDataset.pbtxt new file mode 100644 index 0000000000..ffd01ba5cc --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_FeatureStatsDataset.pbtxt @@ -0,0 +1,3 @@ +op { + graph_op_name: "FeatureStatsDataset" +} diff --git a/tensorflow/core/api_def/python_api/api_def_FeatureStatsDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_FeatureStatsDataset.pbtxt new file mode 100644 index 0000000000..7f721f4fb7 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FeatureStatsDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FeatureStatsDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index da330e742e..6d2a04aa25 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -358,6 +358,7 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", ], ) diff --git a/tensorflow/core/kernels/data/stats_dataset_ops.cc b/tensorflow/core/kernels/data/stats_dataset_ops.cc index 7370a24b38..3e0a6ae049 100644 --- a/tensorflow/core/kernels/data/stats_dataset_ops.cc +++ b/tensorflow/core/kernels/data/stats_dataset_ops.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/core/example/example.pb.h" +#include "tensorflow/core/example/feature.pb.h" #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/stats_aggregator.h" #include "tensorflow/core/framework/tensor.h" @@ -234,6 +236,189 @@ class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel { }; }; +class FeatureStatsDatasetOp : public UnaryDatasetOpKernel { + public: + explicit FeatureStatsDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) {} + + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + string tag; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag)); + OP_REQUIRES(ctx, input->output_dtypes()[0] == DT_STRING, + errors::InvalidArgument("FeatureStatsDataset only supports " + "input with a single `tf.string` " + "component.")); + *output = new Dataset(ctx, input, std::move(tag)); + } + + private: + class Dataset : public GraphDatasetBase { + public: + explicit Dataset(OpKernelContext* ctx, const DatasetBase* input, string tag) + : GraphDatasetBase(ctx), input_(input), tag_(std::move(tag)) { + input_->Ref(); + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + return std::unique_ptr(new Iterator( + {this, strings::StrCat(prefix, "::FeatureStatsDataset")})); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + const std::vector& output_shapes() const override { + return input_->output_shapes(); + } + + string DebugString() const override { + return "FeatureStatsDatasetOp::Dataset"; + } + + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_node; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_node)); + Node* tag_node; + TF_RETURN_IF_ERROR(b->AddScalar(tag_, &tag_node)); + TF_RETURN_IF_ERROR(b->AddDataset(this, {input_node, tag_node}, output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params) {} + + Status Initialize(IteratorContext* ctx) override { + return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_); + } + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + tf_shared_lock l(mu_); + Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence); + auto stats_aggregator = ctx->stats_aggregator(); + if (stats_aggregator && s.ok() && !*end_of_sequence) { + for (const Tensor& t : *out_tensors) { + auto record_t = t.flat(); + Example example; + // TODO(shivaniagrawal): redundant parsing here, potential solutions + // to improve performance is to a) have a potential + // ParseExampleDataset and collect stats from there and b) make + // changes to parse_example() where it returns stats as well. + for (int i = 0; i < record_t.size(); ++i) { + if (example.ParseFromString(record_t(i))) { + AddStatsFeatures(example, stats_aggregator); + } else { + SequenceExample sequence_example; + if (sequence_example.ParseFromString(record_t(i))) { + AddStatsFeatures(sequence_example, stats_aggregator); + } + } + } + } + } + return s; + } + + // TODO(shivaniagrawal): Add features/feature-values to streamz metrics. + int AddStatsFeatureValues(const Feature& feature) { + int feature_values_list_size = 0; + switch (feature.kind_case()) { + case Feature::kBytesList: { + feature_values_list_size = feature.bytes_list().value().size(); + break; + } + case Feature::kFloatList: { + feature_values_list_size = feature.float_list().value().size(); + break; + } + case Feature::kInt64List: { + feature_values_list_size = feature.int64_list().value().size(); + break; + } + case Feature::KIND_NOT_SET: + break; + } + return feature_values_list_size; + } + + void AddStatsFeatures( + const Example& example, + const std::shared_ptr& stats_aggregator) { + stats_aggregator->AddToHistogram( + strings::StrCat(dataset()->tag_, ":features"), + {static_cast(example.features().feature().size())}); + + int feature_values_list_size_sum = 0; + for (const auto& feature : example.features().feature()) { + feature_values_list_size_sum += AddStatsFeatureValues(feature.second); + } + stats_aggregator->AddToHistogram( + strings::StrCat(dataset()->tag_, ":feature-values"), + {static_cast(feature_values_list_size_sum)}); + } + + void AddStatsFeatures( + const SequenceExample& example, + const std::shared_ptr& stats_aggregator) { + stats_aggregator->AddToHistogram( + strings::StrCat(dataset()->tag_, ":features"), + {static_cast( + example.context().feature().size() + + example.feature_lists().feature_list().size())}); + + int feature_values_list_size_sum = 0; + for (const auto& feature : example.context().feature()) { + feature_values_list_size_sum += AddStatsFeatureValues(feature.second); + } + + for (const auto& feature_list : + example.feature_lists().feature_list()) { + for (const auto& feature : feature_list.second.feature()) { + feature_values_list_size_sum += AddStatsFeatureValues(feature); + } + } + + stats_aggregator->AddToHistogram( + strings::StrCat(dataset()->tag_, ":feature-values"), + {static_cast(feature_values_list_size_sum)}); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + return Status::OK(); + } + + private: + mutex mu_; + std::unique_ptr input_impl_ GUARDED_BY(mu_); + }; + + const DatasetBase* const input_; + const string tag_; + }; +}; + +REGISTER_KERNEL_BUILDER(Name("FeatureStatsDataset").Device(DEVICE_CPU), + FeatureStatsDatasetOp); REGISTER_KERNEL_BUILDER(Name("LatencyStatsDataset").Device(DEVICE_CPU), LatencyStatsDatasetOp); REGISTER_KERNEL_BUILDER(Name("BytesProducedStatsDataset").Device(DEVICE_CPU), diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 9bc6c9a30d..0e13d41977 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -166,6 +166,18 @@ REGISTER_OP("LatencyStatsDataset") return shape_inference::ScalarShape(c); }); +REGISTER_OP("FeatureStatsDataset") + .Input("input_dataset: variant") + .Input("tag: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn([](shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle tag_shape; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &tag_shape)); + return shape_inference::ScalarShape(c); + }); + REGISTER_OP("SetStatsAggregatorDataset") .Input("input_dataset: variant") .Input("stats_aggregator: resource") -- GitLab From 2bf2799ee80791107d4fe587ff9b6c7cf6c8b418 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 7 Jun 2018 16:49:27 -0700 Subject: [PATCH 031/365] C API: Fail gracefully if the serialized graph would be too large. See #19657 for some motivation. Without this explicit check, a large graph would trigger an assertion failure in the protobuf codebase (https://github.com/google/protobuf/blob/0456e269ee6505766474aa8d7b8bba7ac047f457/src/google/protobuf/message_lite.cc#L68) Pull Request for google/protobuf: https://github.com/google/protobuf/pull/4739 PiperOrigin-RevId: 199719082 --- tensorflow/c/c_api.cc | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index b86b277ac3..cb0b093ad2 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -631,7 +631,22 @@ Status MessageToBuffer(const tensorflow::protobuf::Message& in, "Failed to allocate memory to serialize message of type '", in.GetTypeName(), "' and size ", proto_size); } - in.SerializeToArray(buf, proto_size); + // SerializeToArray takes size as an int. + // This next 'if' is a workaround till we update to depend on a version + // of protocol buffers that includes + // https://github.com/google/protobuf/pull/4739 + if (proto_size > std::numeric_limits::max()) { + return InvalidArgument("Cannot serialize protocol buffer of type ", + in.GetTypeName(), " as the serialized size (", + proto_size, + "bytes) would be larger than the limit (", + std::numeric_limits::max(), " bytes)"); + } + if (!in.SerializeToArray(buf, proto_size)) { + return InvalidArgument("Unable to serialize ", in.GetTypeName(), + " protocol buffer, perhaps the serialized size (", + proto_size, " bytes) is too large?"); + } out->data = buf; out->length = proto_size; out->data_deallocator = [](void* data, size_t length) { -- GitLab From 3bb7a913be6ba47df6fb1796dd8ce639cdbf1608 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 17:18:10 -0700 Subject: [PATCH 032/365] Update ops-related pbtxt files. PiperOrigin-RevId: 199722844 --- .../core/ops/compat/ops_history.v1.pbtxt | 27 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 27 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 1b4bec7bc8..71f34b3abe 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -22112,6 +22112,33 @@ op { } is_stateful: true } +op { + name: "FeatureStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "Fill" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 1dfaeeabad..718c1510ed 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -10269,6 +10269,33 @@ op { } is_stateful: true } +op { + name: "FeatureStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "Fill" input_arg { -- GitLab From 138e790ab9cb778430168d2b5f6abac1501aa2d8 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 7 Jun 2018 17:19:25 -0700 Subject: [PATCH 033/365] [XLA] Handle kSlice correctly in HloCostAnalysis Slice doesn't read the entire input. It only reads enough to make the output. PiperOrigin-RevId: 199722987 --- .../compiler/xla/service/hlo_cost_analysis.cc | 3 ++- .../xla/service/hlo_cost_analysis_test.cc | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 94c9c7eabc..b9d30ee802 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -172,7 +172,8 @@ Status HloCostAnalysis::HandleReverse(const HloInstruction*) { return Status::OK(); } -Status HloCostAnalysis::HandleSlice(const HloInstruction*) { +Status HloCostAnalysis::HandleSlice(const HloInstruction* slice) { + current_properties_[kBytesAccessedKey] = shape_size_(slice->shape()) * 2; return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc index 16fdda8a8b..72adf09c83 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc @@ -460,5 +460,20 @@ TEST_F(HloCostAnalysisTest, BaseDilatedConvolution) { EXPECT_EQ(analysis.flop_count(), 1472); } +TEST_F(HloCostAnalysisTest, Slice) { + // Test the analysis on a slice. + XlaBuilder builder("slice"); + auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2}), "x"); + auto slice = builder.Slice(x, {0}, {1}, {1}); + auto hlo_module = BuildHloGraph(&builder); + + // Run HLO cost analysis. + HloCostAnalysis analysis(ShapeSize); + ASSERT_IS_OK( + hlo_module->entry_computation()->root_instruction()->Accept(&analysis)); + + EXPECT_EQ(analysis.bytes_accessed(), 8); +} + } // namespace } // namespace xla -- GitLab From fba60ec27f4d415dafdf2ee916e2aa2004fa9635 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 17:50:34 -0700 Subject: [PATCH 034/365] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 199726426 --- tensorflow/go/op/wrappers.go | 196 +++++++++++++++++------------------ 1 file changed, 98 insertions(+), 98 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 6fc7087cb1..cdfd4b30e6 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -7579,6 +7579,69 @@ func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_s return op.Output(0) } +// Returns immutable tensor from memory region. +// +// The current implementation memmaps the tensor from a file. +// +// Arguments: +// dtype: Type of the returned tensor. +// shape: Shape of the returned tensor. +// memory_region_name: Name of readonly memory region used by the tensor, see +// NewReadOnlyMemoryRegionFromFile in tensorflow::Env. +func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name} + opspec := tf.OpSpec{ + Type: "ImmutableConst", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// StringJoinAttr is an optional argument to StringJoin. +type StringJoinAttr func(optionalAttr) + +// StringJoinSeparator sets the optional separator attribute to value. +// +// value: string, an optional join separator. +// If not specified, defaults to "" +func StringJoinSeparator(value string) StringJoinAttr { + return func(m optionalAttr) { + m["separator"] = value + } +} + +// Joins the strings in the given list of string tensors into one tensor; +// +// with the given separator (default is an empty separator). +// +// Arguments: +// inputs: A list of string tensors. The tensors must all have the same shape, +// or be scalars. Scalars may be mixed in; these will be broadcast to the shape +// of non-scalar inputs. +func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StringJoin", + Input: []tf.Input{ + tf.OutputList(inputs), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // LRNGradAttr is an optional argument to LRNGrad. type LRNGradAttr func(optionalAttr) @@ -17648,69 +17711,6 @@ func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.D return op.Output(0), op.Output(1), op.Output(2) } -// StringJoinAttr is an optional argument to StringJoin. -type StringJoinAttr func(optionalAttr) - -// StringJoinSeparator sets the optional separator attribute to value. -// -// value: string, an optional join separator. -// If not specified, defaults to "" -func StringJoinSeparator(value string) StringJoinAttr { - return func(m optionalAttr) { - m["separator"] = value - } -} - -// Joins the strings in the given list of string tensors into one tensor; -// -// with the given separator (default is an empty separator). -// -// Arguments: -// inputs: A list of string tensors. The tensors must all have the same shape, -// or be scalars. Scalars may be mixed in; these will be broadcast to the shape -// of non-scalar inputs. -func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringJoin", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns immutable tensor from memory region. -// -// The current implementation memmaps the tensor from a file. -// -// Arguments: -// dtype: Type of the returned tensor. -// shape: Shape of the returned tensor. -// memory_region_name: Name of readonly memory region used by the tensor, see -// NewReadOnlyMemoryRegionFromFile in tensorflow::Env. -func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name} - opspec := tf.OpSpec{ - Type: "ImmutableConst", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Inverse real-valued fast Fourier transform. // // Computes the inverse 1-dimensional discrete Fourier transform of a real-valued @@ -25053,6 +25053,41 @@ func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, o return op.Output(0) } +// Runs multiple additive regression ensemble predictors on input instances and +// +// computes the update to cached logits. It is designed to be used during training. +// It traverses the trees starting from cached tree id and cached node id and +// calculates the updates to be pushed to the cache. +// +// Arguments: +// +// cached_tree_ids: Rank 1 Tensor containing cached tree ids which is the starting +// tree of prediction. +// cached_node_ids: Rank 1 Tensor containing cached node id which is the starting +// node of prediction. +// bucketized_features: A list of rank 1 Tensors containing bucket id for each +// feature. +// logits_dimension: scalar, dimension of the logits, to be used for partial logits +// shape. +// +// Returns Rank 2 Tensor containing logits update (with respect to cached +// values stored) for each example.Rank 1 Tensor containing new tree ids for each example.Rank 1 Tensor containing new node ids in the new tree_ids. +func BoostedTreesTrainingPredict(scope *Scope, tree_ensemble_handle tf.Output, cached_tree_ids tf.Output, cached_node_ids tf.Output, bucketized_features []tf.Output, logits_dimension int64) (partial_logits tf.Output, tree_ids tf.Output, node_ids tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"logits_dimension": logits_dimension} + opspec := tf.OpSpec{ + Type: "BoostedTreesTrainingPredict", + Input: []tf.Input{ + tree_ensemble_handle, cached_tree_ids, cached_node_ids, tf.OutputList(bucketized_features), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + // MapSizeAttr is an optional argument to MapSize. type MapSizeAttr func(optionalAttr) @@ -29812,41 +29847,6 @@ func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Outpu return scope.AddOperation(opspec) } -// Runs multiple additive regression ensemble predictors on input instances and -// -// computes the update to cached logits. It is designed to be used during training. -// It traverses the trees starting from cached tree id and cached node id and -// calculates the updates to be pushed to the cache. -// -// Arguments: -// -// cached_tree_ids: Rank 1 Tensor containing cached tree ids which is the starting -// tree of prediction. -// cached_node_ids: Rank 1 Tensor containing cached node id which is the starting -// node of prediction. -// bucketized_features: A list of rank 1 Tensors containing bucket id for each -// feature. -// logits_dimension: scalar, dimension of the logits, to be used for partial logits -// shape. -// -// Returns Rank 2 Tensor containing logits update (with respect to cached -// values stored) for each example.Rank 1 Tensor containing new tree ids for each example.Rank 1 Tensor containing new node ids in the new tree_ids. -func BoostedTreesTrainingPredict(scope *Scope, tree_ensemble_handle tf.Output, cached_tree_ids tf.Output, cached_node_ids tf.Output, bucketized_features []tf.Output, logits_dimension int64) (partial_logits tf.Output, tree_ids tf.Output, node_ids tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"logits_dimension": logits_dimension} - opspec := tf.OpSpec{ - Type: "BoostedTreesTrainingPredict", - Input: []tf.Input{ - tree_ensemble_handle, cached_tree_ids, cached_node_ids, tf.OutputList(bucketized_features), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // Elementwise computes the bitwise AND of `x` and `y`. // // The result will have those bits set, that are set in both `x` and `y`. The -- GitLab From b941a031e8a2eb67e0083d8aa6ffe5a3ffe96f7b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 18:07:36 -0700 Subject: [PATCH 035/365] Pass checkpoint_path to predicate functions for experiment.continuous_eval even in the case of falsy eval_results PiperOrigin-RevId: 199728382 --- tensorflow/contrib/learn/python/learn/experiment.py | 2 +- tensorflow/contrib/learn/python/learn/experiment_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 541da90617..f8a3709ee5 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -505,7 +505,7 @@ class Experiment(object): eval_result = None last_warning_time = 0 while (not predicate_fn or predicate_fn( - eval_result, checkpoint_path=previous_path if eval_result else None)): + eval_result, checkpoint_path=previous_path)): # Exit if we have already reached number of steps to train. if self._has_training_stopped(eval_result): logging.info("Exiting continuous eval, global_step=%s >= " diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py index d10927a0cd..fb16c94c29 100644 --- a/tensorflow/contrib/learn/python/learn/experiment_test.py +++ b/tensorflow/contrib/learn/python/learn/experiment_test.py @@ -500,7 +500,7 @@ class ExperimentTest(test.TestCase): noop_hook = _NoopHook() def _predicate_fn(eval_result, checkpoint_path): - self.assertEqual(not eval_result, + self.assertEqual(eval_result is None, checkpoint_path is None) return est.eval_count < 3 # pylint: disable=cell-var-from-loop -- GitLab From 7b9c723c8f5f732f014ba181daf0b96747f291a9 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 7 Jun 2018 18:19:32 -0700 Subject: [PATCH 036/365] Java: Release 1.9.0-rc0 (and update protbuf dependency) PiperOrigin-RevId: 199729533 --- tensorflow/java/maven/libtensorflow/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml | 2 +- tensorflow/java/maven/pom.xml | 2 +- tensorflow/java/maven/proto/pom.xml | 4 ++-- tensorflow/java/maven/run_inside_container.sh | 2 +- tensorflow/java/maven/tensorflow/pom.xml | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml index 08cc860f57..38e87b1639 100644 --- a/tensorflow/java/maven/libtensorflow/pom.xml +++ b/tensorflow/java/maven/libtensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0 + 1.9.0-rc0 ../ libtensorflow diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml index fcc7eacc33..36c984e280 100644 --- a/tensorflow/java/maven/libtensorflow_jni/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0 + 1.9.0-rc0 ../ libtensorflow_jni diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml index 3d22d86a49..4c846de05a 100644 --- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0 + 1.9.0-rc0 ../ libtensorflow_jni_gpu diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 0a09a5ea7c..f2a0a97eae 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.8.0 + 1.9.0-rc0 pom https://www.tensorflow.org diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml index 77ec6a0ddb..eb0a952c7d 100644 --- a/tensorflow/java/maven/proto/pom.xml +++ b/tensorflow/java/maven/proto/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0 + 1.9.0-rc0 ../ proto @@ -16,7 +16,7 @@ com.google.protobuf protobuf-java - 3.3.1 + 3.5.1 diff --git a/tensorflow/java/maven/run_inside_container.sh b/tensorflow/java/maven/run_inside_container.sh index 6136ccfdfb..bf19c09b1d 100644 --- a/tensorflow/java/maven/run_inside_container.sh +++ b/tensorflow/java/maven/run_inside_container.sh @@ -31,7 +31,7 @@ if [[ "${TF_VERSION}" == *"-SNAPSHOT" ]]; then # Bintray does not allow snapshots. DEPLOY_BINTRAY="false" fi -PROTOC_RELEASE_URL="https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip" +PROTOC_RELEASE_URL="https://github.com/google/protobuf/releases/download/v3.5.1/protoc-3.5.1-linux-x86_64.zip" if [[ "${DEPLOY_BINTRAY}" != "true" && "${DEPLOY_OSSRH}" != "true" ]]; then echo "Must deploy to at least one of Bintray or OSSRH" >&2 exit 2 diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml index 0df1f28149..48668a47f2 100644 --- a/tensorflow/java/maven/tensorflow/pom.xml +++ b/tensorflow/java/maven/tensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.8.0 + 1.9.0-rc0 ../ tensorflow -- GitLab From 2f41346cbc0c8ecb915983a1f8711fd0d0ccc50e Mon Sep 17 00:00:00 2001 From: Vinu Rajashekhar Date: Thu, 7 Jun 2018 18:21:25 -0700 Subject: [PATCH 037/365] Changes the batch_function decorator implementation to use the newly added BatchFunction op. o Renames the previous version to batch_function_v1. PiperOrigin-RevId: 199729701 --- tensorflow/contrib/batching/__init__.py | 1 + .../contrib/batching/python/ops/batch_ops.py | 69 +++++++++++++++++++ .../batching/python/ops/batch_ops_test.py | 50 ++++++++++++++ 3 files changed, 120 insertions(+) diff --git a/tensorflow/contrib/batching/__init__.py b/tensorflow/contrib/batching/__init__.py index 44fa5f42a7..1e503a097a 100644 --- a/tensorflow/contrib/batching/__init__.py +++ b/tensorflow/contrib/batching/__init__.py @@ -14,6 +14,7 @@ # ============================================================================== """Ops and modules related to batch. +@@batch_function_v1 @@batch_function """ from __future__ import absolute_import diff --git a/tensorflow/contrib/batching/python/ops/batch_ops.py b/tensorflow/contrib/batching/python/ops/batch_ops.py index 921d6917a4..012a51f711 100644 --- a/tensorflow/contrib/batching/python/ops/batch_ops.py +++ b/tensorflow/contrib/batching/python/ops/batch_ops.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.ops import gen_batch_ops # go/tf-wildcard-import @@ -83,6 +84,74 @@ def batch_function(num_batch_threads, SparseTensor is not supported. The return value of the decorated function must be a Tensor or a list/tuple of Tensors. + Args: + num_batch_threads: Number of scheduling threads for processing batches + of work. Determines the number of batches processed in parallel. + max_batch_size: Batch sizes will never be bigger than this. + batch_timeout_micros: Maximum number of microseconds to wait before + outputting an incomplete batch. + allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, + does nothing. Otherwise, supplies a list of batch sizes, causing the op + to pad batches up to one of those sizes. The entries must increase + monotonically, and the final entry must equal max_batch_size. + grad_timeout_micros: The timeout to use for the gradient. See the + documentation of the unbatch op for more details. Defaults to 60s. + unbatch_timeout_micros: The timeout to use for unbatching. See the + documentation of the unbatch op for more details. Defaults to 60s. + max_enqueued_batches: The maximum depth of the batch queue. Defaults to 10. + + Returns: + The decorated function will return the unbatched computation output Tensors. + """ + + def decorator(fn): # pylint: disable=missing-docstring + + def decorated(*args): # pylint: disable=missing-docstring + types = [arg.dtype for arg in args] + + @function.Defun(*types) + def computation(*computation_args): + return fn(*computation_args) + + with ops.name_scope("batch") as name: + for a in args: + if not isinstance(a, ops.Tensor): + raise ValueError("All arguments to functions decorated with " + "`batch_function` are supposed to be Tensors; " + "found %s" % repr(a)) + for inp in computation.captured_inputs: + print("inp: %s" % inp) + for op in inp.consumers(): + print("op: %s" % op) + return gen_batch_ops.batch_function( + num_batch_threads=num_batch_threads, + max_batch_size=max_batch_size, + batch_timeout_micros=batch_timeout_micros, + allowed_batch_sizes=allowed_batch_sizes, + max_enqueued_batches=max_enqueued_batches, + shared_name=name, + f=computation, + in_tensors=list(args), + captured_tensors=computation.captured_inputs, + Tout=[o.type for o in computation.definition.signature.output_arg]) + + return decorated + + return decorator + + +def batch_function_v1(num_batch_threads, + max_batch_size, + batch_timeout_micros, + allowed_batch_sizes=None, + grad_timeout_micros=60 * 1000 * 1000, + unbatch_timeout_micros=60 * 1000 * 1000, + max_enqueued_batches=10): + """Batches the computation done by the decorated function. + + This is the older version of batch_function(). Please use the former instead + of this. + Args: num_batch_threads: Number of scheduling threads for processing batches of work. Determines the number of batches processed in parallel. diff --git a/tensorflow/contrib/batching/python/ops/batch_ops_test.py b/tensorflow/contrib/batching/python/ops/batch_ops_test.py index ea8339334f..7846814546 100644 --- a/tensorflow/contrib/batching/python/ops/batch_ops_test.py +++ b/tensorflow/contrib/batching/python/ops/batch_ops_test.py @@ -188,12 +188,62 @@ class BatchOpsTest(test.TestCase): self.assertEqual(thread_results[0], [2]) self.assertEqual(main_results[0], [3]) + def testBasicUnbatchV1Decorated(self): + """Tests that the batch_function_v1 decorator works.""" + with self.test_session() as sess: + @batch_ops.batch_function_v1(1, 10, 100000) + def computation(in_t): + return in_t + 1 + + inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1]) + result = computation(inp) + thread_results = [] + + def worker(): + thread_results.extend(sess.run([result], feed_dict={inp: [1]})) + + worker_thread = threading.Thread(target=worker) + worker_thread.start() + main_results = sess.run([result], feed_dict={inp: [2]}) + worker_thread.join() + self.assertEqual(thread_results[0], [2]) + self.assertEqual(main_results[0], [3]) + def testBasicUnbatchDecorated(self): """Tests that the batch_function decorator works.""" with self.test_session() as sess: + # TODO(apassos): Removing this line causes test flakiness! Ideally should + # be investigated. + default_inp = array_ops.placeholder_with_default(2, shape=[]) # pylint: disable=unused-variable + @batch_ops.batch_function(1, 10, 100000) def computation(in_t): return in_t + 1 + + inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1]) + result = computation(inp) + thread_results = [] + + def worker(): + thread_results.extend(sess.run([result], feed_dict={inp: [1]})) + + worker_thread = threading.Thread(target=worker) + worker_thread.start() + main_results = sess.run([result], feed_dict={inp: [2]}) + worker_thread.join() + self.assertEqual(thread_results[0], [2]) + self.assertEqual(main_results[0], [3]) + + def testBatchDecoratedWithCapturedInput(self): + """Tests that the batch_function decorator works.""" + with self.test_session() as sess: + captured_inp0 = array_ops.placeholder_with_default(2, shape=[]) + captured_inp1 = array_ops.placeholder_with_default(1, shape=[]) + + @batch_ops.batch_function(1, 10, 100000) + def computation(in_t): + return in_t + captured_inp0 - captured_inp1 + inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1]) result = computation(inp) thread_results = [] -- GitLab From a9ddfe50eee83b2f18293241ab96f0a1e2b4b05b Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Thu, 7 Jun 2018 18:42:30 -0700 Subject: [PATCH 038/365] [DataFlowAnalysis] Be less conservative on loop fusion nodes when reusing buffer. - Previously, we say we cannot reuse operand buffer for a loop fusion node if any of the fusion's inputs is a broadcast or reshape. That's too conservative since in theory we can still reuse the operand's buffer if all the users of that particular operand are elementwise. This CL implements that. - Also fixed a bug in previous code where a dynamic update fusion node that ends with convert (added for bf16) is not caught by the if condition currectly. PiperOrigin-RevId: 199731488 --- .../xla/service/hlo_dataflow_analysis.cc | 31 +++-- .../xla/service/hlo_dataflow_analysis_test.cc | 123 ++++++++++++++++++ .../compiler/xla/service/hlo_instruction.cc | 19 ++- .../xla/service/hlo_instruction_test.cc | 17 +++ tensorflow/compiler/xla/service/hlo_parser.cc | 3 + .../compiler/xla/service/hlo_parser_test.cc | 2 +- 6 files changed, 181 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index cc130a4900..d020005868 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -931,16 +931,17 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser( } const HloUse& use = value.uses()[0]; - if (user->fusion_kind() == HloInstruction::FusionKind::kLoop && - user->fused_expression_root()->opcode() == - HloOpcode::kDynamicUpdateSlice) { - // Loop fusion with kDynamicUpdateSlice fused root. - // - // Returns true iff there is exactly one use of 'operand' at shape index - // 'operand_index', and this singleton use is the fused root at operand - // index 0. - return use.instruction == user->fused_expression_root() && - use.operand_number == 0; + if (user->fusion_kind() == HloInstruction::FusionKind::kLoop) { + if (user->fused_expression_root()->opcode() == + HloOpcode::kDynamicUpdateSlice) { + // Loop fusion with kDynamicUpdateSlice fused root. + // + // Returns true iff there is exactly one use of 'operand' at shape index + // 'operand_index', and this singleton use is the fused root at operand + // index 0. + return use.instruction == user->fused_expression_root() && + use.operand_number == 0; + } } else if (user->fusion_kind() == HloInstruction::FusionKind::kOutput && user->fused_expression_root()->opcode() == HloOpcode::kAdd) { // Output fusion with kAdd fused root. @@ -967,6 +968,7 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser( use.operand_number == other_add_operand_index; } } + if (user->opcode() == HloOpcode::kDynamicUpdateSlice || user->opcode() == HloOpcode::kWhile) { // We eliminated other users in BufferLiveness::live_range_strictly_before, @@ -998,8 +1000,13 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser( }) != uses.end(); return uses.size() == 2 && found_caller_use && found_elementwise_callee_use; } - // Check if 'user' is element-wise. - return user->IsElementwise(); + + // Loop fusions that contain transposing copies won't reach here as they have + // different layouts, which fails the check in the beginning of this function. + // + // Multi-output fusion will fail the check here as tuples are not considered + // an elementwise operation. + return user->IsElementwiseOnOperand(user->operand_index(operand)); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 5798326dcb..db1822ec47 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -1974,6 +1974,89 @@ TEST_F(CanShareOperandBufferWithUserTest, ElementWiseSameShape) { dataflow_analysis_->CanShareOperandBufferWithUser(exp, {}, log, {})); } +TEST_F(CanShareOperandBufferWithUserTest, + NonElementwiseLoopFusionCantAliasOperandBuffer) { + auto builder = HloComputation::Builder(TestName()); + Shape data_shape = ShapeUtil::MakeShape(F32, {2, 2}); + + auto param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, data_shape, "param0")); + + auto neg = builder.AddInstruction( + HloInstruction::CreateUnary(data_shape, HloOpcode::kNegate, param0)); + + auto reverse = builder.AddInstruction( + HloInstruction::CreateReverse(data_shape, neg, {0, 1})); + + BuildModule(builder.Build()); + auto fusion = computation_->CreateFusionInstruction( + {reverse, neg}, HloInstruction::FusionKind::kLoop); + RunAnalysis(); + + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(param0, {}, + fusion, {})); +} + +TEST_F(CanShareOperandBufferWithUserTest, + MultiOutputFusionCantAliasOperandBuffer) { + auto builder = HloComputation::Builder(TestName()); + Shape data_shape = ShapeUtil::MakeShape(F32, {2, 2}); + + Shape in_shape = ShapeUtil::MakeShape(F32, {8}); + Shape out_shape = ShapeUtil::MakeShape(PRED, {8}); + auto param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, in_shape, "param0")); + auto param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, in_shape, "param1")); + + auto copy0 = builder.AddInstruction( + HloInstruction::CreateUnary(in_shape, HloOpcode::kCopy, param0)); + auto copy1 = builder.AddInstruction( + HloInstruction::CreateUnary(in_shape, HloOpcode::kCopy, param1)); + + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({copy1, copy0})); + + BuildModule(builder.Build()); + auto fusion = computation_->CreateFusionInstruction( + {tuple, copy1, copy0}, HloInstruction::FusionKind::kLoop); + RunAnalysis(); + + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(param0, {}, + fusion, {0})); + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(param0, {}, + fusion, {1})); + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(param1, {}, + fusion, {0})); + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(param1, {}, + fusion, {1})); +} + +TEST_F(CanShareOperandBufferWithUserTest, + ElementwiseLoopFusionCantAliasOperandBuffer) { + auto builder = HloComputation::Builder(TestName()); + Shape data_shape = ShapeUtil::MakeShape(F32, {2, 2}); + + auto one = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto operand = builder.AddInstruction( + HloInstruction::CreateBroadcast(data_shape, one, {1})); + + auto neg = builder.AddInstruction( + HloInstruction::CreateUnary(data_shape, HloOpcode::kNegate, operand)); + + auto exp = builder.AddInstruction( + HloInstruction::CreateUnary(data_shape, HloOpcode::kExp, neg)); + + BuildModule(builder.Build()); + auto fusion = computation_->CreateFusionInstruction( + {exp, neg}, HloInstruction::FusionKind::kLoop); + RunAnalysis(); + + EXPECT_TRUE(dataflow_analysis_->CanShareOperandBufferWithUser(operand, {}, + fusion, {})); +} + TEST_F(CanShareOperandBufferWithUserTest, ElementWiseDifferentShape) { auto builder = HloComputation::Builder(TestName()); @@ -2048,6 +2131,46 @@ TEST_F(CanShareOperandBufferWithUserTest, FusedDynamicUpdateSlice) { fusion, {})); } +TEST_F(CanShareOperandBufferWithUserTest, + FusedDynamicUpdateSliceWithConvertCantShare) { + auto builder = HloComputation::Builder(TestName()); + + Shape data_shape = ShapeUtil::MakeShape(F32, {8}); + Shape data_shape_bf16 = ShapeUtil::MakeShape(BF16, {8}); + auto tuple = builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeTupleShape({data_shape, data_shape}), "tuple")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape, tuple, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape, tuple, 1)); + + auto convert1 = builder.AddInstruction( + HloInstruction::CreateConvert(data_shape_bf16, gte1)); + + // Create a DynamicUpdateSlice instruction of tuple element 1. + auto starts = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR1({2}))); + auto update = builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({2.f, 2.f, 2.f}))); + auto dynamic_update_slice = + builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + data_shape_bf16, convert1, update, starts)); + + auto convert2 = builder.AddInstruction( + HloInstruction::CreateConvert(data_shape, dynamic_update_slice)); + builder.AddInstruction(HloInstruction::CreateTuple({gte0, convert2})); + + BuildModule(builder.Build()); + auto fusion = computation_->CreateFusionInstruction( + {convert2, dynamic_update_slice, starts, update, convert1}, + HloInstruction::FusionKind::kLoop); + RunAnalysis(); + + // The fusion instruction can't share with tuple element 1. + EXPECT_FALSE( + dataflow_analysis_->CanShareOperandBufferWithUser(gte1, {}, fusion, {})); +} + TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) { auto builder = HloComputation::Builder(TestName()); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index cf1530abe1..570ad5459a 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -398,6 +398,11 @@ HloInstruction::CreateGetTupleElement(const Shape& shape, instruction->AppendOperand(operand); } instruction->called_computations_.push_back(map_computation); + // TODO(b/65689298) Remove code below once Map is generalized to accept + // arbitrary map dimensions. + instruction->dimensions_.resize(ShapeUtil::Rank(shape)); + std::iota(instruction->dimensions_.begin(), instruction->dimensions_.end(), + 0); return instruction; } @@ -1603,7 +1608,7 @@ bool HloInstruction::HasLiteral() const { return literal_ != nullptr; } bool HloInstruction::CanHaveDimensionsField() const { return (opcode() == HloOpcode::kReverse || - opcode() == HloOpcode::kConcatenate || + opcode() == HloOpcode::kConcatenate || opcode() == HloOpcode::kMap || opcode() == HloOpcode::kReduce || opcode() == HloOpcode::kBroadcast || opcode() == HloOpcode::kTranspose); } @@ -3151,7 +3156,19 @@ bool HloInstruction::IsElementwise() const { // Other operations. case HloOpcode::kRng: + return true; case HloOpcode::kMap: + if (!dimensions().empty()) { + // Check that the map is executed in elementwise compatible dimensions. + if (dimensions().size() != operand(0)->shape().dimensions_size()) { + return false; + } + for (int i = 0; i < dimensions().size(); ++i) { + if (dimensions()[i] != i) { + return false; + } + } + } return true; case HloOpcode::kFusion: if (fusion_kind() != FusionKind::kLoop) { diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 313033ddad..76349c4099 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -980,6 +980,23 @@ TEST_F(HloInstructionTest, FullyElementwise) { } } +TEST_F(HloInstructionTest, MapIsElementwise) { + auto module = CreateNewModule(); + const Shape r2f32 = ShapeUtil::MakeShapeWithLayout(F32, {10, 10}, {1, 0}); + HloComputation::Builder builder(TestName()); + HloComputation::Builder map_builder("id"); + map_builder.AddInstruction( + HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(F32, {}), "p0")); + auto map_computation = module->AddEmbeddedComputation(map_builder.Build()); + auto x = + builder.AddInstruction(HloInstruction::CreateParameter(0, r2f32, "x")); + auto map = builder.AddInstruction( + HloInstruction::CreateMap(r2f32, {x}, map_computation)); + module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(map->IsElementwise()); +} + TEST_F(HloInstructionTest, PartiallyElementwise) { const Shape r1f32 = ShapeUtil::MakeShape(F32, {5}); const Shape r2f32 = ShapeUtil::MakeShape(F32, {3, 5}); diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 3eadedfe1f..a1bc269400 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -777,6 +777,9 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, optional to_apply; attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, &to_apply}; + optional> dimensions; + attrs["dimensions"] = {/*required=*/false, AttrTy::kBracedInt64List, + &dimensions}; if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 08068dc504..1c5a47c875 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -765,7 +765,7 @@ add_F32.v3 { ENTRY MapBinaryAdder.v3 { param0 = f32[4]{0} parameter(0) param1 = f32[4]{0} parameter(1) - ROOT map = f32[4]{0} map(param0, param1), to_apply=add_F32.v3 + ROOT map = f32[4]{0} map(param0, param1), dimensions={0}, to_apply=add_F32.v3 } )" -- GitLab From 99e6a86480bfb518dea59b4b25f7c9549b227587 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 19:31:38 -0700 Subject: [PATCH 039/365] Implement Log operator. PiperOrigin-RevId: 199735191 --- tensorflow/contrib/lite/build_def.bzl | 1 + tensorflow/contrib/lite/builtin_ops.h | 1 + .../lite/g3doc/tf_ops_compatibility.md | 11 ++++ .../contrib/lite/kernels/elementwise.cc | 23 ++++++-- .../contrib/lite/kernels/elementwise_test.cc | 18 +++++-- tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/model.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 1 + .../contrib/lite/schema/schema_generated.h | 9 ++-- .../contrib/lite/testing/generate_examples.py | 54 ++++++++++++------- .../contrib/lite/toco/import_tensorflow.cc | 2 + .../contrib/lite/toco/tflite/operator.cc | 10 ++-- .../contrib/lite/toco/tflite/operator_test.cc | 1 + 14 files changed, 100 insertions(+), 35 deletions(-) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 13d9a463fb..30bb604d17 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -220,6 +220,7 @@ def generated_test_models(): "less_equal", "local_response_norm", "log_softmax", + "log", "lstm", "max_pool", "maximum", diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 7b10b69f43..f3b2ac77fb 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -98,6 +98,7 @@ typedef enum { kTfLiteBuiltinExpandDims = 70, kTfLiteBuiltinEqual = 71, kTfLiteBuiltinNotEqual = 72, + kTfLiteBuiltinLog = 73, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index 19145281fa..bb2e615eac 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -417,6 +417,17 @@ Outputs { } ``` +**LOG** + +``` +Inputs { + 0: a tensor +} +Outputs { + 0: a tensor equivalent to log(input) +} +``` + **LOG_SOFTMAX** ``` diff --git a/tensorflow/contrib/lite/kernels/elementwise.cc b/tensorflow/contrib/lite/kernels/elementwise.cc index 0bd5046950..98c21ce9d3 100644 --- a/tensorflow/contrib/lite/kernels/elementwise.cc +++ b/tensorflow/contrib/lite/kernels/elementwise.cc @@ -23,7 +23,7 @@ namespace ops { namespace builtin { namespace elementwise { -TfLiteStatus SinPrepare(TfLiteContext* context, TfLiteNode* node) { +TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, 0); @@ -35,7 +35,8 @@ TfLiteStatus SinPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArrayCopy(input->dims)); } -TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) { +inline TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, + float float_func(float)) { const TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); switch (input->type) { @@ -44,7 +45,7 @@ TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) { const float* in = GetTensorData(input); const float* in_end = in + elements; float* out = output->data.f; - for (; in < in_end; in++, out++) *out = std::sin(*in); + for (; in < in_end; in++, out++) *out = float_func(*in); return kTfLiteOk; } default: { @@ -55,14 +56,28 @@ TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) { } } +TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) { + return Eval(context, node, std::sin); +} + +TfLiteStatus LogEval(TfLiteContext* context, TfLiteNode* node) { + return Eval(context, node, std::log); +} + } // namespace elementwise TfLiteRegistration* Register_SIN() { - static TfLiteRegistration r = {nullptr, nullptr, elementwise::SinPrepare, + static TfLiteRegistration r = {nullptr, nullptr, elementwise::GenericPrepare, elementwise::SinEval}; return &r; } +TfLiteRegistration* Register_LOG() { + static TfLiteRegistration r = {nullptr, nullptr, elementwise::GenericPrepare, + elementwise::LogEval}; + return &r; +} + } // namespace builtin } // namespace ops } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/elementwise_test.cc b/tensorflow/contrib/lite/kernels/elementwise_test.cc index 412ffb04b9..10e88d5a31 100644 --- a/tensorflow/contrib/lite/kernels/elementwise_test.cc +++ b/tensorflow/contrib/lite/kernels/elementwise_test.cc @@ -24,12 +24,13 @@ namespace { using ::testing::ElementsAreArray; -class SinOpModel : public SingleOpModel { +class ElementWiseOpModel : public SingleOpModel { public: - SinOpModel(std::initializer_list input_shape) { + ElementWiseOpModel(BuiltinOperator op, + std::initializer_list input_shape) { input_ = AddInput(TensorType_FLOAT32); output_ = AddOutput(TensorType_FLOAT32); - SetBuiltinOp(BuiltinOperator_SIN, BuiltinOptions_NONE, 0); + SetBuiltinOp(op, BuiltinOptions_NONE, 0); BuildInterpreter({input_shape}); } @@ -42,7 +43,7 @@ class SinOpModel : public SingleOpModel { }; TEST(ElementWise, Sin) { - SinOpModel m({1, 1, 4, 1}); + ElementWiseOpModel m(BuiltinOperator_SIN, {1, 1, 4, 1}); m.PopulateTensor(m.input(), {0, 3.1415926, -3.1415926, 1}); m.Invoke(); EXPECT_THAT(m.ExtractVector(m.output()), @@ -50,6 +51,15 @@ TEST(ElementWise, Sin) { EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 1, 4, 1})); } +TEST(ElementWise, Log) { + ElementWiseOpModel m(BuiltinOperator_LOG, {1, 1, 4, 1}); + m.PopulateTensor(m.input(), {1, 3.1415926, 1, 1}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray(ArrayFloatNear({0, 1.14473, 0, 0}))); + EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 1, 4, 1})); +} + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 6c68bb2f31..7bb28d4de7 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -73,6 +73,7 @@ TfLiteRegistration* Register_SQUEEZE(); TfLiteRegistration* Register_STRIDED_SLICE(); TfLiteRegistration* Register_EXP(); TfLiteRegistration* Register_TOPK_V2(); +TfLiteRegistration* Register_LOG(); TfLiteRegistration* Register_LOG_SOFTMAX(); TfLiteRegistration* Register_CAST(); TfLiteRegistration* Register_DEQUANTIZE(); @@ -150,6 +151,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE()); AddBuiltin(BuiltinOperator_EXP, Register_EXP()); AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2()); + AddBuiltin(BuiltinOperator_LOG, Register_LOG()); AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX()); AddBuiltin(BuiltinOperator_CAST, Register_CAST()); AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index d78b6eae90..4fb1ada9fd 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -357,6 +357,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_FLOOR: case BuiltinOperator_NEG: case BuiltinOperator_SIN: + case BuiltinOperator_LOG: break; case BuiltinOperator_CAST: { TfLiteCastParams* params = MallocPOD(); diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index 605ce7d6fc..99cb40e967 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -490,6 +490,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_SELECT: case tflite::BuiltinOperator_SLICE: case tflite::BuiltinOperator_SIN: + case tflite::BuiltinOperator_LOG: case tflite::BuiltinOperator_TRANSPOSE_CONV: case tflite::BuiltinOperator_TILE: case tflite::BuiltinOperator_EXPAND_DIMS: diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index d12a96df1c..ee5208df14 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -150,6 +150,7 @@ enum BuiltinOperator : byte { EXPAND_DIMS = 70, EQUAL = 71, NOT_EQUAL = 72, + LOG = 73, } // Options for the builtin operators. diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 8ddd2f1438..887e47ed1e 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -325,11 +325,12 @@ enum BuiltinOperator { BuiltinOperator_EXPAND_DIMS = 70, BuiltinOperator_EQUAL = 71, BuiltinOperator_NOT_EQUAL = 72, + BuiltinOperator_LOG = 73, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_NOT_EQUAL + BuiltinOperator_MAX = BuiltinOperator_LOG }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[72] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[73] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -402,7 +403,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[72] { BuiltinOperator_TILE, BuiltinOperator_EXPAND_DIMS, BuiltinOperator_EQUAL, - BuiltinOperator_NOT_EQUAL + BuiltinOperator_NOT_EQUAL, + BuiltinOperator_LOG }; return values; } @@ -482,6 +484,7 @@ inline const char **EnumNamesBuiltinOperator() { "EXPAND_DIMS", "EQUAL", "NOT_EQUAL", + "LOG", nullptr }; return names; diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 723b6ae057..f5e25784fa 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -2420,30 +2420,44 @@ def make_neg_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) -def make_sin_tests(zip_path): - """Make a set of tests to do sin.""" +def _make_elementwise_tests(op): + """Make a set of tests to do element-wise operations.""" - test_parameters = [{ - "input_dtype": [tf.float32], - "input_shape": [[1], [1, 2], [5, 6, 7, 8], [3, 4, 5, 6]], - }] + def f(zip_path): + """Actual function that generates examples.""" + test_parameters = [{ + "input_dtype": [tf.float32], + "input_shape": [[1], [1, 2], [5, 6, 7, 8], [3, 4, 5, 6]], + }] - def build_graph(parameters): - """Build the sin op testing graph.""" - input_value = tf.placeholder( - dtype=parameters["input_dtype"], - name="input1", - shape=parameters["input_shape"]) - out = tf.sin(input_value) - return [input_value], [out] + def build_graph(parameters): + """Build the sin op testing graph.""" + input_value = tf.placeholder( + dtype=parameters["input_dtype"], + name="input1", + shape=parameters["input_shape"]) + out = op(input_value) + return [input_value], [out] - def build_inputs(parameters, sess, inputs, outputs): - input_value = create_tensor_data(parameters["input_dtype"], - parameters["input_shape"]) - return [input_value], sess.run( - outputs, feed_dict={inputs[0]: input_value}) + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["input_dtype"], + parameters["input_shape"]) + return [input_value], sess.run( + outputs, feed_dict={inputs[0]: input_value}) - make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + return f + + +def make_sin_tests(zip_path): + """Make a set of tests to do sin.""" + return _make_elementwise_tests(tf.sin)(zip_path) + + +def make_log_tests(zip_path): + """Make a set of tests to do log.""" + return _make_elementwise_tests(tf.log)(zip_path) def make_where_tests(zip_path): diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 5cc999314c..8dd43dda3e 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1941,6 +1941,8 @@ Status ImportTensorFlowNode(const tensorflow::NodeDef& node, ConvertRandomUniform(node, tf_import_flags, model); } else if (node.op() == "Sin") { ConvertSimpleOperator(node, tf_import_flags, model); + } else if (node.op() == "Log") { + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Select") { ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "SparseToDense") { diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index 8bfd76db6e..7490ab960b 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -1112,16 +1112,18 @@ std::vector> BuildOperatorList() { "LESS", OperatorType::kTensorFlowLess)); ops.emplace_back(new SimpleOperator( "LESS_EQUAL", OperatorType::kTensorFlowLessEqual)); + ops.emplace_back(new SimpleOperator( + "EQUAL", OperatorType::kTensorFlowEqual)); + ops.emplace_back(new SimpleOperator( + "NOT_EQUAL", OperatorType::kTensorFlowNotEqual)); ops.emplace_back(new SimpleOperator("NEG", OperatorType::kNeg)); ops.emplace_back( new SimpleOperator("SELECT", OperatorType::kSelect)); ops.emplace_back( new SimpleOperator("SLICE", OperatorType::kSlice)); + // Element-wise operator ops.emplace_back(new SimpleOperator("SIN", OperatorType::kSin)); - ops.emplace_back(new SimpleOperator( - "EQUAL", OperatorType::kTensorFlowEqual)); - ops.emplace_back(new SimpleOperator( - "NOT_EQUAL", OperatorType::kTensorFlowNotEqual)); + ops.emplace_back(new SimpleOperator("LOG", OperatorType::kLog)); return ops; } diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc index 06bbe53516..e3144ad63e 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -123,6 +123,7 @@ TEST_F(OperatorTest, SimpleOperators) { OperatorType::kTensorFlowEqual); CheckSimpleOperator( "NOT_EQUAL", OperatorType::kTensorFlowNotEqual); + CheckSimpleOperator("LOG", OperatorType::kLog); } TEST_F(OperatorTest, BuiltinAdd) { -- GitLab From a58cdd23d5bd5909b14bddade7ddbf9b6573fc69 Mon Sep 17 00:00:00 2001 From: James Qin Date: Thu, 7 Jun 2018 19:55:07 -0700 Subject: [PATCH 040/365] Replace add_variable() with add_weight() in official keras layers. Make it easier for analysis and code search. PiperOrigin-RevId: 199736646 --- .../python/keras/layers/convolutional.py | 83 ++++++++++--------- tensorflow/python/keras/layers/core.py | 30 +++---- .../python/keras/layers/normalization.py | 6 +- 3 files changed, 63 insertions(+), 56 deletions(-) diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py index ce1c84e98d..9ea341139e 100644 --- a/tensorflow/python/keras/layers/convolutional.py +++ b/tensorflow/python/keras/layers/convolutional.py @@ -151,21 +151,23 @@ class Conv(Layer): input_dim = int(input_shape[channel_axis]) kernel_shape = self.kernel_size + (input_dim, self.filters) - self.kernel = self.add_variable(name='kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - trainable=True, - dtype=self.dtype) + self.kernel = self.add_weight( + name='kernel', + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype) if self.use_bias: - self.bias = self.add_variable(name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) + self.bias = self.add_weight( + name='bias', + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype) else: self.bias = None self.input_spec = InputSpec(ndim=self.rank + 2, @@ -720,21 +722,23 @@ class Conv2DTranspose(Conv2D): self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) kernel_shape = self.kernel_size + (self.filters, input_dim) - self.kernel = self.add_variable(name='kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - trainable=True, - dtype=self.dtype) + self.kernel = self.add_weight( + name='kernel', + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype) if self.use_bias: - self.bias = self.add_variable(name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) + self.bias = self.add_weight( + name='bias', + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype) else: self.bias = None self.built = True @@ -961,7 +965,7 @@ class Conv3DTranspose(Conv3D): kernel_shape = self.kernel_size + (self.filters, input_dim) self.input_spec = InputSpec(ndim=5, axes={channel_axis: input_dim}) - self.kernel = self.add_variable( + self.kernel = self.add_weight( 'kernel', shape=kernel_shape, initializer=self.kernel_initializer, @@ -970,7 +974,7 @@ class Conv3DTranspose(Conv3D): trainable=True, dtype=self.dtype) if self.use_bias: - self.bias = self.add_variable( + self.bias = self.add_weight( 'bias', shape=(self.filters,), initializer=self.bias_initializer, @@ -1222,7 +1226,7 @@ class SeparableConv(Conv): pointwise_kernel_shape = ( 1,) * self.rank + (self.depth_multiplier * input_dim, self.filters) - self.depthwise_kernel = self.add_variable( + self.depthwise_kernel = self.add_weight( name='depthwise_kernel', shape=depthwise_kernel_shape, initializer=self.depthwise_initializer, @@ -1230,7 +1234,7 @@ class SeparableConv(Conv): constraint=self.depthwise_constraint, trainable=True, dtype=self.dtype) - self.pointwise_kernel = self.add_variable( + self.pointwise_kernel = self.add_weight( name='pointwise_kernel', shape=pointwise_kernel_shape, initializer=self.pointwise_initializer, @@ -1239,13 +1243,14 @@ class SeparableConv(Conv): trainable=True, dtype=self.dtype) if self.use_bias: - self.bias = self.add_variable(name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) + self.bias = self.add_weight( + name='bias', + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype) else: self.bias = None self.built = True diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index df4c3915a3..5061825d38 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -882,21 +882,23 @@ class Dense(Layer): 'should be defined. Found `None`.') self.input_spec = InputSpec(min_ndim=2, axes={-1: input_shape[-1].value}) - self.kernel = self.add_variable('kernel', - shape=[input_shape[-1].value, self.units], - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - dtype=self.dtype, - trainable=True) + self.kernel = self.add_weight( + 'kernel', + shape=[input_shape[-1].value, self.units], + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + dtype=self.dtype, + trainable=True) if self.use_bias: - self.bias = self.add_variable('bias', - shape=[self.units,], - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - dtype=self.dtype, - trainable=True) + self.bias = self.add_weight( + 'bias', + shape=[self.units,], + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + dtype=self.dtype, + trainable=True) else: self.bias = None self.built = True diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py index 7743d00c0f..ff51eadee9 100644 --- a/tensorflow/python/keras/layers/normalization.py +++ b/tensorflow/python/keras/layers/normalization.py @@ -183,7 +183,7 @@ class BatchNormalization(Layer): def _add_tower_local_variable(self, *args, **kwargs): tower_context = distribute_lib.get_tower_context() with tower_context.tower_local_var_scope('mean'): - return self.add_variable(*args, **kwargs) + return self.add_weight(*args, **kwargs) def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) @@ -276,7 +276,7 @@ class BatchNormalization(Layer): self.axis[idx] = x + 1 # Account for added dimension if self.scale: - self.gamma = self.add_variable( + self.gamma = self.add_weight( name='gamma', shape=param_shape, dtype=param_dtype, @@ -291,7 +291,7 @@ class BatchNormalization(Layer): 1.0, dtype=param_dtype, shape=param_shape) if self.center: - self.beta = self.add_variable( + self.beta = self.add_weight( name='beta', shape=param_shape, dtype=param_dtype, -- GitLab From 88d52c145b7fab581bc97a9ce99514e149c558dc Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Thu, 7 Jun 2018 21:22:55 -0700 Subject: [PATCH 041/365] Enhance row reduction implementation. The current implementation tiles the x-dimension of the tensors to calculate the partial results of the reduction. This change increases such an x-tile size from 8 to 64 if doing so results in all saturated tiles. Otherwise, this change adds z-dimension tiles to increase the number of elements that each thread reduces to a partial result to reduce the number of needed dynamic atomic operations and intra-warp reduction operations. Use a tighter yet safe loop bound for the last unsaturated tile. Avoid generating the atomic operation when the tile size is not smaller than the reduction width. Extend ForLoop emitter to support a request for fully loop unrolling. Add three tests. PiperOrigin-RevId: 199744209 --- .../xla/service/cpu/dot_op_emitter.cc | 169 +++++----- tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../xla/service/gpu/ir_emitter_unnested.cc | 316 +++++++++++------- .../service/llvm_ir/kernel_support_library.cc | 48 +-- .../service/llvm_ir/kernel_support_library.h | 175 +++++++--- .../compiler/xla/service/llvm_ir/llvm_loop.cc | 33 +- .../compiler/xla/service/llvm_ir/llvm_loop.h | 59 ++-- 7 files changed, 499 insertions(+), 302 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index fe4ba2a070..8eb39d615f 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -324,11 +324,11 @@ void ColumnMajorMatrixVectorProductEmitter::Emit() { int64 column_remainder = k() % tile_cols(); int64 column_limit = k() - column_remainder; - ksl_.For("dot.outer.tiled", - /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols(), - [&](llvm::Value* column, bool is_first_column) { - EmitOuterLoopBody(column, tile_cols(), is_first_column); - }); + ksl_.ForReturnVoid("dot.outer.tiled", + /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols(), + [&](llvm::Value* column, bool is_first_column) { + EmitOuterLoopBody(column, tile_cols(), is_first_column); + }); if (column_remainder != 0) { EmitOuterLoopBody(ir_builder_->getInt64(column_limit), column_remainder, @@ -341,19 +341,20 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( int64 columns, bool is_first_column) { int64 row_limit = m() - (m() % tile_rows()); - ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/row_limit, - /*step=*/tile_rows(), [&](llvm::Value* row) { - std::vector lhs_tile = - lhs_memory_tile->LoadTile(/*minor_dim_offset=*/row); - llvm::Value* accumulator = - is_first_column ? (addend_ ? vsl_.LoadVector(addend_, row) - : vsl_.GetZeroVector()) - : vsl_.LoadVector(result_, row); - for (int i = 0; i < columns; i++) { - accumulator = vsl_.MulAdd(lhs_tile[i], rhs_tile[i], accumulator); - } - vsl_.StoreVector(accumulator, result_, row); - }); + ksl_.ForReturnVoid( + "dot.inner.tiled", /*start=*/0, /*end=*/row_limit, + /*step=*/tile_rows(), [&](llvm::Value* row) { + std::vector lhs_tile = + lhs_memory_tile->LoadTile(/*minor_dim_offset=*/row); + llvm::Value* accumulator = + is_first_column ? (addend_ ? vsl_.LoadVector(addend_, row) + : vsl_.GetZeroVector()) + : vsl_.LoadVector(result_, row); + for (int i = 0; i < columns; i++) { + accumulator = vsl_.MulAdd(lhs_tile[i], rhs_tile[i], accumulator); + } + vsl_.StoreVector(accumulator, result_, row); + }); } void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( @@ -372,7 +373,7 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( // // initialized. // } - ksl_.For( + ksl_.ForReturnVoid( "dot.inner.epilg.outer", /*start=*/current_tile_col, /*end=*/ir_builder_->CreateAdd(columns_llvm, current_tile_col), /*step=*/1, /*peel_first_iteration=*/false, @@ -382,7 +383,7 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( ir_builder_->CreateMul(col, ir_builder_->getInt64(m())); llvm::Value* lhs_base_pointer = vsl_.ComputeOffsetPointer(lhs_, total_offset); - ksl_.For( + ksl_.ForReturnVoid( "dot.inner.epilg.inner", /*start=*/row_start, /*end=*/m(), /*step=*/1, [&](llvm::Value* scalar_row) { llvm::Value* product = vsl_.Mul( @@ -390,7 +391,7 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( llvm::Value* setting_result_first_time = ir_builder_->CreateAnd( is_first_scalar_col, ir_builder_->getInt1(is_first_tiled_column)); - ksl_.If( + ksl_.IfReturnVoid( setting_result_first_time, /*true_block_generator=*/ [&]() { @@ -571,9 +572,10 @@ void RowMajorMatrixVectorProductEmitter::Emit() { int64 row_remainder = m() % tile_rows(); int64 row_limit = m() - row_remainder; - ksl_.For("dot.outer.tiled", - /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows(), - [&](llvm::Value* row) { EmitOuterLoopBody(row, tile_rows()); }); + ksl_.ForReturnVoid( + "dot.outer.tiled", + /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows(), + [&](llvm::Value* row) { EmitOuterLoopBody(row, tile_rows()); }); if (row_remainder != 0) { EmitOuterLoopBody(ir_builder_->getInt64(row_limit), row_remainder); @@ -585,17 +587,17 @@ void RowMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( std::vector* vector_accumulators) { int64 column_limit = k() - (k() % tile_cols()); - ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/column_limit, - /*step=*/tile_cols(), [&](llvm::Value* col) { - std::vector lhs_tile = - lhs_memory_tile->LoadTile(/*minor_dim_offset=*/col); - llvm::Value* rhs_value = vsl_.LoadVector(rhs_, col); - for (int i = 0; i < rows; i++) { - llvm::Value* old_sum = (*vector_accumulators)[i].Get(); - (*vector_accumulators)[i].Set( - vsl_.Add(old_sum, vsl_.Mul(rhs_value, lhs_tile[i]))); - } - }); + ksl_.ForReturnVoid("dot.inner.tiled", /*start=*/0, /*end=*/column_limit, + /*step=*/tile_cols(), [&](llvm::Value* col) { + std::vector lhs_tile = + lhs_memory_tile->LoadTile(/*minor_dim_offset=*/col); + llvm::Value* rhs_value = vsl_.LoadVector(rhs_, col); + for (int i = 0; i < rows; i++) { + llvm::Value* old_sum = (*vector_accumulators)[i].Get(); + (*vector_accumulators)[i].Set(vsl_.Add( + old_sum, vsl_.Mul(rhs_value, lhs_tile[i]))); + } + }); } void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( @@ -612,14 +614,15 @@ void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( ir_builder_->getInt64(k())); llvm::Value* lhs_base_pointer = vsl_.ComputeOffsetPointer(lhs_, total_offset); - ksl_.For("dot.inner.epilg.inner", /*start=*/column_start, /*end=*/k(), - /*step=*/1, [&](llvm::Value* scalar_col) { - llvm::Value* product = - vsl_.Mul(vsl_.LoadScalar(lhs_base_pointer, scalar_col), - vsl_.LoadScalar(rhs_, scalar_col)); - llvm::Value* old_value = (*scalar_accumulators)[r].Get(); - (*scalar_accumulators)[r].Set(vsl_.Add(old_value, product)); - }); + ksl_.ForReturnVoid( + "dot.inner.epilg.inner", /*start=*/column_start, /*end=*/k(), + /*step=*/1, [&](llvm::Value* scalar_col) { + llvm::Value* product = + vsl_.Mul(vsl_.LoadScalar(lhs_base_pointer, scalar_col), + vsl_.LoadScalar(rhs_, scalar_col)); + llvm::Value* old_value = (*scalar_accumulators)[r].Get(); + (*scalar_accumulators)[r].Set(vsl_.Add(old_value, product)); + }); } } @@ -817,7 +820,7 @@ void MatrixMatrixBlockPanelEmitter::HandleResiduesOnN() { if (n_start != dims().n()) { VectorSupportLibrary vsl(scalar_type(), 1, ir_builder_, "gebp"); - ksl_.For("epi.n", n_start, dims().n(), 1, [&](llvm::Value* n_i) { + ksl_.ForReturnVoid("epi.n", n_start, dims().n(), 1, [&](llvm::Value* n_i) { llvm::Value* n_i_next = ir_builder_->CreateAdd(n_i, ir_builder_->getInt64(1)); HandleResiduesOnK(&vsl, n_i, n_i_next); @@ -929,39 +932,44 @@ void MatrixMatrixBlockPanelEmitter::EmitTiledGemm( VectorSupportLibrary* vsl, int64 tile_size_k, llvm::Value* k_start, llvm::Value* k_end, llvm::Value* n_start, llvm::Value* n_end, int64 tile_size_m, llvm::Value* m_start, llvm::Value* m_end) { - ksl_.For("dot.m", m_start, m_end, tile_size_m, [&](llvm::Value* m_i) { - MemoryTile result_memory_tile(vsl, ir_builder_, /*matrix=*/result_, - /*matrix_size_along_minor_dim=*/dims().n(), - /*major_dim_offset=*/m_i, - /*tile_size_along_major_dim=*/tile_size_m); - MemoryTile lhs_memory_tile(vsl, ir_builder_, /*matrix=*/lhs_, - /*matrix_size_along_minor_dim=*/dims().k(), - /*major_dim_offset=*/m_i, - /*tile_size_along_major_dim=*/tile_size_m); - - ksl_.For( - "dot.n", n_start, n_end, vsl->vector_size(), [&](llvm::Value* n_i) { - TileVariable result_tile_var(vsl, result_memory_tile.LoadTile(n_i)); - ksl_.For("dot.k", k_start, k_end, tile_size_k, [&](llvm::Value* k_i) { - MemoryTile rhs_memory_tile(vsl, ir_builder_, rhs_, dims().n(), k_i, - tile_size_k); - std::vector> lhs_tile = - lhs_memory_tile.LoadBroadcastTile(k_i, tile_size_k); - std::vector rhs_tile = rhs_memory_tile.LoadTile(n_i); - std::vector result_tile = result_tile_var.Get(); - for (int64 r_m_i = 0; r_m_i < tile_size_m; r_m_i++) { - for (int64 r_k_i = 0; r_k_i < tile_size_k; r_k_i++) { - result_tile[r_m_i] = - vsl->MulAdd(lhs_tile[r_m_i][r_k_i], rhs_tile[r_k_i], - result_tile[r_m_i]); - } - } - result_tile_var.Set(result_tile); - }); - - result_memory_tile.StoreTile(result_tile_var.Get(), n_i); - }); - }); + ksl_.ForReturnVoid( + "dot.m", m_start, m_end, tile_size_m, [&](llvm::Value* m_i) { + MemoryTile result_memory_tile( + vsl, ir_builder_, /*matrix=*/result_, + /*matrix_size_along_minor_dim=*/dims().n(), + /*major_dim_offset=*/m_i, + /*tile_size_along_major_dim=*/tile_size_m); + MemoryTile lhs_memory_tile(vsl, ir_builder_, /*matrix=*/lhs_, + /*matrix_size_along_minor_dim=*/dims().k(), + /*major_dim_offset=*/m_i, + /*tile_size_along_major_dim=*/tile_size_m); + ksl_.ForReturnVoid( + "dot.n", n_start, n_end, vsl->vector_size(), [&](llvm::Value* n_i) { + TileVariable result_tile_var(vsl, + result_memory_tile.LoadTile(n_i)); + ksl_.ForReturnVoid( + "dot.k", k_start, k_end, tile_size_k, [&](llvm::Value* k_i) { + MemoryTile rhs_memory_tile(vsl, ir_builder_, rhs_, + dims().n(), k_i, tile_size_k); + std::vector> lhs_tile = + lhs_memory_tile.LoadBroadcastTile(k_i, tile_size_k); + std::vector rhs_tile = + rhs_memory_tile.LoadTile(n_i); + std::vector result_tile = + result_tile_var.Get(); + for (int64 r_m_i = 0; r_m_i < tile_size_m; r_m_i++) { + for (int64 r_k_i = 0; r_k_i < tile_size_k; r_k_i++) { + result_tile[r_m_i] = + vsl->MulAdd(lhs_tile[r_m_i][r_k_i], rhs_tile[r_k_i], + result_tile[r_m_i]); + } + } + result_tile_var.Set(result_tile); + }); + + result_memory_tile.StoreTile(result_tile_var.Get(), n_i); + }); + }); } } // namespace @@ -1293,8 +1301,11 @@ Status DotOpEmitter::Emit() { // from messing up the vectorization. std::unique_ptr reduction_loop = loop_nest.AddLoop( 0, lhs_shape.dimensions(lhs_reduction_dimension), "reduction", - /*prevent_unrolling=*/lhs_reduction_along_minor_dimension && - rhs_reduction_along_minor_dimension); + /*unroll_mode=*/ + (lhs_reduction_along_minor_dimension && + rhs_reduction_along_minor_dimension) + ? xla::llvm_ir::UnrollMode::kNoUnroll + : xla::llvm_ir::UnrollMode::kDefaultUnroll); // The final entry in the rhs and lhs indexes is the indvar of the // reduction loop. diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 6bd9d4c31d..5e5ca7c72c 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -164,6 +164,7 @@ cc_library( "//tensorflow/compiler/xla/service:name_uniquer", "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter", "//tensorflow/compiler/xla/service/llvm_ir:ir_array", + "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library", "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter", diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index ed005f6afc..a3c1c06cbc 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -59,6 +59,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h" +#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/compiler/xla/service/llvm_ir/ops.h" #include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h" @@ -1391,6 +1392,30 @@ Status IrEmitterUnnested::EmitColumnReduction( .EmitLoop(IrName(reduce)); } +static std::pair ComputeTilingSchemeForReduction( + int64 depth, int64 width, int64 kWarpSize) { + constexpr int64 kTargetNumElementsPerThread = 64; + int64 x_tile_size = kTargetNumElementsPerThread; + int64 z_tile_size = 1; + + // Only tile along the x dimension with tile size kTargetNumElementsPerThread + // if doing so doesn't require a slow version of loop with bound check on each + // dimension. A more sophisticated heuristics is to enable tile along the + // x dimension with tile size kTargetNumElementsPerThread when either width is + // a factor of (kWarpSize * kTargetNumElementsPerThread) or width is big + // enough so that only a small fraction of the threads execute the slow + // version of loop with bound check. + if (width % (kWarpSize * kTargetNumElementsPerThread) != 0) { + x_tile_size = 8; + z_tile_size = 8; + while (depth % z_tile_size != 0) { + z_tile_size -= 1; + } + } + + return std::pair(x_tile_size, z_tile_size); +} + Status IrEmitterUnnested::EmitRowReduction( int64 depth, int64 height, int64 width, HloInstruction* reduce, const Shape& input_shape, @@ -1402,7 +1427,7 @@ Status IrEmitterUnnested::EmitRowReduction( std::pair> extra_output_gens) { // A naive algorithm is: - // 1. Divide the input tensor into tiles of size 1x1xK. + // 1. Divide the x dimension of the input tensor into tiles of size 1x1xX. // 2. Partially reduces each tile to a scalar using one thread. // 3. Accumulates that scalar to the output vector using atomic operations. // @@ -1413,15 +1438,15 @@ Status IrEmitterUnnested::EmitRowReduction( // int y = linear_index / width_in_tiles % height; // int z = linear_index / (height * width_in_tiles); // float partial_result = 0; - // for (element_id_in_tile : range(kTileSize)) { - // int x = x_in_tiles * kTileSize + element_id_in_tile; + // for (element_id_in_tile : range(x_tile_size)) { + // int x = x_in_tiles * x_tile_size + element_id_in_tile; // if (x < width) // partial_result = reducer(partial_result, input[z][y][z]); // } // AtomicReducer(&output[y], partial_result); // } // - // Three optimizations are performed. + // Four optimizations are performed. // // 1. To coalesce global memory accesses, dilate the tile with a factor of 32 // (i.e. the warp size). For example, suppose the width is 8x32=256. Instead @@ -1448,29 +1473,44 @@ Status IrEmitterUnnested::EmitRowReduction( // element_id_in_tile, which makes the code more friendly to optimizations // such as LICM. // + // 4. When the width is too small and x_tile_size is less than the target + // number of elements per thread and use a small factor of depth as + // z_tile_size to increase the number of elements calculated by each + // partial sum. This can reduce the needed number of dynamic shfl_down and + // atomic operations. + // // for (linear_index = threadIdx.x + blockIdx.x * blockDim.x; // linear_index < depth * height * width_in_tiles; // linear_index += blockDim.x * gridDim.x) { // int x_in_tiles = linear_index % width_in_tiles; // int y = linear_index / width_in_tiles % height; - // int z = linear_index / (height * width_in_tiles); + // int z_in_tiles = linear_index / (height * width_in_tiles); // int warp_id = x_in_tiles / warpSize; // int lane_id = x_in_tiles % warpSize; // float partial_result = 0; // int x = warp_id * kTileSize * warpSize + lane_id; - // if (width % (kTileSize * warpSize) == 0 || - // x + (kTileSize - 1) * warpSize < width) { - // // The entire tile is in bounds. - // for (int element_id_in_tile = 0; element_id_in_tile < kTileSize; - // ++element_id_in_tile, x += warpSize) { - // partial_result = Reducer(partial_result, input[z][y][x]); + // if (width % (x_tile_size * warpSize) == 0 || + // x + (x_tile_size - 1) * warpSize < width) { + // // The entire x_tile is in bounds. + // for (int element_id_in_z_tile = 0; element_id_in_z_tile < z_tile_size; + // ++element_id_in_z_tile) { + // z = z_in_tiles * z_tile_size + element_id_in_z_tile; + // for (int element_id_in_x_tile = 0;element_id_in_x_tile < x_tile_size; + // ++element_id_in_x_tile, x += warpSize) { + // partial_result = Reducer(partial_result, input[z][y][x]); + // } // } // } else { // // The tile is partially in bounds. - // for (int element_id_in_tile = 0; element_id_in_tile < kTileSize; + // for (int element_id_in_z_tile = 0; element_id_in_z_tile < z_tile_size; + // ++element_id_in_z_tile) { + // z = z_in_tiles * z_tile_size + element_id_in_z_tile; + // for (int element_id_in_x_tile = 0; element_id_in_x_tile < + // x_tile_size; // ++element_id_in_tile, x += warpSize) { - // if (x < width) - // partial_result = Reducer(partial_result, input[z][y][x]); + // if (x < width) + // partial_result = Reducer(partial_result, input[z][y][x]); + // } // } // } // for (shuffle_distance = 16; shuffle_distance > 0; shuffle_distance /= 2) @@ -1481,17 +1521,20 @@ Status IrEmitterUnnested::EmitRowReduction( // AtomicReducer(&output[y], partial_result); // } // - // Choose 8 as the tile size, which matches Eigen's RowReduceKernel. - constexpr int64 kTileSize = 8; + + int64 x_tile_size; + int64 z_tile_size; + std::tie(x_tile_size, z_tile_size) = + ComputeTilingSchemeForReduction(depth, width, kWarpSize); + // Round the width in tiles up to the nearest multiple of kWarpSize, so that // the use of shfl_down is valid. const int64 width_in_tiles = - RoundUpToNearest(CeilOfRatio(width, kTileSize), kWarpSize); + RoundUpToNearest(CeilOfRatio(width, x_tile_size), kWarpSize); - auto loop_body_emitter = - [=](const llvm_ir::IrArray::Index& tile_index) -> Status { + auto loop_body_emitter = [=](const llvm_ir::IrArray::Index& tile_index) { + // Emit the loop body that reduces one z-x-tile. const int num_reduces = reducers.size(); - // Emit the loop body that reduces one tile. llvm::Type* element_ir_type = llvm_ir::PrimitiveTypeToIrType( input_shape.element_type(), ir_emitter_context_->llvm_module()); std::vector partial_reduction_result_addresses; @@ -1506,9 +1549,7 @@ Status IrEmitterUnnested::EmitRowReduction( partial_reduction_result_address); } - // Emit an inner for-loop that partially reduces the elements in the given - // tile. - llvm::Value* z = tile_index[0]; + llvm::Value* z_tile = tile_index[0]; llvm::Value* y = tile_index[1]; llvm::Value* x_tile = tile_index[2]; llvm::Value* warp_id = ir_builder_.CreateUDiv( @@ -1516,107 +1557,132 @@ Status IrEmitterUnnested::EmitRowReduction( llvm::Value* lane_id = ir_builder_.CreateURem( x_tile, ir_builder_.getInt64(kWarpSize), "lane_id"); - // The x-location of the last element in this tile. - // last_x = lane_id + warpSize * (kTileSize - 1 + warp_id * kTileSize); + // The x-location of the last element in this z-x-tile. + // last_x = lane_id + warpSize * (x_tile_size - 1 + warp_id * + // x_tile_size); llvm::Value* last_x = ir_builder_.CreateNSWAdd( - lane_id, - ir_builder_.CreateNSWMul( - ir_builder_.getInt64(kWarpSize), - ir_builder_.CreateNSWAdd( - ir_builder_.getInt64(kTileSize - 1), - ir_builder_.CreateNSWMul(warp_id, - ir_builder_.getInt64(kTileSize))))); - - auto emit_tile_element_loop = [=](bool tile_in_bounds) -> Status { - std::unique_ptr tile_element_loop = - llvm_ir::ForLoop::EmitForLoop("element_id_in_tile", - ir_builder_.getInt64(0), - ir_builder_.getInt64(kTileSize), - ir_builder_.getInt64(1), &ir_builder_); - - // Emit the body of the partial reduction loop. - llvm_ir::SetToFirstInsertPoint(tile_element_loop->GetBodyBasicBlock(), - &ir_builder_); - // x = lane_id + warpSize * (element_id_in_tile + warp_id * kTileSize); - llvm::Value* x = ir_builder_.CreateNSWAdd( - lane_id, - ir_builder_.CreateNSWMul( - ir_builder_.getInt64(kWarpSize), - ir_builder_.CreateNSWAdd( - tile_element_loop->GetIndVarValue(), - ir_builder_.CreateNSWMul(warp_id, - ir_builder_.getInt64(kTileSize))))); - - // Unless we know the tile is entirely in bounds, we have to emit a - // x-in-bounds check before reading from the input. - if (!tile_in_bounds) { - llvm_ir::LlvmIfData if_x_in_bounds_data = llvm_ir::EmitIfThenElse( - ir_builder_.CreateICmpULT(x, ir_builder_.getInt64(width)), - "x_in_bounds", &ir_builder_); - - // Points ir_builder_ to the then-block. - llvm_ir::SetToFirstInsertPoint(if_x_in_bounds_data.true_block, - &ir_builder_); - } + lane_id, ir_builder_.CreateNSWMul( + ir_builder_.getInt64(kWarpSize), + ir_builder_.CreateNSWAdd( + ir_builder_.getInt64(x_tile_size - 1), + ir_builder_.CreateNSWMul( + warp_id, ir_builder_.getInt64(x_tile_size))))); + + KernelSupportLibrary ksl( + &ir_builder_, + /*unroll_mode=*/xla::llvm_ir::UnrollMode::kFullyUnroll, + /*prevent_vectorization=*/false); + + // Emit a for-loop that partially reduces the elements in the given + // z-x-tile. + auto emit_z_x_tile_element_loop = [&](bool x_tile_in_bounds, + int64 x_tile_loop_bound) -> Status { + auto emit_z_tile_element_loop = [&](llvm::Value* z_indvar) -> Status { + llvm::Value* z = ir_builder_.CreateNSWAdd( + z_indvar, ir_builder_.CreateNSWMul( + ir_builder_.getInt64(z_tile_size), z_tile)); + + TF_RETURN_IF_ERROR(ksl.For( + "x_tile", + /*start=*/0, /*end=*/x_tile_loop_bound, /*step=*/1, + [&](llvm::Value* x_indvar) -> Status { + // x = lane_id + warpSize * (element_id_in_x_tile + warp_id * + // x_tile_size); + llvm::Value* x = ir_builder_.CreateNSWAdd( + lane_id, + ir_builder_.CreateNSWMul( + ir_builder_.getInt64(kWarpSize), + ir_builder_.CreateNSWAdd( + x_indvar, + ir_builder_.CreateNSWMul( + warp_id, ir_builder_.getInt64(x_tile_size))))); + + // Unless we know the x-tile is entirely in bounds, we have to + // emit a x-in-bounds check before reading from the input. + if (!x_tile_in_bounds) { + llvm_ir::LlvmIfData if_x_in_bounds_data = + llvm_ir::EmitIfThenElse(ir_builder_.CreateICmpULT( + x, ir_builder_.getInt64(width)), + "x_in_bounds", &ir_builder_); + // Points ir_builder_ to the then-block. + llvm_ir::SetToFirstInsertPoint(if_x_in_bounds_data.true_block, + &ir_builder_); + } + + // Emit code that reads the input element and accumulates it + // to the partial reduction result. + llvm::Value* input_address = + ir_builder_.CreateAlloca(element_ir_type); + { + // {z,y,x} is an index to input_3d_tensor_shape + // [depth,height,width]. We need to convert that to an index + // to input_shape (the shape of the operand of "reduce"). + // This conversion is composed of a transposition from + // input_shape to normalized_input_shape and a reshape from + // normalized_input_shape to input_3d_tensor_shape. + const Shape normalized_input_shape = ShapeUtil:: + MakeShapeWithDescendingLayoutAndSamePhysicalLayout( + input_shape); + auto input_shape_min2maj = + LayoutUtil::MinorToMajor(input_shape); + const std::vector transpose_dimension_mapping( + input_shape_min2maj.rbegin(), input_shape_min2maj.rend()); + const Shape input_3d_tensor_shape = + ShapeUtil::MakeShapeWithDescendingLayout( + input_shape.element_type(), {depth, height, width}); + const llvm_ir::IrArray::Index input_3d_tensor_index( + {z, y, x}, input_3d_tensor_shape, &ir_builder_); + const llvm_ir::IrArray::Index input_index = + input_3d_tensor_index + .SourceIndexOfReshape(input_3d_tensor_shape, + normalized_input_shape, + &ir_builder_) + .SourceIndexOfTranspose( + normalized_input_shape, input_shape, + transpose_dimension_mapping, &ir_builder_); + + for (int i = 0; i != num_reduces; ++i) { + TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, + input_gens[i](input_index)); + ir_builder_.CreateStore(input_ir_value, input_address); + TF_RETURN_IF_ERROR(EmitCallToNestedComputation( + *reducers[i], + {partial_reduction_result_addresses[i], input_address}, + partial_reduction_result_addresses[i])); + } + return EmitExtraOutputsForReduce(reduce, input_index, + extra_output_gens); + } + })); + return Status::OK(); + }; - // Emit code that reads the input element and accumulates it to the - // partial reduction result. - llvm::Value* input_address = ir_builder_.CreateAlloca(element_ir_type); - { - // {z,y,x} is an index to input_3d_tensor_shape [depth,height,width]. We - // need to convert that to an index to input_shape (the shape of the - // operand of "reduce"). This conversion is composed of a transposition - // from input_shape to normalized_input_shape and a reshape from - // normalized_input_shape to input_3d_tensor_shape. - const Shape normalized_input_shape = - ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( - input_shape); - auto input_shape_min2maj = LayoutUtil::MinorToMajor(input_shape); - const std::vector transpose_dimension_mapping( - input_shape_min2maj.rbegin(), input_shape_min2maj.rend()); - const Shape input_3d_tensor_shape = - ShapeUtil::MakeShapeWithDescendingLayout(input_shape.element_type(), - {depth, height, width}); - const llvm_ir::IrArray::Index input_3d_tensor_index( - {z, y, x}, input_3d_tensor_shape, &ir_builder_); - const llvm_ir::IrArray::Index input_index = - input_3d_tensor_index - .SourceIndexOfReshape(input_3d_tensor_shape, - normalized_input_shape, &ir_builder_) - .SourceIndexOfTranspose(normalized_input_shape, input_shape, - transpose_dimension_mapping, - &ir_builder_); - for (int i = 0; i != num_reduces; ++i) { - TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, - input_gens[i](input_index)); - ir_builder_.CreateStore(input_ir_value, input_address); - TF_RETURN_IF_ERROR(EmitCallToNestedComputation( - *reducers[i], - {partial_reduction_result_addresses[i], input_address}, - partial_reduction_result_addresses[i])); - } - return EmitExtraOutputsForReduce(reduce, input_index, - extra_output_gens); - } + return ksl.For("z_tile", + /*start=*/0, /*end=*/z_tile_size, /*step=*/1, + emit_z_tile_element_loop); }; llvm::Value* tile_in_bounds = ir_builder_.CreateOr( - ir_builder_.getInt1(width % (kTileSize * kWarpSize) == 0), + ir_builder_.getInt1(width % (x_tile_size * kWarpSize) == 0), ir_builder_.CreateICmpULT(last_x, ir_builder_.getInt64(width))); - llvm_ir::LlvmIfData if_tile_in_bounds_data = - llvm_ir::EmitIfThenElse(tile_in_bounds, "tile_in_bounds", &ir_builder_); - llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.true_block, - &ir_builder_); - TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_bounds=*/true)); - llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.false_block, - &ir_builder_); - TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_bounds=*/false)); - // After the if-then-else statement on tile_in_bounds, emit calls to - // shfl_down that accumulate the partial reduction results of all threads - // from the warp. - llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.after_block, - &ir_builder_); + TF_RETURN_IF_ERROR( + ksl.If(tile_in_bounds, + /*true_block_generator=*/ + [&]() -> Status { + return emit_z_x_tile_element_loop(/*x_tile_in_bounds=*/true, + x_tile_size); + }, + /*false_block_generator=*/ + [&]() -> Status { + return emit_z_x_tile_element_loop( + /*x_tile_in_bounds=*/false, + CeilOfRatio(width % (x_tile_size * kWarpSize), kWarpSize)); + })); + + // After accumulating the elements of the z_x_tile, emit calls to + // shfl_down that accumulate the partial reduction results of all + // threads in a warp. int bit_width = llvm_ir::GetSizeInBits(element_ir_type); // bitcast cannot be applied to aggregate types (even packed ones), so we // instead bitcast addresses of load/store to intN* of the same bit-width. @@ -1666,16 +1732,24 @@ Status IrEmitterUnnested::EmitRowReduction( reduce_output_shapes[i]), &ir_builder_), &ir_builder_, "output_element_address"); - TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation( - *reducers[i], output_address, partial_reduction_result_addresses[i])); + if (x_tile_size * z_tile_size < depth * width) { + TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation( + *reducers[i], output_address, + partial_reduction_result_addresses[i])); + } else { + TF_RETURN_IF_ERROR(EmitCallToNestedComputation( + *reducers[i], + {output_address, partial_reduction_result_addresses[i]}, + output_address)); + } } return Status::OK(); }; // Emit a parallel loop that iterates through every input tiles. Shape tiled_input_shape = ShapeUtil::MakeShapeWithLayout( - reduce->shape().element_type(), {depth, height, width_in_tiles}, - {2, 1, 0}); + reduce->shape().element_type(), + {depth / z_tile_size, height, width_in_tiles}, {2, 1, 0}); LaunchDimensions launch_dimensions = CalculateLaunchDimensions( tiled_input_shape, ir_emitter_context_->device_description()); CHECK(LastThunk()->kind() == Thunk::Kind::kSequential); diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc index 23d2d4e87d..1f6e3c829f 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc @@ -15,53 +15,57 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" -#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" namespace xla { -void KernelSupportLibrary::For( +Status KernelSupportLibrary::For( tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, llvm::Value* step, - const std::function& for_body_generator) { - If(ir_builder_->CreateICmpSLT(start, end), [&]() { - for_body_generator(start, /*is_first_iteration=*/true); - For(name, ir_builder_->CreateAdd(start, step), end, step, - [&](llvm::Value* iv) { for_body_generator(iv, false); }); + const std::function& for_body_generator) { + return If(ir_builder_->CreateICmpSLT(start, end), [&]() -> Status { + TF_RETURN_IF_ERROR(for_body_generator(start, /*is_first_iteration=*/true)); + return For(name, ir_builder_->CreateAdd(start, step), end, step, + [&](llvm::Value* iv) { return for_body_generator(iv, false); }); }); } -void KernelSupportLibrary::For( +Status KernelSupportLibrary::For( tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, llvm::Value* step, bool peel_first_iteration, - const std::function& for_body_generator) { + const std::function& + for_body_generator) { if (peel_first_iteration) { - For(name, start, end, step, true, - [&](llvm::Value* indvar, bool is_first_iteration) { - for_body_generator(indvar, ir_builder_->getInt1(is_first_iteration)); - }); + return For(name, start, end, step, true, + [&](llvm::Value* indvar, bool is_first_iteration) -> Status { + return for_body_generator( + indvar, ir_builder_->getInt1(is_first_iteration)); + }); } else { std::unique_ptr loop = llvm_ir::ForLoop::EmitForLoop( name, start, end, step, ir_builder_, - /*prevent_unrolling=*/prevent_unrolling_, + /*unroll_mode=*/unroll_mode_, /*prevent_vectorization=*/prevent_vectorization_); ir_builder_->SetInsertPoint(&loop->GetBodyBasicBlock()->back()); - for_body_generator(loop->GetIndVarValue(), - /*is_first_iteration=*/ir_builder_->CreateICmpEQ( - loop->GetIndVarValue(), start)); + TF_RETURN_IF_ERROR( + for_body_generator(loop->GetIndVarValue(), + /*is_first_iteration=*/ir_builder_->CreateICmpEQ( + loop->GetIndVarValue(), start))); llvm_ir::SetToLastInsertPoint(loop->GetExitBasicBlock(), ir_builder_); + return Status::OK(); } } -void KernelSupportLibrary::If( - llvm::Value* condition, const std::function& true_block_generator, - const std::function& false_block_generator) { +Status KernelSupportLibrary::If( + llvm::Value* condition, const std::function& true_block_generator, + const std::function& false_block_generator) { llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(condition, "", ir_builder_); ir_builder_->SetInsertPoint(&if_data.true_block->back()); - true_block_generator(); + TF_RETURN_IF_ERROR(true_block_generator()); ir_builder_->SetInsertPoint(&if_data.false_block->back()); - false_block_generator(); + TF_RETURN_IF_ERROR(false_block_generator()); llvm_ir::SetToLastInsertPoint(if_data.after_block, ir_builder_); + return Status::OK(); } void KernelSupportLibrary::EmitAndCallOutlinedKernel( diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h index 64b935bbf1..e17c649e52 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h +++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h @@ -21,6 +21,7 @@ limitations under the License. #include "llvm/IR/BasicBlock.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Value.h" +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -30,13 +31,14 @@ namespace xla { class KernelSupportLibrary { public: // `ir_builder` is the llvm::IRBuilder instance used to generate LLVM IR. - // If `prevent_unrolling` is true then unrolling is explicitly disabled on - // every loop generated by this instance of KernelSupportLibrary. - explicit KernelSupportLibrary(llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling = true, - bool prevent_vectorization = true) + // `unroll_mode` specifies the desired LLVM unrolling behavior for every loop + // generated by this instance of KernelSupportLibrary. + explicit KernelSupportLibrary( + llvm::IRBuilder<>* ir_builder, + llvm_ir::UnrollMode unroll_mode = llvm_ir::UnrollMode::kNoUnroll, + bool prevent_vectorization = true) : ir_builder_(ir_builder), - prevent_unrolling_(prevent_unrolling), + unroll_mode_(unroll_mode), prevent_vectorization_(prevent_vectorization) {} // Generates the following control flow structure: @@ -46,19 +48,41 @@ class KernelSupportLibrary { // for (i64 i = `start` + `step`; i s< `end`; i += `step`) // `for_body_generator(/*ind_var=*/,i, /*is_first_iteration=*/false)`; // } - void For( + Status For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, + const std::function& for_body_generator); + + void ForReturnVoid( tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, llvm::Value* step, const std::function& - for_body_generator); + for_body_generator) { + CHECK_EQ(Status::OK(), + For(name, start, end, step, + [&](llvm::Value* ind_var, bool is_first_iteration) -> Status { + for_body_generator(ind_var, is_first_iteration); + return Status::OK(); + })); + } + + Status For(tensorflow::StringPiece name, int64 start, int64 end, int64 step, + const std::function& + for_body_generator) { + return For(name, /*start=*/ir_builder_->getInt64(start), + /*end=*/ir_builder_->getInt64(end), + /*step=*/ir_builder_->getInt64(step), for_body_generator); + } - void For( + void ForReturnVoid( tensorflow::StringPiece name, int64 start, int64 end, int64 step, const std::function& for_body_generator) { - For(name, /*start=*/ir_builder_->getInt64(start), - /*end=*/ir_builder_->getInt64(end), - /*step=*/ir_builder_->getInt64(step), for_body_generator); + ForReturnVoid(name, /*start=*/ir_builder_->getInt64(start), + /*end=*/ir_builder_->getInt64(end), + /*step=*/ir_builder_->getInt64(step), for_body_generator); } // Generates the following control flow structure if `peel_first_iteration` is @@ -75,46 +99,101 @@ class KernelSupportLibrary { // for (i64 i = `start`; i s< `end`; i += `step`) // `for_body_generator(/*ind_var=*/,i, // /*is_first_iteration=*/,(i != `start`))`; - void For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, - llvm::Value* step, bool peel_first_iteration, - const std::function& + Status For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, bool peel_first_iteration, + const std::function& + for_body_generator); + + void ForReturnVoid(tensorflow::StringPiece name, llvm::Value* start, + llvm::Value* end, llvm::Value* step, + bool peel_first_iteration, + const std::function& + for_body_generator) { + TF_CHECK_OK(For( + name, start, end, step, peel_first_iteration, + [&](llvm::Value* ind_var, llvm::Value* is_first_iteration) -> Status { + for_body_generator(ind_var, is_first_iteration); + return Status::OK(); + })); + } + + Status For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + int64 step, bool peel_first_iteration, + const std::function& + for_body_generator) { + return For(name, /*start=*/start, /*end=*/end, + /*step=*/ir_builder_->getInt64(step), peel_first_iteration, for_body_generator); + } + + void ForReturnVoid(tensorflow::StringPiece name, llvm::Value* start, + llvm::Value* end, int64 step, bool peel_first_iteration, + const std::function& + for_body_generator) { + ForReturnVoid(name, /*start=*/start, /*end=*/end, + /*step=*/ir_builder_->getInt64(step), peel_first_iteration, + for_body_generator); + } - void For(tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, - int64 step, bool peel_first_iteration, - const std::function& - for_body_generator) { - For(name, /*start=*/start, /*end=*/end, - /*step=*/ir_builder_->getInt64(step), peel_first_iteration, - for_body_generator); + Status For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + llvm::Value* step, + const std::function& for_body_generator) { + return For(name, start, end, step, + /*peel_first_iteration=*/false, + [&](llvm::Value* indvar, llvm::Value*) -> Status { + return for_body_generator(indvar); + }); } - void For( + void ForReturnVoid( tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, llvm::Value* step, const std::function& for_body_generator) { - For(name, start, end, step, - /*peel_first_iteration=*/false, - [&](llvm::Value* indvar, llvm::Value*) { for_body_generator(indvar); }); + ForReturnVoid(name, start, end, step, + /*peel_first_iteration=*/false, + [&](llvm::Value* indvar, llvm::Value*) { + return for_body_generator(indvar); + }); + } + + Status For( + tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, + int64 step, + const std::function& for_body_generator) { + return For(name, start, end, ir_builder_->getInt64(step), + /*peel_first_iteration=*/false, + [&](llvm::Value* indvar, llvm::Value*) -> Status { + return for_body_generator(indvar); + }); } - void For( + void ForReturnVoid( tensorflow::StringPiece name, llvm::Value* start, llvm::Value* end, int64 step, const std::function& for_body_generator) { - For(name, start, end, ir_builder_->getInt64(step), - /*peel_first_iteration=*/false, - [&](llvm::Value* indvar, llvm::Value*) { for_body_generator(indvar); }); + ForReturnVoid(name, start, end, ir_builder_->getInt64(step), + for_body_generator); + } + + Status For( + tensorflow::StringPiece name, int64 start, int64 end, int64 step, + const std::function& for_body_generator) { + return For(name, /*start=*/ir_builder_->getInt64(start), + /*end=*/ir_builder_->getInt64(end), + /*step=*/ir_builder_->getInt64(step), for_body_generator); } - void For( + void ForReturnVoid( tensorflow::StringPiece name, int64 start, int64 end, int64 step, const std::function& for_body_generator) { - For(name, /*start=*/ir_builder_->getInt64(start), - /*end=*/ir_builder_->getInt64(end), - /*step=*/ir_builder_->getInt64(step), for_body_generator); + ForReturnVoid(name, /*start=*/ir_builder_->getInt64(start), + /*end=*/ir_builder_->getInt64(end), + /*step=*/ir_builder_->getInt64(step), for_body_generator); } // Generates the following control flow structure: @@ -123,9 +202,25 @@ class KernelSupportLibrary { // `true_block_generator()`; // else // `false_block_generator()`; - void If(llvm::Value* condition, - const std::function& true_block_generator, - const std::function& false_block_generator = []() {}); + Status If(llvm::Value* condition, + const std::function& true_block_generator, + const std::function& false_block_generator = + []() -> Status { return Status::OK(); }); + + void IfReturnVoid(llvm::Value* condition, + const std::function& true_block_generator, + const std::function& false_block_generator = []() { + }) { + TF_CHECK_OK(If(condition, + [&]() { + true_block_generator(); + return Status::OK(); + }, + [&]() { + false_block_generator(); + return Status::OK(); + })); + } using ArgumentVector = tensorflow::gtl::ArraySlice; @@ -183,7 +278,7 @@ class KernelSupportLibrary { private: llvm::IRBuilder<>* ir_builder_; - bool prevent_unrolling_; + llvm_ir::UnrollMode unroll_mode_; bool prevent_vectorization_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc index 497b48ff22..9f867014fb 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc @@ -34,7 +34,7 @@ namespace llvm_ir { ForLoop::ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - llvm::Value* step, bool prevent_unrolling, + llvm::Value* step, UnrollMode unroll_mode, bool prevent_vectorization) : prefix_(std::string(prefix)), suffix_(std::string(suffix)), @@ -42,15 +42,15 @@ ForLoop::ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, end_index_(end_index), step_(step), insert_before_bb_(nullptr), - prevent_unrolling_(prevent_unrolling), + unroll_mode_(unroll_mode), prevent_vectorization_(prevent_vectorization) {} /* static */ std::unique_ptr ForLoop::EmitForLoop( tensorflow::StringPiece prefix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling, bool prevent_vectorization) { + UnrollMode unroll_mode, bool prevent_vectorization) { std::unique_ptr loop(new ForLoop(prefix, /*suffix=*/"", start_index, - end_index, step, prevent_unrolling, + end_index, step, unroll_mode, prevent_vectorization)); loop->Emit(ir_builder); return loop; @@ -147,11 +147,12 @@ void ForLoop::Emit(llvm::IRBuilder<>* ir_builder) { std::vector ForLoop::GetLoopMetadata( llvm::IRBuilder<>* ir_builder) { const char* const kLlvmLoopUnrollDisableMDName = "llvm.loop.unroll.disable"; + const char* const kLlvmLoopUnrollFullMDName = "llvm.loop.unroll.full"; const char* const kLlvmLoopVectorizeMDName = "llvm.loop.vectorize.enable"; llvm::LLVMContext* ctx = &start_index_->getContext(); std::vector result; - if (prevent_unrolling_) { + if (unroll_mode_ == xla::llvm_ir::UnrollMode::kNoUnroll) { result.push_back(llvm::MDNode::get( *ctx, {llvm::MDString::get(*ctx, kLlvmLoopUnrollDisableMDName)})); } @@ -162,6 +163,10 @@ std::vector ForLoop::GetLoopMetadata( llvm::ConstantAsMetadata::get(ir_builder->getFalse())})); } + if (unroll_mode_ == xla::llvm_ir::UnrollMode::kFullyUnroll) { + result.push_back(llvm::MDNode::get( + *ctx, {llvm::MDString::get(*ctx, kLlvmLoopUnrollFullMDName)})); + } return result; } @@ -178,25 +183,25 @@ llvm::BasicBlock* ForLoop::CreateLoopBB(tensorflow::StringPiece name, std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, - bool prevent_unrolling, + UnrollMode unroll_mode, bool prevent_vectorization) { return AddLoop(suffix, start_index, end_index, ir_builder_->getInt64(1), - prevent_unrolling, prevent_vectorization); + unroll_mode, prevent_vectorization); } std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* stride, - bool prevent_unrolling, + UnrollMode unroll_mode, bool prevent_vectorization) { if (inner_loop_body_bb_ != nullptr) { // Create this loop inside the previous one. ir_builder_->SetInsertPoint(&*inner_loop_body_bb_->getFirstInsertionPt()); } std::unique_ptr loop(new ForLoop( - /*prefix=*/name_, suffix, start_index, end_index, stride, - prevent_unrolling, prevent_vectorization)); + /*prefix=*/name_, suffix, start_index, end_index, stride, unroll_mode, + prevent_vectorization)); loop->Emit(ir_builder_); if (outer_loop_preheader_bb_ == nullptr) { @@ -215,23 +220,23 @@ std::unique_ptr ForLoopNest::AddLoop(tensorflow::StringPiece suffix, std::unique_ptr ForLoopNest::AddLoop(int64 start_index, int64 end_index, tensorflow::StringPiece suffix, - bool prevent_unrolling, + UnrollMode unroll_mode, bool prevent_vectorization) { CHECK_LE(start_index, end_index); return AddLoop(suffix, ir_builder_->getInt64(start_index), - ir_builder_->getInt64(end_index), prevent_unrolling, + ir_builder_->getInt64(end_index), unroll_mode, prevent_vectorization); } std::unique_ptr ForLoopNest::AddLoop(int64 start_index, int64 end_index, int64 stride, tensorflow::StringPiece suffix, - bool prevent_unrolling, + UnrollMode unroll_mode, bool prevent_vectorization) { CHECK_LE(start_index, end_index); return AddLoop(suffix, ir_builder_->getInt64(start_index), ir_builder_->getInt64(end_index), - ir_builder_->getInt64(stride), prevent_unrolling, + ir_builder_->getInt64(stride), unroll_mode, prevent_vectorization); } diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h index d915f95db1..4e403cd994 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h @@ -34,6 +34,12 @@ limitations under the License. namespace xla { namespace llvm_ir { +enum class UnrollMode { + kDefaultUnroll, + kFullyUnroll, + kNoUnroll, +}; + // A class for constructing a for-loop in LLVM IR. class ForLoop { public: @@ -69,12 +75,13 @@ class ForLoop { // LLVM IR. If non-empty, it is prepended to the name of the induction // variable value and each basic block created for the loop. // - // If `prevent_unrolling` is true then emit metadata that directs LLVM to not - // unroll the generated loop. + // `unroll_mode` specifies the desired LLVM unrolling behavior for generated + // loop. static std::unique_ptr EmitForLoop( tensorflow::StringPiece prefix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder, - bool prevent_unrolling = false, bool prevent_vectorization = false); + UnrollMode unroll_mode = llvm_ir::UnrollMode::kDefaultUnroll, + bool prevent_vectorization = false); // The names of the blocks follow LLVM's conventions. Control flow amongst the // blocks for the example C code looks like: @@ -128,7 +135,7 @@ class ForLoop { ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, - bool prevent_unrolling, bool prevent_vectorization); + UnrollMode unroll_mode, bool prevent_vectorization); // Emit the loop at the insert point of the builder. void Emit(llvm::IRBuilder<>* ir_builder); @@ -161,7 +168,7 @@ class ForLoop { llvm::BasicBlock* body_bb_; llvm::BasicBlock* exit_bb_; llvm::Value* indvar_; - bool prevent_unrolling_; + UnrollMode unroll_mode_; bool prevent_vectorization_; TF_DISALLOW_COPY_AND_ASSIGN(ForLoop); @@ -182,34 +189,34 @@ class ForLoopNest { // Adds a loop to the nest. If no loop has been added yet then emit a loop at // the current insert point of the given builder. If one or more loops have - // been added then emit loop inside the body of the last added loop. If - // prevent_unrolling is true, then metadata is emitting directing LLVM to not - // unroll this loop. - std::unique_ptr AddLoop(tensorflow::StringPiece suffix, - llvm::Value* start_index, - llvm::Value* end_index, llvm::Value* stride, - bool prevent_unrolling = false, - bool prevent_vectorization = false); + // been added then emit loop inside the body of the last added loop. + // unroll_mode is used to emit metadata that controls LLVM unrolling. + std::unique_ptr AddLoop( + tensorflow::StringPiece suffix, llvm::Value* start_index, + llvm::Value* end_index, llvm::Value* stride, + UnrollMode unroll_mode = xla::llvm_ir::UnrollMode::kDefaultUnroll, + bool prevent_vectorization = false); // Like the above, except that it defaults to a stride of one. - std::unique_ptr AddLoop(tensorflow::StringPiece suffix, - llvm::Value* start_index, - llvm::Value* end_index, - bool prevent_unrolling = false, - bool prevent_vectorization = false); + std::unique_ptr AddLoop( + tensorflow::StringPiece suffix, llvm::Value* start_index, + llvm::Value* end_index, + UnrollMode unroll_mode = xla::llvm_ir::UnrollMode::kDefaultUnroll, + bool prevent_vectorization = false); // A convenient wrapper of the other flavor of AddLoop. The given start and // end index are constant. - std::unique_ptr AddLoop(int64 start_index, int64 end_index, - int64 stride, tensorflow::StringPiece suffix, - bool prevent_unrolling = false, - bool prevent_vectorization = false); + std::unique_ptr AddLoop( + int64 start_index, int64 end_index, int64 stride, + tensorflow::StringPiece suffix, + UnrollMode unroll_mode = xla::llvm_ir::UnrollMode::kDefaultUnroll, + bool prevent_vectorization = false); // Like the above, except that it defaults to a stride of one. - std::unique_ptr AddLoop(int64 start_index, int64 end_index, - tensorflow::StringPiece suffix, - bool prevent_unrolling = false, - bool prevent_vectorization = false); + std::unique_ptr AddLoop( + int64 start_index, int64 end_index, tensorflow::StringPiece suffix, + UnrollMode unroll_mode = xla::llvm_ir::UnrollMode::kDefaultUnroll, + bool prevent_vectorization = false); // Add loops to iterate through the indices within the specified // shape. The returned index collects the induction variables of the -- GitLab From 73d6c7bef536d4a15cc1c57d8635d3d670ef34de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 21:31:57 -0700 Subject: [PATCH 042/365] Wire in the kDomain infrastructure brought in by cl/193798254. PiperOrigin-RevId: 199745064 --- .../compiler/xla/service/computation_layout.h | 9 +++ tensorflow/compiler/xla/service/hlo_cse.cc | 11 +-- .../compiler/xla/service/hlo_instruction.cc | 8 +-- .../compiler/xla/service/hlo_instruction.h | 5 +- .../compiler/xla/service/hlo_sharding.cc | 27 +++++++ .../compiler/xla/service/hlo_sharding.h | 15 +++- .../xla/service/hlo_sharding_metadata.cc | 71 +++++++++---------- .../compiler/xla/service/tuple_simplifier.cc | 24 ++----- 8 files changed, 102 insertions(+), 68 deletions(-) diff --git a/tensorflow/compiler/xla/service/computation_layout.h b/tensorflow/compiler/xla/service/computation_layout.h index 53c3a3f7b7..6975f387b4 100644 --- a/tensorflow/compiler/xla/service/computation_layout.h +++ b/tensorflow/compiler/xla/service/computation_layout.h @@ -32,12 +32,21 @@ namespace xla { // mutable layouts. class ComputationLayout { public: + // Creates a new ComputationLayout with the given result layout. + explicit ComputationLayout(ShapeLayout result_layout) + : result_layout_(std::move(result_layout)) {} + // Constructs a ComputationLayout from a ProgramShape. The layouts of the // parameters and results are set to the default layout. Layouts in the // ProgramShape are ignored if ignore_layouts is true. explicit ComputationLayout(const ProgramShape& program_shape, bool ignore_layouts = true); + // Adds a new parameter layout to the computation layout. + void add_parameter_layout(ShapeLayout shape_layout) { + parameter_layouts_.push_back(std::move(shape_layout)); + } + // Returns the layout of a particular parameter. const ShapeLayout& parameter_layout(int64 param_no) const { return parameter_layouts_[param_no]; diff --git a/tensorflow/compiler/xla/service/hlo_cse.cc b/tensorflow/compiler/xla/service/hlo_cse.cc index dab946a099..a0ee889623 100644 --- a/tensorflow/compiler/xla/service/hlo_cse.cc +++ b/tensorflow/compiler/xla/service/hlo_cse.cc @@ -135,17 +135,18 @@ StatusOr HloCSE::Run(HloModule* module) { // instruction for each class. tensorflow::gtl::FlatSet - representatives(/*N=*/1024, &CseHash, cse_equal); - + representatives(/*N=*/computation->instruction_count() + 1, &CseHash, + cse_equal); for (auto instruction : computation->MakeInstructionPostOrder()) { // If the instruction has zero operands (constants, parameters, etc.) skip // over it. if (instruction->operand_count() == 0) { continue; } - - // Skip instructions which have side effects. - if (instruction->HasSideEffect()) { + // Skip instructions which have side effects or are a domain (which must + // not be CSE-ed). + if (instruction->HasSideEffect() || + instruction->opcode() == HloOpcode::kDomain) { continue; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 570ad5459a..b6e2056600 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -844,12 +844,12 @@ HloInstruction::CreateBroadcastSequence( return instruction; } -void HloInstruction::set_device_sharding(int64 device) { - HloSharding device_sharding = HloSharding::AssignDevice(device); +void HloInstruction::set_single_sharding(const HloSharding& sharding) { + CHECK(!sharding.IsTuple()) << sharding; if (ShapeUtil::IsTuple(shape())) { - set_sharding(HloSharding::Tuple(device_sharding.GetAsShapeTree(shape()))); + set_sharding(HloSharding::Tuple(sharding.GetAsShapeTree(shape()))); } else { - set_sharding(device_sharding); + set_sharding(sharding); } } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 6232d55e1b..c08806b33b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -1119,8 +1119,11 @@ class HloInstruction { void set_sharding(const HloSharding& sharding) { sharding_ = MakeUnique(sharding); } + void set_single_sharding(const HloSharding& sharding); // Sets a sharding that assigns the current instruction to device. - void set_device_sharding(int64 device); + void set_device_sharding(int64 device) { + set_single_sharding(HloSharding::AssignDevice(device)); + } // Remove any sharding from this operator. void clear_sharding() { sharding_ = nullptr; } // Return true if this operator has a sharding assigned. diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index 58224ef870..4fbb7f69ac 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -141,6 +141,20 @@ StatusOr> HloSharding::AsShapeTree( } } +StatusOr HloSharding::GetTupleSharding(const Shape& shape) const { + if (IsTuple()) { + // TODO(b/109903108): An empty tuple has one leaf for ShapeTree, while it + // has zero leaves for ShapeUtil. This needs cleanup. + int64 shape_leaves = + ShapeUtil::IsEmptyTuple(shape) ? 1 : ShapeUtil::GetLeafCount(shape); + TF_RET_CHECK(shape_leaves == tuple_elements_.size()) + << "Shape " << ShapeUtil::HumanString(shape) << " has " << shape_leaves + << " leaf nodes while this sharding has " << tuple_elements_.size(); + return *this; + } + return Tuple(ShapeTree(shape, *this)); +} + StatusOr HloSharding::UniqueDevice() const { if (IsTuple()) { if (tuple_elements_.empty()) { @@ -389,6 +403,19 @@ HloSharding HloSharding::GetSubSharding(const Shape& shape, : sub_shape_tree.element(ShapeIndex({})); } +tensorflow::gtl::optional HloSharding::ExtractSingleSharding() + const { + if (!IsTuple()) { + return *this; + } + for (int64 i = 1; i < tuple_elements_.size(); ++i) { + if (tuple_elements_[0] != tuple_elements_[i]) { + return tensorflow::gtl::optional(); + } + } + return tuple_elements_.front(); +} + std::ostream& operator<<(std::ostream& out, const HloSharding& sharding) { out << sharding.ToString(); return out; diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index f4a0fb626f..0a213311b4 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -72,8 +72,7 @@ class HloSharding { // elements for every leaf shape contained in the tuple. static HloSharding Tuple(const ShapeTree& sub_shardings) { std::vector flattened_list; - flattened_list.reserve( - std::distance(sub_shardings.leaf_begin(), sub_shardings.leaf_end())); + flattened_list.reserve(sub_shardings.leaf_count()); for (const auto& index_to_sharding : sub_shardings.leaves()) { flattened_list.push_back(index_to_sharding.second); } @@ -172,6 +171,18 @@ class HloSharding { // REQUIRES: IsTuple() HloSharding GetSubSharding(const Shape& shape, const ShapeIndex& index) const; + // If the current sharding is a tuple sharding, return itself as result. + // Otherwise returns a tuple sharding for the input shape, with all the leaves + // having this object sharding. + StatusOr GetTupleSharding(const Shape& shape) const; + + // Extracts the sharding that is common within the current sharding. + // If the current sharding is not a tuple sharding, the current sharding will + // be returned. If it is a tuple, and all the tuple elements are common, the + // common element will be returned. Otherwise the optional will contain no + // value. + tensorflow::gtl::optional ExtractSingleSharding() const; + bool operator==(const HloSharding& other) const { return replicated_ == other.replicated_ && maximal_ == other.maximal_ && ShapeUtil::Compatible(tile_shape_, other.tile_shape_) && diff --git a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc index 82cff2a4b7..7b4b071af4 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc @@ -31,32 +31,22 @@ struct PassThrough { HloInstruction* operand = nullptr; }; -void SetDeviceSharding(HloInstruction* instruction, int64 device) { - VLOG(4) << " " << instruction->name() << " to device " << device; - instruction->set_device_sharding(device); -} - -tensorflow::gtl::optional ShardingUniqueDevice( - const HloSharding& sharding) { - if (sharding.IsTileMaximal()) { - auto device = sharding.UniqueDevice(); - if (device.ok()) { - return device.ValueOrDie(); - } - } - return tensorflow::gtl::optional(); +void SetSingleSharding(HloInstruction* instruction, + const HloSharding& sharding) { + VLOG(4) << " " << instruction->name() << " to " << sharding; + instruction->set_single_sharding(sharding); } bool ShardingMatches(const HloSharding& sharding1, const HloSharding& sharding2) { - auto device1 = ShardingUniqueDevice(sharding1); - if (device1) { - auto device2 = ShardingUniqueDevice(sharding2); - if (device2) { - return *device1 == *device2; + auto single_sharding1 = sharding1.ExtractSingleSharding(); + if (single_sharding1) { + auto single_sharding2 = sharding2.ExtractSingleSharding(); + if (single_sharding2) { + return *single_sharding1 == single_sharding2; } } - // Anything which is not tile maximal with unique device, gets a full sharding + // Anything which is not unique across all elements, gets a full sharding // compare. return sharding1 == sharding2; } @@ -119,21 +109,21 @@ Status FixupPassThroughDomainLinks(const DomainMetadata::Domain& domain, std::unique_ptr CloneShardingForDomain( const HloSharding& sharding) { - auto device = ShardingUniqueDevice(sharding); - if (!device) { + auto single_sharding = sharding.ExtractSingleSharding(); + if (!single_sharding) { return MakeUnique(sharding); } - return MakeUnique(HloSharding::AssignDevice(*device)); + return MakeUnique(*single_sharding); } -Status ApplyDomainDeviceSharding(const DomainMetadata::Domain& domain, - int64 device) { - VLOG(4) << "Applying device " << device << " sharding"; +Status ApplyDomainSingleSharding(const DomainMetadata::Domain& domain, + const HloSharding& sharding) { + VLOG(4) << "Applying " << sharding << " sharding"; for (HloInstruction* instruction : domain.instructions) { // We only change instructions without sharding, since otherwise we might // mess up with eventual HLO passes which has knowledge of it. if (!instruction->has_sharding()) { - SetDeviceSharding(instruction, device); + SetSingleSharding(instruction, sharding); } else { VLOG(4) << " " << instruction->name() << " already has sharding " << instruction->sharding(); @@ -186,12 +176,15 @@ StatusOr ApplyDomainShardingPass(const DomainMetadata::Domain& domain, const HloSharding* tuple_sharding = GetOperandSharding(tuple, domain, sharding); if (tuple_sharding != nullptr) { - TF_RET_CHECK(tuple_sharding->IsTuple()) << tuple->ToString(); - HloSharding sub_sharding = tuple_sharding->GetSubSharding( - tuple->shape(), {instruction->tuple_index()}); - VLOG(4) << " " << instruction->name() << " to sharding " - << sub_sharding; - instruction->set_sharding(sub_sharding); + if (tuple_sharding->IsTuple()) { + HloSharding sub_sharding = tuple_sharding->GetSubSharding( + tuple->shape(), {instruction->tuple_index()}); + VLOG(4) << " " << instruction->name() << " to sharding " + << sub_sharding; + instruction->set_sharding(sub_sharding); + } else { + SetSingleSharding(instruction, *tuple_sharding); + } ++assigned; } } else if (instruction->opcode() == HloOpcode::kTuple) { @@ -242,12 +235,12 @@ StatusOr ApplyDomainShardingPass(const DomainMetadata::Domain& domain, Status ApplyDomainSharding(const DomainMetadata::Domain& domain, const HloSharding& sharding) { - auto device = ShardingUniqueDevice(sharding); - if (device) { - // Shortcut the simple case. We have a unique device sharding, so we call - // the ApplyDomainDeviceSharding() API which will apply array or tuple - // shaped device sharding to the domain instructions. - return ApplyDomainDeviceSharding(domain, *device); + auto single_sharding = sharding.ExtractSingleSharding(); + if (single_sharding) { + // Shortcut the simple case. We have a unique sharding, so we call + // the ApplyDomainSingleSharding() API which will apply array or tuple + // shaped sharding to the domain instructions. + return ApplyDomainSingleSharding(domain, *single_sharding); } VLOG(1) << "Assigning non-trivial sharding " << sharding; for (;;) { diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.cc b/tensorflow/compiler/xla/service/tuple_simplifier.cc index d668855084..e536c8afbf 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier.cc @@ -69,7 +69,6 @@ StatusOr TupleSimplifier::Run(HloModule* module) { // Tuple // HloInstruction* top_tuple = nullptr; - HloInstruction* first_gte = nullptr; bool can_simplify = true; for (int64 operand_number = 0; operand_number < instruction->operand_count(); ++operand_number) { @@ -79,17 +78,10 @@ StatusOr TupleSimplifier::Run(HloModule* module) { can_simplify = false; break; } - if (first_gte == nullptr) { - first_gte = operand; - } else if (!first_gte->has_compatible_sharding(operand)) { - can_simplify = false; - break; - } if (top_tuple == nullptr) { top_tuple = operand->mutable_operand(0); if (!ShapeUtil::Compatible(top_tuple->shape(), - instruction->shape()) || - !instruction->has_compatible_sharding(top_tuple)) { + instruction->shape())) { can_simplify = false; break; } @@ -118,14 +110,12 @@ StatusOr TupleSimplifier::Run(HloModule* module) { HloInstruction* element_source = instruction->mutable_operand(0)->mutable_operand( instruction->tuple_index()); - if (instruction->has_compatible_sharding(element_source)) { - changed = true; - TF_RETURN_IF_ERROR(instruction->ReplaceAllUsesWith(element_source)); - for (HloInstruction* user : element_source->users()) { - if (user->opcode() == HloOpcode::kTuple || - user->opcode() == HloOpcode::kGetTupleElement) { - worklist.push(user); - } + changed = true; + TF_RETURN_IF_ERROR(instruction->ReplaceAllUsesWith(element_source)); + for (HloInstruction* user : element_source->users()) { + if (user->opcode() == HloOpcode::kTuple || + user->opcode() == HloOpcode::kGetTupleElement) { + worklist.push(user); } } } -- GitLab From 4bc01f8f63074337c846a1b60a4a2b88d420bd56 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 7 Jun 2018 22:32:00 -0700 Subject: [PATCH 043/365] Upgrade Eigen version. Remove eigen_fix_cuda_compilation.patch because the fixes in the patch have been incorporated into the Eigen opensource repository with this commit: https://bitbucket.org/eigen/eigen/commits/60ab50654998f1cbe2791d49fea94d0ca5ae08a8 PiperOrigin-RevId: 199749536 --- tensorflow/workspace.bzl | 9 +++-- third_party/eigen_fix_cuda_compilation.patch | 38 -------------------- 2 files changed, 4 insertions(+), 43 deletions(-) delete mode 100644 third_party/eigen_fix_cuda_compilation.patch diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b007d3f597..ce4a009974 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -107,13 +107,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "eigen_archive", urls = [ - "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/6913f0cf7d06.tar.gz", - "https://bitbucket.org/eigen/eigen/get/6913f0cf7d06.tar.gz", + "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/267806ed9b4f.tar.gz", + "https://bitbucket.org/eigen/eigen/get/267806ed9b4f.tar.gz", ], - sha256 = "791b836cacd03e20bae5bdd25f1c4a5505a0a9975ba94a61eb4e2631fbd1d53a", - strip_prefix = "eigen-eigen-6913f0cf7d06", + sha256 = "ade57357093463cab9e4e51cd5749c81483a75451b1471a3ebc73f9c1d14043b", + strip_prefix = "eigen-eigen-267806ed9b4f", build_file = clean_dep("//third_party:eigen.BUILD"), - patch_file = clean_dep("//third_party:eigen_fix_cuda_compilation.patch") ) tf_http_archive( diff --git a/third_party/eigen_fix_cuda_compilation.patch b/third_party/eigen_fix_cuda_compilation.patch deleted file mode 100644 index b921a7c31d..0000000000 --- a/third_party/eigen_fix_cuda_compilation.patch +++ /dev/null @@ -1,38 +0,0 @@ -diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h ---- a/Eigen/src/Core/ProductEvaluators.h -+++ b/Eigen/src/Core/ProductEvaluators.h -@@ -137,7 +137,7 @@ struct Assignment::type> - { - typedef Product SrcXprType; -- static EIGEN_STRONG_INLINE -+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) - { - Index dstRows = src.rows(); -@@ -390,7 +390,7 @@ struct generic_product_impl::Scalar Scalar; - - template -- static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) -+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) - { - // Same as: dst.noalias() = lhs.lazyProduct(rhs); - // but easier on the compiler side -@@ -398,14 +398,14 @@ struct generic_product_impl -- static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) -+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) - { - // dst.noalias() += lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); - } - - template -- static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) -+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) - { - // dst.noalias() -= lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); -- GitLab From 8666eff2359ccacd528dfda404a1f8ae35762542 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Thu, 7 Jun 2018 23:42:58 -0700 Subject: [PATCH 044/365] Add checkpointing support for ReshufflingDataset. This allows checkpointing input pipelines with .shuffle(reshuffle_each_iteration=True[default]) and .list_files(). PiperOrigin-RevId: 199753836 --- .../contrib/data/python/kernel_tests/BUILD | 2 + .../dataset_serialization_test_base.py | 12 +- .../kernel_tests/shuffle_dataset_op_test.py | 100 +++++++- .../core/kernels/data/shuffle_dataset_op.cc | 217 ++++++++++++------ 4 files changed, 244 insertions(+), 87 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index fd15103870..be834d7dfd 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -462,6 +462,7 @@ py_test( tags = ["no_pip"], deps = [ ":dataset_serialization_test", + "//tensorflow/contrib/data/python/ops:iterator_ops", "//tensorflow/contrib/data/python/ops:shuffle_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -469,6 +470,7 @@ py_test( "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", + "//tensorflow/python:training", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/ops:iterator_ops", "//third_party/py/numpy", diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py index 78ecce8f7d..393f08850b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py +++ b/tensorflow/contrib/data/python/kernel_tests/dataset_serialization_test_base.py @@ -467,7 +467,8 @@ class DatasetSerializationTestBase(test.TestCase): ckpt_saved=False, init_before_restore=False, sparse_tensors=False, - verify_exhausted=True): + verify_exhausted=True, + save_checkpoint_at_end=True): """Generates elements from input dataset while stopping at break points. Produces `num_outputs` outputs and saves the state of the iterator in the @@ -490,6 +491,10 @@ class DatasetSerializationTestBase(test.TestCase): sparse_tensors: Whether dataset is built from SparseTensor(s). verify_exhausted: Whether to verify that the iterator has been exhausted after producing `num_outputs` elements. + save_checkpoint_at_end: Whether to save a checkpoint after producing all + outputs. If False, checkpoints are saved each break point but not at the + end. Note that checkpoints overwrite each other so there is always only + a single checkpoint available. Defaults to True. Returns: A list of `num_outputs` items. @@ -526,8 +531,9 @@ class DatasetSerializationTestBase(test.TestCase): if i == len(break_points) and verify_exhausted: with self.assertRaises(errors.OutOfRangeError): sess.run(get_next_op) - self._save(sess, saver) - ckpt_saved = True + if save_checkpoint_at_end or i < len(break_points): + self._save(sess, saver) + ckpt_saved = True return outputs diff --git a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py index bcc644c097..1b67a33f04 100644 --- a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py @@ -20,11 +20,13 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base +from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops from tensorflow.contrib.data.python.ops import shuffle_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.platform import test +from tensorflow.python.training import saver as saver_lib class ShuffleDatasetSerializationTest( @@ -50,26 +52,100 @@ class ShuffleDatasetSerializationTest( num_repeats = 5 num_outputs = range_limit * num_repeats buffer_sizes = [1, 3, 8, 10, 25, 50] - reshuffle_each_iteration = False # pylint: disable=cell-var-from-loop # pylint: disable=g-long-lambda - for buffer_size in buffer_sizes: - self.run_core_tests( - lambda: self._build_shuffle_dataset( + for reshuffle_each_iteration in [True, False]: + for buffer_size in buffer_sizes: + self.run_core_tests( + lambda: self._build_shuffle_dataset( + range_limit=range_limit, + num_repeats=num_repeats, + buffer_size=buffer_size, + seed=seed, + reshuffle_each_iteration=reshuffle_each_iteration), + lambda: self._build_shuffle_dataset( + range_limit=range_limit, + num_repeats=num_repeats, + buffer_size=buffer_size, + seed=10, + reshuffle_each_iteration=reshuffle_each_iteration), + num_outputs) + # pylint: enable=cell-var-from-loop + # pylint: enable=g-long-lambda + + def testNonDeterministicSeeding(self): + + range_limit = 10 + num_repeats = 5 + num_outputs = range_limit * num_repeats + buffer_sizes = [1, 3, 8, 10, 25, 50] + for reshuffle_each_iteration in [True, False]: + for buffer_size in buffer_sizes: + + def ds_fn(): + # pylint: disable=cell-var-from-loop + return self._build_shuffle_dataset( range_limit=range_limit, num_repeats=num_repeats, buffer_size=buffer_size, - seed=seed, - reshuffle_each_iteration=reshuffle_each_iteration), - lambda: self._build_shuffle_dataset( + seed=None, # Iterator seeds are generated non-deterministically. + reshuffle_each_iteration=reshuffle_each_iteration) + # pylint: enable=cell-var-from-loop + + # We checkpoint the initial state of the Dataset so that we can restore + # the seeds in the next run. Since the seeding is non-deterministic + # the dataset gets initialized with different seeds each time. + expected = self.gen_outputs( + ds_fn, + break_points=[0], + num_outputs=num_outputs, + ckpt_saved=False, + verify_exhausted=False, + save_checkpoint_at_end=False) + actual = self.gen_outputs( + ds_fn, + break_points=self.gen_break_points(num_outputs), + num_outputs=num_outputs, + ckpt_saved=True, + verify_exhausted=False) + self.match(expected, actual) + + def testMultipleIterators(self): + range_limit = 10 + num_repeats = 5 + num_outputs = range_limit * num_repeats + buffer_sizes = [1, 3, 8, 10, 25, 50] + + for reshuffle_each_iteration in [True, False]: + for buffer_size in buffer_sizes: + + def ds_fn(): + # pylint: disable=cell-var-from-loop + return self._build_shuffle_dataset( range_limit=range_limit, num_repeats=num_repeats, buffer_size=buffer_size, - seed=10, - reshuffle_each_iteration=reshuffle_each_iteration), - num_outputs) - # pylint: enable=cell-var-from-loop - # pylint: enable=g-long-lambda + seed=None, # Iterator seeds are generated non-deterministically. + reshuffle_each_iteration=reshuffle_each_iteration) + # pylint: enable=cell-var-from-loop + + with ops.Graph().as_default() as g: + ds = ds_fn() + iterators = [ds.make_one_shot_iterator(), ds.make_one_shot_iterator()] + get_next_ops = [it.get_next() for it in iterators] + saveables = [ + contrib_iterator_ops.make_saveable_from_iterator(it) + for it in iterators + ] + for saveable in saveables: + ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable) + saver = saver_lib.Saver(allow_empty=True) + with self.test_session(graph=g) as sess: + self._save(sess, saver) + expected = [sess.run(get_next_ops) for _ in range(num_outputs)] + self._restore(saver, sess) + actual = [sess.run(get_next_ops) for _ in range(num_outputs)] + self.match(expected, actual) class ShuffleAndRepeatTest( diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc index 3438199ebd..b859295fa4 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc @@ -61,10 +61,12 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { } protected: - class Iterator : public DatasetIterator { + template + class Iterator : public DatasetIterator { public: - explicit Iterator(const Params& params, int64 seed, int64 seed2) - : DatasetIterator(params), + explicit Iterator(const typename DatasetIterator::Params& params, + int64 seed, int64 seed2) + : DatasetIterator(params), input_impl_(nullptr), seed_(seed), seed2_(seed2), @@ -85,26 +87,28 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { bool first_call = false; if (!input_impl_ && epoch_ == 0) { first_call = true; - TF_RETURN_IF_ERROR( - dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); + TF_RETURN_IF_ERROR(this->dataset()->input_->MakeIterator( + ctx, this->prefix(), &input_impl_)); } - while (input_impl_ && num_elements_ < dataset()->buffer_size_) { + while (input_impl_ && num_elements_ < this->dataset()->buffer_size_) { if (ctx->env()->NowMicros() > ((num_log_entries + 1) * kLogIntervalMicros) + start_micros) { num_log_entries++; LOG(INFO) << "Filling up shuffle buffer (this may take a while): " - << num_elements_ << " of " << dataset()->buffer_size_; + << num_elements_ << " of " + << this->dataset()->buffer_size_; } std::vector input_element; bool end_of_input_sequence = false; - while (dataset()->count_ == -1 || epoch_ < dataset()->count_) { + while (this->dataset()->count_ == -1 || + epoch_ < this->dataset()->count_) { TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &input_element, &end_of_input_sequence)); if (!end_of_input_sequence) { first_call = false; break; } - if (first_call && dataset()->count_ == -1) { + if (first_call && this->dataset()->count_ == -1) { // If the first call to GetNext() fails because the end // of sequence has been reached, we terminate the // iteration immediately. (Otherwise, this iterator @@ -115,11 +119,11 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { epoch_++; int64 n = slices_.back()->end; slices_.emplace_back(new Slice{n, n}); - TF_RETURN_IF_ERROR( - dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); + TF_RETURN_IF_ERROR(this->dataset()->input_->MakeIterator( + ctx, this->prefix(), &input_impl_)); } if (!end_of_input_sequence) { - buffer_[slices_.back()->end % dataset()->buffer_size_] = + buffer_[slices_.back()->end % this->dataset()->buffer_size_] = std::move(input_element); num_elements_++; slices_.back()->end++; @@ -144,10 +148,11 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { int64 offset = Random() % (slices_.front()->end - slices_.front()->start); int64 index = - (slices_.front()->start + offset) % dataset()->buffer_size_; + (slices_.front()->start + offset) % this->dataset()->buffer_size_; *out_tensors = std::move(buffer_[index]); - std::swap(buffer_[index], - buffer_[slices_.front()->start % dataset()->buffer_size_]); + std::swap( + buffer_[index], + buffer_[slices_.front()->start % this->dataset()->buffer_size_]); slices_.front()->start++; num_elements_--; } else { @@ -160,40 +165,44 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { protected: Status SaveInternal(IteratorStateWriter* writer) override { mutex_lock l(mu_); - // Save state needed to restore the random number generators. - TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("num_random_samples"), - num_random_samples_)); + TF_RETURN_IF_ERROR(writer->WriteScalar( + this->full_name("num_random_samples"), num_random_samples_)); + TF_RETURN_IF_ERROR(writer->WriteScalar(this->full_name("seed"), seed_)); + TF_RETURN_IF_ERROR( + writer->WriteScalar(this->full_name("seed2"), seed2_)); // Save input iterator if it hasn't been exhausted else write // "end_of_input_sequence". if (!input_impl_) { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("end_of_input_sequence"), "")); + TF_RETURN_IF_ERROR(writer->WriteScalar( + this->full_name("end_of_input_sequence"), "")); } else { - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + TF_RETURN_IF_ERROR(this->SaveParent(writer, input_impl_)); } // Save the epoch counter, buffer, and buffer slices. - TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("epoch"), epoch_)); - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("num_elements"), num_elements_)); TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("slices_size"), slices_.size())); + writer->WriteScalar(this->full_name("epoch"), epoch_)); + TF_RETURN_IF_ERROR(writer->WriteScalar(this->full_name("num_elements"), + num_elements_)); + TF_RETURN_IF_ERROR(writer->WriteScalar(this->full_name("slices_size"), + slices_.size())); for (size_t i = 0; i < slices_.size(); ++i) { TF_RETURN_IF_ERROR(writer->WriteScalar( - full_name(strings::StrCat("slices_start_", i)), + this->full_name(strings::StrCat("slices_start_", i)), slices_[i]->start)); TF_RETURN_IF_ERROR(writer->WriteScalar( - full_name(strings::StrCat("slices_end_", i)), slices_[i]->end)); + this->full_name(strings::StrCat("slices_end_", i)), + slices_[i]->end)); for (size_t j = slices_[i]->start; j < slices_[i]->end; ++j) { - size_t index = j % dataset()->buffer_size_; + size_t index = j % this->dataset()->buffer_size_; TF_RETURN_IF_ERROR(writer->WriteScalar( - full_name(strings::StrCat("buffer_", index, "_size")), + this->full_name(strings::StrCat("buffer_", index, "_size")), buffer_[index].size())); for (size_t k = 0; k < buffer_[index].size(); ++k) { TF_RETURN_IF_ERROR(writer->WriteTensor( - full_name(strings::StrCat("buffer_", index, "_", k)), + this->full_name(strings::StrCat("buffer_", index, "_", k)), buffer_[index][k])); } } @@ -205,51 +214,54 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { Status RestoreInternal(IteratorContext* ctx, IteratorStateReader* reader) override { mutex_lock l(mu_); - // Restore the random number generators. - TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("num_random_samples"), - &num_random_samples_)); + TF_RETURN_IF_ERROR(reader->ReadScalar( + this->full_name("num_random_samples"), &num_random_samples_)); + TF_RETURN_IF_ERROR(reader->ReadScalar(this->full_name("seed"), &seed_)); + TF_RETURN_IF_ERROR( + reader->ReadScalar(this->full_name("seed2"), &seed2_)); ResetRngs(); // Restore the input iterator if it wasn't already exhausted. - if (!reader->Contains(full_name("end_of_input_sequence"))) { - TF_RETURN_IF_ERROR( - dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + if (!reader->Contains(this->full_name("end_of_input_sequence"))) { + TF_RETURN_IF_ERROR(this->dataset()->input_->MakeIterator( + ctx, this->prefix(), &input_impl_)); + TF_RETURN_IF_ERROR(this->RestoreParent(ctx, reader, input_impl_)); } else { input_impl_.reset(); } // Restore the epoch counter, buffer, and buffer slices. - TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("epoch"), &epoch_)); TF_RETURN_IF_ERROR( - reader->ReadScalar(full_name("num_elements"), &num_elements_)); + reader->ReadScalar(this->full_name("epoch"), &epoch_)); + TF_RETURN_IF_ERROR(reader->ReadScalar(this->full_name("num_elements"), + &num_elements_)); size_t slices_size; { int64 temp; TF_RETURN_IF_ERROR( - reader->ReadScalar(full_name("slices_size"), &temp)); + reader->ReadScalar(this->full_name("slices_size"), &temp)); slices_size = static_cast(temp); } - buffer_.reset(new std::vector[dataset()->buffer_size_]); + buffer_.reset(new std::vector[this->dataset()->buffer_size_]); for (size_t i = 0; i < slices_size; ++i) { int64 start; TF_RETURN_IF_ERROR(reader->ReadScalar( - full_name(strings::StrCat("slices_start_", i)), &start)); + this->full_name(strings::StrCat("slices_start_", i)), &start)); int64 end; TF_RETURN_IF_ERROR(reader->ReadScalar( - full_name(strings::StrCat("slices_end_", i)), &end)); + this->full_name(strings::StrCat("slices_end_", i)), &end)); slices_.emplace_back(new Slice{start, end}); for (size_t j = start; j < end; ++j) { - size_t index = j % dataset()->buffer_size_; + size_t index = j % this->dataset()->buffer_size_; int64 list_size; TF_RETURN_IF_ERROR(reader->ReadScalar( - full_name(strings::StrCat("buffer_", index, "_size")), + this->full_name(strings::StrCat("buffer_", index, "_size")), &list_size)); buffer_[index] = std::vector(list_size); for (int k = 0; k < list_size; ++k) { TF_RETURN_IF_ERROR(reader->ReadTensor( - full_name(strings::StrCat("buffer_", index, "_", k)), + this->full_name(strings::StrCat("buffer_", index, "_", k)), &buffer_[index][k])); } } @@ -289,8 +301,8 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { mutex mu_; std::unique_ptr[]> buffer_ GUARDED_BY(mu_); std::unique_ptr input_impl_ GUARDED_BY(mu_); - const int64 seed_ GUARDED_BY(mu_); - const int64 seed2_ GUARDED_BY(mu_); + int64 seed_ GUARDED_BY(mu_); + int64 seed2_ GUARDED_BY(mu_); int64 epoch_ GUARDED_BY(mu_); int64 num_elements_ GUARDED_BY(mu_); std::deque> slices_ GUARDED_BY(mu_); @@ -360,6 +372,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase { generator_(&parent_generator_) {} string DebugString() const override { + mutex_lock l(mu_); return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_, ", ", seed2_, ")::ReshufflingDataset"); } @@ -370,38 +383,96 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase { int64 iterator_seed2; { mutex_lock l(mu_); - iterator_seed = generator_(); - iterator_seed2 = generator_(); + iterator_seed = Random(); + iterator_seed2 = Random(); } - return std::unique_ptr(new ShuffleDatasetBase::Iterator( - {this, strings::StrCat(prefix, "::Shuffle")}, iterator_seed, - iterator_seed2)); + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::Shuffle")}, + iterator_seed, iterator_seed2)); } protected: + class Iterator : public ShuffleDatasetBase::Iterator { + public: + explicit Iterator(const Params& params, int64 seed, int64 seed2) + : ShuffleDatasetBase::Iterator(params, seed, + seed2) {} + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(dataset()->mu_); + + // Save RNG state of Dataset. + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("ds_num_random_samples"), + dataset()->num_random_samples_)); + + // Save the Iterator. + return ShuffleDatasetBase::Iterator::SaveInternal( + writer); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(dataset()->mu_); + + // Restore RNG state of Dataset. + TF_RETURN_IF_ERROR( + reader->ReadScalar(full_name("ds_num_random_samples"), + &dataset()->num_random_samples_)); + dataset()->ResetRngs(); + + // Restore the Iterator. + return ShuffleDatasetBase::Iterator< + ReshufflingDataset>::RestoreInternal(ctx, reader); + } + }; + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { - return errors::Unimplemented( - "Checkpointing ShufflingDataset with reshuffle_each_iteration=true " - "is not supported.\n" - "If you have a ds.shuffle(buffer_size).repeat(count) in your input " - "pipeline, replace it with " - "ds.apply(tf.contrib.data.shuffle_and_repeat(buffer_size, count)).\n" - "If you iterate over your dataset once, change shuffle(buffer_size) " - "to shuffle(buffer_size, reshuffle_each_iteration=False).\n" - "If you are using Dataset.list_files(pattern), change it to " - "Dataset.list_files(pattern, shuffle=False) and manually shuffle " - "the list of files using shuffle_and_repeat as above or using " - "ds.shuffle with reshuffle_each_iteration=False."); + mutex_lock l(mu_); + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); + Node* buffer_size = nullptr; + Node* seed = nullptr; + Node* seed2 = nullptr; + AttrValue reshuffle_each_iteration; + + TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size)); + TF_RETURN_IF_ERROR(b->AddScalar(seed_, &seed)); + TF_RETURN_IF_ERROR(b->AddScalar(seed2_, &seed2)); + b->BuildAttrValue(true, &reshuffle_each_iteration); + TF_RETURN_IF_ERROR(b->AddDataset( + this, {input_graph_node, buffer_size, seed, seed2}, // Inputs + {std::make_pair("reshuffle_each_iteration", + reshuffle_each_iteration)}, // Attrs + output)); + return Status::OK(); } private: - const int64 seed_; - const int64 seed2_; + random::SingleSampleAdapter::ResultType Random() const + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + num_random_samples_++; + auto out = generator_(); + return out; + } + + void ResetRngs() const EXCLUSIVE_LOCKS_REQUIRED(mu_) { + // Reset the generators based on the current seeds. + parent_generator_ = random::PhiloxRandom(seed_, seed2_); + generator_ = + random::SingleSampleAdapter(&parent_generator_); + generator_.Skip(num_random_samples_); + } + + mutable int64 seed_ GUARDED_BY(mu_); + mutable int64 seed2_ GUARDED_BY(mu_); mutable mutex mu_; mutable random::PhiloxRandom parent_generator_ GUARDED_BY(mu_); mutable random::SingleSampleAdapter generator_ GUARDED_BY(mu_); + mutable int64 num_random_samples_ GUARDED_BY(mu_) = 0; }; // A dataset that uses the same fixed seed for all iterators created from it. @@ -421,8 +492,9 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - return std::unique_ptr(new ShuffleDatasetBase::Iterator( - {this, strings::StrCat(prefix, "::Shuffle")}, seed_, seed2_)); + return std::unique_ptr( + new ShuffleDatasetBase::Iterator( + {this, strings::StrCat(prefix, "::Shuffle")}, seed_, seed2_)); } protected: @@ -504,9 +576,10 @@ class ShuffleAndRepeatDatasetOp : public ShuffleDatasetOpBase { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - return std::unique_ptr(new ShuffleDatasetBase::Iterator( - {this, strings::StrCat(prefix, "::ShuffleAndRepeat")}, seed_, - seed2_)); + return std::unique_ptr( + new ShuffleDatasetBase::Iterator( + {this, strings::StrCat(prefix, "::ShuffleAndRepeat")}, seed_, + seed2_)); } protected: -- GitLab From f6d62598848d1804cf6c834b51c2a9f7c049ba59 Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Fri, 8 Jun 2018 01:53:08 -0700 Subject: [PATCH 045/365] [XLA] Base class for fusing sibling instructions with multiple outputs. PiperOrigin-RevId: 199765487 --- tensorflow/compiler/xla/service/BUILD | 13 + .../xla/service/multi_output_fusion.cc | 342 ++++++++++++++++++ .../xla/service/multi_output_fusion.h | 160 ++++++++ 3 files changed, 515 insertions(+) create mode 100644 tensorflow/compiler/xla/service/multi_output_fusion.cc create mode 100644 tensorflow/compiler/xla/service/multi_output_fusion.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 29718e057b..6f34703fec 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1148,6 +1148,19 @@ tf_cc_test( ], ) +cc_library( + name = "multi_output_fusion", + srcs = ["multi_output_fusion.cc"], + hdrs = ["multi_output_fusion.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_pass", + "//tensorflow/core:lib", + ], +) + cc_library( name = "hlo_creation_utils", srcs = ["hlo_creation_utils.cc"], diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc new file mode 100644 index 0000000000..29f787b86b --- /dev/null +++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc @@ -0,0 +1,342 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/multi_output_fusion.h" + +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/core/lib/gtl/flatmap.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +StatusOr MultiOutputFusion::Run(HloModule* module) { + bool changed = false; + + for (auto* computation : module->MakeNonfusionComputations()) { + computation_ = computation; + reachability_ = computation_->ComputeReachability(); + candidates_.clear(); + candidates_index_.clear(); + all_fusion_candidates_.clear(); + + int64 index = 0; + for (auto it : computation_->MakeInstructionPostOrder()) { + candidates_.emplace_back(it); + InsertOrDie(&candidates_index_, it, index++); + } + + // Create the initial candidate list for each Node. + for (auto& node : candidates_) { + HloInstruction* instruction = node.hlo; + int64 instruction_id = get_candidate_id(instruction); + FusionCandidate& instr_node = candidates_[instruction_id]; + if (!IsFusible(instruction)) { + continue; + } + all_fusion_candidates_.push_back(instruction); + + std::vector candidates; + tensorflow::gtl::FlatSet candidates_set; + VLOG(10) << "Looking at instruction: " << instruction->name(); + for (auto operand : instruction->operands()) { + // Filter out the non-interesting instructions -- they + // will not generate the savings. + if (!IsProfitableOperand(operand)) { + VLOG(10) << "Operand not profitable: " << operand->name(); + continue; + } + VLOG(10) << "Operand profitable: " << operand->name(); + for (auto user : operand->users()) { + VLOG(10) << "User: " << user->name(); + if (user == instruction || !IsFusible(user)) { + VLOG(10) << "User is not fusible, or is the instruction itself: " + << user->name(); + continue; + } + int64 user_id = get_candidate_id(user); + if (is_connected(instruction, user)) { + VLOG(10) << "User is connected: " << user->name(); + continue; + } + if (instruction_id < user_id && + user->opcode() == HloOpcode::kFusion) { + VLOG(10) << "User ID for user: " << user->name() << " is " + << user_id << " which is higher than " << instruction_id; + continue; + } + if (!LegalToFuse(instruction, user)) { + VLOG(10) << "User not legal to fuse: " << user->name(); + continue; + } + if (candidates_set.insert(user).second) { + VLOG(10) << "User added to candidate list: " << user->name(); + candidates.push_back(user); + } + } + } + + // Iterate over candidates rather than candidates_set to avoid + // nondeterminism. + for (auto candidate : candidates) { + int64 profit = GetProfit(instruction, candidate); + if (profit > 0) { + FusionCandidate& candidate_node = + candidates_[get_candidate_id(candidate)]; + instr_node.fusibles.emplace_back(candidate, profit); + candidate_node.fusibles.emplace_back(instruction, profit); + worklist_.emplace(instruction, candidate, profit); + } + } + } + if (Perform()) { + changed = true; + } + } + return changed; +} + +HloInstruction* MultiOutputFusion::Fuse(HloInstruction* instr1, + HloInstruction* instr2) { + HloInstruction* remaining = instr1; + HloInstruction* fused = instr2; + // Make sure that if only one of the instructions is a fusion, or if only one + // of the instructions is a multi-output fusion, it's what will be fused into. + // + // An invariant is that no bitcast nodes will show up in the middle of a + // fusion node. This invariant must hold in order for us to lower it. Given + // that, we require that during multi-output fusion, a fusion node ending with + // bitcast to preserve its structure as a nested fusion instead being + // merged and flattened. + if (fused->opcode() == HloOpcode::kFusion && + fused->fused_expression_root()->opcode() != HloOpcode::kBitcast) { + std::swap(remaining, fused); + } + if (fused->IsMultiOutputFusion()) { + std::swap(remaining, fused); + } + + if (fused->opcode() == HloOpcode::kFusion && + fused->fused_expression_root()->opcode() != HloOpcode::kBitcast) { + remaining->MergeFusionInstructionIntoMultiOutput(fused); + } else { + if (remaining->opcode() == HloOpcode::kFusion && + remaining->fused_expression_root()->opcode() == HloOpcode::kBitcast) { + auto parent_computation = remaining->parent(); + // Create a nested fusion node. + auto remaining_nested_fused = + parent_computation->AddInstruction(HloInstruction::CreateFusion( + remaining->shape(), HloInstruction::FusionKind::kLoop, + remaining)); + TF_CHECK_OK(parent_computation->ReplaceInstruction( + remaining, remaining_nested_fused)); + remaining = remaining_nested_fused; + } + remaining->FuseInstructionIntoMultiOutput(fused); + } + + return remaining; +} + +void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) { + HloInstruction* fusion = instr1; + HloInstruction* fused = instr2; + if (is_fused(instr1)) { + fusion = instr2; + fused = instr1; + } + + // Insert the newly created instruction (if any), to candidates_. + for (auto use : fusion->users()) { + if (candidates_index_.find(use) == candidates_index_.end()) { + int64 index = candidates_.size(); + candidates_.emplace_back(use); + InsertOrDie(&candidates_index_, use, index++); + } + } + FusionCandidate& fusion_node = candidates_[get_candidate_id(fusion)]; + FusionCandidate& fused_node = candidates_[get_candidate_id(fused)]; + + // Update the reachability graph. + UpdateReachability(fusion, fused, all_fusion_candidates_, + [this](HloInstruction* instr) { return is_fused(instr); }); + + // Update the fusible list for fusion. Variable new_fusibles keeps + // track of the new or changed entries. + std::vector> new_fusibles; + tensorflow::gtl::FlatSet in_list; + auto it = fusion_node.fusibles.begin(); + while (it != fusion_node.fusibles.end()) { + HloInstruction* instr = it->first; + if (is_fused(instr) || is_connected(fusion, instr)) { + it = fusion_node.fusibles.erase(it); + continue; + } + in_list.insert(instr); + int64 profit = GetProfit(instr, fusion); + if (profit > it->second) { + it->second = profit; + new_fusibles.emplace_back(instr, profit); + } + ++it; + } + + // Fused_node has been fused into fusion_node. Take the fusion candidates + // (fusibles) from fused_nodes and add them to the fusion_node's. Filter + // out those fusibles that no longer valid (or already in the list). + for (const auto& it : fused_node.fusibles) { + HloInstruction* instr = it.first; + if (instr == fusion || is_fused(instr) || is_connected(fusion, instr)) { + continue; + } + if (in_list.count(instr) > 0) { + continue; + } + int64 profit = GetProfit(instr, fusion); + fusion_node.fusibles.emplace_back(instr, profit); + new_fusibles.emplace_back(instr, profit); + } + fused_node.fusibles.clear(); + + // Update the worklist_. + for (auto it : new_fusibles) { + worklist_.emplace(fusion, it.first, it.second); + } +} + +bool MultiOutputFusion::LegalToFuse(HloInstruction* instr1, + HloInstruction* instr2) { + if (instr1 == instr2) { + return false; + } + if (instr1->opcode() != HloOpcode::kFusion) { + return false; + } + + // Fusing nodes with 0 user makes no sense and the rest of the implementation + // doesn't support it either. + if (instr1->user_count() == 0 || instr2->user_count() == 0) { + return false; + } + + // Check if the users of multioutput fusion is not a get-tuple-element. + // If this is the case, we bail out because the transformation assumes + // the users are get-tuple-element. + auto multioutput_user_is_not_gte = [](HloInstruction* instr) { + if (!instr->IsMultiOutputFusion()) { + return false; + } + for (auto user : instr->users()) { + if (user->opcode() != HloOpcode::kGetTupleElement) { + return true; + } + } + return false; + }; + if (multioutput_user_is_not_gte(instr1) || + multioutput_user_is_not_gte(instr2)) { + return false; + } + + if (is_connected(instr1, instr2)) { + return false; + } + if (!ShapesCompatibleForFusion(instr1, instr2)) { + return false; + } + + return true; +} + +void MultiOutputFusion::UpdateReachability( + HloInstruction* instr1, HloInstruction* instr2, + tensorflow::gtl::ArraySlice instrs_to_update, + const std::function& skip) { + for (auto instr : instrs_to_update) { + if (skip != nullptr && skip(instr)) { + continue; + } + if (reachability_->IsReachable(instr2, instr) && + reachability_->IsReachable(instr1, instr)) { + // If a candidate was already reachable by both, no update needed. + continue; + } + if (reachability_->IsReachable(instr2, instr)) { + reachability_->FastSetReachabilityToUnion({instr, instr1}, instr); + } + if (reachability_->IsReachable(instr1, instr)) { + reachability_->FastSetReachabilityToUnion({instr, instr2}, instr); + } + } +} + +bool MultiOutputFusion::Perform() { + int changed = false; + // Pick the top candidate from queue and try to merge. + while (!worklist_.empty()) { + if (fuel_ <= 0) { + VLOG(2) << "No fusing: run out of fuel."; + break; + } + ToBeFused candidate = worklist_.top(); + worklist_.pop(); + + HloInstruction* instr1 = candidate.instr1; + HloInstruction* instr2 = candidate.instr2; + + if (is_fused(instr1) || is_fused(instr2)) { + continue; + } + + VLOG(1) << "Considering candidate profit_score=" << candidate.score + << "\n\t\tinstr1 = " << instr1->ToString() + << "\n\t\tinstr2 = " << instr2->ToString(); + + if (LegalToFuse(instr1, instr2)) { + VLOG(1) << "Fuse!"; + VLOG(2) << "Before multi_output_fusion:"; + VLOG(2) << "instr1: " << instr1->ToString(); + VLOG(2) << "\n" + << instr1->fused_instructions_computation()->ToString( + HloPrintOptions().set_indent_amount(1)); + VLOG(2) << "instr2: " << instr2->ToString(); + if (instr2->opcode() == HloOpcode::kFusion) { + VLOG(2) << "\n" + << instr2->fused_instructions_computation()->ToString( + HloPrintOptions().set_indent_amount(1)); + } + HloInstruction* ret = Fuse(instr1, instr2); + set_is_fused(ret == instr1 ? instr2 : instr1); + Update(instr1, instr2); + changed = true; + VLOG(2) << "After fusion, \t this: " << ret->name() << "\n" + << ret->fused_instructions_computation()->ToString( + HloPrintOptions().set_indent_amount(1)); + auto users = ret->users(); + --fuel_; + } + } + if (DoProducerConsumerMultiOutputFusion(computation_)) { + changed = true; + } + return changed; +} + +bool MultiOutputFusion::DoProducerConsumerMultiOutputFusion( + HloComputation* /*computation*/) { + return false; +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.h b/tensorflow/compiler/xla/service/multi_output_fusion.h new file mode 100644 index 0000000000..cfdf83cfe8 --- /dev/null +++ b/tensorflow/compiler/xla/service/multi_output_fusion.h @@ -0,0 +1,160 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MULTI_OUTPUT_FUSION_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_MULTI_OUTPUT_FUSION_H_ + +#include +#include + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/stringpiece.h" + +namespace xla { + +// This class implements the fusing of sibling fusion instructions that sharing +// common operands. +// It constructs the following associated data structures. +// (1) candidates_: stores the instruction and the set of instructions it can +// fuse to. +// (2) candidates_index_: maps instruction to id. +// (3) reachability_: reachability map in this computation. +// (4) all_fusion_candidates_: the vector of candidate instructions. +// (5) worklist_: a priority queue that contains pairs of instructions to be +// fused and their fusion profit scores. +// +// Function Perform() applies the optimization. It picks up the most profitable +// pair in the worklist_, check if it's legal to fuse and fuse the pair. +// After fusion, it updates the associated structure such as reachability_, +// candidates_ and worklist_. +// Note that the reachability map is updated based on the original computation. +// This works because the reachability is monotonically increasing with +// instruction fusion. +class MultiOutputFusion : public HloPassInterface { + public: + MultiOutputFusion(int64 fuel) : fuel_(fuel) {} + + tensorflow::StringPiece name() const override { + return "multi_output_fusion"; + } + + // Run multi-output fusion on the given module. Returns whether the module + // was changed. + StatusOr Run(HloModule* module) override; + + protected: + // Main entry for the optimization. Returns true if the optimization happens. + bool Perform(); + + // Test if instr1 and instr2 have the compatible shapes that can be legally + // fused. + virtual bool ShapesCompatibleForFusion(HloInstruction* instr1, + HloInstruction* instr2) = 0; + + // Whether the instruction is a candidate for fusion. + virtual bool IsFusible(HloInstruction* instr) = 0; + + // This function estimates the savings by merging instr1 and instr2 into one + // multi-output fusion instruction. + virtual int64 GetProfit(HloInstruction* instr1, HloInstruction* instr2) = 0; + + // Whether fusing the instruction can reduce cost. + virtual bool IsProfitableOperand(HloInstruction* instr) = 0; + + // Test if it's legal to fuse instr1 and instr2 into one fusion instruction. + virtual bool LegalToFuse(HloInstruction* instr1, HloInstruction* instr2); + + // Update the reachability map after fusing instr1 and instr2. + void UpdateReachability( + HloInstruction* instr1, HloInstruction* instr2, + tensorflow::gtl::ArraySlice instrs_to_update, + const std::function& skip = nullptr); + + // Hook for multi-output fusion along producer-consumer edges. + // Returns whether any instructions were fused. + // + // TODO(b/80420762): Perform producer-consumer multi-output fusion in + // InstructionFusion instead. + virtual bool DoProducerConsumerMultiOutputFusion(HloComputation* computation); + + private: + // Fuse HloInstrctuion instr1 and instr2 and return the fused instruction. + // The other instruction is removed from its parent computation. + HloInstruction* Fuse(HloInstruction* instr1, HloInstruction* instr2); + + // Update the internal data structures after instr1 and instr2 are fused into + // one fusion instruction. + void Update(HloInstruction* instr1, HloInstruction* instr2); + + // Optimization fuel is a compiler debugging technique that makes an + // optimization pass stop what it is doing after having made N changes to the + // program, where N is the fuel. By varying N, this can be used to find the + // first single change that makes a test fail. + int64 fuel_; + + // Computation for the pass. + HloComputation* computation_; + + // An internal data structure for each instruction in current computation. + // When an instruction is removed, member 'hlo' is set to nullptr. + struct FusionCandidate { + HloInstruction* hlo; + std::list> fusibles; + explicit FusionCandidate(HloInstruction* hlo) : hlo(hlo) {} + }; + std::vector candidates_; + + // A map that maps an instruction to the index_. + tensorflow::gtl::FlatMap candidates_index_; + + // The reachability map of current computation. + std::unique_ptr reachability_; + + // This stores all the candidate instructions in current computation. + std::vector all_fusion_candidates_; + + // The pair of candidates to be fused and the profit score. + struct ToBeFused { + HloInstruction* instr1; + HloInstruction* instr2; + int64 score; + ToBeFused(HloInstruction* instr1, HloInstruction* instr2, int64 score) + : instr1(instr1), instr2(instr2), score(score) {} + bool operator<(const ToBeFused& rhs) const { return score < rhs.score; } + }; + std::priority_queue worklist_; + + int64 get_candidate_id(HloInstruction* instr) { + return FindOrDie(candidates_index_, instr); + } + + bool is_fused(HloInstruction* instr) { + return candidates_[get_candidate_id(instr)].hlo == nullptr; + } + + void set_is_fused(HloInstruction* instr) { + candidates_[get_candidate_id(instr)].hlo = nullptr; + } + + bool is_connected(HloInstruction* instr1, HloInstruction* instr2) { + return reachability_->IsConnected(instr1, instr2); + } +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MULTI_OUTPUT_FUSION_H_ -- GitLab From c2493ed5aa9eaf375d88331c7cdb70e428614dc8 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Fri, 8 Jun 2018 02:22:02 -0700 Subject: [PATCH 046/365] Make tfe.py_func once differentiable. With this change, it is now possible to embed differentiable eager code --- running on either CPU or GPU --- in graphs. Higher-order derivatives are not yet supported. PiperOrigin-RevId: 199768301 --- .../python/kernel_tests/py_func_test.py | 81 ++++++++++- tensorflow/python/ops/script_ops.py | 128 +++++++++++++----- 2 files changed, 175 insertions(+), 34 deletions(-) diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index dc7399f040..824610323c 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -26,6 +26,7 @@ from six.moves import queue from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.client import session as session_lib +from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import function from tensorflow.python.framework import constant_op @@ -34,6 +35,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import script_ops @@ -438,7 +440,7 @@ class PyFuncTest(test.TestCase): c = constant_op.constant([1.], dtypes.float32) _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) - self.assertTrue(script_ops._py_funcs.size() < 100) + self.assertLess(script_ops._py_funcs.size(), 100) # ----- Tests for eager_py_func ----- @test_util.run_in_graph_and_eager_modes() @@ -515,8 +517,7 @@ class PyFuncTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testEagerReturningVariableRaisesError(self): def return_variable(): - variable = resource_variable_ops.ResourceVariable(0.0) - return variable + return resource_variable_ops.ResourceVariable(0.0) with self.assertRaisesRegexp(errors.UnknownError, "Attempting to return a variable"): @@ -524,6 +525,80 @@ class PyFuncTest(test.TestCase): return_variable, inp=[], Tout=dtypes.float32) self.evaluate(output) + @test_util.run_in_graph_and_eager_modes() + def testEagerGradientTape(self): + + def f(x): + return x**2 + + x = constant_op.constant(3.0) + with backprop.GradientTape() as tape: + tape.watch(x) + y = script_ops.eager_py_func(f, inp=[x], Tout=dtypes.float32) + dy_dx = tape.gradient(y, x) + self.assertEqual(self.evaluate(dy_dx), 6.0) + + def testEagerGradientGraph(self): + + def f(x): + return x**2 + + x = constant_op.constant(3.0) + y = script_ops.eager_py_func(f, inp=[x], Tout=dtypes.float32) + dy_dx = gradients_impl.gradients(y, x)[0] + self.assertEqual(self.evaluate(dy_dx), 6.0) + + @test_util.run_in_graph_and_eager_modes() + def testEagerGradientTapeMultipleArgs(self): + + def f(x, y): + return x**2 + y**2 + + x = constant_op.constant(3.0) + y = constant_op.constant(4.0) + with backprop.GradientTape() as tape: + tape.watch(x) + tape.watch(y) + z = script_ops.eager_py_func(f, inp=[x, y], Tout=dtypes.float32) + + dz_dx, dz_dy = tape.gradient(z, [x, y]) + self.assertEqual(self.evaluate(dz_dx), 6.0) + self.assertEqual(self.evaluate(dz_dy), 8.0) + + def testEagerGradientGraphMultipleArgs(self): + + def f(x, y): + return x**2 + y**2 + + x = constant_op.constant(3.0) + y = constant_op.constant(4.0) + z = script_ops.eager_py_func(f, inp=[x, y], Tout=dtypes.float32) + + dz_dx, dz_dy = gradients_impl.gradients(z, [x, y]) + self.assertEqual(self.evaluate(dz_dx), 6.0) + self.assertEqual(self.evaluate(dz_dy), 8.0) + + def testEagerGradientGraphLogHuber(self): + + def log_huber(x, m): + if math_ops.abs(x) <= m: + return x**2 + else: + return m**2 * (1 - 2 * math_ops.log(m) + math_ops.log(x**2)) + + x = array_ops.placeholder(dtypes.float32) + m = array_ops.placeholder(dtypes.float32) + + y = script_ops.eager_py_func( + func=log_huber, inp=[x, m], Tout=dtypes.float32) + dy_dx = gradients_impl.gradients(y, x)[0] + + with self.test_session() as sess: + # Takes the first branch of log_huber. + y, dy_dx = sess.run([y, dy_dx], feed_dict={x: 1.0, m: 2.0}) + self.assertEqual(y, 1.0) + self.assertEqual(dy_dx, 2.0) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index f87c5dc5e3..128b43a7ae 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """Script Language Operators. See the @{$python/script_ops} guide.""" # pylint: disable=g-bad-name @@ -29,30 +28,54 @@ import numpy as np import six from tensorflow.python import pywrap_tensorflow +from tensorflow.python.eager import backprop from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.ops import gen_script_ops from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export +# Map from EagerPyFunc token to tuple (tape, eager args, eager outputs); +# used for differentiation. +tape_cache = {} + class EagerFunc(object): """A wrapper for a function owned by an EagerPyFunc.""" - def __init__(self, func, Tout): + def __init__(self, func, Tout, is_grad_func): """Constructs an EagerFunc. Args: func: The function to wrap. Tout: A list of datatypes for the output; an empty list if the output is None. + is_grad_func: Whether this EagerFunc is the gradient of another + EagerPyFunc. """ self._func = func self._out_dtypes = Tout + self._is_grad_func = is_grad_func def _convert(self, value, dtype): + """Converts `value` to a tensor of type `dtype`, with error checking. + + Args: + value: The tensor to convert. + dtype: The desired dtype. + + Returns: + A tensor of type `dtype`, or a zeros tensor if value is None and + this function is in fact a grdient function. + + Raises: + RuntimeError: if `value` is a variable. + """ + if isinstance(value, resource_variable_ops.ResourceVariable): raise RuntimeError( "Attempting to return a variable from an eagerly executed py_func. " @@ -60,22 +83,40 @@ class EagerFunc(object): "be returned; to return the value of a variable, make sure to obtain " "the Tensor backing it by calling `.read_value()` on the variable in " "question: %s" % value) + if value is None and self._is_grad_func: + # Gradient functions may legitimately return a list that contains + # both Tensors and Python Nones. Unfortuantely this breaks the + # OpKernel, so for now we replace None objects with zeros, which is + # mathematically correct but will prevent short-circuiting gradient + # computations. + # + # TODO(akshayka): Make it possible to return a list of both Tensors and + # Nones from an EagerPyFunc. + return constant_op.constant(0.0, dtype=dtype) return ops.convert_to_tensor(value, dtype=dtype) - def __call__(self, on_gpu, args): + def __call__(self, on_gpu, token, args): """Passes `args` to `self._func`, which is executed eagerly.""" + with context.eager_mode(): - ret = self._func(*args) - maybe_copy_to_gpu = lambda x: x if not on_gpu else x.gpu() - if isinstance(ret, (tuple, list)): - return [ - maybe_copy_to_gpu(self._convert(x, dtype=dtype)) - for (x, dtype) in zip(ret, self._out_dtypes) - ] - elif ret is None: - return ret - else: - return maybe_copy_to_gpu(self._convert(ret, dtype=self._out_dtypes[0])) + with backprop.GradientTape() as tape: + for tensor in args: + tape.watch(tensor) + ret = self._func(*args) + # NB: The tape needs to watch copies across devices. + maybe_copy_to_gpu = lambda x: x if not on_gpu else x.gpu() + if isinstance(ret, (tuple, list)): + outputs = [ + maybe_copy_to_gpu(self._convert(x, dtype=dtype)) + for (x, dtype) in zip(ret, self._out_dtypes) + ] + elif ret is None: + outputs = None + else: + outputs = maybe_copy_to_gpu( + self._convert(ret, dtype=self._out_dtypes[0])) + tape_cache[compat.as_bytes(token)] = (tape, args, outputs) + return outputs class FuncRegistry(object): @@ -149,7 +190,14 @@ class FuncRegistry(object): if func is None: raise ValueError("callback %s is not found" % token) if isinstance(func, EagerFunc): - return func(on_gpu, args) + # NB: Different invocations of the same py_func will share the same + # token, and the entries they stash in the tape_cache will collide. + # In practice, when executing a graph, this should only happen if + # the py_func is in a while_loop whose iterations are run in parallel + # or if the graph is being driven by concurrent session.run() calls. + # + # TODO(akshayka): Key the tape cache in a thread-safe way. + return func(on_gpu, token, args) else: ret = func(*args) # Strings seem to lead to a memory leak here if they're not wrapped in a @@ -193,7 +241,8 @@ class CleanupFunc(object): _py_funcs.remove(self._token) -def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None): +def _internal_py_func(func, inp, Tout, stateful=None, eager=False, + is_grad_func=False, name=None): """See documentation for py_func and eager_py_func.""" is_list_or_tuple = False @@ -203,7 +252,7 @@ def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None): Tout = [Tout] if eager: - func = EagerFunc(func, Tout) + func = EagerFunc(func, Tout, is_grad_func) token = _py_funcs.insert(func) # We tie the registered function's lifetime with the current default graph, @@ -242,34 +291,55 @@ def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None): return result if is_list_or_tuple else result[0] +# TODO(akshayka): Implement higher-order derivatives. +@ops.RegisterGradient("EagerPyFunc") +def _EagerPyFuncGrad(op, dy): + """Computes the gradient of an EagerPyFunc.""" + + token = op.get_attr("token") + + def eagerly_executed_grad(dy): + tape, eager_inputs, eager_outputs = tape_cache.pop(compat.as_bytes(token)) + return tape.gradient(eager_outputs, eager_inputs, output_gradients=dy) + + with ops.control_dependencies(op.outputs): + return _internal_py_func( + func=eagerly_executed_grad, + inp=[dy] if isinstance(dy, ops.Tensor) else dy, + Tout=[tensor.dtype for tensor in op.inputs], + eager=True, is_grad_func=True) + + def eager_py_func(func, inp, Tout, name=None): """Wraps a python function into a TensorFlow op that executes it eagerly. This function allows expressing computations in a TensorFlow graph as Python functions. In particular, it wraps a Python function `func` - in a TensorFlow operation that executes it with eager exeuction enabled. As a - consequence, `tf.contrib.eager.py_func` makes it possible to express control - flow using Python constructs (`if`, `while`, `for`, etc.), instead of - TensorFlow control flow constructs (@{tf.cond}, @{tf.while_loop}). For - example, you might use `tf.contrib.eager.py_func` to implement the log huber - function: + in a once-differentiable TensorFlow operation that executes it with eager + exeuction enabled. As a consequence, `tf.contrib.eager.py_func` makes it + possible to express control flow using Python constructs (`if`, `while`, + `for`, etc.), instead of TensorFlow control flow constructs (@{tf.cond}, + @{tf.while_loop}). For example, you might use `tf.contrib.eager.py_func` to + implement the log huber function: ```python def log_huber(x, m): if tf.abs(x) <= m: - return x ** 2 + return x**2 else: - return m ** 2 * (1 - 2 * tf.log(m) + tf.log(x ** 2)) + return m**2 * (1 - 2 * tf.log(m) + tf.log(x**2)) x = tf.placeholder(tf.float32) m = tf.placeholder(tf.float32) y = tf.contrib.eager.py_func(func=log_huber, inp=[x, m], Tout=tf.float32) + dy_dx = tf.gradients(y, x)[0] with tf.Session() as sess: # The session executes `log_huber` eagerly. Given the feed values below, - # it will take the second branch, so `output` evaluates to 7.24372. - output = sess.run(y, feed_dict={x: 3.0, m: 2.0}) + # it will take the first branch, so `y` evaluates to 1.0 and + # `dy_dx` evaluates to 2.0. + y, dy_dx = sess.run([y, dy_dx], feed_dict={x: 1.0, m: 2.0}) ``` You can also use `tf.contrib.eager.py_func` to debug your models at runtime @@ -288,10 +358,6 @@ def eager_py_func(func, inp, Tout, name=None): that take Tensors as inputs, execute TensorFlow operations in their bodies, and return Tensors as outputs. - `tf.contrib.eager.py_func` is not differentiable, though a gradient may be - implemented in the future; if you would like to differentiate through it, - please file an issue on Github. - Like @{tf.py_func}, `tf.contrib.eager.py_func` has the following limitations with respect to serialization and distribution: -- GitLab From 16c1d25110e48b8cecbf61ea8e15a7c9da26dd83 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 8 Jun 2018 02:49:33 -0700 Subject: [PATCH 047/365] Removes error message from queues in eager (leaves the one in queuerunners). There's no real reason to not support queues in eager for people using them without using queue runners. PiperOrigin-RevId: 199770626 --- .../common_runtime/eager/kernel_and_device.cc | 1 + .../common_runtime/eager/kernel_and_device.h | 6 +++ .../python/kernel_tests/fifo_queue_test.py | 20 ++++++-- tensorflow/python/ops/data_flow_ops.py | 46 +++++-------------- 4 files changed, 34 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 2a43a31c02..b410ea175b 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -79,6 +79,7 @@ Status KernelAndDevice::Run(std::vector* input_tensors, params.function_library = flib_; params.slice_reader_cache = &slice_reader_cache_; params.rendezvous = rendez_; + params.cancellation_manager = &cm_; if (stats != nullptr) { params.track_allocations = true; } diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.h b/tensorflow/core/common_runtime/eager/kernel_and_device.h index f78d197fd5..c41a0972b1 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.h +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.h @@ -22,6 +22,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/framework/cancellation.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" @@ -76,6 +77,11 @@ class KernelAndDevice { const DataTypeVector& output_dtypes() { return output_dtypes_; } private: + // TODO(apassos) Consider a shared cancellation manager. Note that this + // cancellation manager is not useful to actually cancel anything, and is + // provided here only for the few kernels which can't handle one being + // missing. + CancellationManager cm_; std::unique_ptr kernel_; Device* device_; FunctionLibraryRuntime* flib_; diff --git a/tensorflow/python/kernel_tests/fifo_queue_test.py b/tensorflow/python/kernel_tests/fifo_queue_test.py index ce73e7ad3e..14a336c688 100644 --- a/tensorflow/python/kernel_tests/fifo_queue_test.py +++ b/tensorflow/python/kernel_tests/fifo_queue_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops @@ -125,12 +126,21 @@ class FIFOQueueTest(test.TestCase): q.enqueue_many([[1, 2, 3, 4], [[1, 1], [2, 2], [3, 3], [4, 4]]]).run() self.assertEqual(4, q.size().eval()) + @test_util.run_in_graph_and_eager_modes() def testMultipleDequeues(self): - with self.test_session() as session: - q = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()]) - q.enqueue_many([[1, 2, 3]]).run() - a, b, c = session.run([q.dequeue(), q.dequeue(), q.dequeue()]) - self.assertAllEqual(set([1, 2, 3]), set([a, b, c])) + q = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()]) + self.evaluate(q.enqueue_many([[1, 2, 3]])) + a, b, c = self.evaluate([q.dequeue(), q.dequeue(), q.dequeue()]) + self.assertAllEqual(set([1, 2, 3]), set([a, b, c])) + + @test_util.run_in_graph_and_eager_modes() + def testQueuesDontShare(self): + q = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()]) + self.evaluate(q.enqueue(1)) + q2 = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()]) + self.evaluate(q2.enqueue(2)) + self.assertAllEqual(self.evaluate(q2.dequeue()), 2) + self.assertAllEqual(self.evaluate(q.dequeue()), 1) def testEnqueueDictWithoutNames(self): with self.test_session(): diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index 62c5adc385..abf597ca55 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -35,6 +35,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_data_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_data_flow_ops import * @@ -129,11 +130,6 @@ class QueueBase(object): @{tf.RandomShuffleQueue} for concrete implementations of this class, and instructions on how to create them. - - @compatibility(eager) - Queues are not compatible with eager execution. Instead, please - use `tf.data` to get data into your model. - @end_compatibility """ def __init__(self, dtypes, shapes, names, queue_ref): @@ -157,12 +153,7 @@ class QueueBase(object): Raises: ValueError: If one of the arguments is invalid. - RuntimeError: If eager execution is enabled. """ - if context.executing_eagerly(): - raise RuntimeError( - "Queues are not supported when eager execution is enabled. " - "Instead, please use tf.data to get data into your model.") self._dtypes = dtypes if shapes is not None: if len(shapes) != len(dtypes): @@ -179,6 +170,8 @@ class QueueBase(object): self._queue_ref = queue_ref if context.executing_eagerly(): self._name = context.context().scope_name + self._resource_deleter = resource_variable_ops.EagerResourceDeleter( + queue_ref, None) else: self._name = self._queue_ref.op.name.split("/")[-1] @@ -605,6 +598,11 @@ class QueueBase(object): else: return gen_data_flow_ops.queue_size(self._queue_ref, name=name) +def _shared_name(shared_name): + if context.executing_eagerly(): + return str(ops.uid()) + return shared_name + @tf_export("RandomShuffleQueue") class RandomShuffleQueue(QueueBase): @@ -612,11 +610,6 @@ class RandomShuffleQueue(QueueBase): See @{tf.QueueBase} for a description of the methods on this class. - - @compatibility(eager) - Queues are not compatible with eager execution. Instead, please - use `tf.data` to get data into your model. - @end_compatibility """ def __init__(self, @@ -690,7 +683,7 @@ class RandomShuffleQueue(QueueBase): min_after_dequeue=min_after_dequeue, seed=seed1, seed2=seed2, - shared_name=shared_name, + shared_name=_shared_name(shared_name), name=name) super(RandomShuffleQueue, self).__init__(dtypes, shapes, names, queue_ref) @@ -702,11 +695,6 @@ class FIFOQueue(QueueBase): See @{tf.QueueBase} for a description of the methods on this class. - - @compatibility(eager) - Queues are not compatible with eager execution. Instead, please - use `tf.data` to get data into your model. - @end_compatibility """ def __init__(self, @@ -752,7 +740,7 @@ class FIFOQueue(QueueBase): component_types=dtypes, shapes=shapes, capacity=capacity, - shared_name=shared_name, + shared_name=_shared_name(shared_name), name=name) super(FIFOQueue, self).__init__(dtypes, shapes, names, queue_ref) @@ -767,11 +755,6 @@ class PaddingFIFOQueue(QueueBase): See @{tf.QueueBase} for a description of the methods on this class. - - @compatibility(eager) - Queues are not compatible with eager execution. Instead, please - use `tf.data` to get data into your model. - @end_compatibility """ def __init__(self, @@ -831,7 +814,7 @@ class PaddingFIFOQueue(QueueBase): component_types=dtypes, shapes=shapes, capacity=capacity, - shared_name=shared_name, + shared_name=_shared_name(shared_name), name=name) super(PaddingFIFOQueue, self).__init__(dtypes, shapes, names, queue_ref) @@ -843,11 +826,6 @@ class PriorityQueue(QueueBase): See @{tf.QueueBase} for a description of the methods on this class. - - @compatibility(eager) - Queues are not compatible with eager execution. Instead, please - use `tf.data` to get data into your model. - @end_compatibility """ def __init__(self, @@ -899,7 +877,7 @@ class PriorityQueue(QueueBase): component_types=types, shapes=shapes, capacity=capacity, - shared_name=shared_name, + shared_name=_shared_name(shared_name), name=name) priority_dtypes = [_dtypes.int64] + types -- GitLab From 1c241ba791f578a67c80e932cbbb06b5af5ca81a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 04:12:07 -0700 Subject: [PATCH 048/365] Fix RemoveUnusedNodes generating invalid graphs for PlaceholderWithDefault inputs PiperOrigin-RevId: 199776409 --- .../graph_transforms/fold_constants_lib.cc | 26 +++++++++++ .../graph_transforms/fold_constants_test.cc | 46 ------------------- 2 files changed, 26 insertions(+), 46 deletions(-) diff --git a/tensorflow/tools/graph_transforms/fold_constants_lib.cc b/tensorflow/tools/graph_transforms/fold_constants_lib.cc index 85660f94a8..f858411876 100644 --- a/tensorflow/tools/graph_transforms/fold_constants_lib.cc +++ b/tensorflow/tools/graph_transforms/fold_constants_lib.cc @@ -117,6 +117,31 @@ Status ReplaceSendRecvs(const GraphDef& original_graph_def, return Status::OK(); } +Status RewriteInputsAsPlaceholders(const TransformFuncContext& context, + GraphDef* graph_def) { + std::unordered_set input_names; + for (const string& input_name : context.input_names) { + input_names.insert(ParseTensorName(input_name).first.ToString()); + } + + for (NodeDef& node : *graph_def->mutable_node()) { + if (input_names.find(node.name()) == input_names.end()) { + continue; + } + if (node.op() == "PlaceholderWithDefault") { + node.set_op("Placeholder"); + node.clear_input(); + } else if (node.op() != "Placeholder") { + return errors::InvalidArgument( + "Input '", node.name(), + "' was expected to be a Placeholder or PlaceholderWithDefault op, " + "but was ", + node.op()); + } + } + return Status::OK(); +} + Status RemoveUnusedNodes(const GraphDef& input_graph_def, const TransformFuncContext& context, GraphDef* output_graph_def) { @@ -165,6 +190,7 @@ Status RemoveUnusedNodes(const GraphDef& input_graph_def, input_graph_def, [&](const NodeDef& node) { return used_nodes.count(node.name()) > 0; }, output_graph_def); + TF_RETURN_IF_ERROR(RewriteInputsAsPlaceholders(context, output_graph_def)); return Status::OK(); } diff --git a/tensorflow/tools/graph_transforms/fold_constants_test.cc b/tensorflow/tools/graph_transforms/fold_constants_test.cc index a082399a87..dcdc3c2906 100644 --- a/tensorflow/tools/graph_transforms/fold_constants_test.cc +++ b/tensorflow/tools/graph_transforms/fold_constants_test.cc @@ -330,48 +330,6 @@ class ConstantFoldingTest : public ::testing::Test { EXPECT_EQ(0, node_map.count("unused")); } - void TestRemoveUnusedNodesMultipleOutputs() { - using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - auto root = tensorflow::Scope::NewRootScope(); - - // a b - // \ / - // shape_n - // \ / - // c - auto a = Placeholder(root.WithOpName("a"), DT_FLOAT); - auto b = Placeholder(root.WithOpName("b"), DT_FLOAT); - auto shape_n = ShapeN(root.WithOpName("shape_n"), {Output(a), Output(b)}); - auto c = Add(root.WithOpName("c"), shape_n[0], shape_n[1]); - - GraphDef graph_def; - TF_ASSERT_OK(root.ToGraphDef(&graph_def)); - GraphDef result_graph_def; - TF_ASSERT_OK(graph_transforms::RemoveUnusedNodes( - graph_def, {{shape_n[0].name()}, {"c"}}, &result_graph_def)); - - // Only one output of shape_n node is fed input. Hence the graph search - // should propagate to inputs of shape_n. Nothing to remove here. - std::map node_map; - graph_transforms::MapNamesToNodes(result_graph_def, &node_map); - EXPECT_EQ(1, node_map.count("a")); - EXPECT_EQ(1, node_map.count("b")); - EXPECT_EQ(1, node_map.count("c")); - - result_graph_def.Clear(); - TF_ASSERT_OK(graph_transforms::RemoveUnusedNodes( - graph_def, {{shape_n[0].name(), shape_n[1].name()}, {"c"}}, - &result_graph_def)); - - // Both outputs of shape_n node are fed inputs. shape_n does not function - // and inputs to shape_n should be removed. - node_map.clear(); - graph_transforms::MapNamesToNodes(result_graph_def, &node_map); - EXPECT_EQ(0, node_map.count("a")); - EXPECT_EQ(0, node_map.count("b")); - EXPECT_EQ(1, node_map.count("c")); - } - void TestMaxConstantSizeInBytes() { auto root = tensorflow::Scope::NewRootScope(); @@ -431,10 +389,6 @@ TEST_F(ConstantFoldingTest, TestReplaceSendRecvsPrefixNames) { TEST_F(ConstantFoldingTest, TestRemoveUnusedNodes) { TestRemoveUnusedNodes(); } -TEST_F(ConstantFoldingTest, TestRemoveUnusedNodesMultipleOutputs) { - TestRemoveUnusedNodesMultipleOutputs(); -} - TEST_F(ConstantFoldingTest, TestMaxConstantSizeInBytes) { TestMaxConstantSizeInBytes(); } -- GitLab From 6c1b8e8123bc6bd191d81ab9e095d340e31870bf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 05:13:02 -0700 Subject: [PATCH 049/365] Detect configurations that would be hitting bugs in cuDNN and report an error. PiperOrigin-RevId: 199780350 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 59 +++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index f6564df0d0..48afc06e32 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -2291,9 +2291,7 @@ class CudnnEnvVar { // algorithm through an env-var "TF_ENABLE_FFT_TILING_FORWARD=1". struct FftTilingForward { static constexpr const char* kName = "TF_ENABLE_FFT_TILING_FORWARD"; - // TODO(csigg): Enabling this algo causes XLA test failures, for example in - // platforms/xla/tests/internal:convolution_test_gpu. See b/80018418. - static constexpr bool kDefaultFlag = false; // CUDNN_VERSION >= 7000; + static constexpr bool kDefaultFlag = CUDNN_VERSION >= 7000; }; // A helper struct to decide whether to enable the WINOGRAD_NONFUSED algorithms. @@ -2426,6 +2424,33 @@ port::Status CudnnSupport::DoConvolveImpl( } } + // Report an error if we might be hitting a cuDNN bug that accesses illegal + // memory. See nvbugs/2138754, b/80018418. + SE_RETURN_IF_ERROR([&] { + if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) { + return port::Status::OK(); + } + if (input_descriptor.ndims() < 3) { + return port::Status::OK(); + } + // Checks that a*b is within the valid range (as provided by NVIDIA). + auto check_sizes = [](size_t a, size_t b) { + if ((a * b * 4608 - 1) >> 31 == 0) { + return port::Status::OK(); + } + return port::Status( + port::error::FAILED_PRECONDITION, + "This configuration potentially accesses illegal memory."); + }; + SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(), + output_descriptor.feature_map_count())); + SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(), + input_descriptor.feature_map_count())); + SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(), + output_descriptor.feature_map_count())); + return port::Status::OK(); + }()); + RETURN_IF_CUDNN_ERROR(cudnnConvolutionForward( cudnn.handle(), /*alpha=*/alpha, /*srcDesc=*/input_nd.handle(), @@ -3192,6 +3217,34 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl( } } + // Report an error if we might be hitting a cuDNN bug that produces incorrect + // results. See nvbugs/2072856 + SE_RETURN_IF_ERROR([&] { + if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) { + return port::Status::OK(); + } + if (output_descriptor.height() > 1 && output_descriptor.width() > 1) { + return port::Status::OK(); + } + int convolution_size = output_descriptor.height() > 1 + ? filter_descriptor.input_filter_height() + : filter_descriptor.input_filter_width(); + if (convolution_size <= 32) { + return port::Status::OK(); + } + cudnnConvolutionMode_t convolution_mode; + cudnnDataType_t compute_type; + RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor( + conv.handle(), 0, nullptr, nullptr, nullptr, nullptr, &convolution_mode, + &compute_type)); + if (convolution_mode != CUDNN_CONVOLUTION) { + return port::Status::OK(); + } + return port::Status( + port::error::FAILED_PRECONDITION, + "This configuration potentially produces incorrect results."); + }()); + RETURN_IF_CUDNN_ERROR(cudnnConvolutionBackwardFilter( cudnn.handle(), /*alpha=*/alpha, -- GitLab From cd00aa747a6e6e023910998a744c0f43e1afddbf Mon Sep 17 00:00:00 2001 From: Adria Puigdomenech Date: Fri, 8 Jun 2018 05:42:27 -0700 Subject: [PATCH 050/365] Obtain use_locking for resource variables in scatter_nd_add. PiperOrigin-RevId: 199782188 --- tensorflow/core/kernels/scatter_nd_op.cc | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index bdc268cf49..43c5b29509 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -143,14 +143,10 @@ class ScatterNdUpdateOp : public OpKernel { void Compute(OpKernelContext* c) override { if (dtype_ == DT_RESOURCE) { - if (use_exclusive_lock_) { - Var* v; - OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v)); - mutex_lock m(*v->mu()); - DoCompute(c); - } else { - DoCompute(c); - } + Var* v; + OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v)); + mutex_lock m(*v->mu()); + DoCompute(c); } else if (use_exclusive_lock_) { // If we're here, it means the input type is a ref. DCHECK(IsRefType(c->input_dtype(0))); @@ -176,13 +172,7 @@ class ScatterNdUpdateOp : public OpKernel { Var* v; OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v)); Tensor* t = v->tensor(); - if (!use_exclusive_lock_) { - // We're not holding the lock in the outer scope so need it here. - mutex_lock m(*v->mu()); - OP_REQUIRES_OK(c, PrepareToUpdateVariable(c, t)); - } else { - OP_REQUIRES_OK(c, PrepareToUpdateVariable(c, t)); - } + OP_REQUIRES_OK(c, PrepareToUpdateVariable(c, t)); params = *t; params_shape = params.shape(); } else if (IsRefType(c->input_dtype(0))) { -- GitLab From 7b5d9e86e77bb750d5b794f1673fc08d4d289ec7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 08:12:15 -0700 Subject: [PATCH 051/365] Fix a typo in toco flags description. PiperOrigin-RevId: 199795176 --- tensorflow/contrib/lite/toco/model_cmdline_flags.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc index 0f104d5e2d..4c9f1aa4b0 100644 --- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc +++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc @@ -48,7 +48,7 @@ bool ParseModelFlagsFromCommandLineFlags( "that information from the input file."), Flag("input_arrays", parsed_flags.input_arrays.bind(), parsed_flags.input_arrays.default_value(), - "Names of the output arrays, comma-separated. If not specified, " + "Names of the input arrays, comma-separated. If not specified, " "will try to read that information from the input file."), Flag("output_array", parsed_flags.output_array.bind(), parsed_flags.output_array.default_value(), -- GitLab From ef1555172d452539d749340cdb076f0a24f6c505 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 8 Jun 2018 09:00:06 -0700 Subject: [PATCH 052/365] [tf.data] Improve the error message for `Dataset.padded_batch()`. Previously, we accepted the `padded_shapes` argument without validating that it was compatible with the `input_dataset.output_shapes`. In many cases, we have enough static shape information to do this, and so we now raise an actionable error at the point where the mistake is committed, rather than at runtime. PiperOrigin-RevId: 199800348 --- tensorflow/contrib/data/python/ops/BUILD | 1 + .../contrib/data/python/ops/batching.py | 3 +- .../python/training/tensor_queue_dataset.py | 7 +- .../kernel_tests/batch_dataset_op_test.py | 38 ++++++++ tensorflow/python/data/ops/dataset_ops.py | 91 ++++++++++++++++--- tensorflow/python/data/util/BUILD | 1 + tensorflow/python/data/util/convert.py | 37 ++++++++ tensorflow/python/data/util/convert_test.py | 73 +++++++++++++++ 8 files changed, 236 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index fc8ec5961c..33b7a75046 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -144,6 +144,7 @@ py_library( "//tensorflow/python:tensor_shape", "//tensorflow/python:tensor_util", "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:convert", "//tensorflow/python/data/util:nest", "//tensorflow/python/data/util:sparse", ], diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index b9393de4e9..50c2d17592 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -19,6 +19,7 @@ from __future__ import print_function from tensorflow.contrib.framework import with_shape from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import convert from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes @@ -309,7 +310,7 @@ class DenseToSparseBatchDataset(dataset_ops.Dataset): return gen_dataset_ops.dense_to_sparse_batch_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access self._batch_size, - row_shape=dataset_ops._partial_shape_to_tensor(self._row_shape), # pylint: disable=protected-access + row_shape=convert.partial_shape_to_tensor(self._row_shape), output_shapes=nest.flatten( sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( diff --git a/tensorflow/contrib/training/python/training/tensor_queue_dataset.py b/tensorflow/contrib/training/python/training/tensor_queue_dataset.py index 409aba817c..a2444934bc 100644 --- a/tensorflow/contrib/training/python/training/tensor_queue_dataset.py +++ b/tensorflow/contrib/training/python/training/tensor_queue_dataset.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import convert from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes @@ -45,14 +46,14 @@ class _PrependFromQueueAndPaddedBatchDataset(dataset_ops.Dataset): self._input_dataset = input_dataset self._batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") - # pylint: disable=protected-access if padded_shapes is None: self._padded_shapes = nest.map_structure( - dataset_ops._partial_shape_to_tensor, input_dataset.output_shapes) + convert.partial_shape_to_tensor, input_dataset.output_shapes) else: self._padded_shapes = nest.map_structure_up_to( - input_dataset.output_shapes, dataset_ops._partial_shape_to_tensor, + input_dataset.output_shapes, convert.partial_shape_to_tensor, padded_shapes) + # pylint: disable=protected-access padding_values = ( padding_values if padding_values is not None else dataset_ops._default_padding(input_dataset)) diff --git a/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py index bd80b9dbf5..dba108a531 100644 --- a/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/batch_dataset_op_test.py @@ -371,6 +371,44 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(TypeError): _ = dataset_ops.Dataset.range(10).map(_map_fn).padded_batch(10) + def testPaddedBatchShapeError(self): + with self.assertRaisesRegexp( + ValueError, r'The padded shape \(1,\) is not compatible with the ' + r'corresponding input component shape \(\).'): + _ = dataset_ops.Dataset.range(10).padded_batch(5, padded_shapes=[1]) + + with self.assertRaisesRegexp( + ValueError, r'The padded shape \(1,\) is not compatible with the ' + r'corresponding input component shape \(3,\).'): + _ = dataset_ops.Dataset.from_tensors([1, 2, 3]).padded_batch( + 5, padded_shapes=[1]) + + with self.assertRaisesRegexp( + ValueError, r'Padded shape .* must be a 1-D tensor ' + r'of tf.int64 values, but its shape was \(2, 2\).'): + _ = dataset_ops.Dataset.from_tensors([1, 2, 3]).padded_batch( + 5, padded_shapes=[[1, 1], [1, 1]]) + + with self.assertRaisesRegexp( + TypeError, r'Padded shape .* must be a 1-D tensor ' + r'of tf.int64 values, but its element type was float32.'): + _ = dataset_ops.Dataset.from_tensors([1, 2, 3]).padded_batch( + 5, padded_shapes=constant_op.constant([1., 2., 3.])) + + with self.assertRaisesRegexp( + ValueError, r'The padded shape \(1,\) is not compatible with the ' + r'corresponding input component shape \(\).'): + shape_as_tensor = constant_op.constant([1], dtype=dtypes.int64) + _ = dataset_ops.Dataset.range(10).padded_batch( + 5, padded_shapes=shape_as_tensor) + + with self.assertRaisesRegexp( + ValueError, r'The padded shape \(\?, \?\) is not compatible with the ' + r'corresponding input component shape \(\).'): + shape_as_tensor = array_ops.placeholder(dtypes.int64, shape=[2]) + _ = dataset_ops.Dataset.range(10).padded_batch( + 5, padded_shapes=shape_as_tensor) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 5f17444797..8b2a2e0a32 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1687,20 +1687,77 @@ class BatchDataset(Dataset): return self._input_dataset.output_types -def _partial_shape_to_tensor(shape_like): +def _is_padded_shape_compatible_with(padded_shape, input_component_shape): + """Returns `True` if `input_component_shape` can be padded to `padded_shape`. + + Args: + padded_shape: A `tf.TensorShape`. + input_component_shape: A `tf.TensorShape`. + + Returns: + `True` if `input_component_shape` can be padded to `padded_shape`, otherwise + `False`. + """ + + if padded_shape.dims is None or input_component_shape.dims is None: + return True + if len(padded_shape.dims) != len(input_component_shape.dims): + return False + for padded_dim, input_dim in zip( + padded_shape.dims, input_component_shape.dims): + if (padded_dim.value is not None and input_dim.value is not None + and padded_dim.value < input_dim.value): + return False + return True + + +def _padded_shape_to_tensor(padded_shape, input_component_shape): + """Converts `padded_shape` to a `tf.Tensor` representing that shape. + + Args: + padded_shape: A shape-like object, which may be a `tf.TensorShape`, a Python + sequence, or a 1-D `tf.Tensor` of `tf.int64` elements. + input_component_shape: A `tf.TensorShape`, with which `padded_shape` must + be compatible. + + Returns: + A 1-D `tf.Tensor` of `tf.int64` elements, representing `padded_shape`. + + Raises: + ValueError: If `padded_shape` is not a shape or not compatible with + `input_component_shape`. + TypeError: If `padded_shape` is not convertible to a `tf.int64` tensor. + """ try: - # First attempt to convert the input to a shape, and return the - # "canonical" tensor representation, which uses `-1` in place of - # `None`. - shape_like = tensor_shape.as_shape(shape_like) - return ops.convert_to_tensor( - [dim if dim is not None else -1 for dim in shape_like.as_list()], - dtype=dtypes.int64) + # Try to convert the `padded_shape` to a `tf.TensorShape` + padded_shape_as_shape = tensor_shape.as_shape(padded_shape) + # We will return the "canonical" tensor representation, which uses + # `-1` in place of `None`. + ret = ops.convert_to_tensor( + [dim if dim is not None else -1 + for dim in padded_shape_as_shape.as_list()], dtype=dtypes.int64) except (TypeError, ValueError): # The argument was not trivially convertible to a # `tf.TensorShape`, so fall back on the conversion to tensor # machinery. - return ops.convert_to_tensor(shape_like, dtype=dtypes.int64) + ret = ops.convert_to_tensor(padded_shape, preferred_dtype=dtypes.int64) + if ret.shape.dims is not None and len(ret.shape.dims) != 1: + raise ValueError( + "Padded shape %s must be a 1-D tensor of tf.int64 values, but its " + "shape was %s." % (padded_shape, ret.shape)) + if ret.dtype != dtypes.int64: + raise TypeError( + "Padded shape %s must be a 1-D tensor of tf.int64 values, but its " + "element type was %s." % (padded_shape, ret.dtype.name)) + padded_shape_as_shape = tensor_util.constant_value_as_shape(ret) + + if not _is_padded_shape_compatible_with(padded_shape_as_shape, + input_component_shape): + raise ValueError("The padded shape %s is not compatible with the " + "corresponding input component shape %s." + % (padded_shape_as_shape, input_component_shape)) + + return ret def _padding_value_to_tensor(value, output_type): @@ -1755,8 +1812,20 @@ class PaddedBatchDataset(Dataset): padding_values = ( padding_values if padding_values is not None else _default_padding(input_dataset)) - self._padded_shapes = nest.map_structure_up_to( - input_dataset.output_shapes, _partial_shape_to_tensor, padded_shapes) + + flat_padded_shapes = nest.flatten_up_to(input_dataset.output_shapes, + padded_shapes) + + flat_padded_shapes_as_tensors = [] + + for input_component_shape, padded_shape in zip( + nest.flatten(input_dataset.output_shapes), flat_padded_shapes): + flat_padded_shapes_as_tensors.append( + _padded_shape_to_tensor(padded_shape, input_component_shape)) + + self._padded_shapes = nest.pack_sequence_as(input_dataset.output_shapes, + flat_padded_shapes_as_tensors) + self._padding_values = nest.map_structure_up_to( input_dataset.output_shapes, _padding_value_to_tensor, padding_values, input_dataset.output_types) diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD index 0fc32d51b9..5fcc62b60b 100644 --- a/tensorflow/python/data/util/BUILD +++ b/tensorflow/python/data/util/BUILD @@ -70,6 +70,7 @@ py_library( "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:tensor_shape", ], ) diff --git a/tensorflow/python/data/util/convert.py b/tensorflow/python/data/util/convert.py index eeb1d700f3..99b3300900 100644 --- a/tensorflow/python/data/util/convert.py +++ b/tensorflow/python/data/util/convert.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape def optional_param_to_tensor(argument_name, @@ -32,3 +33,39 @@ def optional_param_to_tensor(argument_name, else: return constant_op.constant( argument_default, dtype=argument_dtype, name=argument_name) + + +def partial_shape_to_tensor(shape_like): + """Returns a @{tf.Tensor} that represents the given shape. + + Args: + shape_like: A value that can be converted to a @{tf.TensorShape} or a + @{tf.Tensor}. + + Returns: + A 1-D `tf.Tensor` of `tf.int64` elements representing the given shape, where + `-1` is substituted for any unknown dimensions. + """ + try: + # First attempt to convert the input to a shape, and return the + # "canonical" tensor representation, which uses `-1` in place of + # `None`. + shape_like = tensor_shape.as_shape(shape_like) + return ops.convert_to_tensor( + [dim if dim is not None else -1 for dim in shape_like.as_list()], + dtype=dtypes.int64) + except (TypeError, ValueError): + # The argument was not trivially convertible to a + # `tf.TensorShape`, so fall back on the conversion to tensor + # machinery. + ret = ops.convert_to_tensor(shape_like, preferred_dtype=dtypes.int64) + if ret.shape.dims is not None and len(ret.shape.dims) != 1: + raise ValueError("The given shape %s must be a 1-D tensor of tf.int64 " + "values, but the shape was %s." + % (shape_like, ret.shape)) + if ret.dtype != dtypes.int64: + raise TypeError("The given shape %s must be a 1-D tensor of tf.int64 " + "values, but the element type was %s." + % (shape_like, ret.dtype.name)) + + return ret diff --git a/tensorflow/python/data/util/convert_test.py b/tensorflow/python/data/util/convert_test.py index 2cb6488070..6a67093e48 100644 --- a/tensorflow/python/data/util/convert_test.py +++ b/tensorflow/python/data/util/convert_test.py @@ -19,7 +19,9 @@ from __future__ import division from __future__ import print_function from tensorflow.python.data.util import convert +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -48,6 +50,77 @@ class ConvertTest(test.TestCase): with self.test_session() as sess: self.assertEqual(compat.as_bytes("value"), sess.run(resp)) + def testPartialShapeToTensorKnownDimension(self): + with self.test_session() as sess: + self.assertAllEqual([1], sess.run(convert.partial_shape_to_tensor( + tensor_shape.TensorShape([1])))) + self.assertAllEqual([1], sess.run(convert.partial_shape_to_tensor((1,)))) + self.assertAllEqual([1], sess.run(convert.partial_shape_to_tensor([1]))) + self.assertAllEqual([1], sess.run(convert.partial_shape_to_tensor( + constant_op.constant([1], dtype=dtypes.int64)))) + + def testPartialShapeToTensorUnknownDimension(self): + with self.test_session() as sess: + self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor( + tensor_shape.TensorShape([None])))) + self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor( + (None,)))) + self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor( + [None]))) + self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor( + [-1]))) + self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor( + constant_op.constant([-1], dtype=dtypes.int64)))) + + with self.assertRaisesRegexp( + ValueError, r"The given shape .* must be a 1-D tensor of tf.int64 " + r"values, but the shape was \(2, 2\)."): + convert.partial_shape_to_tensor(constant_op.constant( + [[1, 1], [1, 1]], dtype=dtypes.int64)) + + with self.assertRaisesRegexp( + TypeError, r"The given shape .* must be a 1-D tensor of tf.int64 " + r"values, but the element type was float32."): + convert.partial_shape_to_tensor(constant_op.constant([1., 1.])) + + def testPartialShapeToTensorMultipleDimensions(self): + with self.test_session() as sess: + self.assertAllEqual([3, 6], sess.run(convert.partial_shape_to_tensor( + tensor_shape.TensorShape([3, 6])))) + self.assertAllEqual([3, 6], sess.run(convert.partial_shape_to_tensor( + (3, 6)))) + self.assertAllEqual([3, 6], sess.run(convert.partial_shape_to_tensor( + [3, 6]))) + self.assertAllEqual([3, 6], sess.run(convert.partial_shape_to_tensor( + constant_op.constant([3, 6], dtype=dtypes.int64)))) + + self.assertAllEqual([3, -1], sess.run(convert.partial_shape_to_tensor( + tensor_shape.TensorShape([3, None])))) + self.assertAllEqual([3, -1], sess.run(convert.partial_shape_to_tensor( + (3, None)))) + self.assertAllEqual([3, -1], sess.run(convert.partial_shape_to_tensor( + [3, None]))) + self.assertAllEqual([3, -1], sess.run(convert.partial_shape_to_tensor( + constant_op.constant([3, -1], dtype=dtypes.int64)))) + + self.assertAllEqual([-1, -1], sess.run(convert.partial_shape_to_tensor( + tensor_shape.TensorShape([None, None])))) + self.assertAllEqual([-1, -1], sess.run(convert.partial_shape_to_tensor( + (None, None)))) + self.assertAllEqual([-1, -1], sess.run(convert.partial_shape_to_tensor( + [None, None]))) + self.assertAllEqual([-1, -1], sess.run(convert.partial_shape_to_tensor( + constant_op.constant([-1, -1], dtype=dtypes.int64)))) + + def testPartialShapeToTensorScalar(self): + with self.test_session() as sess: + self.assertAllEqual([], sess.run(convert.partial_shape_to_tensor( + tensor_shape.TensorShape([])))) + self.assertAllEqual([], sess.run(convert.partial_shape_to_tensor(()))) + self.assertAllEqual([], sess.run(convert.partial_shape_to_tensor([]))) + self.assertAllEqual([], sess.run(convert.partial_shape_to_tensor( + constant_op.constant([], dtype=dtypes.int64)))) + if __name__ == "__main__": test.main() -- GitLab From 1faacc23e3341645ce11a9720775cb27c0694f4d Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Fri, 8 Jun 2018 09:48:26 -0700 Subject: [PATCH 053/365] [tf.data] tf.contrib.data.CsvDataset: Add recovery for errors with quoted fields PiperOrigin-RevId: 199807061 --- .../contrib/data/kernels/csv_dataset_op.cc | 84 +++++++------------ .../kernel_tests/csv_dataset_op_test.py | 21 ++++- 2 files changed, 50 insertions(+), 55 deletions(-) diff --git a/tensorflow/contrib/data/kernels/csv_dataset_op.cc b/tensorflow/contrib/data/kernels/csv_dataset_op.cc index e88ad3dc32..4657807785 100644 --- a/tensorflow/contrib/data/kernels/csv_dataset_op.cc +++ b/tensorflow/contrib/data/kernels/csv_dataset_op.cc @@ -236,7 +236,7 @@ class CSVDatasetOp : public DatasetOpKernel { size_t num_parsed = 0; size_t num_selected_parsed = 0; - Status result = Status::OK(); + Status result; while (!end_of_record) { // Read till we reach \n, \r or EOF bool include = @@ -329,6 +329,7 @@ class CSVDatasetOp : public DatasetOpKernel { size_t start = pos_; pos_++; // Starting quotation mark + Status parse_result; while (true) { // Each iter reads 1 char, filling buffer if necessary if (pos_ >= buffer_.size()) { Status s = SaveAndFillBuffer(&earlier_pieces, &start, include); @@ -351,8 +352,9 @@ class CSVDatasetOp : public DatasetOpKernel { if (errors::IsOutOfRange(s)) { // This was the last field. We are done *end_of_record = true; - return QuotedFieldToOutput(ctx, StringPiece(), out_tensors, - earlier_pieces, include); + parse_result.Update(QuotedFieldToOutput( + ctx, StringPiece(), out_tensors, earlier_pieces, include)); + return parse_result; } else if (!s.ok()) { return s; } @@ -361,20 +363,24 @@ class CSVDatasetOp : public DatasetOpKernel { char next = buffer_[pos_]; pos_++; if (next == dataset()->delim_) { - return QuotedFieldToOutput( + parse_result.Update(QuotedFieldToOutput( ctx, StringPiece(&buffer_[start], pos_ - 1 - start), - out_tensors, earlier_pieces, include); + out_tensors, earlier_pieces, include)); + return parse_result; } else if (next == '\n' || next == '\r') { *end_of_record = true; - Status s = QuotedFieldToOutput( + parse_result.Update(QuotedFieldToOutput( ctx, StringPiece(&buffer_[start], pos_ - 1 - start), - out_tensors, earlier_pieces, include); + out_tensors, earlier_pieces, include)); if (next == '\r') SkipNewLineIfNecessary(); - return s; + return parse_result; } else if (next != '"') { - return errors::InvalidArgument( - "Quote inside a string has to be escaped by another quote"); + // Take note of the error, but keep going to end of field. + include = false; // So we don't get funky errors when trying to + // unescape the quotes. + parse_result.Update(errors::InvalidArgument( + "Quote inside a string has to be escaped by another quote")); } } else { @@ -454,6 +460,8 @@ class CSVDatasetOp : public DatasetOpKernel { EXCLUSIVE_LOCKS_REQUIRED(mu_) { std::vector earlier_pieces; size_t start = pos_; + Status parse_result; + while (true) { // Each iter reads 1 char, filling buffer if necessary if (pos_ >= buffer_.size()) { Status s = SaveAndFillBuffer(&earlier_pieces, &start, include); @@ -461,9 +469,10 @@ class CSVDatasetOp : public DatasetOpKernel { if (errors::IsOutOfRange(s)) { // Whatever we have is the last field of the last record *end_of_record = true; - return UnquotedFieldToOutput( + parse_result.Update(UnquotedFieldToOutput( ctx, StringPiece(&buffer_[start], pos_ - start), out_tensors, - earlier_pieces, include); + earlier_pieces, include)); + return parse_result; } else if (!s.ok()) { return s; // Surface all other errors to caller } @@ -472,66 +481,33 @@ class CSVDatasetOp : public DatasetOpKernel { char ch = buffer_[pos_]; if (ch == dataset()->delim_) { - Status s = UnquotedFieldToOutput( + parse_result.Update(UnquotedFieldToOutput( ctx, StringPiece(&buffer_[start], pos_ - start), out_tensors, - earlier_pieces, include); + earlier_pieces, include)); pos_++; - return s; + return parse_result; } if (ch == '\n' || ch == '\r') { // need special case to skip over first \n of record if the line // breaks are \r\n - Status s = UnquotedFieldToOutput( + parse_result.Update(UnquotedFieldToOutput( ctx, StringPiece(&buffer_[start], pos_ - start), out_tensors, - earlier_pieces, include); + earlier_pieces, include)); *end_of_record = true; pos_++; if (ch == '\r') SkipNewLineIfNecessary(); - return s; + return parse_result; } if (dataset()->use_quote_delim_ && ch == '"') { - // Advance pos_ to the next field anyway so that we can ignore - // errors gracefully if required. The caller of this will be able to - // call ParseOneField and continue with the rest of the record. - AdvanceToNextField(end_of_record); - return errors::InvalidArgument( - "Unquoted fields cannot have quotes inside"); + // Take note of the error, but keep going to end of field. + parse_result.Update(errors::InvalidArgument( + "Unquoted fields cannot have quotes inside")); } // Otherwise, go to next character pos_++; } } - // Advances pos_ to the start of the next field, as delimited by delim, - // CRLF, or EOF, ignoring errors, and not keeping track of characters in - // the current field. - void AdvanceToNextField(bool* end_of_record) - EXCLUSIVE_LOCKS_REQUIRED(mu_) { - while (true) { - if (pos_ >= buffer_.size()) { - Status s = FillBuffer(&buffer_); - pos_ = 0; - if (!s.ok()) { - *end_of_record = true; - return; - } - } - - char ch = buffer_[pos_]; - pos_++; - - if (ch == dataset()->delim_) { - return; - } - - if (ch == '\n' || ch == '\r') { - *end_of_record = true; - if (ch == '\r') SkipNewLineIfNecessary(); - return; - } - } - } - Status FillBuffer(string* result) EXCLUSIVE_LOCKS_REQUIRED(mu_) { result->clear(); Status s = input_stream_->ReadNBytes(dataset()->buffer_size_, result); diff --git a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py index 74b90ec7d1..97b5e94165 100644 --- a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py @@ -162,9 +162,28 @@ class CsvDatasetOpTest(test.TestCase): expected_err_re='Unquoted fields cannot have quotes inside', record_defaults=record_defaults) + def testCsvDataset_errWithUnescapedQuotes(self): + record_defaults = [['']] * 3 + inputs = [['"a"b","c","d"']] + self._test_dataset( + inputs, + expected_err_re= + 'Quote inside a string has to be escaped by another quote', + record_defaults=record_defaults) + + def testCsvDataset_ignoreErrWithUnescapedQuotes(self): + record_defaults = [['']] * 3 + inputs = [['1,"2"3",4', '1,"2"3",4",5,5', 'a,b,"c"d"', 'e,f,g']] + filenames = self.setup_files(inputs) + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = readers.CsvDataset(filenames, record_defaults=record_defaults) + dataset = dataset.apply(error_ops.ignore_errors()) + self._verify_output_or_err(sess, dataset, [['e', 'f', 'g']]) + def testCsvDataset_ignoreErrWithUnquotedQuotes(self): record_defaults = [['']] * 3 - inputs = [['1,2"3,4', 'a,b,c"d', 'e,f,g']] + inputs = [['1,2"3,4', 'a,b,c"d', '9,8"7,6,5', 'e,f,g']] filenames = self.setup_files(inputs) with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: -- GitLab From 8566ebe58ff5b08864ddef6fe743fdd80962465b Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Fri, 8 Jun 2018 09:52:21 -0700 Subject: [PATCH 054/365] [XLA:GPU] Add a mulit-output fusion pass to fuse sibling reduce instructions. Stop creating pre-fused nodes in BatchNormExpander. PiperOrigin-RevId: 199807585 --- tensorflow/compiler/xla/service/gpu/BUILD | 29 ++++ .../compiler/xla/service/gpu/gpu_compiler.cc | 6 +- .../xla/service/gpu/multi_output_fusion.cc | 118 +++++++++++++++ .../xla/service/gpu/multi_output_fusion.h | 55 +++++++ .../service/gpu/multi_output_fusion_test.cc | 138 ++++++++++++++++++ 5 files changed, 343 insertions(+), 3 deletions(-) create mode 100644 tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc create mode 100644 tensorflow/compiler/xla/service/gpu/multi_output_fusion.h create mode 100644 tensorflow/compiler/xla/service/gpu/multi_output_fusion_test.cc diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 5e5ca7c72c..5e02631a58 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -423,6 +423,34 @@ tf_cc_test( ], ) +cc_library( + name = "multi_output_fusion", + srcs = ["multi_output_fusion.cc"], + hdrs = ["multi_output_fusion.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:multi_output_fusion", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "multi_output_fusion_test", + srcs = ["multi_output_fusion_test.cc"], + deps = [ + ":multi_output_fusion", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/service:hlo_parser", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + ], +) + cc_library( name = "gpu_copy_insertion", srcs = ["gpu_copy_insertion.cc"], @@ -523,6 +551,7 @@ cc_library( ":instruction_fusion", ":ir_emission_utils", ":ir_emitter", + ":multi_output_fusion", ":pad_insertion", ":partition_assignment", ":stream_assignment", diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index b857219807..c995736af9 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -52,6 +52,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/ir_emitter_context.h" #include "tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h" #include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h" +#include "tensorflow/compiler/xla/service/gpu/multi_output_fusion.h" #include "tensorflow/compiler/xla/service/gpu/pad_insertion.h" #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h" #include "tensorflow/compiler/xla/service/gpu/stream_assignment.h" @@ -159,13 +160,11 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, if (hlo_module->config().debug_options().xla_gpu_use_cudnn_batchnorm()) { pass.AddPass(); } - // TODO(kramerb): Remove use_fusion once instruction fusion can create - // multi-output fusions from the unfused expander output. pass.AddPass( /*rewrite_training_op=*/true, /*rewrite_inference_op=*/true, /*rewrite_grad_op=*/true, - /*use_fusion=*/true); + /*use_fusion=*/false); // Rewrite gather ops into smaller ones. pass.AddPass(); @@ -261,6 +260,7 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, fusion.AddPass(/*may_duplicate=*/false); fusion.AddPass(/*may_duplicate=*/true); fusion.AddPass(); + fusion.AddPass(); TF_RETURN_IF_ERROR(fusion.Run(hlo_module).status()); HloPassPipeline reduce_pipeline("reduce-precision"); diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc new file mode 100644 index 0000000000..86c5c4fb6f --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc @@ -0,0 +1,118 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/multi_output_fusion.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace gpu { + +GpuMultiOutputFusion::GpuMultiOutputFusion() : MultiOutputFusion(INT64_MAX) {} + +bool GpuMultiOutputFusion::ShapesCompatibleForFusion(HloInstruction* instr1, + HloInstruction* instr2) { + auto get_element_shape = [&](HloInstruction* instr) { + const HloInstruction* element_instr = instr; + if (instr->opcode() == HloOpcode::kFusion) { + auto fused_expression_root = instr->fused_expression_root(); + if (instr->IsMultiOutputFusion()) { + // The shapes in all tuple operands should agree. Just pick the first + // one. + element_instr = fused_expression_root->operands()[0]; + } else { + element_instr = fused_expression_root; + } + } + return element_instr->shape(); + }; + + // The elementwise output shapes must be the same (including layout) + return ShapeUtil::ShapeUtil::Equal(get_element_shape(instr1), + get_element_shape(instr2)); +} + +bool GpuMultiOutputFusion::IsProfitableOperand(HloInstruction* instr) { + // kConstant instruction will not have memory reads, so it won't be a profit + // source. Skip them. + if (instr->opcode() == HloOpcode::kConstant && + ShapeUtil::IsEffectiveScalar(instr->shape())) { + return false; + } + // We don't target to fuse producer/consumer instructions -- this should + // be taken care of by the instruction_fusion pass. If instr has only + // one user, it will not have sibling instructions. We won't consider it. + if (instr->user_count() < 2) { + return false; + } + return true; +} + +namespace { +bool IsReduction(HloInstruction* instr) { + if (instr->IsMultiOutputFusion()) { + for (const HloInstruction* operand : + instr->fused_expression_root()->operands()) { + if (operand->opcode() == HloOpcode::kReduce) { + return true; + } + } + return false; + } else if (instr->opcode() == HloOpcode::kFusion) { + return instr->fused_expression_root()->opcode() == HloOpcode::kReduce; + } else { + return instr->opcode() == HloOpcode::kReduce; + } +} +} // namespace + +bool GpuMultiOutputFusion::IsFusible(HloInstruction* instr) { + return IsReduction(instr); +} + +int64 GpuMultiOutputFusion::GetProfit(HloInstruction* instr1, + HloInstruction* instr2) { + tensorflow::gtl::FlatSet in_list; + for (auto instr : instr1->operands()) { + if (!IsProfitableOperand(instr)) { + continue; + } + in_list.insert(instr); + } + int64 profit = 0; + for (auto instr : instr2->operands()) { + if (!IsProfitableOperand(instr) || in_list.count(instr) == 0) { + continue; + } + profit += ShapeUtil::ByteSizeOf(instr->shape()); + } + VLOG(2) << "Fusing instr1=" << instr1->name() << " instr2=" << instr2->name() + << ", the profit is =" << profit; + return profit; +} + +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.h b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.h new file mode 100644 index 0000000000..5451a93cec --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.h @@ -0,0 +1,55 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_MULTI_OUTPUT_FUSION_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_MULTI_OUTPUT_FUSION_H_ + +#include "tensorflow/compiler/xla/service/multi_output_fusion.h" + +namespace xla { +namespace gpu { + +// Multi-output fusion of sibling and producer-consumer instructions for the +// Jellyfish backend. +class GpuMultiOutputFusion : public MultiOutputFusion { + public: + GpuMultiOutputFusion(); + + protected: + // Test if instr1 and instr2 have the compatible shapes that can be legally + // fused. + bool ShapesCompatibleForFusion(HloInstruction* instr1, + HloInstruction* instr2) override; + + // We currently only consider reduce and reduce fusion nodes as candidates. + bool IsFusible(HloInstruction* instr) override; + + // This function estimates the amount of memory reads saved by merging + // instr1 and instr2 into one multi-output fusion instruction. For a fusion + // instruction, all the operands need to be loaded from memory. If we merge + // instr1 and instr2, common operands will not be loaded twice. The profit is + // estimated as the size of the common operands b/w instr1 and instr2. + int64 GetProfit(HloInstruction* instr1, HloInstruction* instr2) override; + + // Whether fusing the instruction can reduce memory reads. + // + // TODO(tjoerg): Move this method up into the MultiOutputFusion base class. + bool IsProfitableOperand(HloInstruction* instr) override; +}; + +} // namespace gpu +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_MULTI_OUTPUT_FUSION_H_ diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion_test.cc new file mode 100644 index 0000000000..d0b4c88487 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion_test.cc @@ -0,0 +1,138 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/multi_output_fusion.h" + +#include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/strings/str_util.h" + +namespace op = xla::testing::opcode_matchers; + +namespace xla { +namespace gpu { + +using InstructionFusionTest = HloTestBase; + +const char kModulePrefix[] = R"( + HloModule test_module + + scalar_add_computation { + scalar_lhs = f32[] parameter(0) + scalar_rhs = f32[] parameter(1) + ROOT add = f32[] add(scalar_lhs, scalar_rhs) + })"; + +TEST_F(InstructionFusionTest, MultiOutputFusionSiblingReduceAndReduceFusion) { + // Fusion with reduce instruction root and a sibling reduce instruction + // sharing the same input param. + auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R"( + fused_computation { + p1.1 = f32[128,512,28,28]{3,2,1,0} parameter(1) + mul = f32[128,512,28,28]{3,2,1,0} multiply(p1.1, p1.1) + const.1 = f32[] parameter(0) + ROOT reduce.1 = f32[512]{0} reduce(mul, const.1), dimensions={0,2,3}, to_apply=scalar_add_computation + } + + ENTRY entry { + p0 = f32[] parameter(0) + p1 = f32[128,512,28,28]{3,2,1,0} parameter(1) + const.2 = f32[] constant(1) + fusion = f32[512] fusion(p0, p1), kind=kInput, calls=fused_computation + reduce.2 = f32[512]{0} reduce(p1, const.2), dimensions={0,2,3}, to_apply=scalar_add_computation + ROOT root = (f32[512]{0}, f32[512]{0}) tuple(fusion, reduce.2) + })")) + .ValueOrDie(); + ASSERT_TRUE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie()); + SCOPED_TRACE(module->ToString()); + const HloInstruction* fusion = + module->entry_computation()->root_instruction()->operand(0)->operand(0); + ASSERT_TRUE(fusion->IsMultiOutputFusion()); + EXPECT_THAT(fusion->fused_expression_root(), + op::Tuple(op::Reduce(), op::Reduce())); +} + +TEST_F(InstructionFusionTest, MultiOutputFusionSiblingReduceFusions) { + // Two sibling fusions with reduce instruction roots sharing the same input + // param. + auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R"( + fused_computation_1 { + p1.1 = f32[128,512,28,28]{3,2,1,0} parameter(1) + mul = f32[128,512,28,28]{3,2,1,0} multiply(p1.1, p1.1) + const.1 = f32[] parameter(0) + ROOT reduce.1 = f32[512]{0} reduce(mul, const.1), dimensions={0,2,3}, to_apply=scalar_add_computation + } + + fused_computation_2 { + p1.2 = f32[128,512,28,28]{3,2,1,0} parameter(1) + const.2 = f32[] parameter(0) + ROOT reduce.2 = f32[512]{0} reduce(p1.2, const.2), dimensions={0,2,3}, to_apply=scalar_add_computation + } + + ENTRY entry { + p0 = f32[] parameter(0) + p1 = f32[128,512,28,28]{3,2,1,0} parameter(1) + fusion.1 = f32[512] fusion(p0, p1), kind=kInput, calls=fused_computation_1 + fusion.2 = f32[512] fusion(p0, p1), kind=kInput, calls=fused_computation_2 + ROOT root = (f32[512]{0}, f32[512]{0}) tuple(fusion.1, fusion.2) + })")) + .ValueOrDie(); + ASSERT_TRUE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie()); + SCOPED_TRACE(module->ToString()); + const HloInstruction* fusion = + module->entry_computation()->root_instruction()->operand(0)->operand(0); + ASSERT_TRUE(fusion->IsMultiOutputFusion()); + EXPECT_THAT(fusion->fused_expression_root(), + op::Tuple(op::Reduce(), op::Reduce())); +} + +TEST_F(InstructionFusionTest, + MultiOutputFusionSiblingReduceAndReduceMultiOutputFusion) { + // Multi-output fusion with two reduce instructions root and a sibling reduce + // instruction sharing the same input param. + auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R"( + fused_computation (p0: f32[128,512,28,28]) -> (f32[512], f32[512]) { + const.1 = f32[] constant(1) + p0.1 = f32[128,512,28,28]{3,2,1,0} parameter(0) + mul = f32[128,512,28,28]{3,2,1,0} multiply(f32[128,512,28,28]{3,2,1,0} p0.1, f32[128,512,28,28]{3,2,1,0} p0.1) + reduce.1 = f32[512]{0} reduce(f32[128,512,28,28]{3,2,1,0} mul, f32[] const.1), dimensions={0,2,3}, to_apply=scalar_add_computation + reduce.2 = f32[512]{0} reduce(f32[128,512,28,28]{3,2,1,0} p0.1, f32[] const.1), dimensions={0,2,3}, to_apply=scalar_add_computation + ROOT tuple = (f32[512]{0}, f32[512]{0}) tuple(f32[512]{0} reduce.1, f32[512]{0} reduce.2) + } + + ENTRY entry (p0: f32[128,512,28,28]) -> (f32[512], f32[512], f32[512]) { + p0 = f32[128,512,28,28]{3,2,1,0} parameter(0) + const = f32[] constant(1) + fusion = (f32[512]{0}, f32[512]{0}) fusion(f32[128,512,28,28]{3,2,1,0} p0), kind=kInput, calls=fused_computation + get-tuple-element = f32[512]{0} get-tuple-element((f32[512]{0}, f32[512]{0}) fusion), index=0 + get-tuple-element.1 = f32[512]{0} get-tuple-element((f32[512]{0}, f32[512]{0}) fusion), index=1 + reduce.3 = f32[512]{0} reduce(p0, const), dimensions={0,2,3}, to_apply=scalar_add_computation + ROOT root = (f32[512]{0}, f32[512]{0}, f32[512]{0}) tuple(f32[512]{0} get-tuple-element, f32[512]{0} get-tuple-element.1, f32[512]{0} reduce.3) + })")) + .ValueOrDie(); + ASSERT_TRUE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie()); + SCOPED_TRACE(module->ToString()); + const HloInstruction* fusion = + module->entry_computation()->root_instruction()->operand(0)->operand(0); + ASSERT_TRUE(fusion->IsMultiOutputFusion()); + EXPECT_THAT(fusion->fused_expression_root(), + op::Tuple(op::Reduce(), op::Reduce(), op::Reduce())); +} + +} // namespace gpu +} // namespace xla -- GitLab From 0ef76693fdab2a4d1a4923444a2593f79a6b7873 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Fri, 8 Jun 2018 10:02:44 -0700 Subject: [PATCH 055/365] Automated g4 rollback of changelist 199308328 PiperOrigin-RevId: 199809082 --- .../xla/service/algebraic_simplifier_test.cc | 47 +++++++++---------- tensorflow/compiler/xla/tests/hlo_test_base.h | 17 +++---- .../xla/tests/hlo_verified_test_base.cc | 20 +++++--- .../xla/tests/hlo_verified_test_base.h | 16 ++++++- 4 files changed, 60 insertions(+), 40 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index cda157f9fa..27eb48181e 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -1714,7 +1714,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopPad) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1759,7 +1759,7 @@ TEST_F(AlgebraicSimplifierTest, NegativePadding) { EXPECT_THAT(computation->root_instruction(), op::Pad(param, zero)); EXPECT_TRUE(has_negative_padding(pad)); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Slice(op::Pad(param, zero))); EXPECT_FALSE( @@ -1781,7 +1781,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopReshape) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1804,7 +1804,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopSlice) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1932,7 +1932,8 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) { b.AddInstruction(HloInstruction::CreateConvolve(out_shape, input, filter, window, dnums)); - auto module = CreateNewModule(); + // TODO(b/80488902): verify this module. + auto module = HloTestBase::CreateNewModule(); auto* computation = module->AddEntryComputation(b.Build()); AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/true, @@ -2060,7 +2061,7 @@ TEST_F(AlgebraicSimplifierTest, MaxMinToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2090,7 +2091,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2121,7 +2122,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxWithBroadcastToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2151,7 +2152,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxNotToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); + EXPECT_FALSE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Minimum(op::Maximum(param0, max_value), min_value)); @@ -2184,7 +2185,7 @@ TEST_F(AlgebraicSimplifierTest, MinEquationWithMaxNotToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); + EXPECT_FALSE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Minimum(op::Add(op::Maximum(param0, max_value), max_value), @@ -2200,10 +2201,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) { HloInstruction::CreateParameter(0, r0f32, "scalar_param")); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6, 7}); - HloInstruction* broadcast = - builder.AddInstruction(HloInstruction::CreateBroadcast( - broadcast_shape, scalar_param, - AsInt64Slice(broadcast_shape.dimensions()))); + HloInstruction* broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(broadcast_shape, scalar_param, {})); Shape slice_shape = ShapeUtil::MakeShape(F32, {2, 2, 3, 3}); HloInstruction* slice = builder.AddInstruction(HloInstruction::CreateSlice( @@ -2219,10 +2218,10 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); // Running simplification again should not result in any further changes. - ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_FALSE(simplifier.Run(module).ValueOrDie()); root = computation->root_instruction(); EXPECT_THAT(root, op::Broadcast(scalar_param)); @@ -2237,10 +2236,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { HloInstruction::CreateConstant(Literal::CreateR0(42.0f))); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6}); - HloInstruction* broadcast = - builder.AddInstruction(HloInstruction::CreateBroadcast( - broadcast_shape, forty_two, - AsInt64Slice(broadcast_shape.dimensions()))); + HloInstruction* broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(broadcast_shape, forty_two, {})); HloInstruction* transpose = builder.AddInstruction(HloInstruction::CreateTranspose( @@ -2259,7 +2256,7 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); root = computation->root_instruction(); EXPECT_THAT(root, op::Broadcast(forty_two)); @@ -2268,7 +2265,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { // Test that ReduceWindow(Pad(op, x), y) can simplify to ReduceWindow(op, x). TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) { - auto module = CreateNewModule(); + // TODO(b/80488902): verify this module. + auto module = HloTestBase::CreateNewModule(); HloComputation::Builder builder(TestName()); // Create operand to the pad. @@ -2349,7 +2347,8 @@ TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) { // Test that ReduceWindow(Convert(Pad(op, x)), y) can simplify to // ReduceWindow(Convert(op), x). TEST_F(AlgebraicSimplifierTest, FoldConvertedPadIntoReduceWindow) { - auto module = CreateNewModule(); + // TODO(b/80488902): verify this module. + auto module = HloTestBase::CreateNewModule(); HloComputation::Builder builder(TestName()); // Create operand to the pad. @@ -2444,7 +2443,7 @@ TEST_F(AlgebraicSimplifierTest, ReversalOfTrivialDimensionsToBitcast) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); HloInstruction* root = computation->root_instruction(); EXPECT_EQ(a, root); diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h index eb3a2ea76a..249da87f48 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_test_base.h @@ -66,6 +66,15 @@ namespace xla { // // For a more detailed example, see "../tests/sample_text_test.cc". class HloTestBase : public ::testing::Test { + public: + // Creates a new HLO module for a test. The module created will have + // TestName() for its name; it will also automatically populate its debug + // options from command-line flags. If you want a fresh HloModule object and + // then add HloComputations to it, it's recommended to use this method in your + // tests. + static std::unique_ptr CreateNewModule( + const string& name = TestName()); + protected: // This uses the interpreter backend as the reference backend and // automatically finds another supported backend as the test backend. If the @@ -80,14 +89,6 @@ class HloTestBase : public ::testing::Test { ~HloTestBase() override {} - // Creates a new HLO module for a test. The module created will have - // TestName() for its name; it will also automatically populate its debug - // options from command-line flags. If you want a fresh HloModule object and - // then add HloComputations to it, it's recommended to use this method in your - // tests. - static std::unique_ptr CreateNewModule( - const string& name = TestName()); - // Populates debug options from command-line flags and adjusts the options for // testing. It is recommended to use this when you need to pass in // DebugOptions, e.g. when creating a module from a string or a file. diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc index c8a05c2e9e..22c664d142 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc @@ -41,14 +41,17 @@ void HloVerifiedTestBase::TearDown() { << "TearDown called more than once; it should be called exactly once."; tear_down_called_ = true; if (module_) { - VerifyModule(); + VerifyModule(module_.get()); + } + for (int i = 0; i < modules_.size(); ++i) { + VerifyModule(modules_.at(i).get()); } HloTestBase::TearDown(); } -void HloVerifiedTestBase::VerifyModule() { - HloVerifier verifier; - xla::StatusOr mutated = verifier.Run(module_.get()); +void HloVerifiedTestBase::VerifyModule(HloModule* module) { + HloVerifier verifier(/*allow_mixed_precision=*/true); + xla::StatusOr mutated = verifier.Run(module); if (!mutated.ok()) { ADD_FAILURE() << "HloVerifier failed: " << mutated.status(); } else { @@ -59,15 +62,20 @@ void HloVerifiedTestBase::VerifyModule() { HloModule& HloVerifiedTestBase::module() { if (!module_) { - module_ = CreateNewModule(); + module_ = HloTestBase::CreateNewModule(); } return *module_; } +HloModule* HloVerifiedTestBase::CreateNewModule(const string& name) { + modules_.emplace_back(HloTestBase::CreateNewModule()); + return modules_.back().get(); +} + void HloVerifiedTestBase::ParseAndVerifyModule( tensorflow::StringPiece hlo_text) { CHECK(!module_) << "Called ParseModule when test already has a module."; TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); - VerifyModule(); + VerifyModule(module_.get()); } } // namespace xla diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h index e5bb14a883..5b59cc77f6 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h @@ -52,11 +52,23 @@ class HloVerifiedTestBase : public HloTestBase { shape_verifier_ = std::move(shape_verifier); } + // Creates a new module for a test, and stores it in modules_ so it can be + // verified. Intentionally hides HloTestBase::CreateNewModule, to prevent + // creation of unverified modules. + HloModule* CreateNewModule(const string& name = TestName()); + + // It is confusing to store modules created by module() and CreateNewModule() + // in different fields, but it allows us to migrate tests to + // HloVerifiedTestBase more easily, so it's a win because we can verify more + // modules. See b/80488902. private: - std::unique_ptr module_; // Lazily populated. Access via module(). + // Lazily populated. Access via module(). + std::unique_ptr module_; + // Populated by calls to CreateNewModule. + std::vector> modules_; std::unique_ptr shape_verifier_; bool tear_down_called_ = false; - void VerifyModule(); + static void VerifyModule(HloModule* module); }; } // namespace xla -- GitLab From da68f5f45b6b568fecffd53cba0ce382f0d034f9 Mon Sep 17 00:00:00 2001 From: Hsien-Yang Li Date: Sat, 9 Jun 2018 01:35:48 +0800 Subject: [PATCH 056/365] Add decode uint16 PNG images support for tf.image.decode_image. (#18628) * Add decode uint16 images support for tf.image.decode_image. * Decode to a tensor with dtype. * Add testcase for decode_image. * Add float32 testcase for decode_image. * Fix build error * Regenerate the tensorflow.image.pbtxt --- tensorflow/python/ops/image_ops_impl.py | 22 +++-- tensorflow/python/ops/image_ops_test.py | 83 +++++++++++++++++++ .../tools/api/golden/tensorflow.image.pbtxt | 2 +- 3 files changed, 98 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 4a32f2351b..95d05cd4d1 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1556,13 +1556,13 @@ def is_jpeg(contents, name=None): @tf_export('image.decode_image') -def decode_image(contents, channels=None, name=None): +def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None): """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the - appropriate operation to convert the input bytes `string` into a `Tensor` of - type `uint8`. + appropriate operation to convert the input bytes `string` into a `Tensor` + of type `dtype`. Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D @@ -1574,10 +1574,11 @@ def decode_image(contents, channels=None, name=None): contents: 0-D `string`. The encoded image bytes. channels: An optional `int`. Defaults to `0`. Number of color channels for the decoded image. + dtype: The desired DType of the returned `Tensor`. name: A name for the operation (optional) Returns: - `Tensor` with type `uint8` with shape `[height, width, num_channels]` for + `Tensor` with type `dtype` and shape `[height, width, num_channels]` for BMP, JPEG, and PNG images and shape `[num_frames, height, width, 3]` for GIF images. @@ -1601,7 +1602,7 @@ def decode_image(contents, channels=None, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding BMP images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_decode, assert_channels]): - return gen_image_ops.decode_bmp(contents) + return convert_image_dtype(gen_image_ops.decode_bmp(contents), dtype) def _gif(): # Create assert to make sure that channels is not set to 1 @@ -1614,7 +1615,7 @@ def decode_image(contents, channels=None, name=None): channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images' assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return gen_image_ops.decode_gif(contents) + return convert_image_dtype(gen_image_ops.decode_gif(contents), dtype) def check_gif(): # Create assert op to check that bytes are GIF decodable @@ -1623,7 +1624,11 @@ def decode_image(contents, channels=None, name=None): def _png(): """Decodes a PNG image.""" - return gen_image_ops.decode_png(contents, channels) + return convert_image_dtype( + gen_image_ops.decode_png(contents, channels, + dtype=dtypes.uint8 + if dtype == dtypes.uint8 + else dtypes.uint16), dtype) def check_png(): """Checks if an image is PNG.""" @@ -1639,7 +1644,8 @@ def decode_image(contents, channels=None, name=None): 'images') assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) with ops.control_dependencies([assert_channels]): - return gen_image_ops.decode_jpeg(contents, channels) + return convert_image_dtype( + gen_image_ops.decode_jpeg(contents, channels), dtype) # Decode normal JPEG images (start with \xff\xd8\xff\xe0) # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1). diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index d50ff3fb60..ae45037c17 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -3888,5 +3888,88 @@ class SobelEdgesTest(test_util.TensorFlowTestCase): self.assertAllClose(expected_batch, actual_sobel) +class DecodeImageTest(test_util.TensorFlowTestCase): + + def testJpegUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/jpeg/testdata" + jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) + image0 = image_ops.decode_image(jpeg0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testPngUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/png/testdata" + png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) + image0 = image_ops.decode_image(png0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype( + image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testGifUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/gif/testdata" + gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) + image0 = image_ops.decode_image(gif0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testBmpUint16(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/bmp/testdata" + bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) + image0 = image_ops.decode_image(bmp0, dtype=dtypes.uint16) + image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), + dtypes.uint16) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testJpegFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/jpeg/testdata" + jpeg0 = io_ops.read_file(os.path.join(base, "jpeg_merge_test1.jpg")) + image0 = image_ops.decode_image(jpeg0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testPngFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/png/testdata" + png0 = io_ops.read_file(os.path.join(base, "lena_rgba.png")) + image0 = image_ops.decode_image(png0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype( + image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testGifFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/gif/testdata" + gif0 = io_ops.read_file(os.path.join(base, "scan.gif")) + image0 = image_ops.decode_image(gif0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + def testBmpFloat32(self): + with self.test_session(use_gpu=True) as sess: + base = "tensorflow/core/lib/bmp/testdata" + bmp0 = io_ops.read_file(os.path.join(base, "lena.bmp")) + image0 = image_ops.decode_image(bmp0, dtype=dtypes.float32) + image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0), + dtypes.float32) + image0, image1 = sess.run([image0, image1]) + self.assertAllEqual(image0, image1) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt index 87543e374b..32fb9183e6 100644 --- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt @@ -54,7 +54,7 @@ tf_module { } member_method { name: "decode_image" - argspec: "args=[\'contents\', \'channels\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'contents\', \'channels\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"\", \'None\'], " } member_method { name: "decode_jpeg" -- GitLab From 46147d8ca303e29fd15612afdb906b5220af5d3f Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Fri, 8 Jun 2018 10:33:48 -0700 Subject: [PATCH 057/365] Increase relative error to 1e-4 on convolution_test. convolution_test had a zero relative error bound which made it overly sensitive to changes to the underlying computation. PiperOrigin-RevId: 199814523 --- tensorflow/compiler/xla/tests/convolution_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 947959beb1..346bb3a399 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -47,9 +47,9 @@ class ConvolutionTest : public ClientLibraryTestBase { #if XLA_TEST_BACKEND_GPU // XLA:GPU sometimes uses FFT convolution which isn't as precise as spatial // convolution. So relax the absolute error threshold. - ErrorSpec error_spec_ = ErrorSpec(1e-2); + ErrorSpec error_spec_ = ErrorSpec(1e-2, 1e-4); #else - ErrorSpec error_spec_ = ErrorSpec(1e-4); + ErrorSpec error_spec_ = ErrorSpec(1e-4, 1e-4); #endif }; -- GitLab From 255a1c4e5d345710a8d734c0a0dfbbf728675b95 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 8 Jun 2018 10:52:33 -0700 Subject: [PATCH 058/365] Preserve input shape information when serializing deferred-build Sequential models. PiperOrigin-RevId: 199817660 --- tensorflow/python/keras/engine/sequential.py | 7 +++++- .../python/keras/engine/sequential_test.py | 24 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py index 52e29b0ffa..3ca8fdd326 100644 --- a/tensorflow/python/keras/engine/sequential.py +++ b/tensorflow/python/keras/engine/sequential.py @@ -222,11 +222,16 @@ class Sequential(Model): for layer in self._layers: x = layer(x) self.outputs = [x] + # Make sure that the model's input shape will be preserved during + # serialization. + if self._layers: + self._layers[0]._batch_input_shape = batch_shape if self.inputs: self._init_graph_network(self.inputs, self.outputs, name=self.name) self.built = True - self._track_layers(self._layers) + if self._layers: + self._track_layers(self._layers) def predict_proba(self, x, batch_size=32, verbose=0): """Generates class probability predictions for the input samples. diff --git a/tensorflow/python/keras/engine/sequential_test.py b/tensorflow/python/keras/engine/sequential_test.py index 69a288e69b..cdaf9162de 100644 --- a/tensorflow/python/keras/engine/sequential_test.py +++ b/tensorflow/python/keras/engine/sequential_test.py @@ -209,6 +209,30 @@ class TestSequential(test.TestCase): x2 = model.predict(val_a) assert np.abs(np.sum(x1 - x2)) > 1e-5 + def test_sequential_deferred_build_serialization(self): + num_hidden = 5 + input_dim = 3 + batch_size = 5 + num_classes = 2 + + model = keras.models.Sequential() + # We don't specify the input shape. + model.add(keras.layers.Dense(num_hidden)) + model.add(keras.layers.Dense(num_classes)) + model.compile(loss='mse', optimizer=rmsprop.RMSPropOptimizer(1e-3)) + self.assertFalse(model.built) + + x = np.random.random((batch_size, input_dim)) + y = np.random.random((batch_size, num_classes)) + model.train_on_batch(x, y) + self.assertTrue(model.built) + + config = model.get_config() + new_model = keras.models.Sequential.from_config(config) + self.assertTrue(new_model.built) + self.assertEqual(len(model.layers), 2) + self.assertEqual(len(model.weights), 4) + if __name__ == '__main__': test.main() -- GitLab From d33c12188f09d49c2bf0c912702836071ffcc5ae Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 8 Jun 2018 13:59:39 -0400 Subject: [PATCH 059/365] Update RELEASE.md for tfdbg bug fix in 1.9.0 (#19846) --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 18e5dfb16e..e09e9c6190 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -22,7 +22,7 @@ * `tf.keras.Model.save_weights` now saves in TensorFlow format by default. * Enable dataset iterators to be passed to `tf.keras.Model` training/eval methods. * Accelerated Linear Algebra (XLA): -* TensorFlow Debugger (tfdbg) CLI: +* TensorFlow Debugger (tfdbg): fix an issue in which the TensorBoard Debugger Plugin could not handle total source file size exceeding gRPC message size limit (4 MB). * `tf.contrib`: * Add `tf.contrib.data.choose_from_datasets()`. * `tf.contrib.data.make_csv_dataset()` now supports line breaks in quoted strings. Two arguments were removed from `make_csv_dataset`. -- GitLab From e8ca21f1533361aaad5acf1738239266b95dae12 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 11:15:20 -0700 Subject: [PATCH 060/365] Split out opcodes using dimensions_ as subclasses from HloInstruction. PiperOrigin-RevId: 199821675 --- .../compiler/xla/service/hlo_instruction.cc | 237 ++++++--------- .../compiler/xla/service/hlo_instruction.h | 42 ++- .../compiler/xla/service/hlo_instructions.cc | 272 ++++++++++++++++++ .../compiler/xla/service/hlo_instructions.h | 170 +++++++++++ 4 files changed, 553 insertions(+), 168 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index b6e2056600..ae230d2740 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -66,6 +66,9 @@ StatusOr> HloInstruction::CreateFromProto( const auto operands = [&instruction_map, &proto](int index) { return instruction_map.at(proto.operand_ids(index)); }; + const auto computations = [&computation_map, &proto](int index) { + return computation_map.at(proto.called_computation_ids(index)); + }; switch (opcode) { // Ops migrated to subclasses. case HloOpcode::kBatchNormTraining: @@ -111,6 +114,57 @@ StatusOr> HloInstruction::CreateFromProto( CHECK_EQ(proto.operand_ids_size(), 1); instruction = CreateRecvDone(operands(0)); break; + case HloOpcode::kReverse: + CHECK_EQ(proto.operand_ids_size(), 1); + instruction = CreateReverse(proto.shape(), operands(0), + std::vector(proto.dimensions().begin(), + proto.dimensions().end())); + break; + case HloOpcode::kConcatenate: { + CHECK_EQ(proto.dimensions_size(), 1); + std::vector concat_operands(proto.operand_ids_size()); + std::transform(proto.operand_ids().begin(), proto.operand_ids().end(), + concat_operands.begin(), + [&instruction_map](int64 operand_id) { + return instruction_map.at(operand_id); + }); + instruction = CreateConcatenate(proto.shape(), concat_operands, + proto.dimensions(0)); + break; + } + case HloOpcode::kReduce: + CHECK_EQ(proto.operand_ids_size(), 2); + CHECK_EQ(proto.called_computation_ids_size(), 1); + instruction = CreateReduce(proto.shape(), operands(0), operands(1), + std::vector(proto.dimensions().begin(), + proto.dimensions().end()), + computations(0)); + break; + case HloOpcode::kTranspose: + CHECK_EQ(proto.operand_ids_size(), 1); + instruction = + CreateTranspose(proto.shape(), operands(0), + std::vector(proto.dimensions().begin(), + proto.dimensions().end())); + break; + case HloOpcode::kBroadcast: + CHECK_EQ(proto.operand_ids_size(), 1); + instruction = + CreateBroadcast(proto.shape(), operands(0), + std::vector(proto.dimensions().begin(), + proto.dimensions().end())); + break; + case HloOpcode::kMap: { + CHECK_EQ(proto.called_computation_ids_size(), 1); + std::vector map_operands(proto.operand_ids_size()); + std::transform(proto.operand_ids().begin(), proto.operand_ids().end(), + map_operands.begin(), + [&instruction_map](int64 operand_id) { + return instruction_map.at(operand_id); + }); + instruction = CreateMap(proto.shape(), map_operands, computations(0)); + break; + } default: { instruction = WrapUnique(new HloInstruction(opcode, proto.shape())); for (const int64 operand_id : proto.operand_ids()) { @@ -124,6 +178,14 @@ StatusOr> HloInstruction::CreateFromProto( TF_RETURN_IF_ERROR(instruction_map.at(predecessor_id) ->AddControlDependencyTo(instruction.get())); } + if (instruction->opcode() != HloOpcode::kFusion) { + for (const int64 computation_id : proto.called_computation_ids()) { + TF_RET_CHECK(ContainsKey(computation_map, computation_id)) + << "No computation with id " << computation_id; + instruction->called_computations_.push_back( + computation_map.at(computation_id)); + } + } break; } } @@ -146,13 +208,6 @@ StatusOr> HloInstruction::CreateFromProto( << "No fusion computation with id " << fusion_id; fused_computation->SetFusionInstruction(instruction.get()); instruction->called_computations_.push_back(fused_computation); - } else { - for (const int64 computation_id : proto.called_computation_ids()) { - TF_RET_CHECK(ContainsKey(computation_map, computation_id)) - << "No computation with id " << computation_id; - instruction->called_computations_.push_back( - computation_map.at(computation_id)); - } } if (instruction->opcode() == HloOpcode::kTrace) { @@ -174,9 +229,6 @@ StatusOr> HloInstruction::CreateFromProto( instruction->parameter_number_ = proto.parameter_number(); instruction->tuple_index_ = proto.tuple_index(); - for (int64 dimension : proto.dimensions()) { - instruction->dimensions_.push_back(dimension); - } if (proto.has_window()) { instruction->window_ = MakeUnique(proto.window()); } @@ -392,18 +444,8 @@ HloInstruction::CreateGetTupleElement(const Shape& shape, const Shape& shape, tensorflow::gtl::ArraySlice operands, HloComputation* map_computation, tensorflow::gtl::ArraySlice static_operands) { - CHECK(static_operands.empty()) << "static_operands not yet supported"; - auto instruction = WrapUnique(new HloInstruction(HloOpcode::kMap, shape)); - for (auto operand : operands) { - instruction->AppendOperand(operand); - } - instruction->called_computations_.push_back(map_computation); - // TODO(b/65689298) Remove code below once Map is generalized to accept - // arbitrary map dimensions. - instruction->dimensions_.resize(ShapeUtil::Rank(shape)); - std::iota(instruction->dimensions_.begin(), instruction->dimensions_.end(), - 0); - return instruction; + return MakeUnique(shape, operands, map_computation, + static_operands); } /* static */ std::unique_ptr HloInstruction::CreateConvolve( @@ -538,10 +580,7 @@ HloInstruction::CreateCrossReplicaSum( /* static */ std::unique_ptr HloInstruction::CreateReverse( const Shape& shape, HloInstruction* operand, tensorflow::gtl::ArraySlice dimensions) { - auto instruction = WrapUnique(new HloInstruction(HloOpcode::kReverse, shape)); - instruction->AppendOperand(operand); - instruction->dimensions_.assign(dimensions.begin(), dimensions.end()); - return instruction; + return MakeUnique(shape, operand, dimensions); } /* static */ std::unique_ptr HloInstruction::CreateWhile( @@ -619,13 +658,7 @@ HloInstruction::CreateDynamicUpdateSlice(const Shape& shape, /* static */ std::unique_ptr HloInstruction::CreateConcatenate( const Shape& shape, tensorflow::gtl::ArraySlice operands, int64 dimension) { - auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kConcatenate, shape)); - for (auto operand : operands) { - instruction->AppendOperand(operand); - } - instruction->dimensions_.push_back(dimension); - return instruction; + return MakeUnique(shape, operands, dimension); } /* static */ std::unique_ptr HloInstruction::CreateConvert( @@ -648,13 +681,8 @@ HloInstruction::CreateBitcastConvert(const Shape& shape, const Shape& shape, HloInstruction* arg, HloInstruction* init_value, tensorflow::gtl::ArraySlice dimensions_to_reduce, HloComputation* reduce_computation) { - auto instruction = WrapUnique(new HloInstruction(HloOpcode::kReduce, shape)); - instruction->AppendOperand(arg); - instruction->AppendOperand(init_value); - instruction->dimensions_.assign(dimensions_to_reduce.begin(), - dimensions_to_reduce.end()); - instruction->called_computations_.push_back(reduce_computation); - return instruction; + return MakeUnique( + shape, arg, init_value, dimensions_to_reduce, reduce_computation); } /* static */ std::unique_ptr HloInstruction::CreateReduceWindow( @@ -719,12 +747,8 @@ HloInstruction::CreateSelectAndScatter( /* static */ std::unique_ptr HloInstruction::CreateBroadcast( const Shape& shape, HloInstruction* operand, tensorflow::gtl::ArraySlice broadcast_dimensions) { - auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kBroadcast, shape)); - instruction->AppendOperand(operand); - instruction->dimensions_.assign(broadcast_dimensions.begin(), - broadcast_dimensions.end()); - return instruction; + return MakeUnique(shape, operand, + broadcast_dimensions); } /* static */ std::unique_ptr @@ -803,19 +827,7 @@ HloInstruction::CreateBroadcastSequence( /* static */ std::unique_ptr HloInstruction::CreateTranspose( const Shape& shape, HloInstruction* operand, tensorflow::gtl::ArraySlice dimensions) { - CHECK_EQ(shape.dimensions().size(), dimensions.size()); - CHECK_EQ(shape.dimensions().size(), operand->shape().dimensions().size()); - CHECK(std::equal(operand->shape().dimensions().begin(), - operand->shape().dimensions().end(), - Permute(dimensions, shape.dimensions()).begin())) - << "shape: " << ShapeUtil::HumanString(shape) - << ", operand->shape(): " << ShapeUtil::HumanString(shape) - << ", dimensions: {" << Join(dimensions, ", ") << "}"; - auto instruction = - WrapUnique(new HloInstruction(HloOpcode::kTranspose, shape)); - instruction->AppendOperand(operand); - instruction->dimensions_.assign(dimensions.begin(), dimensions.end()); - return instruction; + return MakeUnique(shape, operand, dimensions); } /* static */ std::unique_ptr HloInstruction::CreateFusion( @@ -1293,6 +1305,12 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( case HloOpcode::kSendDone: case HloOpcode::kRecv: case HloOpcode::kRecvDone: + case HloOpcode::kReverse: + case HloOpcode::kConcatenate: + case HloOpcode::kReduce: + case HloOpcode::kTranspose: + case HloOpcode::kBroadcast: + case HloOpcode::kMap: clone = CloneWithNewOperandsImpl(shape, new_operands, context); break; // Unary ops. @@ -1353,10 +1371,6 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( new_operands[2]); break; // Other supported ops. - case HloOpcode::kBroadcast: - CHECK_EQ(new_operands.size(), 1); - clone = CreateBroadcast(shape, new_operands[0], dimensions_); - break; case HloOpcode::kCall: clone = CreateCall(shape, new_operands, to_apply()); break; @@ -1375,9 +1389,6 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( clone = CreateHostCompute(shape, new_operands, channel_name_, cost_estimate_ns_); break; - case HloOpcode::kConcatenate: - clone = CreateConcatenate(shape, new_operands, dimensions(0)); - break; case HloOpcode::kConvert: CHECK_EQ(new_operands.size(), 1); clone = CreateConvert(shape, new_operands[0]); @@ -1408,19 +1419,11 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( CHECK_EQ(new_operands.size(), 1); clone = CreateGetTupleElement(shape, new_operands[0], tuple_index()); break; - case HloOpcode::kMap: - clone = CreateMap(shape, new_operands, to_apply()); - break; case HloOpcode::kPad: CHECK_EQ(new_operands.size(), 2); clone = CreatePad(shape, new_operands[0], new_operands[1], *padding_config_); break; - case HloOpcode::kReduce: - CHECK_EQ(new_operands.size(), 2); - clone = CreateReduce(shape, new_operands[0], new_operands[1], dimensions_, - to_apply()); - break; case HloOpcode::kReduceWindow: CHECK_EQ(new_operands.size(), 2); clone = CreateReduceWindow(shape, new_operands[0], new_operands[1], @@ -1432,10 +1435,6 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( CreateSelectAndScatter(shape, new_operands[0], select(), *window_, new_operands[1], new_operands[2], scatter()); break; - case HloOpcode::kReverse: - CHECK_EQ(new_operands.size(), 1); - clone = CreateReverse(shape, new_operands[0], dimensions_); - break; case HloOpcode::kRng: clone = CreateRng(shape, distribution_, new_operands); break; @@ -1457,10 +1456,6 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( clone = CreateDynamicUpdateSlice(shape, new_operands[0], new_operands[1], new_operands[2]); break; - case HloOpcode::kTranspose: - CHECK_EQ(new_operands.size(), 1); - clone = CreateTranspose(shape, new_operands[0], dimensions_); - break; case HloOpcode::kTuple: clone = CreateTuple(new_operands); *clone->mutable_shape() = shape; @@ -1606,28 +1601,6 @@ const Literal& HloInstruction::literal() const { bool HloInstruction::HasLiteral() const { return literal_ != nullptr; } -bool HloInstruction::CanHaveDimensionsField() const { - return (opcode() == HloOpcode::kReverse || - opcode() == HloOpcode::kConcatenate || opcode() == HloOpcode::kMap || - opcode() == HloOpcode::kReduce || opcode() == HloOpcode::kBroadcast || - opcode() == HloOpcode::kTranspose); -} - -const std::vector& HloInstruction::dimensions() const { - CHECK(CanHaveDimensionsField()); - return dimensions_; -} - -int64 HloInstruction::dimensions(int64 index) const { - return dimensions()[index]; -} - -int64 HloInstruction::concatenate_dimension() const { - CHECK(opcode() == HloOpcode::kConcatenate); - CHECK_EQ(1, dimensions_.size()); - return dimensions(0); -} - int64 HloInstruction::tuple_index() const { CHECK_EQ(HloOpcode::kGetTupleElement, opcode_); return tuple_index_; @@ -1793,12 +1766,6 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kTuple: return true; - // Broadcast, Concatenate, and Transpose need the same dimensions field. - case HloOpcode::kBroadcast: - case HloOpcode::kConcatenate: - case HloOpcode::kTranspose: - return dimensions() == other.dimensions(); - case HloOpcode::kFusion: return fusion_kind() == other.fusion_kind() && eq_computations(fused_instructions_computation(), @@ -1839,11 +1806,6 @@ bool HloInstruction::IdenticalSlowPath( other.gather_dimension_numbers()) && gather_window_bounds() == other.gather_window_bounds(); - // Reduction results are determined by the reduction dimension and the - // reduction computation. - case HloOpcode::kReduce: - return dimensions() == other.dimensions() && - eq_computations(to_apply(), other.to_apply()); case HloOpcode::kReduceWindow: return eq_computations(to_apply(), other.to_apply()) && protobuf_util::ProtobufEquals(window(), other.window()); @@ -1867,7 +1829,6 @@ bool HloInstruction::IdenticalSlowPath( slice_strides_ == other.slice_strides_; case HloOpcode::kCall: case HloOpcode::kCrossReplicaSum: - case HloOpcode::kMap: return eq_computations(to_apply(), other.to_apply()); case HloOpcode::kCustomCall: if ((window_ == nullptr) != (other.window_ == nullptr) || @@ -1884,8 +1845,6 @@ bool HloInstruction::IdenticalSlowPath( return false; } return custom_call_target_ == other.custom_call_target_; - case HloOpcode::kReverse: - return dimensions() == other.dimensions(); case HloOpcode::kConditional: return eq_computations(true_computation(), other.true_computation()) && eq_computations(false_computation(), other.false_computation()); @@ -1907,19 +1866,17 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kSendDone: case HloOpcode::kRecv: case HloOpcode::kRecvDone: + case HloOpcode::kReverse: + case HloOpcode::kConcatenate: + case HloOpcode::kReduce: + case HloOpcode::kTranspose: + case HloOpcode::kBroadcast: + case HloOpcode::kMap: LOG(FATAL) << "Base class impl called for opcode with subclass: " << opcode(); } } -bool HloInstruction::IsRank2Transpose() const { - return (opcode_ == HloOpcode::kTranspose) && - dimensions_ == std::vector({1, 0}) && - shape_.dimensions_size() == 2 && - std::equal(shape_.dimensions().begin(), shape_.dimensions().end(), - operands_[0]->shape_.dimensions().rbegin()); -} - void HloInstruction::RemoveUser(HloInstruction* user) { auto set_it = user_set_.find(user); CHECK(set_it != user_set_.end()); @@ -2277,9 +2234,6 @@ std::vector HloInstruction::ExtraAttributesToString( if (opcode() == HloOpcode::kFusion) { extra.push_back(StrCat("kind=", xla::ToString(fusion_kind()))); } - if (CanHaveDimensionsField()) { - extra.push_back(StrCat("dimensions={", Join(dimensions(), ","), "}")); - } if (window_ != nullptr && window_->dimensions_size() != 0) { extra.push_back(StrCat("window={", window_util::ToString(*window_), "}")); } @@ -2477,9 +2431,6 @@ HloInstructionProto HloInstruction::ToProto() const { } proto.set_tuple_index(tuple_index_); - for (int64 dimension : dimensions_) { - proto.add_dimensions(dimension); - } if (window_ != nullptr) { *proto.mutable_window() = *window_; } @@ -3157,19 +3108,6 @@ bool HloInstruction::IsElementwise() const { // Other operations. case HloOpcode::kRng: return true; - case HloOpcode::kMap: - if (!dimensions().empty()) { - // Check that the map is executed in elementwise compatible dimensions. - if (dimensions().size() != operand(0)->shape().dimensions_size()) { - return false; - } - for (int i = 0; i < dimensions().size(); ++i) { - if (dimensions()[i] != i) { - return false; - } - } - } - return true; case HloOpcode::kFusion: if (fusion_kind() != FusionKind::kLoop) { return false; @@ -3608,4 +3546,13 @@ const std::vector& HloInstruction::fft_length() const { int64 HloInstruction::channel_id() const { return Cast(this)->channel_id(); } + +int64 HloInstruction::concatenate_dimension() const { + return Cast(this)->concatenate_dimension(); +} + +bool HloInstruction::IsRank2Transpose() const { + auto transpose = DynCast(this); + return transpose != nullptr && transpose->IsRank2Transpose(); +} } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index c08806b33b..cc4a8b8252 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -802,9 +802,6 @@ class HloInstruction { // Returns whether the instruction has a constant operand. bool HasConstantOperand() const; - // Returns whether this instruction does a rank-2 transposition. - bool IsRank2Transpose() const; - // Replaces the use of this instruction in "user" with "new_producer". Note // that there might be multiple uses of this instruction in "user"; all will // be replaced. @@ -889,17 +886,6 @@ class HloInstruction { return parameter_number_; } - // Returns the dimension sizes or numbers associated with this instruction. - // - // Precondition: opcode() is one of: concatenate, reduce, broadcast, reshape, - // and reverse. - const std::vector& dimensions() const; - int64 dimensions(int64 index) const; - - // Accessor for the dimension in which a concatenate HLO should occur. - // Precondition: opcode() == HloOpcode::kConcatenate - int64 concatenate_dimension() const; - // Returns the tuple index associated with this instruction. // // Precondition: opcode() == HloOpcode::kGetTupleElement @@ -1385,7 +1371,7 @@ class HloInstruction { bool IsElementwiseOnOperand(int64 operand_idx) const; // Returns true if this instruction is elementwise on all its operands. - bool IsElementwise() const; + virtual bool IsElementwise() const; // Returns true if this elementwise instruction implicitly broadcasts operand // `operand_idx`. @@ -1521,6 +1507,20 @@ class HloInstruction { // Delegates to HloSendRecvInstruction::channel_id. int64 channel_id() const; + + // Returns the dimension sizes or numbers associated with this instruction. + virtual const std::vector& dimensions() const { + LOG(FATAL) << "Unimplemented method."; + } + virtual int64 dimensions(int64 index) const { + LOG(FATAL) << "Unimplemented method."; + } + + // Delegates to HloConcatenateInstruction::concatenate_dimension. + int64 concatenate_dimension() const; + + // Returns whether this instruction does a rank-2 transposition. + bool IsRank2Transpose() const; // Old methods kept for smooth subclassing transition END. protected: @@ -1532,6 +1532,10 @@ class HloInstruction { // of the operand. void AppendOperand(HloInstruction* operand); + void AppendComputation(HloComputation* computation) { + called_computations_.push_back(computation); + } + private: // Implementation for non-common logic of CloneWithNewOperands. virtual std::unique_ptr CloneWithNewOperandsImpl( @@ -1615,10 +1619,6 @@ class HloInstruction { const Shape& shape, tensorflow::gtl::ArraySlice operands, HloCloneContext* context = nullptr) const; - // Returns true if this instruction can legally have the dimensions field - // set. Used for checking precondition of dimensions field accessors. - bool CanHaveDimensionsField() const; - // Returns how this instruction uses elements of its `i`th operand. UseKind OperandElementUse(int64 i) const; @@ -1662,10 +1662,6 @@ class HloInstruction { // Constant index, only present for kGetTupleElement. int64 tuple_index_ = -1; - // Dimensions present for some operations that require reshaping or - // broadcasting, including Reshape, Reduce, ReduceWindow, and Reverse. - std::vector dimensions_; - // Describes the window in a windowed operation such as convolution. std::unique_ptr window_; diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 109bf1a9bd..e987bd6d86 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -251,4 +251,276 @@ HloRecvDoneInstruction::CloneWithNewOperandsImpl( Cast(new_operands[0])); } +HloReverseInstruction::HloReverseInstruction( + const Shape& shape, HloInstruction* operand, + tensorflow::gtl::ArraySlice dimensions) + : HloInstruction(HloOpcode::kReverse, shape), + dimensions_(dimensions.begin(), dimensions.end()) { + AppendOperand(operand); +} + +HloInstructionProto HloReverseInstruction::ToProto() const { + HloInstructionProto proto = HloInstruction::ToProto(); + for (int64 dimension : dimensions_) { + proto.add_dimensions(dimension); + } + return proto; +} + +std::vector HloReverseInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {StrCat("dimensions={", Join(dimensions(), ","), "}")}; +} + +bool HloReverseInstruction::IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const { + const auto& casted_other = static_cast(other); + return dimensions() == casted_other.dimensions(); +} + +std::unique_ptr HloReverseInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + CHECK_EQ(new_operands.size(), 1); + return MakeUnique(shape, new_operands[0], + dimensions()); +} + +HloConcatenateInstruction::HloConcatenateInstruction( + const Shape& shape, tensorflow::gtl::ArraySlice operands, + int64 dimension) + : HloInstruction(HloOpcode::kConcatenate, shape), dimensions_({dimension}) { + for (auto operand : operands) { + AppendOperand(operand); + } +} + +HloInstructionProto HloConcatenateInstruction::ToProto() const { + HloInstructionProto proto = HloInstruction::ToProto(); + for (int64 dimension : dimensions_) { + proto.add_dimensions(dimension); + } + return proto; +} + +std::vector HloConcatenateInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {StrCat("dimensions={", Join(dimensions(), ","), "}")}; +} + +bool HloConcatenateInstruction::IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const { + const auto& casted_other = + static_cast(other); + return dimensions() == casted_other.dimensions(); +} + +std::unique_ptr +HloConcatenateInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + return MakeUnique(shape, new_operands, + dimensions(0)); +} + +HloReduceInstruction::HloReduceInstruction( + const Shape& shape, HloInstruction* arg, HloInstruction* init_value, + tensorflow::gtl::ArraySlice dimensions_to_reduce, + HloComputation* reduce_computation) + : HloInstruction(HloOpcode::kReduce, shape), + dimensions_(dimensions_to_reduce.begin(), dimensions_to_reduce.end()) { + AppendOperand(arg); + AppendOperand(init_value); + AppendComputation(reduce_computation); +} + +HloInstructionProto HloReduceInstruction::ToProto() const { + HloInstructionProto proto = HloInstruction::ToProto(); + for (int64 dimension : dimensions_) { + proto.add_dimensions(dimension); + } + return proto; +} + +std::vector HloReduceInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {StrCat("dimensions={", Join(dimensions(), ","), "}")}; +} + +bool HloReduceInstruction::IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const { + const auto& casted_other = static_cast(other); + // Reduction results are determined by the reduction dimension and the + // reduction computation. + return dimensions() == casted_other.dimensions() && + eq_computations(to_apply(), casted_other.to_apply()); +} + +std::unique_ptr HloReduceInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + CHECK_EQ(new_operands.size(), 2); + return MakeUnique( + shape, new_operands[0], new_operands[1], dimensions(), to_apply()); +} + +HloTransposeInstruction::HloTransposeInstruction( + const Shape& shape, HloInstruction* operand, + tensorflow::gtl::ArraySlice dimensions) + : HloInstruction(HloOpcode::kTranspose, shape), + dimensions_(dimensions.begin(), dimensions.end()) { + CHECK_EQ(shape.dimensions().size(), dimensions.size()); + CHECK_EQ(shape.dimensions().size(), operand->shape().dimensions().size()); + CHECK(std::equal(operand->shape().dimensions().begin(), + operand->shape().dimensions().end(), + Permute(dimensions, shape.dimensions()).begin())) + << "shape: " << ShapeUtil::HumanString(shape) + << ", operand->shape(): " << ShapeUtil::HumanString(shape) + << ", dimensions: {" << Join(dimensions, ", ") << "}"; + AppendOperand(operand); +} + +bool HloTransposeInstruction::IsRank2Transpose() const { + return dimensions() == std::vector({1, 0}) && + shape().dimensions_size() == 2 && + std::equal(shape().dimensions().begin(), shape().dimensions().end(), + operand(0)->shape().dimensions().rbegin()); +} + +HloInstructionProto HloTransposeInstruction::ToProto() const { + HloInstructionProto proto = HloInstruction::ToProto(); + for (int64 dimension : dimensions_) { + proto.add_dimensions(dimension); + } + return proto; +} + +std::vector HloTransposeInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {StrCat("dimensions={", Join(dimensions(), ","), "}")}; +} + +bool HloTransposeInstruction::IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const { + const auto& casted_other = static_cast(other); + return dimensions() == casted_other.dimensions(); +} + +std::unique_ptr +HloTransposeInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + CHECK_EQ(new_operands.size(), 1); + return MakeUnique(shape, new_operands[0], + dimensions()); +} + +HloBroadcastInstruction::HloBroadcastInstruction( + const Shape& shape, HloInstruction* operand, + tensorflow::gtl::ArraySlice broadcast_dimension) + : HloInstruction(HloOpcode::kBroadcast, shape), + dimensions_(broadcast_dimension.begin(), broadcast_dimension.end()) { + AppendOperand(operand); +} + +HloInstructionProto HloBroadcastInstruction::ToProto() const { + HloInstructionProto proto = HloInstruction::ToProto(); + for (int64 dimension : dimensions_) { + proto.add_dimensions(dimension); + } + return proto; +} + +std::vector HloBroadcastInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {StrCat("dimensions={", Join(dimensions(), ","), "}")}; +} + +bool HloBroadcastInstruction::IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const { + const auto& casted_other = static_cast(other); + return dimensions() == casted_other.dimensions(); +} + +std::unique_ptr +HloBroadcastInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + CHECK_EQ(new_operands.size(), 1); + return MakeUnique(shape, new_operands[0], + dimensions()); +} + +HloMapInstruction::HloMapInstruction( + const Shape& shape, tensorflow::gtl::ArraySlice operands, + HloComputation* map_computation, + tensorflow::gtl::ArraySlice static_operands) + : HloInstruction(HloOpcode::kMap, shape) { + CHECK(static_operands.empty()) << "static_operands not yet supported"; + for (auto operand : operands) { + AppendOperand(operand); + } + AppendComputation(map_computation); + // TODO(b/65689298) Remove code below once Map is generalized to accept + // arbitrary map dimensions. + dimensions_.resize(ShapeUtil::Rank(shape)); + std::iota(dimensions_.begin(), dimensions_.end(), 0); +} + +HloInstructionProto HloMapInstruction::ToProto() const { + HloInstructionProto proto = HloInstruction::ToProto(); + for (int64 dimension : dimensions_) { + proto.add_dimensions(dimension); + } + return proto; +} + +bool HloMapInstruction::IsElementwise() const { + if (!dimensions().empty()) { + // Check that the map is executed in elementwise compatible dimensions. + if (dimensions().size() != shape().dimensions_size()) { + return false; + } + for (int i = 0; i < dimensions().size(); ++i) { + if (dimensions()[i] != i) { + return false; + } + } + } + return true; +} + +std::vector HloMapInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& options) const { + return {StrCat("dimensions={", Join(dimensions(), ","), "}")}; +} + +bool HloMapInstruction::IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const { + return eq_computations(to_apply(), other.to_apply()); +} + +std::unique_ptr HloMapInstruction::CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const { + return MakeUnique(shape, new_operands, to_apply()); +} } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 22d2fe6b27..c8c34f3406 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -207,6 +207,176 @@ class HloRecvDoneInstruction : public HloSendRecvInstruction { HloCloneContext* context) const override; }; +class HloReverseInstruction : public HloInstruction { + public: + explicit HloReverseInstruction(const Shape& shape, HloInstruction* operand, + tensorflow::gtl::ArraySlice dimensions); + // Returns the dimension sizes or numbers associated with this instruction. + const std::vector& dimensions() const override { return dimensions_; } + int64 dimensions(int64 index) const override { return dimensions()[index]; } + // Returns a serialized representation of this instruction. + HloInstructionProto ToProto() const override; + + private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; + bool IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const override; + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; + + std::vector dimensions_; +}; + +class HloConcatenateInstruction : public HloInstruction { + public: + explicit HloConcatenateInstruction( + const Shape& shape, tensorflow::gtl::ArraySlice operands, + int64 dimension); + // Returns the dimension sizes or numbers associated with this instruction. + const std::vector& dimensions() const override { return dimensions_; } + int64 dimensions(int64 index) const override { return dimensions()[index]; } + // Accessor for the dimension in which a concatenate HLO should occur. + int64 concatenate_dimension() const { return dimensions(0); } + // Returns a serialized representation of this instruction. + HloInstructionProto ToProto() const override; + + private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; + bool IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const override; + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; + + std::vector dimensions_; +}; + +class HloReduceInstruction : public HloInstruction { + public: + explicit HloReduceInstruction( + const Shape& shape, HloInstruction* arg, HloInstruction* init_value, + tensorflow::gtl::ArraySlice dimensions_to_reduce, + HloComputation* reduce_computation); + // Returns the dimension sizes or numbers associated with this instruction. + const std::vector& dimensions() const override { return dimensions_; } + int64 dimensions(int64 index) const override { return dimensions()[index]; } + // Returns a serialized representation of this instruction. + HloInstructionProto ToProto() const override; + + private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; + bool IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const override; + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; + + std::vector dimensions_; +}; + +class HloTransposeInstruction : public HloInstruction { + public: + explicit HloTransposeInstruction( + const Shape& shape, HloInstruction* operand, + tensorflow::gtl::ArraySlice dimensions); + // Returns the dimension sizes or numbers associated with this instruction. + const std::vector& dimensions() const override { return dimensions_; } + int64 dimensions(int64 index) const override { return dimensions()[index]; } + // Returns whether this instruction does a rank-2 transposition. + bool IsRank2Transpose() const; + // Returns a serialized representation of this instruction. + HloInstructionProto ToProto() const override; + + private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; + bool IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const override; + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; + + std::vector dimensions_; +}; + +class HloBroadcastInstruction : public HloInstruction { + public: + explicit HloBroadcastInstruction( + const Shape& shape, HloInstruction* operand, + tensorflow::gtl::ArraySlice broadcast_dimension); + // Returns the dimension sizes or numbers associated with this instruction. + const std::vector& dimensions() const override { return dimensions_; } + int64 dimensions(int64 index) const override { return dimensions()[index]; } + // Returns a serialized representation of this instruction. + HloInstructionProto ToProto() const override; + + private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; + bool IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const override; + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; + + std::vector dimensions_; +}; + +class HloMapInstruction : public HloInstruction { + public: + explicit HloMapInstruction( + const Shape& shape, tensorflow::gtl::ArraySlice operands, + HloComputation* map_computation, + tensorflow::gtl::ArraySlice static_operands = {}); + // Returns the dimension sizes or numbers associated with this instruction. + const std::vector& dimensions() const override { return dimensions_; } + int64 dimensions(int64 index) const override { return dimensions()[index]; } + // Returns a serialized representation of this instruction. + HloInstructionProto ToProto() const override; + + // Returns true if this instruction is binary and elementwise. + bool IsElementwise() const override; + + private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; + bool IdenticalSlowPath( + const HloInstruction& other, + const std::function& + eq_computations) const override; + // Implementation for non-common logic of CloneWithNewOperands. + std::unique_ptr CloneWithNewOperandsImpl( + const Shape& shape, + tensorflow::gtl::ArraySlice new_operands, + HloCloneContext* context) const override; + + std::vector dimensions_; +}; + } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INSTRUCTIONS_H_ -- GitLab From ebb67e0d7da53b3b848630e63aaa80f1283d83bd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 11:18:23 -0700 Subject: [PATCH 061/365] Delete deprecated protos. PiperOrigin-RevId: 199822232 --- tensorflow/compiler/xla/rpc/xla_service.proto | 16 - tensorflow/compiler/xla/xla.proto | 94 +---- tensorflow/compiler/xla/xla_data.proto | 390 ------------------ 3 files changed, 1 insertion(+), 499 deletions(-) diff --git a/tensorflow/compiler/xla/rpc/xla_service.proto b/tensorflow/compiler/xla/rpc/xla_service.proto index 92eb19ec0f..551ae895e0 100644 --- a/tensorflow/compiler/xla/rpc/xla_service.proto +++ b/tensorflow/compiler/xla/rpc/xla_service.proto @@ -115,10 +115,6 @@ service XlaService { returns (ComputeConstantResponse) { } - // Retrieves the inferred shape for a value within a computation. - rpc GetLocalShape(GetLocalShapeRequest) returns (GetLocalShapeResponse) { - } - // Requests one or more device handles from the target. The returned device // handles can be used to specify the device on which to execute computations // or transfer data. @@ -132,18 +128,6 @@ service XlaService { returns (CreateChannelHandleResponse) { } - // Requests that the referenced computation be specialized for the provided - // arguments for subsequent execution. This permits things such as value - // specialization. - rpc Specialize(SpecializeRequest) returns (SpecializeResponse) { - } - - // Modifies the provided computation so that subsequent executions - // will compute the provided ComputationDataHandle, rather than the - // last expression enqueued on that Computation. - rpc SetReturnValue(SetReturnValueRequest) returns (SetReturnValueResponse) { - } - // Invokes the provided computation with the provided global data passed as // immutable arguments. The request contains the whole computation graph. // Returns global data output and execution timing. diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto index 53ba120d21..6f07e4606b 100644 --- a/tensorflow/compiler/xla/xla.proto +++ b/tensorflow/compiler/xla/xla.proto @@ -225,14 +225,6 @@ message ExecutionOptions { repeated DeviceHandle device_handles = 5; } -message SnapshotComputationRequest { - ComputationHandle computation = 1; -} - -message LoadComputationSnapshotResponse { - ComputationHandle computation = 1; -} - message GetDeviceHandlesRequest { int64 device_count = 1; } @@ -291,11 +283,6 @@ message ResetDeviceRequest { message ResetDeviceResponse { } -message ComputationStatsRequest { - ComputationHandle computation = 1; - DebugOptions debug_options = 2; -} - message ComputationGraphStatsRequest { HloModuleProto computation = 1; DebugOptions debug_options = 2; @@ -305,14 +292,6 @@ message ComputationStatsResponse { ComputationStats stats = 1; } -message ComputationRequest { - string name = 1; -} - -message ComputationResponse { - ComputationHandle computation = 1; -} - message CreateChannelHandleRequest { } @@ -327,24 +306,6 @@ message UnregisterRequest { message UnregisterResponse { } -message SetReturnValueRequest { - ComputationHandle computation = 1; - ComputationDataHandle operand = 2; -} - -message SetReturnValueResponse { -} - -message ExecuteRequest { - reserved 3, 4; - - ComputationHandle computation = 1; - repeated GlobalDataHandle arguments = 2; - - // Options that affect how XLA compiles and runs code to service this request. - ExecutionOptions execution_options = 5; -} - message ExecuteGraphRequest { HloModuleProto computation = 1; repeated GlobalDataHandle arguments = 2; @@ -353,10 +314,6 @@ message ExecuteGraphRequest { ExecutionOptions execution_options = 3; } -message ExecuteParallelRequest { - repeated ExecuteRequest requests = 1; -} - message ExecuteGraphParallelRequest { repeated ExecuteGraphRequest requests = 1; } @@ -370,21 +327,6 @@ message ExecuteParallelResponse { repeated ExecuteResponse responses = 1; } -message ExecuteAsyncRequest { - reserved 3, 4; - - ComputationHandle computation = 1; - repeated GlobalDataHandle arguments = 2; - - // Options that affect how XLA compiles and runs code to service this request. - ExecutionOptions execution_options = 6; -} - -message ExecuteAsyncResponse { - // A handle to the execution launched asynchronously. - ExecutionHandle execution = 1; -} - message WaitForExecutionRequest { ExecutionHandle execution = 1; } @@ -394,31 +336,13 @@ message WaitForExecutionResponse { ExecutionProfile profile = 2; } -message IsConstantRequest { - ComputationHandle computation = 1; - ComputationDataHandle operand = 2; - int64 num_parameters = 3; -} - -message IsConstantResponse { - bool is_constant = 1; -} - -message ComputeConstantRequest { - ComputationHandle computation = 1; - ComputationDataHandle operand = 2; - Layout output_layout = 3; - repeated LiteralProto parameters = 4; -} - message ComputeConstantGraphRequest { HloModuleProto computation = 1; Layout output_layout = 2; } message ComputeConstantResponse { - // A LiteralProto is returned directly for this request, instead of a - // ComputationDataHandle. + // A LiteralProto is returned directly for this request. LiteralProto literal = 1; } @@ -460,14 +384,6 @@ message LoadDataResponse { int64 nanoseconds = 5; } -message SpecializeRequest { - ComputationHandle computation = 1; - repeated GlobalDataHandle arguments = 2; -} - -message SpecializeResponse { -} - message GetShapeRequest { GlobalDataHandle data = 1; } @@ -476,14 +392,6 @@ message GetShapeResponse { Shape shape = 1; } -message GetComputationShapeRequest { - ComputationHandle computation = 1; -} - -message GetComputationShapeResponse { - ProgramShape program_shape = 1; -} - message UnpackRequest { GlobalDataHandle data = 1; } diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 6bdfb0179c..963d3836ed 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -276,12 +276,6 @@ message ExecutionProfile { int64 compute_and_transfer_time_ns = 5; } -// Handle given to a user that represents a computation that the user builds up -// before execution. -message ComputationHandle { - int64 handle = 1; -} - // Handle given to a user that represents an execution that the user launched // asynchronously on the device. message ExecutionHandle { @@ -295,13 +289,6 @@ message GlobalDataHandle { int64 handle = 1; } -// Handle given to a user that represents a data result in a computation. -// This is used to pass to subsequent computations that depends upon the data as -// an operand. -message ComputationDataHandle { - int64 handle = 1; -} - // Handle given to a user that represents a replicated virtual device. Each // replicated device represents N physical devices for execution where N is the // number of replicas. @@ -441,44 +428,6 @@ message GatherDimensionNumbers { int64 index_vector_dim = 4; } -// Operation requests that are all collected as a tagged union with a oneof -// field in OpRequest. - -message ConstantRequest { - LiteralProto literal = 2; -} - -message GetTupleElementRequest { - ComputationDataHandle operand = 2; - int64 index = 3; -} - -message SliceRequest { - ComputationDataHandle operand = 2; - repeated int64 start_indices = 3; - repeated int64 limit_indices = 4; - repeated int64 strides = 5; -} - -message DynamicSliceRequest { - // Operand from which to slice at dynamic 'start_indices'. - ComputationDataHandle operand = 2; - // Dynamically computed 'start_indices' for slice operation. - ComputationDataHandle start_indices = 3; - // Slice sizes for each dimension (note that indices calculations are computed - // modulo dimension sizes to avoid out-of-bound array accesses). - repeated int64 slice_sizes = 4; -} - -message DynamicUpdateSliceRequest { - // Operand on which slice 'update' is to be applied. - ComputationDataHandle operand = 2; - // The slice update to apply to 'operand'. - ComputationDataHandle update = 3; - // Dynamically computed start indices for the update slice operation. - ComputationDataHandle start_indices = 4; -} - message ConvolutionDimensionNumbers { // The number of the dimension that represents batch in the input. int64 input_batch_dimension = 7; @@ -516,13 +465,6 @@ message ConvolutionDimensionNumbers { // Next = 13 }; -message ConvolveRequest { - ComputationDataHandle lhs = 2; - ComputationDataHandle rhs = 3; // This is the filter/kernel. - Window window = 4; // Describes the filter/kernel. - ConvolutionDimensionNumbers dimension_numbers = 5; -} - enum FftType { FFT = 0; // Forward FFT; complex in, complex out. IFFT = 1; // Inverse FFT; complex in, complex out. @@ -531,56 +473,6 @@ enum FftType { // fft_length real out } -message FftRequest { - FftType fft_type = 1; - repeated int64 fft_length = 2; // Multivalent for higher-order FFT. - ComputationDataHandle operand = 3; -} - -message InfeedRequest { - // The shape of the data returned by reading the device's infeed buffer. - Shape shape = 2; - - // Additional infeed configuration for the backend. - bytes config = 3; -} - -message OutfeedRequest { - // The shape of the data returned by reading the device's outfeed buffer. - Shape shape = 1; - - // Operand to the Outfeed. Supports tuple. - ComputationDataHandle operand = 2; - - // Backend-specific information for how to perform the outfeed. - bytes outfeed_config = 3; -} - -message CallRequest { - ComputationHandle to_apply = 2; - repeated ComputationDataHandle operands = 3; -} - -message CustomCallRequest { - string call_target_name = 2; - repeated ComputationDataHandle operands = 3; - Shape shape = 4; -} - -message HostComputeRequest { - // Operand to the HostCompute. Supports tuple. - repeated ComputationDataHandle operands = 1; - - // Name used to identify HostSend/Recv channels. - string channel_name = 2; - - // Cost estimate in nanoseconds. - int64 cost_estimate_ns = 3; - - // The shape of any data returned by host. - Shape shape = 4; -} - message DotDimensionNumbers { // The dimension numbers that represent the 'lhs' contracting dimensions. repeated int64 lhs_contracting_dimensions = 1; @@ -592,179 +484,6 @@ message DotDimensionNumbers { repeated int64 rhs_batch_dimensions = 4; }; -message DotRequest { - ComputationDataHandle lhs = 2; - ComputationDataHandle rhs = 3; - DotDimensionNumbers dimension_numbers = 4; -} - -message MapRequest { - repeated ComputationDataHandle operands = 2; - ComputationHandle to_apply = 3; - repeated ComputationDataHandle static_operands = 4; - // The dimensions over which to map. - // Example mapping a Dot operation along the batch dimension 0: - // operand0.shape = [2, 2, 2], operand1.shape = [2,2,3] - // Map({operand0, operand1}, Dot, {0}) - repeated int64 dimensions = 5; -} - -message ReduceRequest { - // Operand to the reduction. - ComputationDataHandle operand = 2; - - // Initial value for the reduction. This must be consistent with the result - // shape of to_apply. - ComputationDataHandle init_value = 3; - - // The dimensions to reduce over. - repeated int64 dimensions = 4; - - // The computation to apply in the reduction. - ComputationHandle to_apply = 5; -} - -message ReduceWindowRequest { - ComputationDataHandle operand = 2; - ComputationDataHandle init_value = 3; - Window window = 4; - ComputationHandle to_apply = 5; -} - -message BatchNormTrainingRequest { - ComputationDataHandle operand = 1; - ComputationDataHandle scale = 2; - ComputationDataHandle offset = 3; - float epsilon = 4; - int64 feature_index = 5; -} - -message BatchNormInferenceRequest { - ComputationDataHandle operand = 1; - ComputationDataHandle scale = 2; - ComputationDataHandle offset = 3; - ComputationDataHandle mean = 4; - ComputationDataHandle variance = 5; - float epsilon = 6; - int64 feature_index = 7; -} - -message BatchNormGradRequest { - ComputationDataHandle operand = 1; - ComputationDataHandle scale = 2; - ComputationDataHandle mean = 3; - ComputationDataHandle variance = 4; - ComputationDataHandle grad_output = 5; - float epsilon = 6; - int64 feature_index = 7; -} - -message CrossReplicaSumRequest { - ComputationDataHandle operand = 2; -} - -message SelectAndScatterRequest { - // Operand array on which the windows slide. - ComputationDataHandle operand = 2; - - // Source array for the data to scatter. - ComputationDataHandle source = 3; - - // Initial scalar value for each element in the output. - ComputationDataHandle init_value = 4; - - // Window configuration. - Window window = 5; - - // Binary function used to select an element from each window. - ComputationHandle select = 6; - - // Binary function used to combine each scattered value from source with the - // current output value at the selected location. - ComputationHandle scatter = 7; -} - -message ReverseRequest { - ComputationDataHandle operand = 2; - repeated int64 dimensions = 3; -} - -message BroadcastRequest { - ComputationDataHandle operand = 2; - repeated int64 broadcast_sizes = 3; -} - -message PadRequest { - ComputationDataHandle operand = 2; - ComputationDataHandle padding_value = 3; - PaddingConfig padding_config = 4; -} - -message ReshapeRequest { - ComputationDataHandle operand = 2; - - // The dimension order for collapse (from fastest-changing to slowest). - repeated int64 dimensions = 3; - - // The new dimension sizes (from dimension 0 to n-1). - repeated int64 new_sizes = 4; -} - -message TransposeRequest { - ComputationDataHandle operand = 2; - - // The permutation of the operand's dimensions (in the range 0 to n-1). - repeated int64 dimensions = 3; -} - -message ParameterRequest { - Shape shape = 2; - int64 parameter = 3; - string name = 4; -} - -message GetLocalShapeRequest { - ComputationHandle computation = 1; - ComputationDataHandle operand = 2; -} - -message GetLocalShapeResponse { - Shape shape = 1; -} - -message TraceRequest { - string tag = 2; - ComputationDataHandle operand = 3; -} - -message ConvertRequest { - ComputationDataHandle operand = 2; - PrimitiveType new_element_type = 3; -} - -message ConcatenateRequest { - repeated ComputationDataHandle operands = 2; - // The dimension in which we concatenate; e.g. if you had dimension arrays of - // [4, 1] and [5, 1], you'd concatenate in dimension 0 to produce a [9, 1]. - // Attempting to concatenate those in dimension 1 would produce an error, as - // 4 != 5 (and there is no ragged array support). - int64 dimension = 3; -} - -message ConditionalRequest { - ComputationDataHandle predicate = 2; - ComputationDataHandle true_operand = 3; - ComputationHandle true_computation = 4; - ComputationDataHandle false_operand = 5; - ComputationHandle false_computation = 6; -} - -message WhileRequest { - ComputationHandle condition = 2; - ComputationHandle body = 3; - ComputationDataHandle init = 4; -} - enum UnaryOperation { UNOP_INVALID = 0; @@ -827,11 +546,6 @@ enum UnaryOperation { UNOP_LOG1P = 19; } -message UnaryOpRequest { - UnaryOperation unop = 2; - ComputationDataHandle operand = 3; -} - enum BinaryOperation { BINOP_INVALID = 0; @@ -876,13 +590,6 @@ enum BinaryOperation { BINOP_ATAN2 = 24; } -message BinaryOpRequest { - BinaryOperation binop = 2; - ComputationDataHandle lhs = 3; - ComputationDataHandle rhs = 4; - repeated int64 broadcast_dimensions = 5; -} - enum RandomDistribution { RNG_INVALID = 0; @@ -897,12 +604,6 @@ enum RandomDistribution { // Next: 4 } -message RngRequest { - RandomDistribution distribution = 2; - repeated ComputationDataHandle parameter = 3; - Shape shape = 4; -} - enum TernaryOperation { TRIOP_INVALID = 0; @@ -916,13 +617,6 @@ enum TernaryOperation { TRIOP_CLAMP = 3; } -message TernaryOpRequest { - TernaryOperation triop = 2; - ComputationDataHandle lhs = 3; - ComputationDataHandle rhs = 4; - ComputationDataHandle ehs = 5; -} - enum VariadicOperation { VAROP_INVALID = 0; @@ -930,34 +624,6 @@ enum VariadicOperation { VAROP_TUPLE = 1; } -message VariadicOpRequest { - VariadicOperation varop = 2; - repeated ComputationDataHandle operands = 3; -} - -message ReducePrecisionRequest { - ComputationDataHandle operand = 1; - int32 exponent_bits = 2; - int32 mantissa_bits = 3; -} - -message SendRequest { - ComputationDataHandle operand = 1; - ChannelHandle channel_handle = 2; -} - -message RecvRequest { - Shape shape = 1; - ChannelHandle channel_handle = 2; -} - -message GatherRequest { - ComputationDataHandle input = 1; - ComputationDataHandle gather_indices = 2; - GatherDimensionNumbers dimension_numbers = 3; - repeated int64 window_bounds = 4; -} - message OpSharding { enum Type { // This sharding is replicated across all devices (implies maximal, @@ -988,59 +654,3 @@ message OpSharding { // to. repeated OpSharding tuple_shardings = 5; } - -message OpRequest { - ComputationHandle computation = 1; - OpMetadata metadata = 33; - OpSharding sharding = 40; - - oneof op { - BinaryOpRequest binary_op_request = 2; - BroadcastRequest broadcast_request = 3; - CallRequest call_request = 4; - ConcatenateRequest concatenate_request = 5; - ConstantRequest constant_request = 6; - ConvertRequest convert_request = 7; - ConvolveRequest convolve_request = 8; - CrossReplicaSumRequest cross_replica_sum_request = 9; - CustomCallRequest custom_call_request = 10; - DotRequest dot_request = 43; - DynamicSliceRequest dynamic_slice_request = 11; - DynamicUpdateSliceRequest dynamic_update_slice_request = 12; - GetTupleElementRequest get_tuple_element_request = 13; - InfeedRequest infeed_request = 14; - MapRequest map_request = 15; - PadRequest pad_request = 16; - ParameterRequest parameter_request = 17; - ReducePrecisionRequest reduce_precision_request = 36; - ReduceRequest reduce_request = 18; - ReduceWindowRequest reduce_window_request = 19; - ReshapeRequest reshape_request = 20; - ReverseRequest reverse_request = 21; - RngRequest rng_request = 22; - SelectAndScatterRequest select_and_scatter_request = 23; - SliceRequest slice_request = 24; - TernaryOpRequest ternary_op_request = 25; - TraceRequest trace_request = 26; - TransposeRequest transpose_request = 34; - UnaryOpRequest unary_op_request = 27; - VariadicOpRequest variadic_op_request = 28; - WhileRequest while_request = 29; - SendRequest send_request = 30; - RecvRequest recv_request = 31; - OutfeedRequest outfeed_request = 32; - BatchNormTrainingRequest batch_norm_training_request = 35; - BatchNormGradRequest batch_norm_grad_request = 37; - BatchNormInferenceRequest batch_norm_inference_request = 38; - FftRequest fft_request = 41; - ConvertRequest bitcast_convert_request = 42; - ConditionalRequest conditional_request = 44; - HostComputeRequest host_compute_request = 45; - GatherRequest gather_request = 46; - // Next: 47 - } -} - -message OpResponse { - ComputationDataHandle output = 1; -} -- GitLab From 7eaf8941930c8b1a099b7ec626134b67179c07e3 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Fri, 8 Jun 2018 11:20:56 -0700 Subject: [PATCH 062/365] Use the new operators for list conversion. Includes list creation, append, pop, stack. Simplify the type annotation mechanism by having it literally copy its arguments, instead of attempting to resolve them. PiperOrigin-RevId: 199822771 --- .../contrib/autograph/converters/lists.py | 233 +++++++++++++----- .../autograph/converters/lists_test.py | 130 +++++++--- .../pyct/static_analysis/type_info.py | 40 ++- .../pyct/static_analysis/type_info_test.py | 18 +- 4 files changed, 291 insertions(+), 130 deletions(-) diff --git a/tensorflow/contrib/autograph/converters/lists.py b/tensorflow/contrib/autograph/converters/lists.py index b49521b2c3..c15dfff9e8 100644 --- a/tensorflow/contrib/autograph/converters/lists.py +++ b/tensorflow/contrib/autograph/converters/lists.py @@ -33,82 +33,193 @@ from __future__ import print_function import gast from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import parser from tensorflow.contrib.autograph.pyct import templates from tensorflow.contrib.autograph.pyct import transformer -from tensorflow.python.framework import dtypes +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno + + +# Tags for local state. +POP_USES = 'pop_uses' class ListTransformer(transformer.Base): """Converts lists and related operations to their TF counterpart.""" - def _empty_list(self, node): - if not anno.hasanno(node, 'element_type'): - raise NotImplementedError( - 'type inference for empty lists is not yet supported; ' - 'use set_element_type(, ) to continue') - dtype = anno.getanno(node, 'element_type') - if not isinstance(dtype, dtypes.DType): - # TODO(mdan): Allow non-TF dtypes? - # That would be consistent with the dynamic dispatch pattern, but - # we must make sure that doesn't become confusing. - raise NotImplementedError('element type "%s" not yet supported' % dtype) - - dtype_name = dtype.name - # TODO(mdan): Does it ever make sense not to use tensor lists? + def visit_List(self, node): + node = self.generic_visit(node) template = """ - tf.TensorArray(tf.dtype_name, size=0, dynamic_size=True) + ag__.new_list(elements) """ - return templates.replace_as_expression(template, dtype_name=dtype_name) + return templates.replace_as_expression(template, elements=node) - def _pre_populated_list(self, node): - raise NotImplementedError('pre-populated lists') + def _replace_append_call(self, node): + assert len(node.args) == 1 + assert isinstance(node.func, gast.Attribute) + template = """ + target = ag__.list_append(target, element) + """ + return templates.replace( + template, + target=node.func.value, + element=node.args[0]) + + def _replace_pop_call(self, node): + # Expressions that use pop() are converted to a statement + expression. + # + # For example: + # + # print(target.pop()) + # + # ... is converted to: + # + # target, target_pop = ag__.list_pop(target) + # print(target_pop) + # + # Here, we just generate the variable name and swap it in, + # and _generate_pop_operation will handle the rest. + # + # Multiple uses of pop() are allowed: + # + # print(tartget.pop(), target.pop()) + # print(tartget.pop().pop()) + # + assert isinstance(node.func, gast.Attribute) + scope = anno.getanno(node, NodeAnno.ARGS_SCOPE) + target_node = node.func.value + + # Attempt to use a related name if can get one. Otherwise use something + # generic. + if anno.hasanno(target_node, anno.Basic.QN): + target_name = anno.getanno(target_node, anno.Basic.QN).ssf() + else: + target_name = 'list' + pop_var_name = self.context.namer.new_symbol(target_name, scope.referenced) + + pop_uses = self.get_local(POP_USES, []) + pop_uses.append((node, pop_var_name)) + self.set_local(POP_USES, pop_uses) + + return templates.replace_as_expression('var_name', var_name=pop_var_name) + + def _replace_stack_call(self, node): + assert len(node.args) == 1 + dtype = anno.getanno( + node.args[0], + 'element_type', + default=templates.replace_as_expression('None')) + template = """ + ag__.list_stack( + target, + opts=ag__.ListStackOpts( + element_dtype=dtype, + original_call=orig_call)) + """ + return templates.replace_as_expression( + template, + dtype=dtype, + target=node.args[0], + orig_call=node.func) - def visit_Expr(self, node): + def visit_Call(self, node): node = self.generic_visit(node) - if isinstance(node.value, gast.Call): - call_node = node.value - - if not anno.hasanno(call_node.func, anno.Basic.QN): - return node - qn = anno.getanno(call_node.func, anno.Basic.QN) - - if qn.qn[-1] == 'append' and (len(call_node.args) == 1): - template = """ - target = ag__.utils.dynamic_list_append(target, element) - """ - node = templates.replace( - template, - target=qn.parent.ast(), - element=call_node.args[0]) + + # TODO(mdan): This is insufficient if target is a function argument. + # In the case of function arguments, we need to add the list to the + # function's return value, because it is being modified. + # TODO(mdan): Checking just the name is brittle, can it be improved? + if isinstance(node.func, gast.Attribute): + func_name = node.func.attr + if func_name == 'append' and (len(node.args) == 1): + node = self._replace_append_call(node) + elif func_name == 'pop' and (len(node.args) <= 1): + node = self._replace_pop_call(node) + elif func_name == 'stack' and (len(node.args) == 1): + node = self._replace_stack_call(node) + return node - def _replace_list_constructors(self, targets, values): - for target in targets: - if (isinstance(target, (gast.Tuple, gast.List)) and - isinstance(values, (gast.Tuple, gast.List))): - n_targets = len(target.elts) - for i in range(n_targets): - target_el, value_el = target.elts[i], values.elts[i] - values.elts[i] = self._replace_list_constructors( - (target_el,), value_el) - return values - if isinstance(values, gast.List): - if values.elts: - return self._pre_populated_list(values) - else: - return self._empty_list(values) - return values - - def visit_Assign(self, node): - node = self.generic_visit(node) + def _generate_pop_operation(self, original_call_node, pop_var_name): + assert isinstance(original_call_node.func, gast.Attribute) + + if original_call_node.args: + pop_element = original_call_node.args[0] + else: + pop_element = parser.parse_expression('None') + # The call will be something like "target.pop()", and the dtype is hooked to + # target, hence the func.value. + dtype = anno.getanno( + original_call_node.func.value, + 'element_type', + default=templates.replace_as_expression('None')) + shape = anno.getanno( + original_call_node.func.value, + 'element_shape', + default=templates.replace_as_expression('None')) + + template = """ + target, pop_var_name = ag__.list_pop( + target, element, + opts=ag__.ListPopOpts(element_dtype=dtype, element_shape=shape)) + """ + return templates.replace( + template, + target=original_call_node.func.value, + pop_var_name=pop_var_name, + element=pop_element, + dtype=dtype, + shape=shape) + + def _postprocess_statement(self, node): + """Inserts any separate pop() calls that node may use.""" + pop_uses = self.get_local(POP_USES, None) + if pop_uses: + replacements = [] + for original_call_node, pop_var_name in pop_uses: + replacements.extend( + self._generate_pop_operation(original_call_node, pop_var_name)) + replacements.append(node) + node = replacements + self.exit_local_scope() + return node, None + + # TODO(mdan): Should we have a generic visit_block instead? + # Right now it feels that a visit_block would add too much magic that's + # hard to follow. + + def _visit_and_process_block(self, block): + return self.visit_block( + block, + before_visit=self.enter_local_scope, + after_visit=self._postprocess_statement) + + def visit_FunctionDef(self, node): + node.args = self.generic_visit(node.args) + node.decorator_list = self.visit_block(node.decorator_list) + node.body = self._visit_and_process_block(node.body) + return node + + def visit_For(self, node): + node.target = self.visit(node.target) + node.body = self._visit_and_process_block(node.body) + node.orelse = self._visit_and_process_block(node.orelse) + return node + + def visit_While(self, node): + node.test = self.visit(node.test) + node.body = self._visit_and_process_block(node.body) + node.orelse = self._visit_and_process_block(node.orelse) + return node + + def visit_If(self, node): + node.test = self.visit(node.test) + node.body = self._visit_and_process_block(node.body) + node.orelse = self._visit_and_process_block(node.orelse) + return node - # Only convert lists when they are assigned to a variable, e.g.: - # l = [] - # TODO(mdan): A similar pattern exists in type_info.py - # We should add a generic "unpack_assignment" function to the base - # transformer, that has the same effect as applying some logic to the SSA - # form. - node.value = self._replace_list_constructors(node.targets, node.value) + def visit_With(self, node): + node.items = self.visit_block(node.items) + node.body = self._visit_and_process_block(node.body) return node diff --git a/tensorflow/contrib/autograph/converters/lists_test.py b/tensorflow/contrib/autograph/converters/lists_test.py index 74c6dc64f1..9f18ab9f44 100644 --- a/tensorflow/contrib/autograph/converters/lists_test.py +++ b/tensorflow/contrib/autograph/converters/lists_test.py @@ -22,74 +22,126 @@ from tensorflow.contrib.autograph import utils from tensorflow.contrib.autograph.converters import converter_test_base from tensorflow.contrib.autograph.converters import lists from tensorflow.python.framework import dtypes -from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import list_ops from tensorflow.python.platform import test class ListTest(converter_test_base.TestCase): - def test_empty_annotated_list(self): + def test_empty_list(self): def test_fn(): - l = [] - utils.set_element_type(l, dtypes.int32) - l.append(1) - return l + return [] - node = self.parse_and_analyze(test_fn, {'dtypes': dtypes, 'utils': utils}) + node = self.parse_and_analyze(test_fn, {}) node = lists.transform(node, self.ctx) - with self.compiled(node, tensor_array_ops.TensorArray, - dtypes.int32) as result: - # TODO(mdan): Attach these additional modules automatically. - result.utils = utils - result.dtypes = dtypes + with self.compiled(node) as result: + tl = result.test_fn() + # Empty tensor lists cannot be evaluated or stacked. + self.assertTrue(isinstance(tl, ops.Tensor)) + self.assertEqual(tl.dtype, dtypes.variant) + + def test_initialized_list(self): + + def test_fn(): + return [1, 2, 3] + + node = self.parse_and_analyze(test_fn, {}) + node = lists.transform(node, self.ctx) + + with self.compiled(node) as result: with self.test_session() as sess: - self.assertAllEqual([1], sess.run(result.test_fn().stack())) + tl = result.test_fn() + r = list_ops.tensor_list_stack(tl, dtypes.int32) + self.assertAllEqual(sess.run(r), [1, 2, 3]) - def test_empty_annotated_lists_unpacked(self): + def test_list_append(self): def test_fn(): - l, m = [], [] - utils.set_element_type(l, dtypes.int32) - utils.set_element_type(m, dtypes.int32) - l.append(1) - m.append(2) - return l, m + l = [1] + l.append(2) + l.append(3) + return l - node = self.parse_and_analyze(test_fn, {'dtypes': dtypes, 'utils': utils}) + node = self.parse_and_analyze(test_fn, {}) node = lists.transform(node, self.ctx) - with self.compiled(node, tensor_array_ops.TensorArray, - dtypes.int32) as result: + with self.compiled(node) as result: + with self.test_session() as sess: + tl = result.test_fn() + r = list_ops.tensor_list_stack(tl, dtypes.int32) + self.assertAllEqual(sess.run(r), [1, 2, 3]) + + def test_list_pop(self): + + def test_fn(): + l = [1, 2, 3] + utils.set_element_type(l, dtypes.int32, ()) + s = l.pop() + return s, l + + node = self.parse_and_analyze( + test_fn, + { + 'utils': utils, + 'dtypes': dtypes + }, + include_type_analysis=True, + ) + node = lists.transform(node, self.ctx) + + with self.compiled(node) as result: result.utils = utils result.dtypes = dtypes with self.test_session() as sess: - res_l, res_m = result.test_fn() - self.assertEqual([1], sess.run(res_l.stack())) - self.assertEqual([2], sess.run(res_m.stack())) + ts, tl = result.test_fn() + r = list_ops.tensor_list_stack(tl, dtypes.int32) + self.assertAllEqual(sess.run(r), [1, 2]) + self.assertAllEqual(sess.run(ts), 3) + + def test_double_list_pop(self): - def test_empty_annotated_lists_list_unpacked(self): + def test_fn(l): + s = l.pop().pop() + return s + + node = self.parse_and_analyze(test_fn, {}) + node = lists.transform(node, self.ctx) + + with self.compiled(node) as result: + test_input = [1, 2, [1, 2, 3]] + # TODO(mdan): Pass a list of lists of tensor when we fully support that. + # For now, we just pass a regular Python list of lists just to verify that + # the two pop calls are sequenced properly. + self.assertAllEqual(result.test_fn(test_input), 3) + + def test_list_stack(self): + + tf = None # Will be replaced with a mock. def test_fn(): - [l, m] = [], [] + l = [1, 2, 3] utils.set_element_type(l, dtypes.int32) - utils.set_element_type(m, dtypes.int32) - l.append(1) - m.append(2) - return l, m - - node = self.parse_and_analyze(test_fn, {'dtypes': dtypes, 'utils': utils}) + return tf.stack(l) + + node = self.parse_and_analyze( + test_fn, + { + 'utils': utils, + 'dtypes': dtypes + }, + include_type_analysis=True, + ) node = lists.transform(node, self.ctx) - with self.compiled(node, tensor_array_ops.TensorArray, - dtypes.int32) as result: + with self.compiled(node, array_ops.stack, dtypes.int32) as result: result.utils = utils result.dtypes = dtypes with self.test_session() as sess: - res_l, res_m = result.test_fn() - self.assertEqual([1], sess.run(res_l.stack())) - self.assertEqual([2], sess.run(res_m.stack())) + self.assertAllEqual(sess.run(result.test_fn()), [1, 2, 3]) if __name__ == '__main__': diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py b/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py index d6555dc7e0..7d1e65c958 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py @@ -17,8 +17,8 @@ This analyzer uses known live values to further infer object types. This may include for instance constructed objects and object member functions. -In addition, the analyzer will also process annotations for TF (staged) type -annotations. +In addition, the analyzer also handles user annotations made in the code (for +example, the autograph.set_element_type function). Requires annotations generated by LiveValuesResolver. """ @@ -44,6 +44,7 @@ from __future__ import print_function import gast from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import parser from tensorflow.contrib.autograph.pyct import transformer from tensorflow.python.util import tf_inspect @@ -159,12 +160,10 @@ class TypeInfoResolver(transformer.Base): # a = b # then for future references to `a` we should have definition = `b` definition = self.scope.getval(qn) - if anno.hasanno(definition, 'type'): - anno.setanno(node, 'type', anno.getanno(definition, 'type')) - anno.setanno(node, 'type_fqn', anno.getanno(definition, 'type_fqn')) - if anno.hasanno(definition, 'element_type'): - anno.setanno(node, 'element_type', - anno.getanno(definition, 'element_type')) + anno.copyanno(definition, node, 'type') + anno.copyanno(definition, node, 'type_fqn') + anno.copyanno(definition, node, 'element_type') + anno.copyanno(definition, node, 'element_shape') return node def _process_variable_assignment(self, target, value): @@ -211,23 +210,20 @@ class TypeInfoResolver(transformer.Base): if (anno.getanno(node.func, 'live_val') is self.context.type_annotation_func): - if len(node.args) != 2: - raise ValueError('"%s" must have exactly two parameters' + if len(node.args) < 2 or len(node.args) > 3: + raise ValueError('"%s" must have either two or three parameters' % self.context.type_annotation_func) - target_arg, type_arg = node.args + if len(node.args) == 2: + target_arg, type_arg = node.args + shape_arg = parser.parse_expression('None') + else: + target_arg, type_arg, shape_arg = node.args if not anno.hasanno(target_arg, anno.Basic.QN): raise ValueError('the first argument of "%s" must by a symbol' % self.context.type_annotation_func) - if isinstance(type_arg, gast.Str): - element_type = type_arg.s - elif isinstance(type_arg, gast.Num): - element_type = type_arg.n - else: - if not anno.hasanno(type_arg, 'live_val'): - raise ValueError( - 'the second argument of "%s" must be statically resolvable' % - self.context.type_annotation_func) - element_type = anno.getanno(type_arg, 'live_val') + # TODO(mdan): This is vulnerable to symbol renaming. + element_type = type_arg + element_shape = shape_arg target_symbol = anno.getanno(target_arg, anno.Basic.QN) # Find the definition of this symbol and annotate it with the given @@ -235,7 +231,9 @@ class TypeInfoResolver(transformer.Base): # to receive the same type annotation. definition = self.scope.getval(target_symbol) anno.setanno(node, 'element_type', element_type) + anno.setanno(node, 'element_shape', element_shape) anno.setanno(definition, 'element_type', element_type) + anno.setanno(definition, 'element_shape', element_shape) # TODO(mdan): Should we update references between definition and here? return self.generic_visit(node) diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py index 95cbf5ca79..484562f294 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py @@ -187,14 +187,14 @@ class TypeInfoResolverTest(test.TestCase): def test_fn(): f = [] - f = utils.set_element_type(f, Foo) + f = utils.set_element_type(f, Foo, (1, 2, 3)) return f node = self._parse_and_analyze(test_fn, {'Foo': Foo, 'utils': utils}) f_def = node.body[0].body[0].value - self.assertEqual(anno.getanno(f_def, 'element_type'), Foo) + self.assertEqual(anno.getanno(f_def, 'element_type').id, 'Foo') f_ref = node.body[0].body[1].value - self.assertEqual(anno.getanno(f_ref, 'element_type'), Foo) + self.assertEqual(anno.getanno(f_ref, 'element_type').id, 'Foo') def test_type_annotation_args(self): @@ -207,7 +207,7 @@ class TypeInfoResolverTest(test.TestCase): node = self._parse_and_analyze(test_fn, {'Foo': Foo, 'utils': utils}) f_ref = node.body[0].body[1].value - self.assertEqual(anno.getanno(f_ref, 'element_type'), Foo) + self.assertEqual(anno.getanno(f_ref, 'element_type').id, 'Foo') def test_nested_unpacking(self): @@ -223,9 +223,9 @@ class TypeInfoResolverTest(test.TestCase): node = self._parse_and_analyze(test_fn, {'Foo': Foo, 'Bar': Bar}) a, b, c = node.body[0].body[1].value.elts - self.assertEquals(Foo, anno.getanno(a, 'type')) - self.assertEquals(Bar, anno.getanno(b, 'type')) - self.assertEquals(Foo, anno.getanno(c, 'type')) + self.assertEquals(anno.getanno(a, 'type'), Foo) + self.assertEquals(anno.getanno(b, 'type'), Bar) + self.assertEquals(anno.getanno(c, 'type'), Foo) self.assertFalse(anno.hasanno(a, 'live_val')) self.assertFalse(anno.hasanno(b, 'live_val')) self.assertFalse(anno.hasanno(c, 'live_val')) @@ -242,8 +242,8 @@ class TypeInfoResolverTest(test.TestCase): node = self._parse_and_analyze(test_fn, {'utils': utils}) a, b = node.body[0].body[2].body[2].value.elts - self.assertEquals(1, anno.getanno(a, 'element_type')) - self.assertEquals(2, anno.getanno(b, 'element_type')) + self.assertEquals(anno.getanno(a, 'element_type').n, 1) + self.assertEquals(anno.getanno(b, 'element_type').n, 2) self.assertFalse(anno.hasanno(a, 'type')) self.assertFalse(anno.hasanno(b, 'type')) self.assertFalse(anno.hasanno(a, 'live_val')) -- GitLab From 0d4274943a6bf6d461f5468b05162118934df6b3 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 8 Jun 2018 11:44:17 -0700 Subject: [PATCH 063/365] [TF:XLA] Bump open source llvm revision to r334273 PiperOrigin-RevId: 199826723 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index ce4a009974..4e2f26e097 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -451,11 +451,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/7488dbc1218de926f3de0e9bb3d465f3bbe5b80e.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/7488dbc1218de926f3de0e9bb3d465f3bbe5b80e.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/42f7ad099aa73695ea633c585da0a9848d6a730d.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/42f7ad099aa73695ea633c585da0a9848d6a730d.tar.gz", ], - sha256 = "dd4a2e2a4f21ab69cf99534bcb2739c04fc12d12b63e5e3d8f2b85a2eb55d5d1", - strip_prefix = "llvm-7488dbc1218de926f3de0e9bb3d465f3bbe5b80e", + sha256 = "3a7f1f9c54b51640ba30e40e7e7698bca152e18510001b5a1ad70e8df45e1b05", + strip_prefix = "llvm-42f7ad099aa73695ea633c585da0a9848d6a730d", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) -- GitLab From f21129b8afc083afbd53b4392762ed7b83205b47 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 8 Jun 2018 12:07:36 -0700 Subject: [PATCH 064/365] Improve tfdbg documentation regarding high-level APIs * Mention both keras and tf.keras * In one of the early paragraphs, list all three high-level APIs supported (tf.estimator, keras and tf.contrib.slim). PiperOrigin-RevId: 199830255 --- .../docs_src/programmers_guide/debugger.md | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 6bd941886d..fc845c68f4 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -33,8 +33,9 @@ and [`inf`s](https://en.wikipedia.org/wiki/Infinity), a frequently-encountered type of bug in TensorFlow model development. The following example is for users who use the low-level [`Session`](https://www.tensorflow.org/api_docs/python/tf/Session) API of -TensorFlow. A later section of this document describes how to use **tfdbg** -with a higher-level API, namely `Estimator`s. +TensorFlow. Later sections of this document describe how to use **tfdbg** +with higher-level APIs of TensorFlow, including `tf.estimator`, +`tf.keras` / `keras` and `tf.contrib.slim`. To *observe* such an issue, run the following command without the debugger (the source code can be found [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/debug_mnist.py)): @@ -477,20 +478,31 @@ for more details. ## Debugging Keras Models with TFDBG -To use TFDBG with [Keras](https://keras.io/), let the Keras backend use -a TFDBG-wrapped Session object. For example, to use the CLI wrapper: +To use TFDBG with +[tf.keras](https://www.tensorflow.org/api_docs/python/tf/keras), +let the Keras backend use a TFDBG-wrapped Session object. For example, to use +the CLI wrapper: ``` python import tensorflow as tf -from keras import backend as keras_backend from tensorflow.python import debug as tf_debug -keras_backend.set_session(tf_debug.LocalCLIDebugWrapperSession(tf.Session())) +tf.keras.backend.set_session(tf_debug.LocalCLIDebugWrapperSession(tf.Session())) # Define your keras model, called "model". -model.fit(...) # This will break into the TFDBG CLI. + +# Calls to `fit()`, 'evaluate()` and `predict()` methods will break into the +# TFDBG CLI. +model.fit(...) +model.evaluate(...) +model.predict(...) ``` +With minor modification, the preceding code example also works for the +[non-TensorFlow version of Keras](https://keras.io/) running against a +TensorFlow backend. You just need to replace `tf.keras.backend` with +`keras.backend`. + ## Debugging tf-slim with TFDBG TFDBG supports debugging of training and evaluation with -- GitLab From 9f29e81349e15118847cdaf4029bb76760cf3543 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Fri, 8 Jun 2018 12:31:49 -0700 Subject: [PATCH 065/365] Fix: Keras models using datasets in eager mode fail on float64 data PiperOrigin-RevId: 199833632 --- tensorflow/python/keras/engine/training.py | 11 ++- .../python/keras/engine/training_eager.py | 15 +++- .../python/keras/engine/training_test.py | 70 +++++++++++-------- .../python/keras/engine/training_utils.py | 30 ++++++++ 4 files changed, 93 insertions(+), 33 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 04a2aa7664..89c1f1a40f 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -1008,14 +1008,16 @@ class Model(Network): # to keep track of number of inputs and outputs and their ndim. if isinstance(inputs, (list, tuple)): if tensor_util.is_tensor(inputs[0]): - dummy_output_values = self.call(inputs) + dummy_output_values = self.call( + training_utils.cast_if_floating_dtype(inputs)) else: dummy_output_values = self.call( [ops.convert_to_tensor(v, dtype=K.floatx()) for v in inputs]) dummy_input_values = list(inputs) else: if tensor_util.is_tensor(inputs): - dummy_output_values = self.call(inputs) + dummy_output_values = self.call( + training_utils.cast_if_floating_dtype(inputs)) else: dummy_output_values = self.call( ops.convert_to_tensor(inputs, dtype=K.floatx())) @@ -1616,7 +1618,10 @@ class Model(Network): # Validate and standardize user data. inputs, _, _ = self._standardize_user_data(x) if context.executing_eagerly(): - if not isinstance(inputs, iterator_ops.EagerIterator): + if (isinstance(x, iterator_ops.EagerIterator) or + (isinstance(x, dataset_ops.Dataset) and context.executing_eagerly())): + inputs = training_utils.cast_if_floating_dtype(inputs) + else: inputs = [ ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs ] diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py index 15a7b0c0f2..2ecbff3a1c 100644 --- a/tensorflow/python/keras/engine/training_eager.py +++ b/tensorflow/python/keras/engine/training_eager.py @@ -255,6 +255,8 @@ def iterator_fit_loop(model, # Validate and standardize data. x, y, sample_weights = model._standardize_user_data( x, y, class_weight=class_weight) + x = training_utils.cast_if_floating_dtype(x) + y = training_utils.cast_if_floating_dtype(y) if sample_weights: sample_weights = [ ops.convert_to_tensor(val, dtype=backend.floatx()) @@ -471,6 +473,8 @@ def iterator_test_loop(model, inputs, steps, verbose=0): # Validate and standardize data. x, y, sample_weights = model._standardize_user_data(x, y) + x = training_utils.cast_if_floating_dtype(x) + y = training_utils.cast_if_floating_dtype(y) # Calculate model output, loss values. loss_outs, loss, loss_metrics = _model_loss( @@ -639,6 +643,7 @@ def iterator_predict_loop(model, inputs, steps, verbose=0): # Validate and standardize data. x, _, _ = model._standardize_user_data(x) + x = training_utils.cast_if_floating_dtype(x) if model._expects_training_arg: batch_outs = model.call(x[0] if len(x) == 1 else x, training=False) @@ -814,7 +819,10 @@ def train_on_batch(model, inputs, targets, sample_weights=None): Returns: total loss and the loss associated with each output. """ - if len(inputs) and not tensor_util.is_tensor(inputs[0]): + if len(inputs) and tensor_util.is_tensor(inputs[0]): + inputs = training_utils.cast_if_floating_dtype(inputs) + targets = training_utils.cast_if_floating_dtype(targets) + else: inputs = [ ops.convert_to_tensor(val, dtype=backend.floatx()) for val in inputs ] @@ -849,7 +857,10 @@ def test_on_batch(model, inputs, targets, sample_weights=None): Returns: total loss, loss and metrics associated with each output. """ - if len(inputs) and not tensor_util.is_tensor(inputs[0]): + if len(inputs) and tensor_util.is_tensor(inputs[0]): + inputs = training_utils.cast_if_floating_dtype(inputs) + targets = training_utils.cast_if_floating_dtype(targets) + else: inputs = [ ops.convert_to_tensor(val, dtype=backend.floatx()) for val in inputs ] diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 5c02d36382..a1ab720189 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -129,8 +129,10 @@ class TrainingTest(test.TestCase): { 'input_a': input_a_np, 'input_b': input_b_np - }, {'dense': output_d_np, - 'dropout': output_e_np}, + }, { + 'dense': output_d_np, + 'dropout': output_e_np + }, epochs=1, batch_size=5, verbose=0) @@ -138,8 +140,10 @@ class TrainingTest(test.TestCase): { 'input_a': input_a_np, 'input_b': input_b_np - }, {'dense': output_d_np, - 'dropout': output_e_np}, + }, { + 'dense': output_d_np, + 'dropout': output_e_np + }, epochs=1, batch_size=5, verbose=1) @@ -147,8 +151,10 @@ class TrainingTest(test.TestCase): { 'input_a': input_a_np, 'input_b': input_b_np - }, {'dense': output_d_np, - 'dropout': output_e_np}, + }, { + 'dense': output_d_np, + 'dropout': output_e_np + }, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np @@ -162,8 +168,10 @@ class TrainingTest(test.TestCase): model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np - }, {'dense': output_d_np, - 'dropout': output_e_np}) + }, { + 'dense': output_d_np, + 'dropout': output_e_np + }) # Test with lists for loss, metrics loss = ['mae', 'mse'] @@ -285,16 +293,20 @@ class TrainingTest(test.TestCase): { 'input_a': input_a_np, 'input_b': input_b_np - }, {'dense': output_d_np, - 'dropout': output_e_np}, + }, { + 'dense': output_d_np, + 'dropout': output_e_np + }, batch_size=5, verbose=0) model.evaluate( { 'input_a': input_a_np, 'input_b': input_b_np - }, {'dense': output_d_np, - 'dropout': output_e_np}, + }, { + 'dense': output_d_np, + 'dropout': output_e_np + }, batch_size=5, verbose=1) @@ -349,9 +361,11 @@ class TrainingTest(test.TestCase): with self.test_session(): test_inputs = [ - scipy_sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] + scipy_sparse.random(6, 3, density=0.25).tocsr() for _ in range(2) + ] test_outputs = [ - scipy_sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)] + scipy_sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5) + ] in1 = keras.layers.Input(shape=(3,)) in2 = keras.layers.Input(shape=(3,)) out1 = keras.layers.Dropout(0.5, name='dropout')(in1) @@ -1721,8 +1735,8 @@ class TestTrainingWithDatasetIterators(test.TestCase): metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) @@ -1786,8 +1800,8 @@ class TestTrainingWithDatasetIterators(test.TestCase): metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) @@ -1811,8 +1825,8 @@ class TestTrainingWithDatasetIterators(test.TestCase): metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(2) dataset = dataset.batch(10) @@ -1838,8 +1852,8 @@ class TestTrainingWithDataset(test.TestCase): metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) @@ -1865,8 +1879,8 @@ class TestTrainingWithDataset(test.TestCase): metrics = ['mae'] model.compile(optimizer, loss, metrics=metrics) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) @@ -1928,8 +1942,8 @@ class TestTrainingWithDataset(test.TestCase): model.compile(optimizer, loss) # User forgets to batch the dataset - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) + inputs = np.zeros((10, 3)) + targets = np.zeros((10, 4)) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) @@ -1938,8 +1952,8 @@ class TestTrainingWithDataset(test.TestCase): model.train_on_batch(dataset) # Wrong input shape - inputs = np.zeros((10, 5), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) + inputs = np.zeros((10, 5)) + targets = np.zeros((10, 4)) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index b93f999444..728a2b493b 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -553,6 +553,10 @@ def standardize_weights(y, def has_symbolic_tensors(ls): if context.executing_eagerly(): return False + return has_tensors(ls) + + +def has_tensors(ls): if isinstance(ls, (list, tuple)): return any(tensor_util.is_tensor(v) for v in ls) return tensor_util.is_tensor(ls) @@ -692,3 +696,29 @@ def check_steps_argument(input_data, steps, steps_name): input_type=input_type_str, steps_name=steps_name)) return True return False + + +def cast_if_floating_dtype(x): + """Casts the given data tensors to the default floating point type. + + Casts only if the input is already a floating point type. + Args: + x: tensor or list/tuple of tensors. + + Returns: + Converted input. + + Raises: + RuntimeError: if data isn't tensors. + """ + if not has_tensors(x): + raise RuntimeError( + 'Please provide tensors for casting, got: {x}'.format(x=x)) + + if isinstance(x, (list, tuple)): + return [ + math_ops.cast(val, dtype=K.floatx()) + if tensor_util.is_tensor(val) and val.dtype.is_floating else val + for val in x + ] + return math_ops.cast(x, dtype=K.floatx()) if x.dtype.is_floating else x -- GitLab From 503b7c11b44ee8b238946b345efea503058652c0 Mon Sep 17 00:00:00 2001 From: Nishidha Date: Sat, 9 Jun 2018 01:07:06 +0530 Subject: [PATCH 066/365] Skipped the check that fails due to overflow error as float128 datatype is same as float64 instead of longdouble on platforms like Power - Issue# 19694 (#19860) --- .../bijectors/sinh_arcsinh_bijector_test.py | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py index 45760a29ee..795f1993ba 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py @@ -151,16 +151,24 @@ class SinhArcsinhBijectorTest(test.TestCase): self.assertAllClose(y, bijector.forward(x).eval(), rtol=1e-4, atol=0.) self.assertAllClose(x, bijector.inverse(y).eval(), rtol=1e-4, atol=0.) - # Do the numpy calculation in float128 to avoid inf/nan. - y_float128 = np.float128(y) - self.assertAllClose( - np.log(np.cosh( - np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( - y_float128**2 + 1)) - - np.log(tailweight), - bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), - rtol=1e-4, - atol=0.) + # On IBM PPC systems, longdouble (np.float128) is same as double except that it can have more precision. + # Type double being of 8 bytes, can't hold square of max of float64 (which is also 8 bytes) and + # below test fails due to overflow error giving inf. So this check avoids that error by skipping square + # calculation and corresponding assert. + + if np.amax(y) <= np.sqrt(np.finfo(np.float128).max) and \ + np.fabs(np.amin(y)) <= np.sqrt(np.fabs(np.finfo(np.float128).min)): + + # Do the numpy calculation in float128 to avoid inf/nan. + y_float128 = np.float128(y) + self.assertAllClose( + np.log(np.cosh( + np.arcsinh(y_float128) / tailweight - skewness) / np.sqrt( + y_float128**2 + 1)) - + np.log(tailweight), + bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), + rtol=1e-4, + atol=0.) self.assertAllClose( -bijector.inverse_log_det_jacobian(y, event_ndims=0).eval(), bijector.forward_log_det_jacobian(x, event_ndims=0).eval(), -- GitLab From 055a0af39189924c52b12e875e7694e6c99a25d0 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Fri, 8 Jun 2018 12:34:43 -0700 Subject: [PATCH 067/365] Fix: Add back test case to test generator methods. PiperOrigin-RevId: 199834091 --- .../python/keras/engine/training_eager_test.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tensorflow/python/keras/engine/training_eager_test.py b/tensorflow/python/keras/engine/training_eager_test.py index 7906d208eb..1571a7782a 100644 --- a/tensorflow/python/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/engine/training_eager_test.py @@ -403,6 +403,24 @@ class TrainingTest(test.TestCase): model.train_on_batch(inputs, targets) model.test_on_batch(inputs, targets) + def test_generator_methods(self): + model = keras.Sequential() + model.add(keras.layers.Dense(4, input_shape=(3,))) + optimizer = RMSPropOptimizer(learning_rate=0.001) + model.compile(optimizer, 'mse', metrics=['mae']) + + x = np.random.random((10, 3)) + y = np.random.random((10, 4)) + + def iterator(): + while True: + yield x, y + + model.fit_generator(iterator(), steps_per_epoch=3, epochs=1) + model.evaluate_generator(iterator(), steps=3) + out = model.predict_generator(iterator(), steps=3) + self.assertEqual(out.shape, (30, 4)) + class LossWeightingTest(test.TestCase): -- GitLab From a6a265b61a9ad9510f45cf4c9032778bf2e042b9 Mon Sep 17 00:00:00 2001 From: SRIRAM VETURI Date: Fri, 8 Jun 2018 14:38:48 -0500 Subject: [PATCH 068/365] Added the tutorials link (#19844) The very first time users would like to have a clear navigation where they can find the tutorials regarding the additional resources where they can learn the specific tasks in TensorFlow. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6fb4486d0d..63853137cf 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ $ python 42 >>> sess.close() ``` +Learn more examples about how to do specific tasks in TensorFlow at the [tutorials page of tensorflow.org](https://www.tensorflow.org/tutorials/). ## Contribution guidelines -- GitLab From 5b540fe049fbb675eb1b5ea7d03fb4cb96a642c4 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Fri, 8 Jun 2018 12:36:55 -0700 Subject: [PATCH 069/365] [tf.data] Adding optimization for rewriting `shuffle(...).repeat(...)` to `shuffle_and_repeat(...)`. PiperOrigin-RevId: 199834400 --- .../core/grappler/optimizers/data/BUILD | 35 ++++ .../grappler/optimizers/data/graph_utils.cc | 17 +- .../grappler/optimizers/data/graph_utils.h | 4 + .../optimizers/data/graph_utils_test.cc | 15 ++ .../optimizers/data/map_and_batch_fusion.cc | 20 ++- .../optimizers/data/map_and_batch_fusion.h | 8 +- .../data/map_and_batch_fusion_test.cc | 23 ++- .../data/shuffle_and_repeat_fusion.cc | 112 +++++++++++++ .../data/shuffle_and_repeat_fusion.h | 46 ++++++ .../data/shuffle_and_repeat_fusion_test.cc | 149 ++++++++++++++++++ 10 files changed, 410 insertions(+), 19 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc create mode 100644 tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.h create mode 100644 tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion_test.cc diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index 121de1e089..08fc9d84da 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -67,11 +67,46 @@ tf_cc_test( ], ) +cc_library( + name = "shuffle_and_repeat_fusion", + srcs = ["shuffle_and_repeat_fusion.cc"], + hdrs = [ + "shuffle_and_repeat_fusion.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":graph_utils", + "//tensorflow/core:lib", + "//tensorflow/core/grappler:graph_view", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", + ] + tf_protos_all(), +) + +tf_cc_test( + name = "shuffle_and_repeat_fusion_test", + srcs = ["shuffle_and_repeat_fusion_test.cc"], + visibility = ["//visibility:public"], + deps = [ + ":graph_utils", + ":shuffle_and_repeat_fusion", + "//tensorflow/core:framework", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/grappler:grappler_item", + ], +) + cc_library( name = "data", visibility = ["//visibility:public"], deps = [ ":map_and_batch_fusion", + ":shuffle_and_repeat_fusion", ], alwayslink = 1, ) diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc index df12de37da..aece142f7a 100644 --- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc +++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc @@ -28,6 +28,8 @@ namespace grappler { namespace graph_utils { namespace { +constexpr char kConstOpName[] = "Const"; + int FindNodeWithPredicate(const std::function& predicate, const GraphDef& graph) { for (int i = 0; i < graph.node_size(); ++i) { @@ -68,9 +70,8 @@ Status AddScalarConstNodeHelper( DataType dtype, const std::function& add_value, GraphDef* graph, NodeDef** result) { NodeDef* node = graph->add_node(); - const string& name = strings::StrCat("Const/_", graph->node_size()); - node->set_name(name); - node->set_op("Const"); + node->set_op(kConstOpName); + SetUniqueName(kConstOpName, graph, node); (*node->mutable_attr())["dtype"].set_type(dtype); std::unique_ptr tensor = tensorflow::MakeUnique(); @@ -94,7 +95,7 @@ Status AddNode(const string& name, const string& op, if (!name.empty()) { node->set_name(name); } else { - node->set_name(strings::StrCat(op, "/_", graph->node_size())); + SetUniqueName(op, graph, node); } node->set_op(op); for (const string& input : inputs) { @@ -212,6 +213,14 @@ int FindNodeWithOp(const string& op, const GraphDef& graph) { [op](const NodeDef& node) { return node.op() == op; }, graph); } +void SetUniqueName(const string& op, GraphDef* graph, NodeDef* node) { + int id = graph->node_size(); + while (ContainsNodeWithName(strings::StrCat(op, "/_", id), *graph)) { + ++id; + } + node->set_name(strings::StrCat(op, "/_", id)); +} + } // end namespace graph_utils } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h index b40ca44d78..3d2467031f 100644 --- a/tensorflow/core/grappler/optimizers/data/graph_utils.h +++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h @@ -74,6 +74,10 @@ int FindNodeWithName(const string& name, const GraphDef& graph); // exists. int FindNodeWithOp(const string& op, const GraphDef& graph); +// Sets the node name using the op name as a prefix while guaranteeing the name +// is unique across the graph. +void SetUniqueName(const string& op, GraphDef* graph, NodeDef* node); + } // end namespace graph_utils } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc index b34726044e..00f66c9bc1 100644 --- a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc +++ b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc @@ -136,6 +136,21 @@ TEST_F(GraphUtilsTest, FindNodeWithOp) { EXPECT_EQ(FindNodeWithOp("OpA", graph), -1); } +TEST_F(GraphUtilsTest, SetUniqueName) { + GraphDef graph; + + NodeDef* node1; + TF_EXPECT_OK(AddNode("", "A", {}, {}, &graph, &node1)); + NodeDef* node2; + TF_EXPECT_OK(AddNode("", "A", {}, {}, &graph, &node2)); + EXPECT_NE(node1->name(), node2->name()); + + TF_EXPECT_OK(DeleteNodes({node1->name()}, &graph)); + NodeDef* node3; + TF_EXPECT_OK(AddNode("", "A", {}, {}, &graph, &node3)); + EXPECT_NE(node2->name(), node3->name()); +} + } // namespace } // namespace graph_utils } // namespace grappler diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc index 290326ab75..a28b21224e 100644 --- a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc @@ -28,6 +28,11 @@ limitations under the License. namespace tensorflow { namespace grappler { +namespace { + +constexpr char kFusedOpName[] = "MapAndBatchDatasetV2"; + +} // namespace Status MapAndBatchFusion::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { @@ -39,21 +44,20 @@ Status MapAndBatchFusion::Optimize(Cluster* cluster, const GrapplerItem& item, continue; } - // Use a more descriptive variable name now that we now the node type. - NodeDef batch_node(node); + // Use a more descriptive variable name now that we know the node type. + const NodeDef batch_node(node); GraphView::InputPort input_port = graph.GetInputPort(batch_node.name(), 0); NodeDef* node2 = graph.GetRegularFanin(input_port).node; if (node2->op() != "MapDataset" && node2->op() != "ParallelMapDataset") { continue; } - // Use a more descriptive variable name now that we now the node type. - NodeDef* map_node = node2; - NodeDef* new_node = output->mutable_node()->Add(); - new_node->set_op("MapAndBatchDatasetV2"); - new_node->set_name( - strings::StrCat("MapAndBatchDatasetV2/_", output->node_size())); + NodeDef* new_node = output->add_node(); + new_node->set_op(kFusedOpName); + graph_utils::SetUniqueName(kFusedOpName, output, new_node); + // Use a more descriptive variable name now that we know the node type. + NodeDef* map_node = node2; // Set the `input` input argument. new_node->add_input(map_node->input(0)); diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h index a5a4d91df6..2c64831105 100644 --- a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h @@ -23,13 +23,13 @@ namespace grappler { class MapAndBatchFusion : public CustomGraphOptimizer { public: - MapAndBatchFusion() {} - ~MapAndBatchFusion() override {} + MapAndBatchFusion() = default; + ~MapAndBatchFusion() override = default; string name() const override { return "map_and_batch_fusion"; }; - Status Init(const tensorflow::RewriterConfig_CustomGraphOptimizer* config = - nullptr) override { + Status Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc index 8c7498dc5d..76d2f5d537 100644 --- a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc @@ -204,10 +204,9 @@ TEST(MapAndBatchFusionTest, FuseParallelMapAndBatchNodesIntoOne) { } TEST(MapAndBatchFusionTest, NoChange) { - std::vector> empty_attributes; - GrapplerItem item; GraphDef *graph = &item.graph; + NodeDef *start_node; TF_ASSERT_OK(graph_utils::AddScalarConstNode(0, graph, &start_node)); NodeDef *stop_node; @@ -219,9 +218,27 @@ TEST(MapAndBatchFusionTest, NoChange) { range_inputs[0] = start_node->name(); range_inputs[1] = stop_node->name(); range_inputs[2] = step_node->name(); + std::vector> range_attrs; NodeDef *range_node; TF_ASSERT_OK(graph_utils::AddNode("", "RangeDataset", range_inputs, - empty_attributes, graph, &range_node)); + range_attrs, graph, &range_node)); + + NodeDef *batch_size_node; + TF_ASSERT_OK( + graph_utils::AddScalarConstNode(5, graph, &batch_size_node)); + std::vector batch_inputs(2); + batch_inputs[0] = range_node->name(); + batch_inputs[1] = batch_size_node->name(); + std::vector> batch_attrs(2); + AttrValue shapes_attr; + SetAttrValue("output_shapes", &shapes_attr); + batch_attrs[0] = std::make_pair("output_shapes", shapes_attr); + AttrValue types_attr; + SetAttrValue("output_types", &types_attr); + batch_attrs[1] = std::make_pair("output_types", types_attr); + NodeDef *batch_node; + TF_ASSERT_OK(graph_utils::AddNode("", "BatchDataset", batch_inputs, + batch_attrs, graph, &batch_node)); MapAndBatchFusion optimizer; GraphDef output; diff --git a/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc new file mode 100644 index 0000000000..0df73b33ed --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc @@ -0,0 +1,112 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.h" + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/graph_view.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/platform/protobuf.h" + +namespace tensorflow { +namespace grappler { +namespace { + +constexpr char kFusedOpName[] = "ShuffleAndRepeatDataset"; + +} // namespace + +Status ShuffleAndRepeatFusion::Optimize(Cluster* cluster, + const GrapplerItem& item, + GraphDef* output) { + *output = item.graph; + GraphView graph(output); + std::set nodes_to_delete; + for (const NodeDef& node : item.graph.node()) { + if (node.op() != "RepeatDataset") { + continue; + } + + // Use a more descriptive variable name now that we know the node type. + const NodeDef repeat_node(node); + GraphView::InputPort input_port = graph.GetInputPort(repeat_node.name(), 0); + NodeDef* node2 = graph.GetRegularFanin(input_port).node; + if (node2->op() != "ShuffleDataset") { + continue; + } + + NodeDef* new_node = output->add_node(); + new_node->set_op(kFusedOpName); + graph_utils::SetUniqueName(kFusedOpName, output, new_node); + + // Use a more descriptive variable name now that we know the node type. + NodeDef* shuffle_node = node2; + + // Set the `input` input argument. + new_node->add_input(shuffle_node->input(0)); + + // Set the `buffer_size` input argument. + new_node->add_input(shuffle_node->input(1)); + + // Set the `seed` input argument. + new_node->add_input(shuffle_node->input(2)); + + // Set the `seed2` input argument. + new_node->add_input(shuffle_node->input(3)); + + // Set the `count` input argument. + new_node->add_input(repeat_node.input(1)); + + // Set `output_types` and `output_shapes` attributes. + for (auto key : {"output_shapes", "output_types"}) { + (*new_node->mutable_attr())[key] = repeat_node.attr().at(key); + } + + // Mark the `Shuffle` and `Repeat` nodes for removal. + nodes_to_delete.insert(shuffle_node->name()); + nodes_to_delete.insert(repeat_node.name()); + + // Update the input of the outputs of the `Repeat` node to use + // `ShuffleAndRepeat`. + GraphView::OutputPort output_port = + graph.GetOutputPort(repeat_node.name(), 0); + auto fanout = graph.GetFanout(output_port); + for (auto it = fanout.begin(); it != fanout.end(); ++it) { + NodeDef* node = it->node; + node->set_input(0, new_node->name()); + } + } + TF_RETURN_IF_ERROR(graph_utils::DeleteNodes(nodes_to_delete, output)); + return Status::OK(); +} + +void ShuffleAndRepeatFusion::Feedback(Cluster* cluster, + const GrapplerItem& item, + const GraphDef& optimize_output, + double result) { + // no-op +} + +REGISTER_GRAPH_OPTIMIZER_AS(ShuffleAndRepeatFusion, + "shuffle_and_repeat_fusion"); + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.h b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.h new file mode 100644 index 0000000000..c8fa53edce --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.h @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_SHUFFLE_AND_REPEAT_FUSION_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_SHUFFLE_AND_REPEAT_FUSION_H_ + +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" + +namespace tensorflow { +namespace grappler { + +class ShuffleAndRepeatFusion : public CustomGraphOptimizer { + public: + ShuffleAndRepeatFusion() = default; + ~ShuffleAndRepeatFusion() override = default; + + string name() const override { return "shuffle_and_repeat_fusion"; }; + + Status Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { + return Status::OK(); + } + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* output) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) override; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_SHUFFLE_AND_REPEAT_FUSION_H_ diff --git a/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion_test.cc b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion_test.cc new file mode 100644 index 0000000000..e89675efb7 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion_test.cc @@ -0,0 +1,149 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.h" + +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +TEST(ShuffleAndRepeatFusionTest, FuseShuffleAndRepeatNodesIntoOne) { + GrapplerItem item; + GraphDef *graph = &item.graph; + + std::vector> common_attrs(2); + AttrValue shapes_attr; + SetAttrValue("output_shapes", &shapes_attr); + common_attrs[0] = std::make_pair("output_shapes", shapes_attr); + AttrValue types_attr; + SetAttrValue("output_types", &types_attr); + common_attrs[1] = std::make_pair("output_types", types_attr); + + NodeDef *start_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(0, graph, &start_node)); + NodeDef *stop_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(10, graph, &stop_node)); + NodeDef *step_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(1, graph, &step_node)); + + std::vector range_inputs(3); + range_inputs[0] = start_node->name(); + range_inputs[1] = stop_node->name(); + range_inputs[2] = step_node->name(); + NodeDef *range_node; + TF_ASSERT_OK(graph_utils::AddNode("", "RangeDataset", range_inputs, + common_attrs, graph, &range_node)); + + NodeDef *buffer_size_node; + TF_ASSERT_OK( + graph_utils::AddScalarConstNode(128, graph, &buffer_size_node)); + NodeDef *seed_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(-1, graph, &seed_node)); + NodeDef *seed2_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(-1, graph, &seed2_node)); + std::vector shuffle_inputs(4); + shuffle_inputs[0] = range_node->name(); + shuffle_inputs[1] = buffer_size_node->name(); + shuffle_inputs[2] = seed_node->name(); + shuffle_inputs[3] = seed2_node->name(); + NodeDef *shuffle_node; + TF_ASSERT_OK(graph_utils::AddNode("", "ShuffleDataset", shuffle_inputs, + common_attrs, graph, &shuffle_node)); + + NodeDef *count_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(-1, graph, &count_node)); + std::vector repeat_inputs(2); + repeat_inputs[0] = shuffle_node->name(); + repeat_inputs[1] = count_node->name(); + NodeDef *repeat_node; + TF_ASSERT_OK(graph_utils::AddNode("", "RepeatDataset", repeat_inputs, + common_attrs, graph, &repeat_node)); + + ShuffleAndRepeatFusion optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + + EXPECT_FALSE(graph_utils::ContainsNodeWithName(shuffle_node->name(), output)); + EXPECT_FALSE(graph_utils::ContainsNodeWithName(repeat_node->name(), output)); + EXPECT_TRUE( + graph_utils::ContainsNodeWithOp("ShuffleAndRepeatDataset", output)); + NodeDef shuffle_and_repeat_node = output.node( + graph_utils::FindNodeWithOp("ShuffleAndRepeatDataset", output)); + EXPECT_EQ(shuffle_and_repeat_node.input_size(), 5); + EXPECT_EQ(shuffle_and_repeat_node.input(0), shuffle_node->input(0)); + EXPECT_EQ(shuffle_and_repeat_node.input(1), shuffle_node->input(1)); + EXPECT_EQ(shuffle_and_repeat_node.input(2), shuffle_node->input(2)); + EXPECT_EQ(shuffle_and_repeat_node.input(3), shuffle_node->input(3)); + EXPECT_EQ(shuffle_and_repeat_node.input(4), repeat_node->input(1)); + EXPECT_TRUE( + AreAttrValuesEqual(shuffle_and_repeat_node.attr().at("output_shapes"), + repeat_node->attr().at("output_shapes"))); + EXPECT_TRUE( + AreAttrValuesEqual(shuffle_and_repeat_node.attr().at("output_types"), + repeat_node->attr().at("output_types"))); +} + +TEST(ShuffleAndRepeatFusionTest, NoChange) { + GrapplerItem item; + GraphDef *graph = &item.graph; + + std::vector> common_attrs(2); + AttrValue shapes_attr; + SetAttrValue("output_shapes", &shapes_attr); + common_attrs[0] = std::make_pair("output_shapes", shapes_attr); + AttrValue types_attr; + SetAttrValue("output_types", &types_attr); + common_attrs[1] = std::make_pair("output_types", types_attr); + + NodeDef *start_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(0, graph, &start_node)); + NodeDef *stop_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(10, graph, &stop_node)); + NodeDef *step_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(1, graph, &step_node)); + + std::vector range_inputs(3); + range_inputs[0] = start_node->name(); + range_inputs[1] = stop_node->name(); + range_inputs[2] = step_node->name(); + NodeDef *range_node; + TF_ASSERT_OK(graph_utils::AddNode("", "RangeDataset", range_inputs, + common_attrs, graph, &range_node)); + + NodeDef *count_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(-1, graph, &count_node)); + std::vector repeat_inputs(2); + repeat_inputs[0] = range_node->name(); + repeat_inputs[1] = count_node->name(); + NodeDef *repeat_node; + TF_ASSERT_OK(graph_utils::AddNode("", "RepeatDataset", repeat_inputs, + common_attrs, graph, &repeat_node)); + + ShuffleAndRepeatFusion optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + + EXPECT_TRUE(graph_utils::Compare(*graph, output)); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow -- GitLab From 7bb79ee219d4efbd92d1ef4e0dbe45f4aee26654 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 8 Jun 2018 12:46:39 -0700 Subject: [PATCH 070/365] Ask NumPy for read only array when converting it to Tensor. Fix for: #17315 If numpy array is read-only, calling PyArray_FromAny with NPY_ARRAY_CARRAY flags introduce extra memory copy. Before: feed_cpu_variable_read_only: 5.6 GB/sec, min: 17.99, median: 19.54, mean: 19.76 After: feed_cpu_variable_read_only: 13.2 GB/sec, min: 7.60, median: 7.78, mean: 8.13 PiperOrigin-RevId: 199835695 --- tensorflow/python/lib/core/ndarray_tensor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc index 2acab92764..ec1ba7b8f7 100644 --- a/tensorflow/python/lib/core/ndarray_tensor.cc +++ b/tensorflow/python/lib/core/ndarray_tensor.cc @@ -411,7 +411,7 @@ Status PyArrayToTF_Tensor(PyObject* ndarray, Safe_TF_TensorPtr* out_tensor) { // Make sure we dereference this array object in case of error, etc. Safe_PyObjectPtr array_safe(make_safe( - PyArray_FromAny(ndarray, nullptr, 0, 0, NPY_ARRAY_CARRAY, nullptr))); + PyArray_FromAny(ndarray, nullptr, 0, 0, NPY_ARRAY_CARRAY_RO, nullptr))); if (!array_safe) return errors::InvalidArgument("Not a ndarray."); PyArrayObject* array = reinterpret_cast(array_safe.get()); -- GitLab From 278fbe4146b160980fec318187546d9d8870d244 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Fri, 8 Jun 2018 12:50:16 -0700 Subject: [PATCH 071/365] Add kGenerateToken HLO instruction. The new HLO instruction serves two purposes. (1) It generates a new token value. This is the only way to create tokens. (2) The operation is variadic, taking zero or more token operands. The operation acts as a join of its operands. I considered initially using a kConstant constant as a method to create new tokens, but this ran into problems because of expectations in backends regarding constants and their materialization. This CL enables creation of generate-token instructions, but the new instruction is not supported yet in any backend. PiperOrigin-RevId: 199836205 --- .../compiler/xla/service/dfs_hlo_visitor.h | 2 + .../service/dfs_hlo_visitor_with_default.h | 3 + .../compiler/xla/service/hlo_cost_analysis.cc | 4 + .../compiler/xla/service/hlo_cost_analysis.h | 1 + .../compiler/xla/service/hlo_evaluator.cc | 8 ++ .../compiler/xla/service/hlo_evaluator.h | 2 + .../compiler/xla/service/hlo_graph_dumper.cc | 1 + .../compiler/xla/service/hlo_instruction.cc | 17 +++ .../compiler/xla/service/hlo_instruction.h | 5 + tensorflow/compiler/xla/service/hlo_opcode.h | 1 + .../compiler/xla/service/hlo_opcode_test.cc | 1 + tensorflow/compiler/xla/service/hlo_parser.cc | 8 ++ .../compiler/xla/service/hlo_verifier.cc | 50 +++++++ .../compiler/xla/service/hlo_verifier.h | 1 + .../xla/service/instruction_fusion.cc | 1 + .../compiler/xla/service/shape_inference.cc | 11 ++ .../compiler/xla/service/shape_inference.h | 7 + tensorflow/compiler/xla/tests/BUILD | 16 +++ .../compiler/xla/tests/token_hlo_test.cc | 124 ++++++++++++++++++ 19 files changed, 263 insertions(+) create mode 100644 tensorflow/compiler/xla/tests/token_hlo_test.cc diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index 64678d9d74..ee2b455730 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -243,6 +243,8 @@ class DfsHloVisitorBase { virtual Status HandleBatchNormGrad(HloInstructionPtr hlo) = 0; + virtual Status HandleGenerateToken(HloInstructionPtr token) = 0; + // Invoked to inform the visitor that the traversal has completed, and that // the root was "root". virtual Status FinishVisit(HloInstructionPtr root) = 0; diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h index 240faebe62..6934e00a4b 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h @@ -188,6 +188,9 @@ class DfsHloVisitorWithDefaultBase Status HandleGather(HloInstructionPtr gather) override { return DefaultAction(gather); } + Status HandleGenerateToken(HloInstructionPtr token) override { + return DefaultAction(token); + } // Invoked to inform the visitor that the traversal has completed, and that // the root was "root". diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index b9d30ee802..92a66681a9 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -387,6 +387,10 @@ Status HloCostAnalysis::HandleTranspose(const HloInstruction*) { return Status::OK(); } +Status HloCostAnalysis::HandleGenerateToken(const HloInstruction*) { + return Status::OK(); +} + Status HloCostAnalysis::HandleConvolution(const HloInstruction* convolution) { auto lhs = convolution->operand(0); auto rhs = convolution->operand(1); diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h index d17678d20f..0d66736fe1 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h @@ -97,6 +97,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor { Status HandleBroadcast(const HloInstruction* broadcast) override; Status HandlePad(const HloInstruction* pad) override; Status HandleReshape(const HloInstruction* reshape) override; + Status HandleGenerateToken(const HloInstruction* token) override; Status HandleTranspose(const HloInstruction* transpose) override; Status HandleWhile(const HloInstruction* xla_while) override; Status HandleConditional(const HloInstruction* conditional) override; diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 1e78d775c8..e0648e1467 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -910,6 +910,14 @@ Status HloEvaluator::HandleBroadcast(HloInstruction* broadcast) { return Status::OK(); } +Status HloEvaluator::HandleGenerateToken(HloInstruction* token) { + // Literals cannot represent a TOKEN shape so just create an empty tuple as + // the "result" of the kGenerateToken operation. + // TODO(b/109929053): Add support for TOKENs in Literals. + evaluated_[token] = Literal::MakeTuple({}); + return Status::OK(); +} + Status HloEvaluator::HandleGetTupleElement(HloInstruction* get_tuple_element) { const auto result_shape = get_tuple_element->shape(); const int64 index = get_tuple_element->tuple_index(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index b53d5644de..fc2fc9437b 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -174,6 +174,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleBroadcast(HloInstruction* broadcast) override; + Status HandleGenerateToken(HloInstruction* token) override; + // Returns the already-evaluated literal result for the instruction. // A Constant instruction is considered evaluated and its literal will be // returned directly without looking up the cache. diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index a6750460e5..cf954001c6 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -964,6 +964,7 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kBitcast: case HloOpcode::kGetTupleElement: case HloOpcode::kTrace: + case HloOpcode::kGenerateToken: case HloOpcode::kTuple: return kWhite; case HloOpcode::kBroadcast: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index ae230d2740..a778a6a965 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -583,6 +583,17 @@ HloInstruction::CreateCrossReplicaSum( return MakeUnique(shape, operand, dimensions); } +/* static */ std::unique_ptr +HloInstruction::CreateGenerateToken( + tensorflow::gtl::ArraySlice operands) { + auto instruction = WrapUnique(new HloInstruction( + HloOpcode::kGenerateToken, ShapeUtil::MakeTokenShape())); + for (auto operand : operands) { + instruction->AppendOperand(operand); + } + return instruction; +} + /* static */ std::unique_ptr HloInstruction::CreateWhile( const Shape& shape, HloComputation* condition, HloComputation* body, HloInstruction* init) { @@ -1512,6 +1523,9 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( CreateDomain(shape, new_operands[0], operand_side_metadata_->Clone(), user_side_metadata_->Clone()); break; + case HloOpcode::kGenerateToken: + clone = CreateGenerateToken(new_operands); + break; case HloOpcode::kTrace: LOG(FATAL) << "Not yet implemented, clone: " << HloOpcodeString(opcode_); } @@ -1776,6 +1790,7 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kRng: case HloOpcode::kTrace: case HloOpcode::kWhile: + case HloOpcode::kGenerateToken: return false; case HloOpcode::kParameter: @@ -2776,6 +2791,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { return visitor->HandleGather(this); case HloOpcode::kDomain: return visitor->HandleDomain(this); + case HloOpcode::kGenerateToken: + return visitor->HandleGenerateToken(this); // These opcodes are not handled here. case HloOpcode::kTrace: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index cc4a8b8252..d252533eb2 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -664,6 +664,11 @@ class HloInstruction { const Shape& shape, HloInstruction* operand, tensorflow::gtl::ArraySlice dimensions); + // Creates a token instruction used for joining or creating token types which + // thread through side-effecting operations. + static std::unique_ptr CreateGenerateToken( + tensorflow::gtl::ArraySlice operands); + // Creates an instance of GatherDimensionNumbers. static GatherDimensionNumbers MakeGatherDimNumbers( tensorflow::gtl::ArraySlice output_window_dims, diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h index 1fe06ee0c0..a35546f5f4 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.h +++ b/tensorflow/compiler/xla/service/hlo_opcode.h @@ -81,6 +81,7 @@ namespace xla { V(kFusion, "fusion", kHloOpcodeIsVariadic) \ V(kGather, "gather") \ V(kGe, "greater-than-or-equal-to", kHloOpcodeIsComparison) \ + V(kGenerateToken, "generate-token", kHloOpcodeIsVariadic) \ V(kGetTupleElement, "get-tuple-element") \ V(kGt, "greater-than", kHloOpcodeIsComparison) \ V(kHostCompute, "host-compute") \ diff --git a/tensorflow/compiler/xla/service/hlo_opcode_test.cc b/tensorflow/compiler/xla/service/hlo_opcode_test.cc index cd2ce5c69f..774345124b 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode_test.cc +++ b/tensorflow/compiler/xla/service/hlo_opcode_test.cc @@ -58,6 +58,7 @@ TEST(HloOpcodeTest, OpcodeProperties) { case HloOpcode::kConcatenate: case HloOpcode::kFusion: case HloOpcode::kMap: + case HloOpcode::kGenerateToken: case HloOpcode::kTuple: EXPECT_TRUE(HloOpcodeIsVariadic(opcode)); break; diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index a1bc269400..bf1c7b9323 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -606,6 +606,14 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction::CreateReshape(shape, operands[0])); break; } + case HloOpcode::kGenerateToken: { + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateGenerateToken(operands)); + break; + } case HloOpcode::kTuple: { if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 9cfd8a9bf7..9034073cc8 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -426,6 +426,14 @@ Status ShapeVerifier::HandleGather(HloInstruction* gather) { gather->gather_dimension_numbers(), gather->gather_window_bounds())); } +Status ShapeVerifier::HandleGenerateToken(HloInstruction* token) { + std::vector operand_shapes; + for (const HloInstruction* operand : token->operands()) { + operand_shapes.push_back(&operand->shape()); + } + return CheckShape(token, ShapeInference::InferTokenShape(operand_shapes)); +} + Status ShapeVerifier::CheckShape(const HloInstruction* instruction, const Shape& inferred_shape) { // If allow_mixed_precision_ is false, check if there are operands with @@ -791,6 +799,46 @@ Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) { return Status::OK(); } +namespace { + +// Returns true if the given Shape has a TOKEN shape as any subshape. +bool ShapeContainsToken(const Shape& shape) { + bool contains_token = false; + ShapeUtil::ForEachSubshape( + shape, [&contains_token](const Shape& subshape, const ShapeIndex&) { + if (ShapeUtil::IsToken(subshape)) { + contains_token = true; + } + }); + return contains_token; +} + +// Verifies that all types entering and exiting the entry computation are +// legal. For example, TOKEN types have no Literal representation and cannot be +// on the interface of the entry computation (parameters and root instruction). +Status VerifyEntryAndExitShapes(const HloModule& module) { + for (int i = 0; i < module.entry_computation()->num_parameters(); ++i) { + HloInstruction* param = + module.entry_computation()->parameter_instruction(i); + if (ShapeContainsToken(param->shape())) { + return InternalError( + "Entry parameter %d is or contains a token shape: %s", i, + ShapeUtil::HumanString(param->shape()).c_str()); + } + } + if (ShapeContainsToken( + module.entry_computation()->root_instruction()->shape())) { + return InternalError( + "Entry root is or contains a token shape: %s", + ShapeUtil::HumanString( + module.entry_computation()->root_instruction()->shape()) + .c_str()); + } + return Status::OK(); +} + +} // namespace + StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(VerifyHloStructure(module)); @@ -851,6 +899,8 @@ StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get())); } + TF_RETURN_IF_ERROR(VerifyEntryAndExitShapes(*module)); + return false; } diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h index 1392a78097..7283b3e7dc 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.h +++ b/tensorflow/compiler/xla/service/hlo_verifier.h @@ -81,6 +81,7 @@ class ShapeVerifier : public DfsHloVisitor { HloInstruction* batch_norm_inference) override; Status HandleBatchNormGrad(HloInstruction* batch_norm_grad) override; Status HandleGather(HloInstruction* gather) override; + Status HandleGenerateToken(HloInstruction* token) override; Status FinishVisit(HloInstruction*) override { return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index 429c850343..abedb4063d 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -96,6 +96,7 @@ bool IsAlwaysDuplicable(const HloInstruction& instruction) { case HloOpcode::kShiftRightLogical: case HloOpcode::kSlice: case HloOpcode::kSubtract: + case HloOpcode::kGenerateToken: case HloOpcode::kTranspose: case HloOpcode::kTuple: return false; diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index d624f548b1..fdc7f41759 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -463,6 +463,17 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, return ShapeUtil::MakeShape(element_type, new_dimensions); } +/* static */ StatusOr ShapeInference::InferTokenShape( + tensorflow::gtl::ArraySlice arg_shapes) { + for (const Shape* arg_shape : arg_shapes) { + if (arg_shape->element_type() != TOKEN) { + return InvalidArgument( + "Operands of token instructions must be TOKEN types."); + } + } + return ShapeUtil::MakeTokenShape(); +} + /* static */ StatusOr ShapeInference::InferConvertShape( const Shape& operand_shape, PrimitiveType new_element_type) { auto old_element_type = operand_shape.element_type(); diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h index 9da2c99b41..6100e2cd33 100644 --- a/tensorflow/compiler/xla/service/shape_inference.h +++ b/tensorflow/compiler/xla/service/shape_inference.h @@ -227,6 +227,13 @@ class ShapeInference { static StatusOr InferConcatOpShape( tensorflow::gtl::ArraySlice arg_shapes, int64 dimension); + // Infers the shape produced by a kGenerateToken operation. Trivially this + // shape is always a TOKEN shape. However, ShapeInference serves two purposes: + // inferring shapes and checking operand shapes. This method verifies that the + // operand shapes are all TOKENs. + static StatusOr InferTokenShape( + tensorflow::gtl::ArraySlice arg_shapes); + // Helper that validates the given operand shape can be converted to the // target output_shape via a convert instruction -- the requirement is that // the shape is identical except for the element type. diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 7f6bbe6f87..e7e0a19db0 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1203,6 +1203,22 @@ xla_test( ], ) +xla_test( + name = "token_hlo_test", + srcs = ["token_hlo_test.cc"], + tags = [ + "enable_for_xla_interpreter", + ], + deps = [ + ":client_library_test_base", + "//tensorflow/compiler/xla/service:hlo_verifier", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + xla_test( name = "call_test", srcs = ["call_test.cc"], diff --git a/tensorflow/compiler/xla/tests/token_hlo_test.cc b/tensorflow/compiler/xla/tests/token_hlo_test.cc new file mode 100644 index 0000000000..4585244ce8 --- /dev/null +++ b/tensorflow/compiler/xla/tests/token_hlo_test.cc @@ -0,0 +1,124 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/compiler/xla/service/hlo_verifier.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class TokenHloTest : public HloTestBase {}; + +// TODO(b/79770375): Compile, not just verify the HLO module when the backends +// support kGenerateToken. +XLA_TEST_F(TokenHloTest, SingleTokenInstruction) { + std::unique_ptr module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + builder.AddInstruction(HloInstruction::CreateGenerateToken({})); + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42))); + + module->AddEntryComputation(builder.Build()); + EXPECT_IS_OK(HloVerifier().Run(module.get()).status()); +} + +XLA_TEST_F(TokenHloTest, TokenTree) { + std::unique_ptr module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + auto token0 = builder.AddInstruction(HloInstruction::CreateGenerateToken({})); + auto token1 = builder.AddInstruction(HloInstruction::CreateGenerateToken({})); + auto token2 = builder.AddInstruction(HloInstruction::CreateGenerateToken({})); + builder.AddInstruction( + HloInstruction::CreateGenerateToken({token0, token0, token1, token2})); + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42))); + + module->AddEntryComputation(builder.Build()); + EXPECT_IS_OK(HloVerifier().Run(module.get()).status()); +} + +XLA_TEST_F(TokenHloTest, InvalidTokenShapedEntryParameter) { + std::unique_ptr module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + builder.AddInstruction( + HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(F32, {}), "p0")); + builder.AddInstruction( + HloInstruction::CreateParameter(1, ShapeUtil::MakeTokenShape(), "p1")); + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42))); + module->AddEntryComputation(builder.Build()); + + Status status = HloVerifier().Run(module.get()).status(); + ASSERT_IS_NOT_OK(status); + EXPECT_THAT( + status.error_message(), + ::testing::HasSubstr("Entry parameter 1 is or contains a token shape")); +} + +XLA_TEST_F(TokenHloTest, InvalidTupleTokenShapedEntryParameter) { + std::unique_ptr module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + builder.AddInstruction(HloInstruction::CreateParameter( + 0, + ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {1, 2, 3}), ShapeUtil::MakeTokenShape()}), + "param")); + module->AddEntryComputation(builder.Build()); + + Status status = HloVerifier().Run(module.get()).status(); + ASSERT_IS_NOT_OK(status); + EXPECT_THAT( + status.error_message(), + ::testing::HasSubstr("Entry parameter 0 is or contains a token shape")); +} + +XLA_TEST_F(TokenHloTest, InvalidTokenRoot) { + std::unique_ptr module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + builder.AddInstruction(HloInstruction::CreateGenerateToken({})); + module->AddEntryComputation(builder.Build()); + + Status status = HloVerifier().Run(module.get()).status(); + ASSERT_IS_NOT_OK(status); + EXPECT_THAT(status.error_message(), + ::testing::HasSubstr("Entry root is or contains a token shape")); +} + +XLA_TEST_F(TokenHloTest, InvalidOperandToTokenInstruction) { + std::unique_ptr module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(F32, {}), "p0")); + builder.AddInstruction(HloInstruction::CreateGenerateToken({param})); + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(123))); + module->AddEntryComputation(builder.Build()); + + Status status = HloVerifier().Run(module.get()).status(); + ASSERT_IS_NOT_OK(status); + EXPECT_THAT(status.error_message(), + ::testing::HasSubstr( + "Operands of token instructions must be TOKEN types")); +} + +} // namespace +} // namespace xla -- GitLab From 9affc2080bf9840f4c7da2990ba528114e25d3b1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 12:51:11 -0700 Subject: [PATCH 072/365] Change gRPC include directory from "grpc++" to "grpcpp" PiperOrigin-RevId: 199836336 --- .../compiler/xla/rpc/grpc_client_test.cc | 4 ++-- tensorflow/compiler/xla/rpc/grpc_service.h | 2 +- .../compiler/xla/rpc/grpc_service_main.cc | 6 +++--- .../tpu/profiler/capture_tpu_profile.cc | 2 +- tensorflow/contrib/verbs/grpc_verbs_service.cc | 6 +++--- .../contrib/verbs/grpc_verbs_service_impl.cc | 16 ++++++++-------- .../contrib/verbs/grpc_verbs_service_impl.h | 16 ++++++++-------- tensorflow/core/debug/debug_grpc_testlib.h | 2 +- tensorflow/core/debug/debug_io_utils.cc | 2 +- .../core/distributed_runtime/master_test.cc | 2 +- .../rpc/eager/grpc_eager_client.cc | 2 +- .../rpc/eager/grpc_eager_service.cc | 16 ++++++++-------- .../rpc/eager/grpc_eager_service.h | 16 ++++++++-------- .../rpc/eager/grpc_eager_service_impl.h | 6 +++--- .../core/distributed_runtime/rpc/grpc_call.h | 6 +++--- .../distributed_runtime/rpc/grpc_channel.cc | 2 +- .../distributed_runtime/rpc/grpc_channel.h | 2 +- .../rpc/grpc_client_cq_tag.h | 2 +- .../rpc/grpc_master_service.cc | 4 ++-- .../rpc/grpc_master_service_impl.cc | 16 ++++++++-------- .../rpc/grpc_master_service_impl.h | 16 ++++++++-------- .../rpc/grpc_remote_worker.cc | 4 ++-- .../distributed_runtime/rpc/grpc_server_lib.cc | 6 +++--- .../distributed_runtime/rpc/grpc_server_lib.h | 4 ++-- .../core/distributed_runtime/rpc/grpc_state.h | 4 ++-- .../rpc/grpc_tensor_coding.cc | 4 ++-- .../rpc/grpc_tensor_coding_test.cc | 4 ++-- .../rpc/grpc_tensorflow_server.cc | 6 +++--- .../rpc/grpc_testlib_server.cc | 6 +++--- .../core/distributed_runtime/rpc/grpc_util.h | 6 +++--- .../rpc/grpc_worker_service.cc | 4 ++-- .../rpc/grpc_worker_service_impl.cc | 16 ++++++++-------- .../rpc/grpc_worker_service_impl.h | 18 +++++++++--------- 33 files changed, 114 insertions(+), 114 deletions(-) diff --git a/tensorflow/compiler/xla/rpc/grpc_client_test.cc b/tensorflow/compiler/xla/rpc/grpc_client_test.cc index 313f11a9a9..d7dd9786a2 100644 --- a/tensorflow/compiler/xla/rpc/grpc_client_test.cc +++ b/tensorflow/compiler/xla/rpc/grpc_client_test.cc @@ -20,8 +20,8 @@ limitations under the License. #include #include -#include "grpc++/create_channel.h" -#include "grpc++/security/credentials.h" +#include "grpcpp/create_channel.h" +#include "grpcpp/security/credentials.h" #include "tensorflow/compiler/xla/client/client.h" #include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" diff --git a/tensorflow/compiler/xla/rpc/grpc_service.h b/tensorflow/compiler/xla/rpc/grpc_service.h index 5cd573167a..ca1b09b648 100644 --- a/tensorflow/compiler/xla/rpc/grpc_service.h +++ b/tensorflow/compiler/xla/rpc/grpc_service.h @@ -16,7 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_RPC_GRPC_SERVICE_H_ #define TENSORFLOW_COMPILER_XLA_RPC_GRPC_SERVICE_H_ -#include "grpc++/server_context.h" +#include "grpcpp/server_context.h" #include "tensorflow/compiler/xla/rpc/xla_service.grpc.pb.h" #include "tensorflow/compiler/xla/service/service.h" diff --git a/tensorflow/compiler/xla/rpc/grpc_service_main.cc b/tensorflow/compiler/xla/rpc/grpc_service_main.cc index e29908ccec..c68c857c30 100644 --- a/tensorflow/compiler/xla/rpc/grpc_service_main.cc +++ b/tensorflow/compiler/xla/rpc/grpc_service_main.cc @@ -15,9 +15,9 @@ limitations under the License. // Basic server binary that exposes a xla::Service through a GRPC interface // on a configurable port. -#include "grpc++/security/server_credentials.h" -#include "grpc++/server.h" -#include "grpc++/server_builder.h" +#include "grpcpp/security/server_credentials.h" +#include "grpcpp/server.h" +#include "grpcpp/server_builder.h" #include "tensorflow/compiler/xla/rpc/grpc_service.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/init_main.h" diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index 99485322c6..f80f5652af 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -18,7 +18,7 @@ limitations under the License. // Initiates a TPU profiling on the TPUProfiler service at service_addr, // receives and dumps the profile data to a tensorboard log directory. -#include "grpc++/grpc++.h" +#include "grpcpp/grpcpp.h" #include #include diff --git a/tensorflow/contrib/verbs/grpc_verbs_service.cc b/tensorflow/contrib/verbs/grpc_verbs_service.cc index 742f946c95..af29abd91f 100644 --- a/tensorflow/contrib/verbs/grpc_verbs_service.cc +++ b/tensorflow/contrib/verbs/grpc_verbs_service.cc @@ -15,9 +15,9 @@ limitations under the License. #ifdef TENSORFLOW_USE_VERBS -#include "grpc++/alarm.h" -#include "grpc++/grpc++.h" -#include "grpc++/server_builder.h" +#include "grpcpp/alarm.h" +#include "grpcpp/grpcpp.h" +#include "grpcpp/server_builder.h" #include "tensorflow/contrib/verbs/grpc_verbs_service.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" diff --git a/tensorflow/contrib/verbs/grpc_verbs_service_impl.cc b/tensorflow/contrib/verbs/grpc_verbs_service_impl.cc index 991f9a9d8b..4da7b59c69 100644 --- a/tensorflow/contrib/verbs/grpc_verbs_service_impl.cc +++ b/tensorflow/contrib/verbs/grpc_verbs_service_impl.cc @@ -15,14 +15,14 @@ limitations under the License. #include "tensorflow/contrib/verbs/grpc_verbs_service_impl.h" -#include "grpc++/impl/codegen/async_stream.h" -#include "grpc++/impl/codegen/async_unary_call.h" -#include "grpc++/impl/codegen/channel_interface.h" -#include "grpc++/impl/codegen/client_unary_call.h" -#include "grpc++/impl/codegen/method_handler_impl.h" -#include "grpc++/impl/codegen/rpc_service_method.h" -#include "grpc++/impl/codegen/service_type.h" -#include "grpc++/impl/codegen/sync_stream.h" +#include "grpcpp/impl/codegen/async_stream.h" +#include "grpcpp/impl/codegen/async_unary_call.h" +#include "grpcpp/impl/codegen/channel_interface.h" +#include "grpcpp/impl/codegen/client_unary_call.h" +#include "grpcpp/impl/codegen/method_handler_impl.h" +#include "grpcpp/impl/codegen/rpc_service_method.h" +#include "grpcpp/impl/codegen/service_type.h" +#include "grpcpp/impl/codegen/sync_stream.h" namespace tensorflow { diff --git a/tensorflow/contrib/verbs/grpc_verbs_service_impl.h b/tensorflow/contrib/verbs/grpc_verbs_service_impl.h index 1f0f10517e..abe5e08b07 100644 --- a/tensorflow/contrib/verbs/grpc_verbs_service_impl.h +++ b/tensorflow/contrib/verbs/grpc_verbs_service_impl.h @@ -16,14 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_GRPC_VERBS_SERVICE_IMPL_H_ #define TENSORFLOW_CONTRIB_GRPC_VERBS_SERVICE_IMPL_H_ -#include "grpc++/impl/codegen/async_stream.h" -#include "grpc++/impl/codegen/async_unary_call.h" -#include "grpc++/impl/codegen/proto_utils.h" -#include "grpc++/impl/codegen/rpc_method.h" -#include "grpc++/impl/codegen/service_type.h" -#include "grpc++/impl/codegen/status.h" -#include "grpc++/impl/codegen/stub_options.h" -#include "grpc++/impl/codegen/sync_stream.h" +#include "grpcpp/impl/codegen/async_stream.h" +#include "grpcpp/impl/codegen/async_unary_call.h" +#include "grpcpp/impl/codegen/proto_utils.h" +#include "grpcpp/impl/codegen/rpc_method.h" +#include "grpcpp/impl/codegen/service_type.h" +#include "grpcpp/impl/codegen/status.h" +#include "grpcpp/impl/codegen/stub_options.h" +#include "grpcpp/impl/codegen/sync_stream.h" #include "tensorflow/contrib/verbs/verbs_service.pb.h" diff --git a/tensorflow/core/debug/debug_grpc_testlib.h b/tensorflow/core/debug/debug_grpc_testlib.h index 58361bf78f..8d3c9ff575 100644 --- a/tensorflow/core/debug/debug_grpc_testlib.h +++ b/tensorflow/core/debug/debug_grpc_testlib.h @@ -19,7 +19,7 @@ limitations under the License. #include #include -#include "grpc++/grpc++.h" +#include "grpcpp/grpcpp.h" #include "tensorflow/core/debug/debug_io_utils.h" #include "tensorflow/core/debug/debug_service.grpc.pb.h" #include "tensorflow/core/framework/tensor.h" diff --git a/tensorflow/core/debug/debug_io_utils.cc b/tensorflow/core/debug/debug_io_utils.cc index 03a011f79e..9e8002d490 100644 --- a/tensorflow/core/debug/debug_io_utils.cc +++ b/tensorflow/core/debug/debug_io_utils.cc @@ -23,7 +23,7 @@ limitations under the License. #include #ifndef PLATFORM_WINDOWS -#include "grpc++/create_channel.h" +#include "grpcpp/create_channel.h" #else // winsock2.h is used in grpc, so Ws2_32.lib is needed #pragma comment(lib, "Ws2_32.lib") diff --git a/tensorflow/core/distributed_runtime/master_test.cc b/tensorflow/core/distributed_runtime/master_test.cc index 0826a90860..62b18a45b1 100644 --- a/tensorflow/core/distributed_runtime/master_test.cc +++ b/tensorflow/core/distributed_runtime/master_test.cc @@ -18,7 +18,7 @@ limitations under the License. #include #include -#include "grpc++/grpc++.h" +#include "grpcpp/grpcpp.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_channel.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h" diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc index 4786c43ee2..b23466037f 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.h" -#include "grpc++/generic/generic_stub.h" +#include "grpcpp/generic/generic_stub.h" #include "tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_client_cq_tag.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_state.h" diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.cc b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.cc index 3fd7deaa86..39ab6856c5 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.cc +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.cc @@ -15,14 +15,14 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.h" -#include "grpc++/impl/codegen/async_stream.h" -#include "grpc++/impl/codegen/async_unary_call.h" -#include "grpc++/impl/codegen/channel_interface.h" -#include "grpc++/impl/codegen/client_unary_call.h" -#include "grpc++/impl/codegen/method_handler_impl.h" -#include "grpc++/impl/codegen/rpc_service_method.h" -#include "grpc++/impl/codegen/service_type.h" -#include "grpc++/impl/codegen/sync_stream.h" +#include "grpcpp/impl/codegen/async_stream.h" +#include "grpcpp/impl/codegen/async_unary_call.h" +#include "grpcpp/impl/codegen/channel_interface.h" +#include "grpcpp/impl/codegen/client_unary_call.h" +#include "grpcpp/impl/codegen/method_handler_impl.h" +#include "grpcpp/impl/codegen/rpc_service_method.h" +#include "grpcpp/impl/codegen/service_type.h" +#include "grpcpp/impl/codegen/sync_stream.h" namespace tensorflow { namespace eager { diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.h b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.h index d7b192ac85..66458186ad 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.h +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.h @@ -16,14 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_EAGER_GRPC_EAGER_SERVICE_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_EAGER_GRPC_EAGER_SERVICE_H_ -#include "grpc++/impl/codegen/async_stream.h" -#include "grpc++/impl/codegen/async_unary_call.h" -#include "grpc++/impl/codegen/proto_utils.h" -#include "grpc++/impl/codegen/rpc_method.h" -#include "grpc++/impl/codegen/service_type.h" -#include "grpc++/impl/codegen/status.h" -#include "grpc++/impl/codegen/stub_options.h" -#include "grpc++/impl/codegen/sync_stream.h" +#include "grpcpp/impl/codegen/async_stream.h" +#include "grpcpp/impl/codegen/async_unary_call.h" +#include "grpcpp/impl/codegen/proto_utils.h" +#include "grpcpp/impl/codegen/rpc_method.h" +#include "grpcpp/impl/codegen/service_type.h" +#include "grpcpp/impl/codegen/status.h" +#include "grpcpp/impl/codegen/stub_options.h" +#include "grpcpp/impl/codegen/sync_stream.h" #include "tensorflow/core/protobuf/eager_service.pb.h" diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.h b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.h index 65550caf64..e94aedf535 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.h +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.h @@ -16,9 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_EAGER_GRPC_EAGER_SERVICE_IMPL_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_EAGER_GRPC_EAGER_SERVICE_IMPL_H_ -#include "grpc++/alarm.h" -#include "grpc++/completion_queue.h" -#include "grpc++/server_builder.h" +#include "grpcpp/alarm.h" +#include "grpcpp/completion_queue.h" +#include "grpcpp/server_builder.h" #include "tensorflow/core/distributed_runtime/eager/eager_service_impl.h" #include "tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_call.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_call.h b/tensorflow/core/distributed_runtime/rpc/grpc_call.h index ecad1274cc..90666def60 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_call.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_call.h @@ -20,9 +20,9 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" -#include "grpc++/grpc++.h" -#include "grpc++/impl/codegen/service_type.h" -#include "grpc++/server_builder.h" +#include "grpcpp/grpcpp.h" +#include "grpcpp/impl/codegen/service_type.h" +#include "grpcpp/server_builder.h" namespace tensorflow { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc index 613188244f..0ebc084cb6 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc @@ -19,7 +19,7 @@ limitations under the License. #include #include -#include "grpc++/create_channel.h" +#include "grpcpp/create_channel.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_channel.h b/tensorflow/core/distributed_runtime/rpc/grpc_channel.h index 48b9d958aa..4861cdb691 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_channel.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel.h @@ -22,7 +22,7 @@ limitations under the License. #include #include -#include "grpc++/grpc++.h" +#include "grpcpp/grpcpp.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_client_cq_tag.h b/tensorflow/core/distributed_runtime/rpc/grpc_client_cq_tag.h index d367b83ee7..6e7f5dbd13 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_client_cq_tag.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_client_cq_tag.h @@ -16,7 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_CLIENT_CQ_TAG_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_CLIENT_CQ_TAG_H_ -#include "grpc++/grpc++.h" +#include "grpcpp/grpcpp.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/lib/core/status.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc index e025e555dd..127dea2882 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc @@ -30,8 +30,8 @@ limitations under the License. // RunGraph on workers. #include "tensorflow/core/distributed_runtime/rpc/grpc_master_service.h" -#include "grpc++/alarm.h" -#include "grpc++/server_builder.h" +#include "grpcpp/alarm.h" +#include "grpcpp/server_builder.h" #include "tensorflow/core/distributed_runtime/master.h" #include "tensorflow/core/distributed_runtime/rpc/async_service_interface.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc index c832adbbbf..1cea1b1462 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.cc @@ -15,14 +15,14 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h" -#include "grpc++/impl/codegen/async_stream.h" -#include "grpc++/impl/codegen/async_unary_call.h" -#include "grpc++/impl/codegen/channel_interface.h" -#include "grpc++/impl/codegen/client_unary_call.h" -#include "grpc++/impl/codegen/method_handler_impl.h" -#include "grpc++/impl/codegen/rpc_service_method.h" -#include "grpc++/impl/codegen/service_type.h" -#include "grpc++/impl/codegen/sync_stream.h" +#include "grpcpp/impl/codegen/async_stream.h" +#include "grpcpp/impl/codegen/async_unary_call.h" +#include "grpcpp/impl/codegen/channel_interface.h" +#include "grpcpp/impl/codegen/client_unary_call.h" +#include "grpcpp/impl/codegen/method_handler_impl.h" +#include "grpcpp/impl/codegen/rpc_service_method.h" +#include "grpcpp/impl/codegen/service_type.h" +#include "grpcpp/impl/codegen/sync_stream.h" namespace tensorflow { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h index 8f1b589698..751f2633e7 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h @@ -16,14 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_MASTER_SERVICE_IMPL_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_MASTER_SERVICE_IMPL_H_ -#include "grpc++/impl/codegen/async_stream.h" -#include "grpc++/impl/codegen/async_unary_call.h" -#include "grpc++/impl/codegen/proto_utils.h" -#include "grpc++/impl/codegen/rpc_method.h" -#include "grpc++/impl/codegen/service_type.h" -#include "grpc++/impl/codegen/status.h" -#include "grpc++/impl/codegen/stub_options.h" -#include "grpc++/impl/codegen/sync_stream.h" +#include "grpcpp/impl/codegen/async_stream.h" +#include "grpcpp/impl/codegen/async_unary_call.h" +#include "grpcpp/impl/codegen/proto_utils.h" +#include "grpcpp/impl/codegen/rpc_method.h" +#include "grpcpp/impl/codegen/service_type.h" +#include "grpcpp/impl/codegen/status.h" +#include "grpcpp/impl/codegen/stub_options.h" +#include "grpcpp/impl/codegen/sync_stream.h" #include "tensorflow/core/protobuf/master.pb.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc index 1acf1fb4fc..6008462d04 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc @@ -17,8 +17,8 @@ limitations under the License. #include -#include "grpc++/generic/generic_stub.h" -#include "grpc++/grpc++.h" +#include "grpcpp/generic/generic_stub.h" +#include "grpcpp/grpcpp.h" #include "tensorflow/core/common_runtime/process_util.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_client_cq_tag.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index e5ffb4ed2f..c0a9b43bf4 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -19,10 +19,10 @@ limitations under the License. #include #include -#include "grpc++/grpc++.h" -#include "grpc++/security/credentials.h" -#include "grpc++/server_builder.h" #include "grpc/support/alloc.h" +#include "grpcpp/grpcpp.h" +#include "grpcpp/security/credentials.h" +#include "grpcpp/server_builder.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h index 0122df178a..b1c2eda0cf 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h @@ -18,8 +18,8 @@ limitations under the License. #include -#include "grpc++/grpc++.h" -#include "grpc++/security/credentials.h" +#include "grpcpp/grpcpp.h" +#include "grpcpp/security/credentials.h" #include "tensorflow/core/common_runtime/process_util.h" #include "tensorflow/core/common_runtime/stats_publisher_interface.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_state.h b/tensorflow/core/distributed_runtime/rpc/grpc_state.h index 59dbb7ae04..61c5bc285f 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_state.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_state.h @@ -18,8 +18,8 @@ limitations under the License. #include -#include "grpc++/generic/generic_stub.h" -#include "grpc++/grpc++.h" +#include "grpcpp/generic/generic_stub.h" +#include "grpcpp/grpcpp.h" #include "tensorflow/core/distributed_runtime/call_options.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_client_cq_tag.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc index e51894b4c7..d0684f1833 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc @@ -14,8 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.h" -#include "grpc++/support/byte_buffer.h" -#include "grpc++/support/slice.h" +#include "grpcpp/support/byte_buffer.h" +#include "grpcpp/support/slice.h" #include "tensorflow/core/common_runtime/dma_helper.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.pb.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding_test.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding_test.cc index 71f69e9024..7cace573e8 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding_test.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding_test.cc @@ -15,8 +15,8 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.h" -#include "grpc++/support/byte_buffer.h" -#include "grpc++/support/slice.h" +#include "grpcpp/support/byte_buffer.h" +#include "grpcpp/support/slice.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc index f247322bc4..e52b257411 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc @@ -16,9 +16,9 @@ limitations under the License. #include #include -#include "grpc++/grpc++.h" -#include "grpc++/security/credentials.h" -#include "grpc++/server_builder.h" +#include "grpcpp/grpcpp.h" +#include "grpcpp/security/credentials.h" +#include "grpcpp/server_builder.h" #include "tensorflow/core/distributed_runtime/server_lib.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc index e718db251c..33cbadda0a 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc @@ -15,9 +15,9 @@ limitations under the License. #include -#include "grpc++/grpc++.h" -#include "grpc++/security/credentials.h" -#include "grpc++/server_builder.h" +#include "grpcpp/grpcpp.h" +#include "grpcpp/security/credentials.h" +#include "grpcpp/server_builder.h" #include "tensorflow/core/distributed_runtime/server_lib.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_util.h b/tensorflow/core/distributed_runtime/rpc/grpc_util.h index 4b58781b54..45259aa2ec 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_util.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_util.h @@ -18,9 +18,9 @@ limitations under the License. #include -#include "grpc++/grpc++.h" -#include "grpc++/impl/codegen/proto_utils.h" -#include "grpc++/support/byte_buffer.h" +#include "grpcpp/grpcpp.h" +#include "grpcpp/impl/codegen/proto_utils.h" +#include "grpcpp/support/byte_buffer.h" #include "tensorflow/core/distributed_runtime/tensor_coding.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/stringprintf.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc index aa9304a033..61f5369617 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc @@ -17,8 +17,8 @@ limitations under the License. #include -#include "grpc++/alarm.h" -#include "grpc++/server_builder.h" +#include "grpcpp/alarm.h" +#include "grpcpp/server_builder.h" #include "tensorflow/core/common_runtime/buf_rendezvous.h" #include "tensorflow/core/common_runtime/device.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc index 38cc2b81d3..72b5e77f1c 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.cc @@ -15,14 +15,14 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h" -#include "grpc++/impl/codegen/async_stream.h" -#include "grpc++/impl/codegen/async_unary_call.h" -#include "grpc++/impl/codegen/channel_interface.h" -#include "grpc++/impl/codegen/client_unary_call.h" -#include "grpc++/impl/codegen/method_handler_impl.h" -#include "grpc++/impl/codegen/rpc_service_method.h" -#include "grpc++/impl/codegen/service_type.h" -#include "grpc++/impl/codegen/sync_stream.h" +#include "grpcpp/impl/codegen/async_stream.h" +#include "grpcpp/impl/codegen/async_unary_call.h" +#include "grpcpp/impl/codegen/channel_interface.h" +#include "grpcpp/impl/codegen/client_unary_call.h" +#include "grpcpp/impl/codegen/method_handler_impl.h" +#include "grpcpp/impl/codegen/rpc_service_method.h" +#include "grpcpp/impl/codegen/service_type.h" +#include "grpcpp/impl/codegen/sync_stream.h" namespace tensorflow { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h index da270835bd..7915c3aafd 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h @@ -16,15 +16,15 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_WORKER_SERVICE_IMPL_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_GRPC_WORKER_SERVICE_IMPL_H_ -#include "grpc++/impl/codegen/async_stream.h" -#include "grpc++/impl/codegen/async_unary_call.h" -#include "grpc++/impl/codegen/proto_utils.h" -#include "grpc++/impl/codegen/rpc_method.h" -#include "grpc++/impl/codegen/service_type.h" -#include "grpc++/impl/codegen/status.h" -#include "grpc++/impl/codegen/stub_options.h" -#include "grpc++/impl/codegen/sync_stream.h" -#include "grpc++/support/byte_buffer.h" +#include "grpcpp/impl/codegen/async_stream.h" +#include "grpcpp/impl/codegen/async_unary_call.h" +#include "grpcpp/impl/codegen/proto_utils.h" +#include "grpcpp/impl/codegen/rpc_method.h" +#include "grpcpp/impl/codegen/service_type.h" +#include "grpcpp/impl/codegen/status.h" +#include "grpcpp/impl/codegen/stub_options.h" +#include "grpcpp/impl/codegen/sync_stream.h" +#include "grpcpp/support/byte_buffer.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/distributed_runtime/tensor_coding.h" -- GitLab From 3b81d6e6055c529c00a165fd8e3431a6ba704e8e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 13:14:59 -0700 Subject: [PATCH 073/365] Optimizing transpose_conv. PiperOrigin-RevId: 199839745 --- .../kernels/internal/optimized/optimized_ops.h | 8 ++++---- .../kernels/internal/reference/reference_ops.h | 8 ++++---- .../contrib/lite/kernels/transpose_conv.cc | 2 +- .../lite/kernels/transpose_conv_test.cc | 18 +++++++++--------- .../contrib/lite/toco/export_tensorflow.cc | 2 +- .../propagate_fixed_sizes.cc | 2 +- .../contrib/lite/toco/import_tensorflow.cc | 8 +++++--- tensorflow/contrib/lite/toco/model.h | 1 + tensorflow/contrib/lite/toco/tooling_util.cc | 15 +++++++-------- 9 files changed, 33 insertions(+), 31 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 0ce781db59..d2bee2cd70 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -6289,8 +6289,8 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, // To optimize, start by using the conv code with transposed weights for the // case of stride_height = stride_width = 1. const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 3); - const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); + const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0); const int input_height = ArraySize(input_dims, 2); const int input_width = ArraySize(input_dims, 1); const int filter_height = ArraySize(filter_dims, 2); @@ -6337,8 +6337,8 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, float input_value = input_data[Offset(input_dims, in_channel, in_x, in_y, batch)]; float filter_value = - filter_data[Offset(filter_dims, out_channel, filter_x, - filter_y, in_channel)]; + filter_data[Offset(filter_dims, in_channel, filter_x, + filter_y, out_channel)]; output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] += input_value * filter_value; } diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 0b644a1fa6..c3f645bdf1 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -3810,8 +3810,8 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, int pad_height, float* output_data, const Dims<4>& output_dims) { const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 3); - const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); + const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0); const int input_height = ArraySize(input_dims, 2); const int input_width = ArraySize(input_dims, 1); const int filter_height = ArraySize(filter_dims, 2); @@ -3851,8 +3851,8 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, float input_value = input_data[Offset(input_dims, in_channel, in_x, in_y, batch)]; float filter_value = - filter_data[Offset(filter_dims, out_channel, filter_x, - filter_y, in_channel)]; + filter_data[Offset(filter_dims, in_channel, filter_x, + filter_y, out_channel)]; output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] += input_value * filter_value; } diff --git a/tensorflow/contrib/lite/kernels/transpose_conv.cc b/tensorflow/contrib/lite/kernels/transpose_conv.cc index 3c99661029..e83b1ec987 100644 --- a/tensorflow/contrib/lite/kernels/transpose_conv.cc +++ b/tensorflow/contrib/lite/kernels/transpose_conv.cc @@ -79,7 +79,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Ensure that weights and inputs have the same channel dimension. // Note: TOCO will reorder weights in the following format: OHWI. TF_LITE_ENSURE_EQ(context, SizeOfDimension(input, 3), - SizeOfDimension(weights, 0)); + SizeOfDimension(weights, 3)); if (!IsConstantTensor(output_shape)) { SetTensorToDynamic(output); diff --git a/tensorflow/contrib/lite/kernels/transpose_conv_test.cc b/tensorflow/contrib/lite/kernels/transpose_conv_test.cc index 52be089349..55df897180 100644 --- a/tensorflow/contrib/lite/kernels/transpose_conv_test.cc +++ b/tensorflow/contrib/lite/kernels/transpose_conv_test.cc @@ -88,10 +88,10 @@ TEST(TransposeConvOpModelTest, SimpleTest) { // And filter value is derived by: // filter = tf.reshape(tf.transpose(filter, perm=[3, 0, 1, 2]), shape=[18, 1]) TEST(TransposeConvOpModelTest, TwoFiltersTest) { - TransposeConvOpModel m({1, 4, 4, 2}, {2, 3, 3, 1}, Padding_SAME, 1, 1); + TransposeConvOpModel m({1, 4, 4, 2}, {1, 3, 3, 2}, Padding_SAME, 1, 1); m.PopulateTensor(m.output_shape(), {1, 4, 4, 1}); - m.PopulateTensor(m.filter(), {1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, - 8, 10, 12, 14, 16, 18}); + m.PopulateTensor(m.filter(), {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18}); m.PopulateTensor( m.input(), {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, @@ -117,10 +117,10 @@ TEST(TransposeConvOpModelTest, TwoFiltersTest) { // And filter value is derived by: // filter = tf.reshape(tf.transpose(filter, perm=[3, 0, 1, 2]), shape=[1, 18]) TEST(TransposeConvOpModelTest, PaddingValidTest) { - TransposeConvOpModel m({1, 4, 4, 2}, {2, 3, 3, 1}, Padding_VALID, 1, 1); + TransposeConvOpModel m({1, 4, 4, 2}, {1, 3, 3, 2}, Padding_VALID, 1, 1); m.PopulateTensor(m.output_shape(), {1, 6, 6, 1}); - m.PopulateTensor(m.filter(), {1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, - 8, 10, 12, 14, 16, 18}); + m.PopulateTensor(m.filter(), {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18}); m.PopulateTensor( m.input(), {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, @@ -171,10 +171,10 @@ TEST(TransposeConvOpModelTest, StrideValidTest) { // [1, 2, 2, 1 ], // "VALID") TEST(TransposeConvOpModelTest, MultiChannelTest) { - TransposeConvOpModel m({1, 2, 2, 1}, {1, 3, 3, 2}, Padding_VALID, 2, 2); + TransposeConvOpModel m({1, 2, 2, 1}, {2, 3, 3, 1}, Padding_VALID, 2, 2); m.PopulateTensor(m.output_shape(), {1, 5, 5, 2}); - m.PopulateTensor(m.filter(), {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18}); + m.PopulateTensor(m.filter(), {1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, + 8, 10, 12, 14, 16, 18}); m.PopulateTensor(m.input(), {1, 2, 3, 4}); m.Invoke(); diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index 76ce1c5802..c7c80ab21c 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -494,7 +494,7 @@ void ConvertTransposeConvOperator(const Model& model, const auto& weights_array = model.GetArray(weights_array_name); CHECK(weights_array.buffer->type == ArrayDataType::kFloat); ConvertFloatTensorConst(model, weights_array_name, AxesOrder::kOHWI, - AxesOrder::kHWIO, tensorflow_graph); + AxesOrder::kHWOI, tensorflow_graph); auto& strides = (*conv2d_op->mutable_attr())["strides"]; strides.mutable_list()->add_i(1); strides.mutable_list()->add_i(src_op.stride_height); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 9e4262223e..170a499d4e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -278,7 +278,7 @@ void ProcessTransposeConvOperator(Model* model, TransposeConvOperator* op) { << "TransposeConv input shape must have 4 dimensions. Input \"" << op->inputs[TransposeConvOperator::WEIGHTS] << "\" had shape " << toco::ShapeToString(weights_shape) << "."; - CHECK_EQ(input_shape.dims(3), weights_shape.dims(0)) + CHECK_EQ(input_shape.dims(3), weights_shape.dims(3)) << "Input shape depth and weight depth do not agree"; // Set the output shape according to the specified output shape. diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 8dd43dda3e..a2241c85a7 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1445,11 +1445,13 @@ void ConvertTransposeConvOperator(const NodeDef& node, if (existing_transpose) { CHECK(existing_transpose->type == OperatorType::kTranspose); } else { - // Transpose weights from HWIO order to OHWI order, which is more efficient - // for computation + // Transpose weights from HWOI order to OHWI order, which is more efficient + // for computation. (Note that TensorFlow considers the order as HWIO + // because they consider this a backward conv, inverting the sense of + // input/output.) TransposeOperator* transpose = new TransposeOperator; string perm_array = CreateConstArray( - model, node.name() + "_transpose_perm", {3, 0, 1, 2}); + model, node.name() + "_transpose_perm", {2, 0, 1, 3}); transpose->inputs = {weights_name, perm_array}; transpose->outputs = {transposed_weights_name}; model->operators.emplace_back(transpose); diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 81beb29372..2ec36d27ef 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -155,6 +155,7 @@ enum class AxesOrder { k1HWO, // Our standard for DepthwiseConv weights kHWIM, // TensorFlow DepthwiseConv weights kNHWC, // TensorFlow activations + kHWOI, // TensorFlow back-prop conv weights }; // The type of the scalars in an array. diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 5a82be3939..810718f610 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1865,18 +1865,15 @@ void GetShuffleShape(AxesOrder input_axes_order, AxesOrder output_axes_order, output_axes_order == AxesOrder::kHWIO) { // 3210 <- 3210 // HWIO <- OHWI - (*shuffle)[0] = 1; - (*shuffle)[1] = 2; - (*shuffle)[2] = 3; - (*shuffle)[3] = 0; + *shuffle = {1, 2, 3, 0}; } else if (input_axes_order == AxesOrder::kHWIO && output_axes_order == AxesOrder::kOHWI) { // 3210 <- 3210 // OHWI <- HWIO - (*shuffle)[0] = 3; - (*shuffle)[1] = 0; - (*shuffle)[2] = 1; - (*shuffle)[3] = 2; + *shuffle = {3, 0, 1, 2}; + } else if (input_axes_order == AxesOrder::kOHWI && + output_axes_order == AxesOrder::kHWOI) { + *shuffle = {1, 2, 0, 3}; } else { LOG(FATAL) << "Bad shuffle"; } @@ -2022,6 +2019,8 @@ int AxesCount(AxesOrder axes_order) { return 4; case AxesOrder::kNHWC: return 4; + case AxesOrder::kHWOI: + return 4; default: LOG(FATAL) << "Bad AxesOrder"; return 0; -- GitLab From 70266a65f7fb1d58196eff5355f16d62aba64310 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 13:58:59 -0700 Subject: [PATCH 074/365] Avoid compilation of nodes that forward tensor refs. PiperOrigin-RevId: 199846447 --- tensorflow/compiler/jit/BUILD | 1 + .../compiler/jit/mark_for_compilation_pass.cc | 22 ++----------------- tensorflow/compiler/jit/xla_cluster_util.cc | 22 +++++++++++++++++++ tensorflow/compiler/jit/xla_cluster_util.h | 3 +++ .../compiler/jit/xla_fusion_optimizer.cc | 7 ++++++ 5 files changed, 35 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index ab8cd8f4bc..e2b614d91b 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -342,6 +342,7 @@ cc_library( "//tensorflow/compiler/jit/graphcycles", "//tensorflow/core:framework", "//tensorflow/core:graph", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/kernels:bounds_check", ], ) diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index 74468266b9..8c3882116d 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -44,12 +44,6 @@ namespace tensorflow { namespace { -// Returns true if, when executed in TensorFlow, `node` is guaranteed to forward -// a ref tensor input to its output. -static bool AlwaysForwardsRefInput(const Node& node) { - return node.IsIdentity(); -} - bool HasXLAKernel(const Node& node, const DeviceType& jit_device_type) { // There is a SymbolicGradient kernel on the XLA_JIT device, but the gradient // is really a kind of function call and will be handled by @@ -68,20 +62,8 @@ bool HasXLAKernel(const Node& node, const DeviceType& jit_device_type) { // XLA does not offer guaranteed aliasing between the input and output of the // XLA cluster so it can't implement the forward-tensor-ref semantic. Leave // such nodes out of XLA clusters. - if (AlwaysForwardsRefInput(node)) { - for (const Edge* incoming_edge : node.in_edges()) { - if (incoming_edge->IsControlEdge()) { - continue; - } - - Node* incoming_node = incoming_edge->src(); - if (IsRefType(incoming_node->output_type(incoming_edge->src_output()))) { - VLOG(2) << "Not clustering " << node.def().ShortDebugString() - << " because of ref input " << incoming_node->name() << " " - << incoming_node->type_string(); - return false; - } - } + if (HasForwardedRefInput(node)) { + return false; } return FindKernelDef(jit_device_type, node.def(), nullptr, nullptr).ok(); diff --git a/tensorflow/compiler/jit/xla_cluster_util.cc b/tensorflow/compiler/jit/xla_cluster_util.cc index 70bd10336b..05b7821b88 100644 --- a/tensorflow/compiler/jit/xla_cluster_util.cc +++ b/tensorflow/compiler/jit/xla_cluster_util.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/graph/control_flow.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/util/device_name_utils.h" @@ -66,6 +67,9 @@ string DescribeCycle(const GraphCycles* cycles, const Graph& graph, int src, } return description; } + +bool AlwaysForwardsRefInput(const Node& node) { return node.IsIdentity(); } + } // namespace Status DeviceToDeviceType(const string& device, DeviceType* device_type) { @@ -77,6 +81,24 @@ Status DeviceToDeviceType(const string& device, DeviceType* device_type) { return Status::OK(); } +bool HasForwardedRefInput(const Node& node) { + if (AlwaysForwardsRefInput(node)) { + for (const Edge* incoming_edge : node.in_edges()) { + if (incoming_edge->IsControlEdge()) { + continue; + } + + Node* incoming_node = incoming_edge->src(); + if (IsRefType(incoming_node->output_type(incoming_edge->src_output()))) { + VLOG(2) << "Node " << node.def().ShortDebugString() << " has ref input " + << incoming_node->name() << " " << incoming_node->type_string(); + return true; + } + } + } + return false; +} + Status CreateCycleDetectionGraph(const Graph* graph, GraphCycles* cycles) { for (int i = 0; i < graph->num_node_ids(); ++i) { // We rely on the node IDs in the cycle detection graph being consecutive diff --git a/tensorflow/compiler/jit/xla_cluster_util.h b/tensorflow/compiler/jit/xla_cluster_util.h index 5b673bdc27..bcce082aaf 100644 --- a/tensorflow/compiler/jit/xla_cluster_util.h +++ b/tensorflow/compiler/jit/xla_cluster_util.h @@ -36,6 +36,9 @@ using OrderedNodeSet = std::set; // Returns the DeviceType corresponding to 'device'. Status DeviceToDeviceType(const string& device, DeviceType* device_type); +// Returns true if `node` has a ref tensor input that it forwards to its output. +bool HasForwardedRefInput(const Node& node); + // Creates a graph representation to enable cycle detection when clustering. // This representation handles loops in graph by disconnecting each loop from // the enclosing graph. diff --git a/tensorflow/compiler/jit/xla_fusion_optimizer.cc b/tensorflow/compiler/jit/xla_fusion_optimizer.cc index 96016521ea..74257b09a8 100644 --- a/tensorflow/compiler/jit/xla_fusion_optimizer.cc +++ b/tensorflow/compiler/jit/xla_fusion_optimizer.cc @@ -178,6 +178,13 @@ Status XlaFusionOptimizer::Optimize(grappler::Cluster* cluster, continue; } + // XLA does not offer guaranteed aliasing between the input and output of + // the XLA cluster so it can't implement the forward-tensor-ref semantic. + // Leave such nodes out of XLA clusters. + if (HasForwardedRefInput(*node)) { + continue; + } + compilation_candidates.insert(node); } -- GitLab From 77f0772c0ead3e1402615022649aad2a721265fd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 14:14:49 -0700 Subject: [PATCH 075/365] Bugfix for dilated_conv optimizations. We were failing to create im2col arrays for dilated unstrided 1x1 cases. PiperOrigin-RevId: 199849200 --- tensorflow/contrib/lite/build_def.bzl | 2 +- tensorflow/contrib/lite/kernels/conv.cc | 4 +++- .../lite/toco/graph_transformations/create_im2col_arrays.cc | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 30bb604d17..612813caee 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -201,7 +201,7 @@ def generated_test_models(): "concat", "constant", "control_dep", - # "conv", + "conv", "depthwiseconv", "div", "equal", diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index ee42e5cdc8..747c8a62c0 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -134,7 +134,9 @@ static TfLiteStatus AllocateTemporaryTensorsIfRequired(TfLiteContext* context, // optimized_ops.h, in order to avoid a DCHECK(!im2col_data). data->need_im2col = (params->stride_width != 1 || params->stride_height != 1 || - filter_width != 1 || filter_height != 1); + params->dilation_width_factor != 1 || + params->dilation_height_factor != 1 || filter_width != 1 || + filter_height != 1); // If we're using the optimized multithreaded EigenTensor implementation of // convolution, it expects the filter weights to be transposed compared to // the normal TF Lite buffer format. Typical TF Lite weights are diff --git a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc index 076415ece8..8ca2cd66ac 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc @@ -46,8 +46,9 @@ bool CreateIm2colArrays::Run(Model* model, std::size_t op_index) { const int kheight = weights_shape.dims(1); const int kwidth = weights_shape.dims(2); if (kwidth == 1 && kheight == 1 && conv_op->stride_width == 1 && - conv_op->stride_height == 1) { - // 1x1 unstrided conv does not need an im2col array. + conv_op->stride_height == 1 && conv_op->dilation_width_factor == 1 && + conv_op->dilation_height_factor == 1) { + // 1x1 unstrided undilated conv does not need an im2col array. return false; } -- GitLab From bc65583b2b4e3f48b6a724832ef96ab176666d33 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 14:58:43 -0700 Subject: [PATCH 076/365] Allow large allocations in toco. PiperOrigin-RevId: 199855838 --- tensorflow/contrib/lite/toco/model.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 2ec36d27ef..2f43adb07b 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -1644,8 +1644,8 @@ struct SparseToDenseOperator : Operator { // be used for the transient array at hand. The 'start' and 'end' values are // offsets from the start of the workspace buffer, expressed in bytes. struct Alloc { - int start = 0; - int end = 0; + int64 start = 0; + int64 end = 0; }; inline bool operator<(const Alloc& a, const Alloc& b) { -- GitLab From db717a72c20ab37974ec9076c8e406345c8776be Mon Sep 17 00:00:00 2001 From: AG Ramesh Date: Fri, 8 Jun 2018 15:06:47 -0700 Subject: [PATCH 077/365] [INTEL MKL] Enable compilation of TF without MKL ML dependency Closes #19808. PiperOrigin-RevId: 199857219 --- .../xla/service/cpu/runtime_matmul_mkl.cc | 2 +- .../core/common_runtime/mkl_cpu_allocator.h | 6 ++- .../core/kernels/batch_matmul_op_complex.cc | 2 +- .../core/kernels/batch_matmul_op_real.cc | 2 +- tensorflow/core/kernels/matmul_op.cc | 3 +- tensorflow/core/kernels/mkl_aggregate_ops.cc | 11 +++-- .../core/kernels/mkl_batch_matmul_op.cc | 2 +- tensorflow/core/kernels/mkl_concat_op.cc | 7 +-- .../core/kernels/mkl_conv_grad_bias_ops.cc | 2 + .../core/kernels/mkl_conv_grad_filter_ops.cc | 8 ++-- .../core/kernels/mkl_conv_grad_input_ops.cc | 2 + .../core/kernels/mkl_fused_batch_norm_op.cc | 8 ++-- tensorflow/core/kernels/mkl_identity_op.cc | 2 + .../core/kernels/mkl_input_conversion_op.cc | 6 +-- tensorflow/core/kernels/mkl_lrn_op.cc | 10 ++-- tensorflow/core/kernels/mkl_matmul_op.cc | 2 +- tensorflow/core/kernels/mkl_relu_op.cc | 7 +-- tensorflow/core/kernels/mkl_reshape_op.cc | 10 ++-- tensorflow/core/kernels/mkl_softmax_op.cc | 2 - tensorflow/core/kernels/mkl_tfconv_op.h | 2 + tensorflow/core/kernels/mkl_transpose_op.cc | 2 +- tensorflow/core/kernels/transpose_op.cc | 2 +- tensorflow/core/kernels/transpose_op.h | 4 +- tensorflow/core/util/mkl_util.h | 47 +++++++++++++------ 24 files changed, 94 insertions(+), 57 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.cc b/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.cc index 92da5f71c2..f8c8dd5e93 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && !defined(DO_NOT_USE_ML) #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h" #include "third_party/intel_mkl_ml/include/mkl_cblas.h" #include "third_party/intel_mkl_ml/include/mkl_service.h" diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 245320c896..29f702699f 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -29,7 +29,9 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" +#ifndef DO_NOT_USE_ML #include "i_malloc.h" +#endif #ifdef _WIN32 typedef unsigned int uint; @@ -97,14 +99,14 @@ class MklCPUAllocator : public VisitableAllocator { VLOG(1) << "MklCPUAllocator: Setting max_mem_bytes: " << max_mem_bytes; allocator_ = new BFCAllocator(new MklSubAllocator, max_mem_bytes, kAllowGrowth, kName); - +#ifndef DO_NOT_USE_ML // For redirecting all allocations from MKL to this allocator // From: http://software.intel.com/en-us/node/528565 i_malloc = MallocHook; i_calloc = CallocHook; i_realloc = ReallocHook; i_free = FreeHook; - +#endif return Status::OK(); } diff --git a/tensorflow/core/kernels/batch_matmul_op_complex.cc b/tensorflow/core/kernels/batch_matmul_op_complex.cc index 96216764fd..b77c80c01f 100644 --- a/tensorflow/core/kernels/batch_matmul_op_complex.cc +++ b/tensorflow/core/kernels/batch_matmul_op_complex.cc @@ -17,7 +17,7 @@ limitations under the License. namespace tensorflow { -#if !defined(INTEL_MKL) +#if !defined(INTEL_MKL) || defined(DO_NOT_USE_ML) TF_CALL_complex64(REGISTER_BATCH_MATMUL_CPU); TF_CALL_complex128(REGISTER_BATCH_MATMUL_CPU); #endif diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc index 87a0795f2f..fe259c1634 100644 --- a/tensorflow/core/kernels/batch_matmul_op_real.cc +++ b/tensorflow/core/kernels/batch_matmul_op_real.cc @@ -21,7 +21,7 @@ limitations under the License. namespace tensorflow { -#if !defined(INTEL_MKL) +#if !defined(INTEL_MKL) || defined(DO_NOT_USE_ML) TF_CALL_float(REGISTER_BATCH_MATMUL_CPU); TF_CALL_double(REGISTER_BATCH_MATMUL_CPU); #endif diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc index f9c15ce6d7..fc3b3d3445 100644 --- a/tensorflow/core/kernels/matmul_op.cc +++ b/tensorflow/core/kernels/matmul_op.cc @@ -551,7 +551,8 @@ struct MatMulFunctor { .Label("cublas"), \ MatMulOp) -#if defined(INTEL_MKL) +#if defined(INTEL_MKL) && !defined(DO_NOT_USE_ML) + // MKL does not support half and int32 types for matrix-multiplication, so // register the kernel to use default Eigen based implementations for these // types. Registration for NO-LABEL version is in mkl_matmul_op.cc diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc index b539b00009..4ad858e4a9 100644 --- a/tensorflow/core/kernels/mkl_aggregate_ops.cc +++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc @@ -24,15 +24,16 @@ limitations under the License. #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/platform/logging.h" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" -#include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML #include "mkldnn.hpp" using mkldnn::stream; using mkldnn::sum; +#else +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #endif +#include "tensorflow/core/util/mkl_util.h" namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -333,7 +334,7 @@ class MklAddNOp : public OpKernel { if (!input1_in_mkl_format && src1_dims_size == 0) { Tensor* dst_tensor = nullptr; - MklShape mkl_shape_dst; + MklDnnShape mkl_shape_dst; mkl_shape_dst.SetMklTensor(false); AllocateOutputSetMklShape(ctx, output_idx, &dst_tensor, src1_tensor.shape(), mkl_shape_dst); @@ -347,7 +348,7 @@ class MklAddNOp : public OpKernel { if (!input1_in_mkl_format && !input2_in_mkl_format) { if (src1_tensor.shape().num_elements() == 0) { Tensor* dst_tensor = nullptr; - MklShape mkl_shape_dst; + MklDnnShape mkl_shape_dst; mkl_shape_dst.SetMklTensor(false); AllocateOutputSetMklShape(ctx, output_idx, &dst_tensor, src1_tensor.shape(), mkl_shape_dst); diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc index 723b445a75..45328b03d6 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -25,7 +25,7 @@ limitations under the License. #define EIGEN_USE_THREADS -#if defined(INTEL_MKL) +#if defined(INTEL_MKL) && !defined(DO_NOT_USE_ML) #include #include "mkl_cblas.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 9ab95d765c..5eeb23d810 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -26,16 +26,17 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" -#include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML #include "mkldnn.hpp" using mkldnn::concat; using mkldnn::stream; +#else +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #endif +#include "tensorflow/core/util/mkl_util.h" namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; diff --git a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc index d23027a54d..c1da0ded1d 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc @@ -38,8 +38,10 @@ limitations under the License. #include "tensorflow/core/util/use_cudnn.h" #include "tensorflow/core/util/work_sharder.h" +#ifdef INTEL_MKL_ML #include "mkl_dnn.h" #include "mkl_dnn_types.h" +#endif #include "tensorflow/core/util/mkl_util.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index e0706568b1..356eed8b67 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -38,9 +38,6 @@ limitations under the License. #include "tensorflow/core/util/use_cudnn.h" #include "tensorflow/core/util/work_sharder.h" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" -#include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML #include "mkldnn.hpp" @@ -49,8 +46,13 @@ using mkldnn::convolution_backward_weights; using mkldnn::memory; using mkldnn::prop_kind; using mkldnn::stream; +#else +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #endif +#include "tensorflow/core/util/mkl_util.h" + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index d203c04934..21b18f9119 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -23,8 +23,10 @@ limitations under the License. #define EIGEN_USE_THREADS #include #include +#ifdef INTEL_MKL_ML #include "mkl_dnn.h" #include "mkl_dnn_types.h" +#endif #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 62aafa7930..3fe660cf96 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -21,21 +21,21 @@ limitations under the License. #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/util/tensor_format.h" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" -#include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML #include "mkldnn.hpp" - using mkldnn::batch_normalization_backward; using mkldnn::batch_normalization_forward; using mkldnn::prop_kind; using mkldnn::stream; using mkldnn::use_global_stats; using mkldnn::use_scale_shift; +#else +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #endif +#include "tensorflow/core/util/mkl_util.h" // TODO(inteltf) Address comments from PR 8968. namespace tensorflow { diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl_identity_op.cc index 6c027f8e72..b02cc5384c 100644 --- a/tensorflow/core/kernels/mkl_identity_op.cc +++ b/tensorflow/core/kernels/mkl_identity_op.cc @@ -24,8 +24,10 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#ifdef INTEL_MKL_ML #include "mkl_dnn.h" #include "mkl_dnn_types.h" +#endif #include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index 663228722b..dc4da33a06 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -369,8 +369,8 @@ class MklInputConversionOp : public OpKernel { MklToTfOp::ConvertMklToTf(this, context, data_format_str, op_data_type, has_avx512f_, kInputIndex_1); - SetDummyMklShapeOutput(context, kInputIndex_0); - SetDummyMklShapeOutput(context, kInputIndex_1); + SetDummyMklDnnShapeOutput(context, kInputIndex_0); + SetDummyMklDnnShapeOutput(context, kInputIndex_1); return; } @@ -458,7 +458,7 @@ class MklInputConversionOp : public OpKernel { MklToTfOp::ConvertMklToTf(this, context, data_format_str, op_data_type, has_avx512f_, mkl_tensor_index); - SetDummyMklShapeOutput(context, mkl_tensor_index); + SetDummyMklDnnShapeOutput(context, mkl_tensor_index); // The tensor in TF format passes through ForwardTfTensorInToOut(context, tf_tensor_index, tf_tensor_index); diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc index eef254cdad..dfe50e6a7f 100644 --- a/tensorflow/core/kernels/mkl_lrn_op.cc +++ b/tensorflow/core/kernels/mkl_lrn_op.cc @@ -22,8 +22,6 @@ limitations under the License. #define EIGEN_USE_THREADS #include -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -31,7 +29,6 @@ limitations under the License. #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/tensor_format.h" #if !defined(IS_MOBILE_PLATFORM) @@ -45,8 +42,13 @@ using mkldnn::lrn_backward; using mkldnn::lrn_forward; using mkldnn::prop_kind; using mkldnn::stream; +#else +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #endif +#include "tensorflow/core/util/mkl_util.h" + namespace tensorflow { namespace { @@ -1236,7 +1238,7 @@ class MklLRNGradOp : public OpKernel { auto activations = orig_output_tensor.shaped({nodes * batch, depth}); Tensor* output_dnn_data; - MklShape mkl_output_mkl_shape; + MklDnnShape mkl_output_mkl_shape; mkl_output_mkl_shape.SetMklTensor(false); mkl_output_mkl_shape.SetDimensions(4); AllocateOutputSetMklShape(context, kIdxOutput, &output_dnn_data, diff --git a/tensorflow/core/kernels/mkl_matmul_op.cc b/tensorflow/core/kernels/mkl_matmul_op.cc index dfa6cecc9b..62c0404891 100644 --- a/tensorflow/core/kernels/mkl_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_matmul_op.cc @@ -23,7 +23,7 @@ limitations under the License. // and when it is undefined at build time, this file becomes an empty // compilation unit -#if defined(INTEL_MKL) +#if defined(INTEL_MKL) && !defined(DO_NOT_USE_ML) #include "mkl_cblas.h" #include "tensorflow/core/framework/op.h" diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 1ed43834dd..78abbdb730 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -23,9 +23,6 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/errors.h" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" -#include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML #include "mkldnn.hpp" @@ -38,7 +35,11 @@ using mkldnn::prop_kind; using mkldnn::relu_backward; using mkldnn::relu_forward; using mkldnn::stream; +#else +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #endif +#include "tensorflow/core/util/mkl_util.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc index 2cfde1f6fd..c44a6f3477 100644 --- a/tensorflow/core/kernels/mkl_reshape_op.cc +++ b/tensorflow/core/kernels/mkl_reshape_op.cc @@ -24,15 +24,17 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" -#include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML #include "mkldnn.hpp" using mkldnn::stream; +#else +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #endif +#include "tensorflow/core/util/mkl_util.h" + namespace tensorflow { using CPUDevice = Eigen::ThreadPoolDevice; template @@ -250,7 +252,7 @@ class MklReshapeOp : public OpKernel { memory::primitive_desc(output_tf_md, cpu_engine); Tensor* output_tensor = nullptr; - MklShape mkl_shape_output; + MklDnnShape mkl_shape_output; mkl_shape_output.SetMklTensor(false); // We allocate output tensor in the shape expected by Reshape. AllocateOutputSetMklShape(context, kOutputSlotIdx, &output_tensor, diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index f79e18cff2..638392954e 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -25,8 +25,6 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/tensor_format.h" -#include "mkldnn.h" -#include "mkldnn_types.h" #include "tensorflow/core/util/mkl_util.h" #include "mkldnn.hpp" diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index 4120f013ac..7e8ed1b1d6 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -32,8 +32,10 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/tensor_format.h" +#ifdef INTEL_MKL_ML #include "mkl_dnn.h" #include "mkl_dnn_types.h" +#endif #include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML diff --git a/tensorflow/core/kernels/mkl_transpose_op.cc b/tensorflow/core/kernels/mkl_transpose_op.cc index 3f07b317c4..b180c2ff20 100644 --- a/tensorflow/core/kernels/mkl_transpose_op.cc +++ b/tensorflow/core/kernels/mkl_transpose_op.cc @@ -15,7 +15,7 @@ limitations under the License. // See docs in ../ops/array_ops.cc. -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && !defined(DO_NOT_USE_ML) #define EIGEN_USE_THREADS #include "mkl_trans.h" diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc index 7177ad7888..886b3e7492 100644 --- a/tensorflow/core/kernels/transpose_op.cc +++ b/tensorflow/core/kernels/transpose_op.cc @@ -218,7 +218,7 @@ Status ConjugateTransposeCpuOp::DoTranspose(OpKernelContext* ctx, perm, out); } -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && !defined(DO_NOT_USE_ML) #define REGISTER(T) \ REGISTER_KERNEL_BUILDER(Name("Transpose") \ .Device(DEVICE_CPU) \ diff --git a/tensorflow/core/kernels/transpose_op.h b/tensorflow/core/kernels/transpose_op.h index ae67592d04..709b0a92e9 100644 --- a/tensorflow/core/kernels/transpose_op.h +++ b/tensorflow/core/kernels/transpose_op.h @@ -42,7 +42,7 @@ class TransposeCpuOp : public TransposeOp { gtl::ArraySlice perm, Tensor* out) override; }; -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && !defined(DO_NOT_USE_ML) class MklTransposeCpuOp : public TransposeOp { public: explicit MklTransposeCpuOp(OpKernelConstruction* ctx) : TransposeOp(ctx) {} @@ -85,7 +85,7 @@ class ConjugateTransposeCpuOp : public TransposeOp { bool IsConjugate() const override { return true; } }; -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && !defined(DO_NOT_USE_ML) class MklConjugateTransposeCpuOp : public TransposeOp { public: explicit MklConjugateTransposeCpuOp(OpKernelConstruction* ctx) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 8a3ece7b8c..dffc965b14 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -22,10 +22,13 @@ limitations under the License. #include #include +#ifdef INTEL_MKL_ML #include "mkl_dnn.h" #include "mkl_dnn_types.h" #include "mkl_service.h" #include "mkl_trans.h" +#endif + #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" @@ -51,11 +54,12 @@ using mkldnn::reorder; typedef unsigned int uint; #endif -// The file contains a number of utility classes and functions used by MKL -// enabled kernels namespace tensorflow { +// The file contains a number of utility classes and functions used by MKL +// enabled kernels + // This class encapsulates all the meta data that is associated with an MKL // tensor. A tensor is an MKL tensor if it was created as the result of an // MKL operation, and did not go through a conversion to a standard @@ -71,6 +75,7 @@ typedef enum { Dim_I = 1 } MklDnnDims; +#ifdef INTEL_MKL_ML class MklShape { public: MklShape() {} @@ -331,7 +336,7 @@ class MklShape { nullptr; // TF dimension corresponding to this MKL dimension }; -#ifndef INTEL_MKL_ML +#else // Forward decl TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format); @@ -664,12 +669,14 @@ class MklDnnShape { // List of MklShape objects. Used in Concat/Split layers. -typedef std::vector MklShapeList; #ifndef INTEL_MKL_ML typedef std::vector MklDnnShapeList; +#else +typedef std::vector MklShapeList; #endif +#ifdef INTEL_MKL_ML // Check if all tensors specified by MklShapes are MKL tensors. inline bool AreAllMklTensors(const MklShapeList& shapes) { for (auto& s : shapes) { @@ -680,7 +687,6 @@ inline bool AreAllMklTensors(const MklShapeList& shapes) { return true; } -#ifdef INTEL_MKL_ML template inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, const MklShape& mkl_shape) { @@ -720,6 +726,7 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, #endif // Get the MKL shape from the second string tensor +#ifdef INTEL_MKL_ML inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { mklshape->DeSerializeMklShape( ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) @@ -730,8 +737,7 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { .size() * sizeof(uint8)); } - -#ifndef INTEL_MKL_ML +#else inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) { mklshape->DeSerializeMklDnnShape( ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) @@ -805,6 +811,7 @@ inline TensorShape GetTfShape(OpKernelContext* context, size_t input_idx) { } #endif +#ifdef INTEL_MKL_ML // Allocate the second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -820,7 +827,7 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } -#ifndef INTEL_MKL_ML +#else // Allocate the second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -837,6 +844,7 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, } #endif +#ifdef INTEL_MKL_ML // Allocate the output tensor, create a second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -857,7 +865,7 @@ inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, second_tensor->flat().size() * sizeof(uint8)); } -#ifndef INTEL_MKL_ML +#else // Allocate the output tensor, create a second output tensor that will contain // the MKL shape serialized inline void AllocateOutputSetMklShape(OpKernelContext* ctext, int n, @@ -892,8 +900,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, tf_shape, tensor_out)); *buf_out = static_cast(tensor_out->flat().data()); } -#endif - +#else inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, dnnLayout_t lt_buff, void** buf_out) { TensorShape tf_shape; @@ -907,6 +914,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, *buf_out = static_cast(tensor_out->flat().data()); } +#endif template inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, TensorShape tf_shape) { @@ -930,6 +938,7 @@ inline void GetStridesFromSizes(TensorFormat data_format, size_t* strides, } } +#ifdef INTEL_MKL_ML inline void MklSizesToTFSizes(OpKernelContext* context, TensorFormat data_format_, const MklShape& mkl_shape, @@ -955,6 +964,7 @@ inline void MklSizesToTFSizes(OpKernelContext* context, OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(sizes, tf_shape)); } +#endif inline int32 GetMklTensorDimIndex(char dimension) { switch (dimension) { @@ -972,12 +982,14 @@ inline int32 GetMklTensorDimIndex(char dimension) { } } +#ifdef INTEL_MKL_ML inline int64 GetMklTensorDim(const MklShape& mkl_shape, char dimension) { int index = GetMklTensorDimIndex(dimension); CHECK(index >= 0 && index < mkl_shape.GetDimension()) << "Invalid index from the dimension: " << index << ", " << dimension; return mkl_shape.dim_size(index); } +#endif inline void CopyMklTensorInToOut(OpKernelContext* context, int idx_in, int idx_out) { @@ -1097,6 +1109,14 @@ inline void ForwardMklTensorInToOut(OpKernelContext* context, int idx_in, } #ifndef INTEL_MKL_ML +// Set a dummy MKLDNN shape (called when the output is in TF format) +inline void SetDummyMklDnnShapeOutput(OpKernelContext* context, + uint32 idx_data_out) { + MklDnnShape mkl_shape_output; + mkl_shape_output.SetMklTensor(false); + AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output); +} + inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context, int idx_in, int idx_out, const MklDnnShape& mkl_shape) { @@ -1132,6 +1152,7 @@ inline void ForwardMklMetaDataInToOut(OpKernelContext* context, } } +#ifdef INTEL_MKL_ML // Set a dummy MKL shape (called when the output is in TF format) inline void SetDummyMklShapeOutput(OpKernelContext* context, uint32 idx_data_out) { @@ -1139,8 +1160,6 @@ inline void SetDummyMklShapeOutput(OpKernelContext* context, mkl_shape_output.SetMklTensor(false); AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output); } - -#ifdef INTEL_MKL_ML // We don't need these functions in MKLDNN. We have defined equality operator // on MklDnnShape class directly. @@ -1210,7 +1229,6 @@ inline bool MklCompareShapes(const TensorShape* input_shape_0, return true; } -#endif // These functions do not compile with MKL-DNN since mkl.h is missing. // We may need to remove them later. @@ -1248,6 +1266,7 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) { } } +#endif // ------------------------------------------------------------------- #ifndef INTEL_MKL_ML -- GitLab From 60dccab365de5089dbf3a680b7234e5b158362cd Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 8 Jun 2018 15:18:23 -0700 Subject: [PATCH 078/365] [tf.data] Print an actionable warning when a lookup table is created in a function. PiperOrigin-RevId: 199859228 --- .../contrib/data/python/ops/grouping.py | 10 ++++++ .../contrib/data/python/ops/scan_ops.py | 2 ++ .../data/kernel_tests/map_dataset_op_test.py | 21 +++++++++++++ tensorflow/python/data/ops/dataset_ops.py | 31 +++++++++++++++++++ tensorflow/python/data/ops/readers.py | 3 ++ 5 files changed, 67 insertions(+) diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index ea229b5b27..520f784228 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -300,6 +300,7 @@ class GroupByReducerDataset(dataset_ops.Dataset): raise ValueError( "`key_func` must return a single tf.int64 tensor. " "Got type=%s and shape=%s" % (ret.dtype, ret.get_shape())) + dataset_ops._warn_if_collections("tf.contrib.data.group_by_reducer()") # pylint: disable=protected-access return ret self._key_func = tf_key_func @@ -327,6 +328,8 @@ class GroupByReducerDataset(dataset_ops.Dataset): self._state_types = nest.pack_sequence_as( ret, [t.dtype for t in nest.flatten(ret)]) + dataset_ops._warn_if_collections("tf.contrib.data.group_by_reducer()") # pylint: disable=protected-access + # Serialize any sparse tensors. ret = nest.pack_sequence_as( ret, [t for t in nest.flatten(sparse.serialize_sparse_tensors(ret))]) @@ -398,6 +401,8 @@ class GroupByReducerDataset(dataset_ops.Dataset): nest.pack_sequence_as(self._state_types, [t.dtype for t in flat_new_state]))) + dataset_ops._warn_if_collections("tf.contrib.data.group_by_reducer()") # pylint: disable=protected-access + # Serialize any sparse tensors. ret = nest.pack_sequence_as( ret, @@ -464,6 +469,8 @@ class GroupByReducerDataset(dataset_ops.Dataset): self._output_types = nest.pack_sequence_as( ret, [t.dtype for t in nest.flatten(ret)]) + dataset_ops._warn_if_collections("tf.contrib.data.group_by_reducer()") # pylint: disable=protected-access + # Serialize any sparse tensors. ret = nest.pack_sequence_as( ret, [t for t in nest.flatten(sparse.serialize_sparse_tensors(ret))]) @@ -525,6 +532,7 @@ class GroupByWindowDataset(dataset_ops.Dataset): if window_size.dtype != dtypes.int64: raise ValueError( "`window_size_func` must return a single tf.int64 tensor.") + dataset_ops._warn_if_collections("tf.contrib.data.group_by_window()") # pylint: disable=protected-access return window_size self._window_size_func = tf_window_size_func @@ -557,6 +565,7 @@ class GroupByWindowDataset(dataset_ops.Dataset): ret = ops.convert_to_tensor(ret, dtype=dtypes.int64) if ret.dtype != dtypes.int64: raise ValueError("`key_func` must return a single tf.int64 tensor.") + dataset_ops._warn_if_collections("tf.contrib.data.group_by_window()") # pylint: disable=protected-access return ret self._key_func = tf_key_func @@ -580,6 +589,7 @@ class GroupByWindowDataset(dataset_ops.Dataset): self._output_classes = output_dataset.output_classes self._output_types = output_dataset.output_types self._output_shapes = output_dataset.output_shapes + dataset_ops._warn_if_collections("tf.contrib.data.group_by_window()") # pylint: disable=protected-access return output_dataset._as_variant_tensor() # pylint: disable=protected-access self._reduce_func = tf_reduce_func diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index e911ad0fa0..9909ca8d9d 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -148,6 +148,8 @@ class _ScanDataset(dataset_ops.Dataset): self._output_types = nest.pack_sequence_as( output_value, [t.dtype for t in nest.flatten(output_value)]) + dataset_ops._warn_if_collections("tf.contrib.data.scan()") # pylint: disable=protected-access + # Serialize any sparse tensors. new_state = nest.pack_sequence_as(new_state, [ t for t in nest.flatten(sparse.serialize_sparse_tensors(new_state)) diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py index 1ad0b9de5e..768d4ac82c 100644 --- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from collections import namedtuple import threading import time +import warnings import numpy as np @@ -638,6 +639,26 @@ class MapDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testWarnOnLookupTable(self): + def collecting_function(x): + _ = lookup_ops.HashTable( + lookup_ops.KeyValueTensorInitializer([], []), 0.0, name="t1") + return x + + warnings.simplefilter("always") + with warnings.catch_warnings(record=True) as w: + _ = dataset_ops.Dataset.range(10).map(collecting_function) + # NOTE(mrry): Python 3 prints other warnings in addition to the one we are + # testing, so we search for the expected warning. + self.assertGreaterEqual(len(w), 1) + found_warning = False + for warning in w: + if ("Creating lookup tables inside a function passed to Dataset.map() is " + "not supported." in str(warning)): + found_warning = True + break + self.assertTrue(found_warning) + class MapDatasetBenchmark(test.Benchmark): diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 8b2a2e0a32..2ec6c6f154 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -19,6 +19,7 @@ from __future__ import print_function import abc import threading +import warnings import numpy as np import six @@ -1865,6 +1866,24 @@ def _should_unpack_args(args): return type(args) is tuple # pylint: disable=unidiomatic-typecheck +def _warn_if_collections(transformation_name): + """Prints warning message if the current graph uses common graph collections. + + NOTE(mrry): Currently a warning is only generated for lookup tables. Any + variables created will be automatically hoisted out to the outermost scope + using `init_scope()`. Some collections (such as for control-flow contexts) + are benign and should not generate a warning. + + Args: + transformation_name: A human-readable name for the transformation. + """ + if ops.get_default_graph().get_collection(ops.GraphKeys.TABLE_INITIALIZERS): + warnings.warn("Creating lookup tables inside a function passed to %s is not" + " supported. Create each table outside the function, and " + "capture it inside the function to use it." + % transformation_name) + + class MapDataset(Dataset): """A `Dataset` that maps a function over elements in its input.""" @@ -1924,6 +1943,8 @@ class MapDataset(Dataset): self._output_types = nest.pack_sequence_as( ret, [t.dtype for t in nest.flatten(ret)]) + _warn_if_collections("Dataset.map()") + # Serialize any sparse tensors. ret = nest.pack_sequence_as( ret, [t for t in nest.flatten(sparse.serialize_sparse_tensors(ret))]) @@ -2012,6 +2033,8 @@ class FlatMapDataset(Dataset): if not isinstance(dataset, Dataset): raise TypeError("`map_func` must return a `Dataset` object.") + _warn_if_collections(self._transformation_name()) + self._output_classes = dataset.output_classes self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes @@ -2043,6 +2066,9 @@ class FlatMapDataset(Dataset): def output_types(self): return self._output_types + def _transformation_name(self): + return "Dataset.flat_map()" + class InterleaveDataset(FlatMapDataset): """A `Dataset` that maps a function over its input and interleaves the result. @@ -2068,6 +2094,9 @@ class InterleaveDataset(FlatMapDataset): output_shapes=nest.flatten( sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + def _transformation_name(self): + return "Dataset.interleave()" + class FilterDataset(Dataset): """A `Dataset` that filters its input according to a predicate function.""" @@ -2102,6 +2131,8 @@ class FilterDataset(Dataset): ret.shape.is_compatible_with(tensor_shape.scalar())): raise ValueError("`predicate` must return a scalar boolean tensor.") + _warn_if_collections("Dataset.filter()") + return ret self._predicate = tf_predicate diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py index a73a8b5cdc..6a72ed380f 100644 --- a/tensorflow/python/data/ops/readers.py +++ b/tensorflow/python/data/ops/readers.py @@ -156,6 +156,9 @@ class ParallelInterleaveDataset(dataset_ops.InterleaveDataset): sparse.as_dense_shapes(self.output_shapes, self.output_classes))) # pylint: enable=protected-access + def _transformation_name(self): + return "tf.contrib.data.parallel_interleave()" + @tf_export("data.TFRecordDataset") class TFRecordDataset(dataset_ops.Dataset): -- GitLab From aba275157880076c8fe39c5ecac48741938223c5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 15:21:34 -0700 Subject: [PATCH 079/365] Replace cout with VLOG(2). PiperOrigin-RevId: 199859711 --- tensorflow/core/grappler/optimizers/remapper.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index efd870b118..4dde7ed1b4 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -200,8 +200,8 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item, } } if (optimizable) { - std::cout << "Optimizing fused batch norm node " << node.DebugString() - << std::endl; + VLOG(2) << "Optimizing fused batch norm node " << node.DebugString() + << std::endl; AddBatchNormNodes(optimized_graph, node); continue; } -- GitLab From c552838d342cb6e5243a88b9e08d38b95c2b2291 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 8 Jun 2018 15:39:25 -0700 Subject: [PATCH 080/365] Add TensorArrayGradWithShape op. PiperOrigin-RevId: 199862180 --- .../api_def_TensorArrayGradWithShape.pbtxt | 40 ++++++++++++++ .../api_def_TensorArrayGradWithShape.pbtxt | 4 ++ tensorflow/core/kernels/tensor_array.cc | 10 +++- tensorflow/core/kernels/tensor_array.h | 4 +- tensorflow/core/kernels/tensor_array_ops.cc | 46 +++++++++++++--- tensorflow/core/ops/data_flow_ops.cc | 44 +++++++++++++++ .../kernel_tests/tensor_array_ops_test.py | 54 +++++++++++++++++++ tensorflow/python/ops/tensor_array_grad.py | 1 + 8 files changed, 192 insertions(+), 11 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorArrayGradWithShape.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorArrayGradWithShape.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_TensorArrayGradWithShape.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorArrayGradWithShape.pbtxt new file mode 100644 index 0000000000..dd37b94ffa --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_TensorArrayGradWithShape.pbtxt @@ -0,0 +1,40 @@ +op { + graph_op_name: "TensorArrayGradWithShape" + endpoint { + name: "TensorArrayGradWithShape" + } + in_arg { + name: "handle" + description: <