From 5e6b20e53720e8d00619d851ce983f8da77c5cf4 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 8 May 2018 14:54:53 -0700 Subject: [PATCH 001/902] Deploy TensorFlow ecosystem jars --- tensorflow/java/maven/pom.xml | 10 +- tensorflow/java/maven/release.sh | 1 + tensorflow/java/maven/run_inside_container.sh | 42 ++++- .../pom-spark.xml.template | 19 +++ .../spark-tensorflow-connector/update.py | 152 ++++++++++++++++++ .../tensorflow-hadoop/pom-hadoop.xml.template | 18 +++ .../java/maven/tensorflow-hadoop/update.py | 114 +++++++++++++ 7 files changed, 352 insertions(+), 4 deletions(-) create mode 100644 tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template create mode 100644 tensorflow/java/maven/spark-tensorflow-connector/update.py create mode 100644 tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template create mode 100644 tensorflow/java/maven/tensorflow-hadoop/update.py diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 0a09a5ea7c..21fed5a419 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.8.0 + 1.8.0-SNAPSHOT pom https://www.tensorflow.org @@ -32,6 +32,8 @@ libtensorflow_jni_gpu tensorflow proto + tensorflow-hadoop + spark-tensorflow-connector ossrh - https://oss.sonatype.org/content/repositories/snapshots + https://tap.jfrog.io/tap/public-snapshots + ossrh @@ -74,6 +77,7 @@ + diff --git a/tensorflow/java/maven/release.sh b/tensorflow/java/maven/release.sh index 9012ea14ea..6c51029198 100755 --- a/tensorflow/java/maven/release.sh +++ b/tensorflow/java/maven/release.sh @@ -48,6 +48,7 @@ fi set -ex docker run \ + $DOCKER_PROXY_RUN_ARGS \ -e TF_VERSION="${TF_VERSION}" \ -e DEPLOY_OSSRH="${DEPLOY_OSSRH:-true}" \ -e DEPLOY_BINTRAY="${DEPLOY_BINTRAY:-true}" \ diff --git a/tensorflow/java/maven/run_inside_container.sh b/tensorflow/java/maven/run_inside_container.sh index 6136ccfdfb..73f7ee94a0 100644 --- a/tensorflow/java/maven/run_inside_container.sh +++ b/tensorflow/java/maven/run_inside_container.sh @@ -32,11 +32,15 @@ if [[ "${TF_VERSION}" == *"-SNAPSHOT" ]]; then DEPLOY_BINTRAY="false" fi PROTOC_RELEASE_URL="https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip" +TF_ECOSYSTEM_URL="https://github.com/tensorflow/ecosystem.git" + if [[ "${DEPLOY_BINTRAY}" != "true" && "${DEPLOY_OSSRH}" != "true" ]]; then echo "Must deploy to at least one of Bintray or OSSRH" >&2 exit 2 fi +IS_SNAPSHOT="true" + set -ex clean() { @@ -183,6 +187,41 @@ generate_java_protos() { rm -rf "${DIR}/proto/tmp" } + +download_tf_ecosystem() { + ECOSYSTEM_DIR="/tmp/tensorflow-ecosystem" + HADOOP_DIR="${DIR}/tensorflow-hadoop" + SPARK_DIR="${DIR}/spark-tensorflow-connector" + + # Clean any previous attempts + rm -rf "${ECOSYSTEM_DIR}" + + # Clone the TensorFlow ecosystem project + mkdir -p "${ECOSYSTEM_DIR}" + cd "${ECOSYSTEM_DIR}" + git clone "${TF_ECOSYSTEM_URL}" + + # Copy the TensorFlow Hadoop source + cp -r "${ECOSYSTEM_DIR}/ecosystem/hadoop/src" "${HADOOP_DIR}" + python ${HADOOP_DIR}/update.py --template ${HADOOP_DIR}/pom-hadoop.xml.template \ + --input_pom ${ECOSYSTEM_DIR}/ecosystem/hadoop/pom.xml \ + --output_pom ${HADOOP_DIR}/pom.xml \ + --version ${TF_VERSION} + + # Copy the TensorFlow Spark connector source + cp -r "${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/src" "${SPARK_DIR}" + python ${SPARK_DIR}/update.py --template ${SPARK_DIR}/pom-spark.xml.template \ + --input_pom ${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/pom.xml \ + --output_pom ${SPARK_DIR}/pom.xml \ + --version ${TF_VERSION} \ + --scala_version 2.11 + + # Cleanup + rm -rf "${ECOSYSTEM_DIR}" + + cd "${DIR}" +} + # Deploy artifacts using a specific profile. # Arguments: # profile - name of selected profile. @@ -240,7 +279,7 @@ cd "${DIR}" # Comment lines out appropriately if debugging/tinkering with the release # process. # gnupg2 is required for signing -apt-get -qq update && apt-get -qqq install -y gnupg2 +apt-get -qq update && apt-get -qqq install -y gnupg2 && apt-get -qqq install -y git clean update_version_in_pom download_libtensorflow @@ -248,6 +287,7 @@ download_libtensorflow_jni download_libtensorflow_jni_gpu update_tensorflow_android generate_java_protos +download_tf_ecosystem # Build the release artifacts mvn verify # Push artifacts to repository diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template b/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template new file mode 100644 index 0000000000..d8a3d559be --- /dev/null +++ b/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template @@ -0,0 +1,19 @@ + + 4.0.0 + TensorFlow TFRecord connector for Apache Spark DataFrames + spark-tensorflow-connector_${scala_version} + ${version} + jar + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + ${version} + ../ + + + diff --git a/tensorflow/java/maven/spark-tensorflow-connector/update.py b/tensorflow/java/maven/spark-tensorflow-connector/update.py new file mode 100644 index 0000000000..6185ccbb00 --- /dev/null +++ b/tensorflow/java/maven/spark-tensorflow-connector/update.py @@ -0,0 +1,152 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Merge TensorFlow Spark connector pom from with deployment template. + +The TensorFlow Spark connector pom is here: https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import string +import xml.etree.ElementTree as ET + +POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" +SCALA_VERSION_TAG = "scala.binary.version" + + +def get_args(): + """Parse command line args.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--version', + required=True, + help='Version for the artifact.') + parser.add_argument( + '--scala_version', + required=True, + choices=['2.10', '2.11'], + help='Scala version for the artifact.') + parser.add_argument( + '--template', + required=True, + help='Path to the pom file template.') + parser.add_argument( + '--input_pom', + required=True, + help='Path to input pom file to merge with template.') + parser.add_argument( + '--output_pom', + required=True, + help='Path to output pom file.') + return parser.parse_args() + + +def load_pom(input_path): + """ Loads POM file to XML tree""" + ET.register_namespace("", POM_NAMESPACE) + tree = ET.parse(input_path) + return tree + + +def update_scala_version(tree, version, is_template=False): + """ Updates scala version in XML tree""" + + if is_template: + tag = "{%s}artifactId" % POM_NAMESPACE + nodes = tree.findall(tag) + + if nodes is None: + raise ValueError("Missing artifactId in template pom") + + for node in nodes: + template = string.Template(node.text) + + text = template.substitute({"scala_version": version}) + node.text = text + else: + # Update scala version property in pom + tag = "{%s}%s" % (POM_NAMESPACE, SCALA_VERSION_TAG) + nodes = nodes = list(tree.iter(tag)) + + if len(nodes) == 0: + raise ValueError("Missing %s property in Spark connector pom") + + for node in nodes: + node.text = version + + return tree + + +def update_version(tree, version): + """ Updates version tags in XML tree """ + version_tag = "{%s}version" % POM_NAMESPACE + nodes = list(tree.iter(version_tag)) + + if len(nodes) == 0: + raise ValueError("Missing version in template pom") + + for node in nodes: + node.text = version + + return tree + + +def merge_tags(template_root, pom_root): + """ Merge pom file from TensorFlow Spark connector with deployment template. + + Modify the TensorFlow Spark connector pom to inherit parent pom and version info and + other tags provided by deployment template. + + TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed + for propagating the deployment profile. + + Args: + template_root: Root XML element for template file. + pom_root: Root XML element for TensorFlow Spark connector pom file. + + Return: + template_root: Root XML element with merged tree. + """ + template_tags = [child.tag for child in template_root] + template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent + + for child in pom_root: + if child.tag not in template_tags: + template_root.append(child) + + return template_root + + +def main(): + args = get_args() + template_tree = load_pom(args.template) + pom_tree = load_pom(args.input_pom) + + template_tree = update_version(template_tree, args.version) + template_tree = update_scala_version(template_tree, args.scala_version, is_template=True) + pom_tree = update_scala_version(pom_tree, args.scala_version, is_template=False) + template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) + + with open(args.output_pom, "w") as f: + f.write(ET.tostring(template_root)) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template b/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template new file mode 100644 index 0000000000..6a82c56cc7 --- /dev/null +++ b/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template @@ -0,0 +1,18 @@ + + 4.0.0 + TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop + tensorflow-hadoop + ${version} + jar + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + ${version} + ../ + + diff --git a/tensorflow/java/maven/tensorflow-hadoop/update.py b/tensorflow/java/maven/tensorflow-hadoop/update.py new file mode 100644 index 0000000000..503062608d --- /dev/null +++ b/tensorflow/java/maven/tensorflow-hadoop/update.py @@ -0,0 +1,114 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Merge TensorFlow Hadoop pom from with deployment template. + +The TensorFlow Hadoop pom is here: https://github.com/tensorflow/ecosystem/tree/master/hadoop +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import xml.etree.ElementTree as ET + +POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" + + +def get_args(): + """Parse command line args.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--version', + required=True, + help='Version for the artifact.') + parser.add_argument( + '--template', + required=True, + help='Path to the pom file template.') + parser.add_argument( + '--input_pom', + required=True, + help='Path to input pom file to merge with template.') + parser.add_argument( + '--output_pom', + required=True, + help='Path to output pom file.') + return parser.parse_args() + + +def load_pom(input_path): + """ Loads POM file to XML tree""" + ET.register_namespace("", POM_NAMESPACE) + tree = ET.parse(input_path) + return tree + + +def update_version(tree, version): + """ Updates version tags in XML tree """ + version_tag = "{%s}version" % POM_NAMESPACE + nodes = list(tree.iter(version_tag)) + + if len(nodes) == 0: + raise ValueError("Missing version in template pom") + + for node in nodes: + node.text = version + + return tree + + +def merge_tags(template_root, pom_root): + """ Merge pom file from TensorFlow Hadoop with deployment template. + + Modify the TensorFlow Hadoop pom to inherit parent pom and version info and + other tags provided by deployment template. + + TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed + for propagating the deployment profile. + + Args: + template_root: Root XML element for template file. + pom_root: Root XML element for TensorFlow Hadoop pom file. + + Return: + template_root: Root XML element with merged tree. + """ + template_tags = [child.tag for child in template_root] + template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent + + for child in pom_root: + if child.tag not in template_tags: + template_root.append(child) + + return template_root + + +def main(): + args = get_args() + template_tree = load_pom(args.template) + pom_tree = load_pom(args.input_pom) + + template_tree = update_version(template_tree, args.version) + template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) + + with open(args.output_pom, "w") as f: + f.write(ET.tostring(template_root)) + + +if __name__ == '__main__': + sys.exit(main()) -- GitLab From f957cfbc4d27a57bf08d128b41042a16f1155ab0 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 8 May 2018 18:40:20 -0700 Subject: [PATCH 002/902] Add TensorFlow ecosystem Spark and Hadoop jars to Maven deployment --- tensorflow/java/maven/README.md | 6 +++++ tensorflow/java/maven/pom.xml | 8 +++--- tensorflow/java/maven/release.sh | 1 - tensorflow/java/maven/run_inside_container.sh | 26 ++++++++++--------- .../maven/spark-tensorflow-connector/pom.xml | 24 +++++++++++++++++ .../java/maven/tensorflow-hadoop/pom.xml | 24 +++++++++++++++++ 6 files changed, 71 insertions(+), 18 deletions(-) create mode 100644 tensorflow/java/maven/spark-tensorflow-connector/pom.xml create mode 100644 tensorflow/java/maven/tensorflow-hadoop/pom.xml diff --git a/tensorflow/java/maven/README.md b/tensorflow/java/maven/README.md index c7e8f03806..fa756815a9 100644 --- a/tensorflow/java/maven/README.md +++ b/tensorflow/java/maven/README.md @@ -53,6 +53,12 @@ There are seven artifacts and thus `pom.xml`s involved in this release: 7. [`parentpom`](https://maven.apache.org/pom/index.html): Common settings shared by all of the above. +8. `tensorflow-hadoop`: The TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop. + The source code for this package is available in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/hadoop) + +9. `spark-tensorflow-connector`: A Scala library for loading and storing TensorFlow TFRecord + using Apache Spark DataFrames. The source code for this package is available + in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector) ## Updating the release diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 21fed5a419..7a95fb2556 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.8.0-SNAPSHOT + 1.8.0 pom https://www.tensorflow.org @@ -46,8 +46,7 @@ ossrh - https://tap.jfrog.io/tap/public-snapshots - + https://oss.sonatype.org/content/repositories/snapshots ossrh @@ -77,7 +76,6 @@ - + diff --git a/tensorflow/java/maven/release.sh b/tensorflow/java/maven/release.sh index 6c51029198..9012ea14ea 100755 --- a/tensorflow/java/maven/release.sh +++ b/tensorflow/java/maven/release.sh @@ -48,7 +48,6 @@ fi set -ex docker run \ - $DOCKER_PROXY_RUN_ARGS \ -e TF_VERSION="${TF_VERSION}" \ -e DEPLOY_OSSRH="${DEPLOY_OSSRH:-true}" \ -e DEPLOY_BINTRAY="${DEPLOY_BINTRAY:-true}" \ diff --git a/tensorflow/java/maven/run_inside_container.sh b/tensorflow/java/maven/run_inside_container.sh index 73f7ee94a0..3808104bc1 100644 --- a/tensorflow/java/maven/run_inside_container.sh +++ b/tensorflow/java/maven/run_inside_container.sh @@ -39,8 +39,6 @@ if [[ "${DEPLOY_BINTRAY}" != "true" && "${DEPLOY_OSSRH}" != "true" ]]; then exit 2 fi -IS_SNAPSHOT="true" - set -ex clean() { @@ -48,7 +46,9 @@ clean() { # (though if run inside a clean docker container, there won't be any dirty # artifacts lying around) mvn -q clean - rm -rf libtensorflow_jni/src libtensorflow_jni/target libtensorflow_jni_gpu/src libtensorflow_jni_gpu/target libtensorflow/src libtensorflow/target tensorflow-android/target + rm -rf libtensorflow_jni/src libtensorflow_jni/target libtensorflow_jni_gpu/src libtensorflow_jni_gpu/target \ + libtensorflow/src libtensorflow/target tensorflow-android/target \ + tensorflow-hadoop/src spark-tensorflow-connector/src } update_version_in_pom() { @@ -188,6 +188,9 @@ generate_java_protos() { } +# Download the TensorFlow ecosystem source from git. +# The pom files from this repo do not inherit from the parent pom so the maven version +# is updated for each module. download_tf_ecosystem() { ECOSYSTEM_DIR="/tmp/tensorflow-ecosystem" HADOOP_DIR="${DIR}/tensorflow-hadoop" @@ -203,18 +206,15 @@ download_tf_ecosystem() { # Copy the TensorFlow Hadoop source cp -r "${ECOSYSTEM_DIR}/ecosystem/hadoop/src" "${HADOOP_DIR}" - python ${HADOOP_DIR}/update.py --template ${HADOOP_DIR}/pom-hadoop.xml.template \ - --input_pom ${ECOSYSTEM_DIR}/ecosystem/hadoop/pom.xml \ - --output_pom ${HADOOP_DIR}/pom.xml \ - --version ${TF_VERSION} + cp "${ECOSYSTEM_DIR}/ecosystem/hadoop/pom.xml" "${HADOOP_DIR}" + cd "${HADOOP_DIR}" + update_version_in_pom # Copy the TensorFlow Spark connector source cp -r "${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/src" "${SPARK_DIR}" - python ${SPARK_DIR}/update.py --template ${SPARK_DIR}/pom-spark.xml.template \ - --input_pom ${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/pom.xml \ - --output_pom ${SPARK_DIR}/pom.xml \ - --version ${TF_VERSION} \ - --scala_version 2.11 + cp "${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/pom.xml" "${SPARK_DIR}" + cd "${SPARK_DIR}" + update_version_in_pom # Cleanup rm -rf "${ECOSYSTEM_DIR}" @@ -280,6 +280,7 @@ cd "${DIR}" # process. # gnupg2 is required for signing apt-get -qq update && apt-get -qqq install -y gnupg2 && apt-get -qqq install -y git + clean update_version_in_pom download_libtensorflow @@ -288,6 +289,7 @@ download_libtensorflow_jni_gpu update_tensorflow_android generate_java_protos download_tf_ecosystem + # Build the release artifacts mvn verify # Push artifacts to repository diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml new file mode 100644 index 0000000000..8c962d111f --- /dev/null +++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml @@ -0,0 +1,24 @@ + + + 4.0.0 + TensorFlow TFRecord connector for Apache Spark DataFrames + spark-tensorflow-connector + jar + + + https://github.com/tensorflow/ecosystem.git + git@github.com:tensorflow/ecosystem.git + scm:git:https://github.com/tensorflow/ecosystem.git + + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + 1.8.0 + ../ + + diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml new file mode 100644 index 0000000000..ee90d8c92b --- /dev/null +++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml @@ -0,0 +1,24 @@ + + + 4.0.0 + TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop + tensorflow-hadoop + jar + + + https://github.com/tensorflow/ecosystem.git + git@github.com:tensorflow/ecosystem.git + scm:git:https://github.com/tensorflow/ecosystem.git + + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + 1.8.0 + ../ + + -- GitLab From 90b01f238d83d833ce9a843845dd96bb816a6c76 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 8 May 2018 18:46:35 -0700 Subject: [PATCH 003/902] Delete templating approach for deploying TensorFlow ecosystem jars --- .../pom-spark.xml.template | 19 --- .../spark-tensorflow-connector/update.py | 152 ------------------ .../tensorflow-hadoop/pom-hadoop.xml.template | 18 --- .../java/maven/tensorflow-hadoop/update.py | 114 ------------- 4 files changed, 303 deletions(-) delete mode 100644 tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template delete mode 100644 tensorflow/java/maven/spark-tensorflow-connector/update.py delete mode 100644 tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template delete mode 100644 tensorflow/java/maven/tensorflow-hadoop/update.py diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template b/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template deleted file mode 100644 index d8a3d559be..0000000000 --- a/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template +++ /dev/null @@ -1,19 +0,0 @@ - - 4.0.0 - TensorFlow TFRecord connector for Apache Spark DataFrames - spark-tensorflow-connector_${scala_version} - ${version} - jar - - https://github.com/tensorflow/ecosystem/ - - org.tensorflow - parentpom - ${version} - ../ - - - diff --git a/tensorflow/java/maven/spark-tensorflow-connector/update.py b/tensorflow/java/maven/spark-tensorflow-connector/update.py deleted file mode 100644 index 6185ccbb00..0000000000 --- a/tensorflow/java/maven/spark-tensorflow-connector/update.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Merge TensorFlow Spark connector pom from with deployment template. - -The TensorFlow Spark connector pom is here: https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -import string -import xml.etree.ElementTree as ET - -POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" -SCALA_VERSION_TAG = "scala.binary.version" - - -def get_args(): - """Parse command line args.""" - parser = argparse.ArgumentParser() - parser.add_argument( - '--version', - required=True, - help='Version for the artifact.') - parser.add_argument( - '--scala_version', - required=True, - choices=['2.10', '2.11'], - help='Scala version for the artifact.') - parser.add_argument( - '--template', - required=True, - help='Path to the pom file template.') - parser.add_argument( - '--input_pom', - required=True, - help='Path to input pom file to merge with template.') - parser.add_argument( - '--output_pom', - required=True, - help='Path to output pom file.') - return parser.parse_args() - - -def load_pom(input_path): - """ Loads POM file to XML tree""" - ET.register_namespace("", POM_NAMESPACE) - tree = ET.parse(input_path) - return tree - - -def update_scala_version(tree, version, is_template=False): - """ Updates scala version in XML tree""" - - if is_template: - tag = "{%s}artifactId" % POM_NAMESPACE - nodes = tree.findall(tag) - - if nodes is None: - raise ValueError("Missing artifactId in template pom") - - for node in nodes: - template = string.Template(node.text) - - text = template.substitute({"scala_version": version}) - node.text = text - else: - # Update scala version property in pom - tag = "{%s}%s" % (POM_NAMESPACE, SCALA_VERSION_TAG) - nodes = nodes = list(tree.iter(tag)) - - if len(nodes) == 0: - raise ValueError("Missing %s property in Spark connector pom") - - for node in nodes: - node.text = version - - return tree - - -def update_version(tree, version): - """ Updates version tags in XML tree """ - version_tag = "{%s}version" % POM_NAMESPACE - nodes = list(tree.iter(version_tag)) - - if len(nodes) == 0: - raise ValueError("Missing version in template pom") - - for node in nodes: - node.text = version - - return tree - - -def merge_tags(template_root, pom_root): - """ Merge pom file from TensorFlow Spark connector with deployment template. - - Modify the TensorFlow Spark connector pom to inherit parent pom and version info and - other tags provided by deployment template. - - TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed - for propagating the deployment profile. - - Args: - template_root: Root XML element for template file. - pom_root: Root XML element for TensorFlow Spark connector pom file. - - Return: - template_root: Root XML element with merged tree. - """ - template_tags = [child.tag for child in template_root] - template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent - - for child in pom_root: - if child.tag not in template_tags: - template_root.append(child) - - return template_root - - -def main(): - args = get_args() - template_tree = load_pom(args.template) - pom_tree = load_pom(args.input_pom) - - template_tree = update_version(template_tree, args.version) - template_tree = update_scala_version(template_tree, args.scala_version, is_template=True) - pom_tree = update_scala_version(pom_tree, args.scala_version, is_template=False) - template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) - - with open(args.output_pom, "w") as f: - f.write(ET.tostring(template_root)) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template b/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template deleted file mode 100644 index 6a82c56cc7..0000000000 --- a/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template +++ /dev/null @@ -1,18 +0,0 @@ - - 4.0.0 - TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop - tensorflow-hadoop - ${version} - jar - - https://github.com/tensorflow/ecosystem/ - - org.tensorflow - parentpom - ${version} - ../ - - diff --git a/tensorflow/java/maven/tensorflow-hadoop/update.py b/tensorflow/java/maven/tensorflow-hadoop/update.py deleted file mode 100644 index 503062608d..0000000000 --- a/tensorflow/java/maven/tensorflow-hadoop/update.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Merge TensorFlow Hadoop pom from with deployment template. - -The TensorFlow Hadoop pom is here: https://github.com/tensorflow/ecosystem/tree/master/hadoop -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -import xml.etree.ElementTree as ET - -POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" - - -def get_args(): - """Parse command line args.""" - parser = argparse.ArgumentParser() - parser.add_argument( - '--version', - required=True, - help='Version for the artifact.') - parser.add_argument( - '--template', - required=True, - help='Path to the pom file template.') - parser.add_argument( - '--input_pom', - required=True, - help='Path to input pom file to merge with template.') - parser.add_argument( - '--output_pom', - required=True, - help='Path to output pom file.') - return parser.parse_args() - - -def load_pom(input_path): - """ Loads POM file to XML tree""" - ET.register_namespace("", POM_NAMESPACE) - tree = ET.parse(input_path) - return tree - - -def update_version(tree, version): - """ Updates version tags in XML tree """ - version_tag = "{%s}version" % POM_NAMESPACE - nodes = list(tree.iter(version_tag)) - - if len(nodes) == 0: - raise ValueError("Missing version in template pom") - - for node in nodes: - node.text = version - - return tree - - -def merge_tags(template_root, pom_root): - """ Merge pom file from TensorFlow Hadoop with deployment template. - - Modify the TensorFlow Hadoop pom to inherit parent pom and version info and - other tags provided by deployment template. - - TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed - for propagating the deployment profile. - - Args: - template_root: Root XML element for template file. - pom_root: Root XML element for TensorFlow Hadoop pom file. - - Return: - template_root: Root XML element with merged tree. - """ - template_tags = [child.tag for child in template_root] - template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent - - for child in pom_root: - if child.tag not in template_tags: - template_root.append(child) - - return template_root - - -def main(): - args = get_args() - template_tree = load_pom(args.template) - pom_tree = load_pom(args.input_pom) - - template_tree = update_version(template_tree, args.version) - template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) - - with open(args.output_pom, "w") as f: - f.write(ET.tostring(template_root)) - - -if __name__ == '__main__': - sys.exit(main()) -- GitLab From b01513202b657719589bb6f92256a0be5717dbc4 Mon Sep 17 00:00:00 2001 From: Jason Zaman Date: Tue, 1 May 2018 19:55:53 +0800 Subject: [PATCH 004/902] pip_package: modularize build script to allow distros to install more flexibly Gentoo Linux handles python modules slightly differently and packaging wheels is complicated. We prefer to run setup.py directly ourselves rather than build a wheel and then install from there. This modularizes build_pip_package.sh to allow running parts separately. using --src srcdir will prepare the package in a known dir so the distro package can take it from there. If only dstdir is given (either with --dst or as the only argument to preserve backwards compat) then behaviour is the same as before, the sources are prepared and the wheel is built and placed in dstdir. Signed-off-by: Jason Zaman --- .../tools/pip_package/build_pip_package.sh | 160 +++++++++++++----- 1 file changed, 115 insertions(+), 45 deletions(-) diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index 1a83c6e757..41e714b1c1 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -41,51 +41,15 @@ function is_windows() { fi } -function main() { +function prepare_src() { if [ $# -lt 1 ] ; then echo "No destination dir provided" exit 1 fi - DEST=$(real_path $1) - TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXX) - - PKG_NAME_FLAG="" - GPU_BUILD=0 - NIGHTLY_BUILD=0 - PROJECT_NAME="" - while true; do - if [[ "$1" == "--nightly_flag" ]]; then - NIGHTLY_BUILD=1 - elif [[ "$1" == "--gpu" ]]; then - GPU_BUILD=1 - elif [[ "$1" == "--gpudirect" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpudirect" - elif [[ "$1" == "--project_name" ]]; then - shift - if [[ -z "$1" ]]; then - break - fi - PROJECT_NAME="$1" - fi - shift - - if [[ -z "$1" ]]; then - break - fi - done - - if [[ -n ${PROJECT_NAME} ]]; then - PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" - elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly_gpu" - elif [[ ${NIGHTLY_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tf_nightly" - elif [[ ${GPU_BUILD} == "1" ]]; then - PKG_NAME_FLAG="--project_name tensorflow_gpu" - fi - - echo $(date) : "=== Using tmpdir: ${TMPDIR}" + TMPDIR="$1" + mkdir -p "$TMPDIR" + echo $(date) : "=== Preparing sources in dir: ${TMPDIR}" if [ ! -d bazel-bin/tensorflow ]; then echo "Could not find bazel-bin. Did you run from the root of the build tree?" @@ -157,17 +121,28 @@ function main() { # over so user defined ops can be compiled. mkdir -p ${TMPDIR}/google mkdir -p ${TMPDIR}/third_party - pushd ${RUNFILES%org_tensorflow} + pushd ${RUNFILES%org_tensorflow} > /dev/null for header in $(find protobuf_archive -name \*.h); do mkdir -p "${TMPDIR}/google/$(dirname ${header})" cp "$header" "${TMPDIR}/google/$(dirname ${header})/" done - popd + popd > /dev/null cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party cp tensorflow/tools/pip_package/MANIFEST.in ${TMPDIR} cp tensorflow/tools/pip_package/README ${TMPDIR} cp tensorflow/tools/pip_package/setup.py ${TMPDIR} +} + +function build_wheel() { + if [ $# -lt 2 ] ; then + echo "No src and dest dir provided" + exit 1 + fi + + TMPDIR="$1" + DEST="$2" + PKG_NAME_FLAG="$3" # Before we leave the top-level directory, make sure we know how to # call python. @@ -175,15 +150,110 @@ function main() { source tools/python_bin_path.sh fi - pushd ${TMPDIR} + pushd ${TMPDIR} > /dev/null rm -f MANIFEST echo $(date) : "=== Building wheel" "${PYTHON_BIN_PATH:-python}" setup.py bdist_wheel ${PKG_NAME_FLAG} >/dev/null mkdir -p ${DEST} cp dist/* ${DEST} - popd - rm -rf ${TMPDIR} + popd > /dev/null echo $(date) : "=== Output wheel file is in: ${DEST}" } +function usage() { + echo "Usage:" + echo "$0 [--src srcdir] [--dst dstdir] [options]" + echo "$0 dstdir [options]" + echo "" + echo " --src prepare sources in srcdir" + echo " will use temporary dir if not specified" + echo "" + echo " --dst build wheel in dstdir" + echo " if dstdir is not set do not build, only prepare sources" + echo "" + echo " Options:" + echo " --project_name set project name to name" + echo " --gpu build tensorflow_gpu" + echo " --gpudirect build tensorflow_gpudirect" + echo " --nightly_flag build tensorflow nightly" + echo "" + exit 1 +} + +function main() { + PKG_NAME_FLAG="" + PROJECT_NAME="" + GPU_BUILD=0 + NIGHTLY_BUILD=0 + SRCDIR="" + DSTDIR="" + CLEANSRC=1 + while true; do + if [[ "$1" == "--help" ]]; then + usage + exit 1 + elif [[ "$1" == "--nightly_flag" ]]; then + NIGHTLY_BUILD=1 + elif [[ "$1" == "--gpu" ]]; then + GPU_BUILD=1 + elif [[ "$1" == "--gpudirect" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpudirect" + elif [[ "$1" == "--project_name" ]]; then + shift + if [[ -z "$1" ]]; then + break + fi + PROJECT_NAME="$1" + elif [[ "$1" == "--src" ]]; then + shift + SRCDIR="$(real_path $1)" + CLEANSRC=0 + elif [[ "$1" == "--dst" ]]; then + shift + DSTDIR="$(real_path $1)" + else + DSTDIR="$(real_path $1)" + fi + shift + + if [[ -z "$1" ]]; then + break + fi + done + + if [[ -z "$DSTDIR" ]] && [[ -z "$SRCDIR" ]]; then + echo "No destination dir provided" + usage + exit 1 + fi + + if [[ -z "$SRCDIR" ]]; then + # make temp srcdir if none set + SRCDIR="$(mktemp -d -t tmp.XXXXXXXXXX)" + fi + + prepare_src "$SRCDIR" + + if [[ -z "$DSTDIR" ]]; then + # only want to prepare sources + exit + fi + + if [[ -n ${PROJECT_NAME} ]]; then + PKG_NAME_FLAG="--project_name ${PROJECT_NAME}" + elif [[ ${NIGHTLY_BUILD} == "1" && ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly_gpu" + elif [[ ${NIGHTLY_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tf_nightly" + elif [[ ${GPU_BUILD} == "1" ]]; then + PKG_NAME_FLAG="--project_name tensorflow_gpu" + fi + + build_wheel "$SRCDIR" "$DSTDIR" "$PKG_NAME_FLAG" + + if [[ $CLEANSRC -ne 0 ]]; then + rm -rf "${TMPDIR}" + fi +} + main "$@" -- GitLab From d3052b421960bb386a75448512974fb23e76186d Mon Sep 17 00:00:00 2001 From: Mustafa Ispir Date: Tue, 22 May 2018 10:42:31 -0700 Subject: [PATCH 005/902] Adding stop request capability to CheckpointSaverListener. An example usage of it is stopping training based on evaluation metrics. Example usage is as follows: my-estimator = tf.estimator.DNNClassifier(...) stopper = StopTrainingBasedOnEvaluateMetrics(my-estimator) my-estimator.train(..., saving_listeners=[stopper]) where: class StopTrainingBasedOnEvaluateMetrics(tf.train.CheckpointSaverListener): """A saver listener to run evaluate with every checkpoint.""" def __init__(self, estimator): self._estimator = estimator def after_save(self, session, global_step_value): eval_results = self._estimator.evaluate(...) if stop-if-started-overfitting(eval_results): return True PiperOrigin-RevId: 197586515 --- tensorflow/python/estimator/estimator_test.py | 1 + .../training/basic_session_run_hooks.py | 20 ++++++++++++++--- .../training/basic_session_run_hooks_test.py | 22 +++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 1b70189948..a9f20f7fa4 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -814,6 +814,7 @@ class EstimatorTrainTest(test.TestCase): def test_saving_listeners_are_used(self): listener = test.mock.Mock(spec=training.CheckpointSaverListener) + listener.after_save.return_value = None est = estimator.Estimator( model_fn=model_fn_global_step_incrementer, config=run_config.RunConfig(save_checkpoints_steps=10)) diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py index df528d54d6..9b40817f55 100644 --- a/tensorflow/python/training/basic_session_run_hooks.py +++ b/tensorflow/python/training/basic_session_run_hooks.py @@ -336,6 +336,8 @@ class CheckpointSaverListener(object): def after_save(self, session, global_step_value): print('Done writing checkpoint.') + if decided_to_stop_training(): + return True def end(self, session, global_step_value): print('Done with the session.') @@ -354,6 +356,11 @@ class CheckpointSaverListener(object): implementors should implement the `end()` method to handle actions related to the last checkpoint save. But the listener should not act twice if `after_save()` already handled this last checkpoint save. + + A `CheckpointSaverListener` can request training to be stopped, by returning + True in `after_save`. Please note that, in replicated distributed training + setting, only `chief` should use this behavior. Otherwise each worker will do + their own evaluation, which may be wasteful of resources. """ def begin(self): @@ -453,7 +460,8 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): global_step = run_context.session.run(self._global_step_tensor) if self._timer.should_trigger_for_step(global_step): self._timer.update_last_triggered_step(global_step) - self._save(run_context.session, global_step) + if self._save(run_context.session, global_step): + run_context.request_stop() def end(self, session): last_step = session.run(self._global_step_tensor) @@ -463,7 +471,7 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): l.end(session, last_step) def _save(self, session, step): - """Saves the latest checkpoint.""" + """Saves the latest checkpoint, returns should_stop.""" logging.info("Saving checkpoints for %d into %s.", step, self._save_path) for l in self._listeners: @@ -475,8 +483,14 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): status=SessionLog.CHECKPOINT, checkpoint_path=self._save_path), step) + should_stop = False for l in self._listeners: - l.after_save(session, step) + if l.after_save(session, step): + logging.info( + "A CheckpointSaverListener requested that training be stopped. " + "listener: {}".format(l)) + should_stop = True + return should_stop def _get_saver(self): if self._saver is not None: diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py index 7344ce2758..21c584f2ee 100644 --- a/tensorflow/python/training/basic_session_run_hooks_test.py +++ b/tensorflow/python/training/basic_session_run_hooks_test.py @@ -58,6 +58,7 @@ class MockCheckpointSaverListener( self.before_save_count = 0 self.after_save_count = 0 self.end_count = 0 + self.ask_for_stop = False def begin(self): self.begin_count += 1 @@ -67,6 +68,8 @@ class MockCheckpointSaverListener( def after_save(self, session, global_step): self.after_save_count += 1 + if self.ask_for_stop: + return True def end(self, session, global_step): self.end_count += 1 @@ -471,6 +474,25 @@ class CheckpointSaverHookTest(test.TestCase): 'end': 1 }, listener_counts) + def test_listener_stops_training_in_after_save(self): + with ops.Graph().as_default(): + scaffold = monitored_session.Scaffold() + variables.get_or_create_global_step() + train_op = training_util._increment_global_step(1) + listener = MockCheckpointSaverListener() + hook = basic_session_run_hooks.CheckpointSaverHook( + self.model_dir, save_steps=1, scaffold=scaffold, listeners=[listener]) + with monitored_session.SingularMonitoredSession( + hooks=[hook], scaffold=scaffold, + checkpoint_dir=self.model_dir) as sess: + sess.run(train_op) + self.assertFalse(sess.should_stop()) + sess.run(train_op) + self.assertFalse(sess.should_stop()) + listener.ask_for_stop = True + sess.run(train_op) + self.assertTrue(sess.should_stop()) + def test_listener_with_default_saver(self): with ops.Graph().as_default(): global_step = variables.get_or_create_global_step() -- GitLab From dd2f3ebe3ede1e7b89819f40f53fdfb6c0433af0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 11:02:30 -0700 Subject: [PATCH 006/902] * Remove the bias centering graph if it is turned off. * Create consts once. Otherwise each time the constant is passed to an Op, a new Const op is created. * Speed up the graph construction by using a functions to build splits. PiperOrigin-RevId: 197590220 --- .../kernels/split_handler_ops.cc | 168 ++++++++++-------- .../learner/batch/ordinal_split_handler.py | 139 +++++++++++---- .../batch/ordinal_split_handler_test.py | 113 +++++++----- .../boosted_trees/ops/split_handler_ops.cc | 74 +++++--- .../python/ops/batch_ops_utils.py | 10 +- .../boosted_trees/python/ops/quantile_ops.py | 3 + .../python/training/functions/gbdt_batch.py | 113 +++++++----- 7 files changed, 382 insertions(+), 238 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc index 04e32267cc..401bec84a2 100644 --- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc @@ -43,47 +43,60 @@ namespace { const int32 DUMMY_FEATURE_DIMENSION = -1; } // namespace -class BaseBuildSplitOp : public OpKernel { +class SplitBuilderState { public: - explicit BaseBuildSplitOp(OpKernelConstruction* const context) - : OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("feature_column_group_id", - &feature_column_group_id_)); + explicit SplitBuilderState(OpKernelContext* const context) { + const Tensor* l1_regularization_t; OP_REQUIRES_OK(context, - context->GetAttr("l1_regularization", &l1_regularization_)); + context->input("l1_regularization", &l1_regularization_t)); + const Tensor* l2_regularization_t; OP_REQUIRES_OK(context, - context->GetAttr("l2_regularization", &l2_regularization_)); - OP_REQUIRES_OK(context, context->GetAttr("tree_complexity_regularization", - &tree_complexity_regularization_)); + context->input("l2_regularization", &l2_regularization_t)); + const Tensor* tree_complexity_regularization_t; + OP_REQUIRES_OK(context, context->input("tree_complexity_regularization", + &tree_complexity_regularization_t)); + const Tensor* min_node_weight_t; OP_REQUIRES_OK(context, - context->GetAttr("min_node_weight", &min_node_weight_)); + context->input("min_node_weight", &min_node_weight_t)); - int strategy; - OP_REQUIRES_OK(context, context->GetAttr("multiclass_strategy", &strategy)); + const Tensor* feature_column_group_id_t; + OP_REQUIRES_OK(context, context->input("feature_column_group_id", + &feature_column_group_id_t)); + + const Tensor* multiclass_strategy_t; + OP_REQUIRES_OK( + context, context->input("multiclass_strategy", &multiclass_strategy_t)); + int strategy = multiclass_strategy_t->scalar()(); OP_REQUIRES( context, boosted_trees::learner::LearnerConfig_MultiClassStrategy_IsValid( strategy), errors::InvalidArgument("Wrong multiclass strategy passed.")); - multiclass_strategy_ = LearnerConfig_MultiClassStrategy(strategy); - } - NodeStats ComputeNodeStats(const GradientStats& grad_stats) { - return NodeStats(l1_regularization_, l2_regularization_, min_node_weight_, - multiclass_strategy_, grad_stats); - } + multiclass_strategy_ = LearnerConfig_MultiClassStrategy(strategy); - void ReadClassId(OpKernelContext* const context, int32* class_id) { const Tensor* class_id_t; OP_REQUIRES_OK(context, context->input("class_id", &class_id_t)); OP_REQUIRES(context, TensorShapeUtils::IsScalar(class_id_t->shape()), errors::InvalidArgument("class_id must be a scalar.")); - *class_id = class_id_t->scalar()(); + class_id_ = class_id_t->scalar()(); + + l1_regularization_ = l1_regularization_t->scalar()(); + l2_regularization_ = l2_regularization_t->scalar()(); + tree_complexity_regularization_ = + tree_complexity_regularization_t->scalar()(); + min_node_weight_ = min_node_weight_t->scalar()(); + feature_column_group_id_ = feature_column_group_id_t->scalar()(); + } + + NodeStats ComputeNodeStats(const GradientStats& grad_stats) { + return NodeStats(l1_regularization_, l2_regularization_, min_node_weight_, + multiclass_strategy_, grad_stats); } - void FillLeaf(const int class_id, const NodeStats& best_node_stats, + void FillLeaf(const NodeStats& best_node_stats, boosted_trees::trees::Leaf* leaf) const { - if (class_id == -1) { + if (class_id_ == -1) { // This would be the case either for TREE_PER_CLASS with only 2 classes, // or for other multiclass strategies. for (float f : best_node_stats.weight_contribution) { @@ -93,25 +106,31 @@ class BaseBuildSplitOp : public OpKernel { CHECK(best_node_stats.weight_contribution.size() == 1) << "Weight contribution size = " << best_node_stats.weight_contribution.size(); - leaf->mutable_sparse_vector()->add_index(class_id); + leaf->mutable_sparse_vector()->add_index(class_id_); leaf->mutable_sparse_vector()->add_value( best_node_stats.weight_contribution[0]); } } - protected: + int32 feature_column_group_id() { return feature_column_group_id_; } + float tree_complexity_regularization() { + return tree_complexity_regularization_; + } + + private: LearnerConfig_MultiClassStrategy multiclass_strategy_; - int32 feature_column_group_id_; float l1_regularization_; float l2_regularization_; - float min_node_weight_; float tree_complexity_regularization_; + float min_node_weight_; + int32 class_id_; + int32 feature_column_group_id_; }; -class BuildDenseInequalitySplitsOp : public BaseBuildSplitOp { +class BuildDenseInequalitySplitsOp : public OpKernel { public: explicit BuildDenseInequalitySplitsOp(OpKernelConstruction* const context) - : BaseBuildSplitOp(context) {} + : OpKernel(context) {} void Compute(OpKernelContext* const context) override { const Tensor* num_minibatches_t; @@ -139,9 +158,6 @@ class BuildDenseInequalitySplitsOp : public BaseBuildSplitOp { const Tensor* hessians_t; OP_REQUIRES_OK(context, context->input("hessians", &hessians_t)); - int class_id; - ReadClassId(context, &class_id); - // Find the number of unique partitions before we allocate the output. std::vector partition_boundaries; partition_boundaries.push_back(0); @@ -185,6 +201,7 @@ class BuildDenseInequalitySplitsOp : public BaseBuildSplitOp { &output_splits_t)); tensorflow::TTypes::Vec output_splits = output_splits_t->vec(); + SplitBuilderState state(context); for (int root_idx = 0; root_idx < num_elements; ++root_idx) { float best_gain = std::numeric_limits::lowest(); int start_index = partition_boundaries[root_idx]; @@ -196,7 +213,7 @@ class BuildDenseInequalitySplitsOp : public BaseBuildSplitOp { GradientStats(*gradients_t, *hessians_t, bucket_idx); } root_gradient_stats *= normalizer_ratio; - NodeStats root_stats = ComputeNodeStats(root_gradient_stats); + NodeStats root_stats = state.ComputeNodeStats(root_gradient_stats); int32 best_bucket_idx = 0; NodeStats best_right_node_stats(0); NodeStats best_left_node_stats(0); @@ -206,10 +223,10 @@ class BuildDenseInequalitySplitsOp : public BaseBuildSplitOp { GradientStats g(*gradients_t, *hessians_t, bucket_idx); g *= normalizer_ratio; left_gradient_stats += g; - NodeStats left_stats = ComputeNodeStats(left_gradient_stats); + NodeStats left_stats = state.ComputeNodeStats(left_gradient_stats); GradientStats right_gradient_stats = root_gradient_stats - left_gradient_stats; - NodeStats right_stats = ComputeNodeStats(right_gradient_stats); + NodeStats right_stats = state.ComputeNodeStats(right_gradient_stats); if (left_stats.gain + right_stats.gain > best_gain) { best_gain = left_stats.gain + right_stats.gain; best_left_node_stats = left_stats; @@ -220,18 +237,18 @@ class BuildDenseInequalitySplitsOp : public BaseBuildSplitOp { SplitInfo split_info; auto* dense_split = split_info.mutable_split_node()->mutable_dense_float_binary_split(); - dense_split->set_feature_column(feature_column_group_id_); + dense_split->set_feature_column(state.feature_column_group_id()); dense_split->set_threshold( bucket_boundaries(bucket_ids(best_bucket_idx, 0))); auto* left_child = split_info.mutable_left_child(); auto* right_child = split_info.mutable_right_child(); - FillLeaf(class_id, best_left_node_stats, left_child); - FillLeaf(class_id, best_right_node_stats, right_child); + state.FillLeaf(best_left_node_stats, left_child); + state.FillLeaf(best_right_node_stats, right_child); split_info.SerializeToString(&output_splits(root_idx)); gains(root_idx) = - best_gain - root_stats.gain - tree_complexity_regularization_; + best_gain - root_stats.gain - state.tree_complexity_regularization(); output_partition_ids(root_idx) = partition_ids(start_index); } } @@ -239,13 +256,10 @@ class BuildDenseInequalitySplitsOp : public BaseBuildSplitOp { REGISTER_KERNEL_BUILDER(Name("BuildDenseInequalitySplits").Device(DEVICE_CPU), BuildDenseInequalitySplitsOp); -class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { +class BuildSparseInequalitySplitsOp : public OpKernel { public: explicit BuildSparseInequalitySplitsOp(OpKernelConstruction* const context) - : BaseBuildSplitOp(context) { - OP_REQUIRES_OK(context, - context->GetAttr("bias_feature_id", &bias_feature_id_)); - } + : OpKernel(context) {} void Compute(OpKernelContext* const context) override { const Tensor* num_minibatches_t; @@ -275,8 +289,10 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { const Tensor* hessians_t; OP_REQUIRES_OK(context, context->input("hessians", &hessians_t)); - int class_id; - ReadClassId(context, &class_id); + const Tensor* bias_feature_id_t; + OP_REQUIRES_OK(context, + context->input("bias_feature_id", &bias_feature_id_t)); + int64 bias_feature_id = bias_feature_id_t->scalar()(); // For each partition (tree node), store starting index for each dimension. PartitionAndDimensionBoundaries partition_boundaries; @@ -354,6 +370,7 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { &output_splits_t)); tensorflow::TTypes::Vec output_splits = output_splits_t->vec(); + SplitBuilderState state(context); // For each tree node that needs to be split. for (int root_idx = 0; root_idx < num_elements; ++root_idx) { const auto& dimension_boundaries = @@ -372,7 +389,7 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { OP_REQUIRES( context, - bucket_ids_and_dimensions(bias_start_index, 0) == bias_feature_id_, + bucket_ids_and_dimensions(bias_start_index, 0) == bias_feature_id, errors::InvalidArgument("Bias feature ID missing.")); // Dimension for bias feature is always 0 @@ -388,7 +405,7 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { GradientStats root_gradient_stats(*gradients_t, *hessians_t, bias_start_index); root_gradient_stats *= normalizer_ratio; - NodeStats root_stats = ComputeNodeStats(root_gradient_stats); + NodeStats root_stats = state.ComputeNodeStats(root_gradient_stats); // Iterate through dimensions. for (int j = 0; j < dimension_boundaries.size() - 1; ++j) { @@ -408,7 +425,7 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { << bucket_ids_and_dimensions(start_index, 1) << " and for " << bucket_ids_and_dimensions(end_index - 1, 0) << " " << bucket_ids_and_dimensions(end_index - 1, 1); - if (bucket_ids_and_dimensions(start_index, 0) == bias_feature_id_) { + if (bucket_ids_and_dimensions(start_index, 0) == bias_feature_id) { // 0-dimension case which has a first bucket for catch all feature. CHECK(bucket_ids_and_dimensions(start_index, 1) == 0) << "Dimension of bias feature should be 0"; @@ -447,10 +464,10 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { present_gradient_stats - left_gradient_stats; { - NodeStats left_stats_default_left = - ComputeNodeStats(root_gradient_stats - right_gradient_stats); + NodeStats left_stats_default_left = state.ComputeNodeStats( + root_gradient_stats - right_gradient_stats); NodeStats right_stats_default_left = - ComputeNodeStats(right_gradient_stats); + state.ComputeNodeStats(right_gradient_stats); if (left_stats_default_left.gain + right_stats_default_left.gain > best_gain) { best_gain = @@ -466,9 +483,9 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { // enough missing examples. if (!fixed_default_direction) { NodeStats left_stats_default_right = - ComputeNodeStats(left_gradient_stats); - NodeStats right_stats_default_right = - ComputeNodeStats(root_gradient_stats - left_gradient_stats); + state.ComputeNodeStats(left_gradient_stats); + NodeStats right_stats_default_right = state.ComputeNodeStats( + root_gradient_stats - left_gradient_stats); if (left_stats_default_right.gain + right_stats_default_right.gain > best_gain) { best_gain = left_stats_default_right.gain + @@ -494,7 +511,7 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { ->mutable_sparse_float_binary_split_default_left() ->mutable_split(); } - dense_split->set_feature_column(feature_column_group_id_); + dense_split->set_feature_column(state.feature_column_group_id()); // Set the feature index for the best feature column. const int64 best_dimension_id = bucket_ids_and_dimensions(best_element_idx, 1); @@ -505,11 +522,11 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { auto* left_child = split_info.mutable_left_child(); auto* right_child = split_info.mutable_right_child(); - FillLeaf(class_id, best_left_node_stats, left_child); - FillLeaf(class_id, best_right_node_stats, right_child); + state.FillLeaf(best_left_node_stats, left_child); + state.FillLeaf(best_right_node_stats, right_child); split_info.SerializeToString(&output_splits(root_idx)); gains(root_idx) = - best_gain - root_stats.gain - tree_complexity_regularization_; + best_gain - root_stats.gain - state.tree_complexity_regularization(); output_partition_ids(root_idx) = partition_ids(bias_start_index); } } @@ -526,19 +543,14 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { // For each partition, store start indices of feature column dimensions. typedef std::vector> PartitionAndDimensionBoundaries; - - int64 bias_feature_id_; }; REGISTER_KERNEL_BUILDER(Name("BuildSparseInequalitySplits").Device(DEVICE_CPU), BuildSparseInequalitySplitsOp); -class BuildCategoricalEqualitySplitsOp : public BaseBuildSplitOp { +class BuildCategoricalEqualitySplitsOp : public OpKernel { public: explicit BuildCategoricalEqualitySplitsOp(OpKernelConstruction* const context) - : BaseBuildSplitOp(context) { - OP_REQUIRES_OK(context, - context->GetAttr("bias_feature_id", &bias_feature_id_)); - } + : OpKernel(context) {} void Compute(OpKernelContext* const context) override { const Tensor* num_minibatches_t; @@ -561,8 +573,10 @@ class BuildCategoricalEqualitySplitsOp : public BaseBuildSplitOp { const Tensor* hessians_t; OP_REQUIRES_OK(context, context->input("hessians", &hessians_t)); - int class_id; - ReadClassId(context, &class_id); + const Tensor* bias_feature_id_t; + OP_REQUIRES_OK(context, + context->input("bias_feature_id", &bias_feature_id_t)); + int64 bias_feature_id = bias_feature_id_t->scalar()(); // Find the number of unique partitions before we allocate the output. std::vector partition_boundaries; @@ -605,16 +619,17 @@ class BuildCategoricalEqualitySplitsOp : public BaseBuildSplitOp { &output_splits_t)); tensorflow::TTypes::Vec output_splits = output_splits_t->vec(); + SplitBuilderState state(context); for (int root_idx = 0; root_idx < num_elements; ++root_idx) { float best_gain = std::numeric_limits::lowest(); int start_index = partition_boundaries[non_empty_partitions[root_idx]]; int end_index = partition_boundaries[non_empty_partitions[root_idx] + 1]; // First feature ID in each partition should be the bias feature. - OP_REQUIRES(context, feature_ids(start_index, 0) == bias_feature_id_, + OP_REQUIRES(context, feature_ids(start_index, 0) == bias_feature_id, errors::InvalidArgument("Bias feature ID missing.")); GradientStats root_gradient_stats(*gradients_t, *hessians_t, start_index); root_gradient_stats *= normalizer_ratio; - NodeStats root_stats = ComputeNodeStats(root_gradient_stats); + NodeStats root_stats = state.ComputeNodeStats(root_gradient_stats); int32 best_feature_idx = 0; NodeStats best_right_node_stats(0); NodeStats best_left_node_stats(0); @@ -625,8 +640,8 @@ class BuildCategoricalEqualitySplitsOp : public BaseBuildSplitOp { left_gradient_stats *= normalizer_ratio; GradientStats right_gradient_stats = root_gradient_stats - left_gradient_stats; - NodeStats left_stats = ComputeNodeStats(left_gradient_stats); - NodeStats right_stats = ComputeNodeStats(right_gradient_stats); + NodeStats left_stats = state.ComputeNodeStats(left_gradient_stats); + NodeStats right_stats = state.ComputeNodeStats(right_gradient_stats); if (left_stats.gain + right_stats.gain > best_gain) { best_gain = left_stats.gain + right_stats.gain; best_left_node_stats = left_stats; @@ -637,21 +652,18 @@ class BuildCategoricalEqualitySplitsOp : public BaseBuildSplitOp { SplitInfo split_info; auto* equality_split = split_info.mutable_split_node() ->mutable_categorical_id_binary_split(); - equality_split->set_feature_column(feature_column_group_id_); + equality_split->set_feature_column(state.feature_column_group_id()); equality_split->set_feature_id(feature_ids(best_feature_idx, 0)); auto* left_child = split_info.mutable_left_child(); auto* right_child = split_info.mutable_right_child(); - FillLeaf(class_id, best_left_node_stats, left_child); - FillLeaf(class_id, best_right_node_stats, right_child); + state.FillLeaf(best_left_node_stats, left_child); + state.FillLeaf(best_right_node_stats, right_child); split_info.SerializeToString(&output_splits(root_idx)); gains(root_idx) = - best_gain - root_stats.gain - tree_complexity_regularization_; + best_gain - root_stats.gain - state.tree_complexity_regularization(); output_partition_ids(root_idx) = partition_ids(start_index); } } - - private: - int64 bias_feature_id_; }; REGISTER_KERNEL_BUILDER( diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py index f06b73c00d..23f4021c34 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py @@ -64,6 +64,8 @@ from __future__ import print_function import re from tensorflow.contrib.boosted_trees.lib.learner.batch import base_split_handler +from tensorflow.contrib.boosted_trees.python.ops import gen_quantile_ops +from tensorflow.contrib.boosted_trees.python.ops import gen_stats_accumulator_ops from tensorflow.contrib.boosted_trees.python.ops import quantile_ops from tensorflow.contrib.boosted_trees.python.ops import split_handler_ops from tensorflow.contrib.boosted_trees.python.ops import stats_accumulator_ops @@ -72,6 +74,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops @@ -327,9 +330,6 @@ class SparseSplitHandler(InequalitySplitHandler): multiclass_strategy=multiclass_strategy, init_stamp_token=init_stamp_token, name=name) - # Register sparse_make_stats_update function as an Op to the graph. - g = ops.get_default_graph() - sparse_make_stats_update.add_to_graph(g) self._sparse_float_column = sparse_float_column def scheduled_reads(self): @@ -361,8 +361,8 @@ class SparseSplitHandler(InequalitySplitHandler): are_buckets_ready, buckets = scheduled_reads[0] with ops.name_scope(self._name, "SparseSplitHandler"): (quantile_indices, quantile_values, quantile_shapes, quantile_weights, - example_partition_ids, - feature_ids, gradients, hessians) = sparse_make_stats_update( + example_partition_ids, feature_ids, gradients, + hessians) = sparse_make_stats_update( is_active, are_buckets_ready, self._sparse_float_column.indices, self._sparse_float_column.values, self._sparse_float_column.dense_shape, buckets, @@ -379,42 +379,104 @@ class SparseSplitHandler(InequalitySplitHandler): def make_splits(self, stamp_token, next_stamp_token, class_id): """Create the best split using the accumulated stats and flush the state.""" + if (self._gradient_shape == tensor_shape.scalar() and + self._hessian_shape == tensor_shape.scalar()): + handler = make_sparse_split_scalar + else: + handler = make_sparse_split_tensor + + are_splits_ready, partition_ids, gains, split_infos = ( + handler(self._quantile_accumulator.resource(), + self._stats_accumulator.resource(), stamp_token, + next_stamp_token, self._multiclass_strategy, class_id, + self._feature_column_group_id, self._l1_regularization, + self._l2_regularization, self._tree_complexity_regularization, + self._min_node_weight)) + return are_splits_ready, partition_ids, gains, split_infos + + +def _specialize_sparse_split(is_multi_dimentional): + """Builds a specialized version of the function.""" + + def _make_sparse_split(quantile_accumulator_handle, stats_accumulator_handle, + stamp_token, next_stamp_token, multiclass_strategy, + class_id, feature_column_id, l1_regularization, + l2_regularization, tree_complexity_regularization, + min_node_weight, is_multi_dimentional): + """Function that builds splits for a sparse feature column.""" # Get the bucket boundaries are_splits_ready, buckets = ( - self._quantile_accumulator.get_buckets(stamp_token)) + gen_quantile_ops.quantile_accumulator_get_buckets( + quantile_accumulator_handles=[quantile_accumulator_handle], + stamp_token=stamp_token)) # After we receive the boundaries from previous iteration we can flush # the quantile accumulator. - with ops.control_dependencies([buckets]): - flush_quantiles = self._quantile_accumulator.flush( - stamp_token=stamp_token, next_stamp_token=next_stamp_token) - - with ops.device(None): - with ops.device(self._stats_accumulator.resource().device): - num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( - self._stats_accumulator.flush(stamp_token, next_stamp_token)) - - # Put quantile and stats accumulator flushing in the dependency path. - are_splits_ready = control_flow_ops.with_dependencies( - [flush_quantiles, partition_ids], are_splits_ready) - partition_ids, gains, split_infos = ( - split_handler_ops.build_sparse_inequality_splits( - num_minibatches=num_minibatches, - bucket_boundaries=buckets, - partition_ids=partition_ids, - bucket_ids=bucket_ids, - gradients=gradients, - hessians=hessians, - class_id=class_id, - feature_column_group_id=self._feature_column_group_id, - l1_regularization=self._l1_regularization, - l2_regularization=self._l2_regularization, - tree_complexity_regularization=self. - _tree_complexity_regularization, - min_node_weight=self._min_node_weight, - bias_feature_id=_BIAS_FEATURE_ID, - multiclass_strategy=self._multiclass_strategy)) - return (are_splits_ready, partition_ids, gains, split_infos) + with ops.control_dependencies([buckets[0]]): + flush_quantiles = gen_quantile_ops.quantile_accumulator_flush( + quantile_accumulator_handle=quantile_accumulator_handle, + stamp_token=stamp_token, + next_stamp_token=next_stamp_token) + + if is_multi_dimentional: + num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( + gen_stats_accumulator_ops.stats_accumulator_tensor_flush( + stats_accumulator_handle, stamp_token, next_stamp_token)) + else: + num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( + gen_stats_accumulator_ops.stats_accumulator_scalar_flush( + stats_accumulator_handle, stamp_token, next_stamp_token)) + + # Put quantile and stats accumulator flushing in the dependency path. + with ops.control_dependencies([flush_quantiles, partition_ids]): + are_splits_ready = array_ops.identity(are_splits_ready) + partition_ids, gains, split_infos = ( + split_handler_ops.build_sparse_inequality_splits( + num_minibatches=num_minibatches, + bucket_boundaries=buckets[0], + partition_ids=partition_ids, + bucket_ids=bucket_ids, + gradients=gradients, + hessians=hessians, + class_id=class_id, + feature_column_group_id=feature_column_id, + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + tree_complexity_regularization=tree_complexity_regularization, + min_node_weight=min_node_weight, + bias_feature_id=_BIAS_FEATURE_ID, + multiclass_strategy=multiclass_strategy)) + return are_splits_ready, partition_ids, gains, split_infos + + @function.Defun( + dtypes.resource, + dtypes.resource, + dtypes.int64, + dtypes.int64, + dtypes.int32, + dtypes.int32, + dtypes.int32, + dtypes.float32, + dtypes.float32, + dtypes.float32, + dtypes.float32, + noinline=True) + def f(quantile_accumulator_handle, stats_accumulator_handle, stamp_token, + next_stamp_token, multiclass_strategy, class_id, feature_column_id, + l1_regularization, l2_regularization, tree_complexity_regularization, + min_node_weight): + """Function that builds splits for a sparse feature column.""" + return _make_sparse_split( + quantile_accumulator_handle, stats_accumulator_handle, stamp_token, + next_stamp_token, multiclass_strategy, class_id, feature_column_id, + l1_regularization, l2_regularization, tree_complexity_regularization, + min_node_weight, is_multi_dimentional) + + return f + + +make_sparse_split_scalar = _specialize_sparse_split(is_multi_dimentional=False) +make_sparse_split_tensor = _specialize_sparse_split(is_multi_dimentional=True) @function.Defun( @@ -540,8 +602,9 @@ def sparse_make_stats_update( empty_float = constant_op.constant([], dtype=dtypes.float32) handler_not_active = (constant_op.constant( - [], dtype=dtypes.int64, shape=[0, 2]), empty_float, constant_op.constant( - [0, 1], dtype=dtypes.int64), empty_float) + [], dtype=dtypes.int64, shape=[0, 2]), empty_float, + constant_op.constant([0, 1], dtype=dtypes.int64), + empty_float) handler_active = (sparse_column_indices, sparse_column_values, sparse_column_shape, weights) quantile_indices, quantile_values, quantile_shape, quantile_weights = ( diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py index 54d03018d9..c081a3f2c4 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.contrib.boosted_trees.lib.learner.batch import ordinal_split_handler from tensorflow.contrib.boosted_trees.proto import learner_pb2 from tensorflow.contrib.boosted_trees.proto import split_info_pb2 @@ -92,7 +94,9 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] + with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( 1, @@ -105,7 +109,7 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -227,7 +231,9 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] + with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( 1, @@ -240,7 +246,7 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -313,7 +319,8 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( 1, @@ -326,7 +333,7 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -396,7 +403,8 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, False])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( 1, @@ -409,7 +417,7 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([False, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -470,7 +478,8 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( 1, @@ -483,7 +492,7 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -603,7 +612,8 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( 1, @@ -616,7 +626,7 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -685,10 +695,10 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): class_id = -1 split_handler = ordinal_split_handler.SparseSplitHandler( - l1_regularization=0, - l2_regularization=2, - tree_complexity_regularization=0, - min_node_weight=0, + l1_regularization=0.0, + l2_regularization=2.0, + tree_complexity_regularization=0.0, + min_node_weight=0.0, epsilon=0.01, num_quantiles=2, feature_column_group_id=0, @@ -713,8 +723,8 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] - + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( 1, @@ -727,7 +737,7 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -811,10 +821,10 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): class_id = -1 split_handler = ordinal_split_handler.SparseSplitHandler( - l1_regularization=0, - l2_regularization=2, - tree_complexity_regularization=0, - min_node_weight=0, + l1_regularization=0.0, + l2_regularization=2.0, + tree_complexity_regularization=0.0, + min_node_weight=0.0, epsilon=0.01, num_quantiles=2, feature_column_group_id=0, @@ -839,7 +849,8 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( @@ -853,7 +864,7 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -905,10 +916,10 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): class_id = -1 split_handler = ordinal_split_handler.SparseSplitHandler( - l1_regularization=0, - l2_regularization=2, - tree_complexity_regularization=0, - min_node_weight=0, + l1_regularization=0.0, + l2_regularization=2.0, + tree_complexity_regularization=0.0, + min_node_weight=0.0, epsilon=0.01, num_quantiles=2, feature_column_group_id=0, @@ -933,7 +944,8 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( @@ -947,7 +959,7 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -996,10 +1008,10 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): class_id = -1 split_handler = ordinal_split_handler.SparseSplitHandler( - l1_regularization=0, - l2_regularization=2, - tree_complexity_regularization=0, - min_node_weight=0, + l1_regularization=0.0, + l2_regularization=2.0, + tree_complexity_regularization=0.0, + min_node_weight=0.0, epsilon=0.01, num_quantiles=2, feature_column_group_id=0, @@ -1024,7 +1036,8 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, False])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( @@ -1038,7 +1051,7 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([False, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -1065,10 +1078,10 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): class_id = -1 split_handler = ordinal_split_handler.SparseSplitHandler( - l1_regularization=0, - l2_regularization=2, - tree_complexity_regularization=0, - min_node_weight=0, + l1_regularization=0.0, + l2_regularization=2.0, + tree_complexity_regularization=0.0, + min_node_weight=0.0, epsilon=0.01, num_quantiles=2, feature_column_group_id=0, @@ -1096,7 +1109,8 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( @@ -1110,7 +1124,7 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits @@ -1138,10 +1152,10 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): class_id = -1 split_handler = ordinal_split_handler.SparseSplitHandler( - l1_regularization=0, - l2_regularization=2, - tree_complexity_regularization=0, - min_node_weight=0, + l1_regularization=0.0, + l2_regularization=2.0, + tree_complexity_regularization=0.0, + min_node_weight=0.0, epsilon=0.01, num_quantiles=2, feature_column_group_id=0, @@ -1166,7 +1180,8 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): example_weights, is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_1]): - are_splits_ready = split_handler.make_splits(0, 1, class_id)[0] + are_splits_ready = split_handler.make_splits( + np.int64(0), np.int64(1), class_id)[0] with ops.control_dependencies([are_splits_ready]): update_2 = split_handler.update_stats_sync( @@ -1180,7 +1195,7 @@ class SparseSplitHandlerTest(test_util.TensorFlowTestCase): is_active=array_ops.constant([True, True])) with ops.control_dependencies([update_2]): are_splits_ready2, partitions, gains, splits = ( - split_handler.make_splits(1, 2, class_id)) + split_handler.make_splits(np.int64(1), np.int64(2), class_id)) are_splits_ready, are_splits_ready2, partitions, gains, splits = ( sess.run([ are_splits_ready, are_splits_ready2, partitions, gains, splits diff --git a/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc index 5d0ebbf73c..ca5c7f3d8c 100644 --- a/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/split_handler_ops.cc @@ -23,12 +23,6 @@ using shape_inference::InferenceContext; using shape_inference::ShapeHandle; REGISTER_OP("BuildDenseInequalitySplits") - .Attr("feature_column_group_id: int") - .Attr("l1_regularization: float") - .Attr("l2_regularization: float") - .Attr("tree_complexity_regularization: float") - .Attr("min_node_weight: float") - .Attr("multiclass_strategy: int") .Input("num_minibatches: int64") .Input("partition_ids: int32") .Input("bucket_ids: int64") @@ -36,6 +30,12 @@ REGISTER_OP("BuildDenseInequalitySplits") .Input("hessians: float32") .Input("bucket_boundaries: float32") .Input("class_id: int32") + .Input("feature_column_group_id: int32") + .Input("l1_regularization: float") + .Input("l2_regularization: float") + .Input("tree_complexity_regularization: float") + .Input("min_node_weight: float") + .Input("multiclass_strategy: int32") .Output("output_partition_ids: int32") .Output("gains: float32") .Output("split_infos: string") @@ -73,6 +73,17 @@ bucket_ids: A rank 2 tensor of buckets IDs and dimensions. gradients: A rank 1 tensor of gradients. hessians: A rank 1 tensor of hessians. bucket_boundaries: A rank 1 tensor, thresholds that were used for bucketization. +class_id: A scalar, the class id for which we're building the splits. +feature_column_group_id: A scalar, the index of the feature we are spiltting on. +l1_regularization: A scalar, which specifies the l1 regularization term. +l2_regularization: A scalar, which specifies the l2 regularization term. +tree_complexity_regularization: A scalar, which specifies the tree complexity + regularization term. +min_node_weight: A scalar, minimum sum of example hessian needed in a child. + If a split results in a leaf node with a smaller value, the split will not + be considered. +multiclass_strategy: A scalar, specifying the multiclass handling strategy. + See LearnerConfig.MultiClassStrategy for valid values. output_partition_ids: A rank 1 tensor, the partition IDs that we created splits for. gains: A rank 1 tensor, for the computed gain for the created splits. @@ -81,13 +92,6 @@ split_infos: A rank 1 tensor of serialized protos which contains the )doc"); REGISTER_OP("BuildSparseInequalitySplits") - .Attr("feature_column_group_id: int") - .Attr("bias_feature_id: int") - .Attr("l1_regularization: float") - .Attr("l2_regularization: float") - .Attr("tree_complexity_regularization: float") - .Attr("min_node_weight: float") - .Attr("multiclass_strategy: int") .Input("num_minibatches: int64") .Input("partition_ids: int32") .Input("bucket_ids: int64") @@ -95,6 +99,13 @@ REGISTER_OP("BuildSparseInequalitySplits") .Input("hessians: float32") .Input("bucket_boundaries: float32") .Input("class_id: int32") + .Input("feature_column_group_id: int32") + .Input("bias_feature_id: int64") + .Input("l1_regularization: float") + .Input("l2_regularization: float") + .Input("tree_complexity_regularization: float") + .Input("min_node_weight: float") + .Input("multiclass_strategy: int32") .Output("output_partition_ids: int32") .Output("gains: float32") .Output("split_infos: string") @@ -133,6 +144,17 @@ bucket_ids: A rank 2 tensor of buckets IDs and dimensions. gradients: A rank 1 tensor of gradients. hessians: A rank 1 tensor of hessians. bucket_boundaries: A rank 1 tensor, thresholds that were used for bucketization. +class_id: A scalar, the class id for which we're building the splits. +feature_column_group_id: A scalar, the index of the feature we are spiltting on. +l1_regularization: A scalar, which specifies the l1 regularization term. +l2_regularization: A scalar, which specifies the l2 regularization term. +tree_complexity_regularization: A scalar, which specifies the tree complexity + regularization term. +min_node_weight: A scalar, minimum sum of example hessian needed in a child. + If a split results in a leaf node with a smaller value, the split will not + be considered. +multiclass_strategy: A scalar, specifying the multiclass handling strategy. + See LearnerConfig.MultiClassStrategy for valid values. output_partition_ids: A rank 1 tensor, the partition IDs that we created splits for. gains: A rank 1 tensor, for the computed gain for the created splits. @@ -141,19 +163,19 @@ split_infos: A rank 1 tensor of serialized protos which contains the )doc"); REGISTER_OP("BuildCategoricalEqualitySplits") - .Attr("feature_column_group_id: int") - .Attr("bias_feature_id: int") - .Attr("l1_regularization: float") - .Attr("l2_regularization: float") - .Attr("tree_complexity_regularization: float") - .Attr("min_node_weight: float") - .Attr("multiclass_strategy: int") .Input("num_minibatches: int64") .Input("partition_ids: int32") .Input("feature_ids: int64") .Input("gradients: float32") .Input("hessians: float32") .Input("class_id: int32") + .Input("feature_column_group_id: int32") + .Input("bias_feature_id: int64") + .Input("l1_regularization: float") + .Input("l2_regularization: float") + .Input("tree_complexity_regularization: float") + .Input("min_node_weight: float") + .Input("multiclass_strategy: int32") .Output("output_partition_ids: int32") .Output("gains: float32") .Output("split_infos: string") @@ -188,6 +210,17 @@ partition_ids: A rank 1 tensor of partition IDs. feature_ids: A rank 2 tensor of feature IDs and dimensions. gradients: A rank 1 tensor of gradients. hessians: A rank 1 tensor of hessians. +class_id: A scalar, the class id for which we're building the splits. +feature_column_group_id: A scalar, the index of the feature we are spiltting on. +l1_regularization: A scalar, which specifies the l1 regularization term. +l2_regularization: A scalar, which specifies the l2 regularization term. +tree_complexity_regularization: A scalar, which specifies the tree complexity + regularization term. +min_node_weight: A scalar, minimum sum of example hessian needed in a child. + If a split results in a leaf node with a smaller value, the split will not + be considered. +multiclass_strategy: A scalar, specifying the multiclass handling strategy. + See LearnerConfig.MultiClassStrategy for valid values. output_partition_ids: A rank 1 tensor, the partition IDs that we created splits for. gains: A rank 1 tensor, for the computed gain for the created splits. @@ -196,4 +229,3 @@ split_infos: A rank 1 tensor of serialized protos which contains the )doc"); } // namespace tensorflow - // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/python/ops/batch_ops_utils.py b/tensorflow/contrib/boosted_trees/python/ops/batch_ops_utils.py index 7a5f329b7a..843420968a 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/batch_ops_utils.py +++ b/tensorflow/contrib/boosted_trees/python/ops/batch_ops_utils.py @@ -20,6 +20,8 @@ from __future__ import print_function import abc import collections +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops @@ -60,6 +62,7 @@ def _move_tensors(tensors, device): """Moves a list of tensors to a device by concatenating/splitting them.""" # Reset the device setting to avoid weird interactions with device merging # logic. + zero = constant_op.constant(0, dtype=dtypes.int32) with ops.device(None): if all(tensor.shape == tensor_shape.scalar() for tensor in tensors): with ops.device(tensors[0].device): @@ -68,12 +71,11 @@ def _move_tensors(tensors, device): return array_ops.unstack(values) else: with ops.device(tensors[0].device): - sizes = array_ops.stack( - [array_ops.shape(tensor)[0] for tensor in tensors]) - values = array_ops.concat(tensors, axis=0) + sizes = array_ops.stack(array_ops.shape_n(tensors))[:, 0] + values = array_ops.concat(tensors, axis=zero) with ops.device(device): sizes = array_ops.unstack(sizes) - return list(array_ops.split(values, sizes, axis=0)) + return list(array_ops.split(values, sizes, axis=zero)) def _scheduled_stamp_resource_op_runner(batch, stamp): diff --git a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py index 50cc00afdc..19b6b3296d 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py +++ b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py @@ -201,3 +201,6 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject): stamp_token=stamp_token, next_stamp_token=next_stamp_token) return result + + def resource(self): + return self._quantile_accumulator_handle diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index 08c1dcdd02..c725f32b7c 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -180,8 +180,7 @@ def extract_features(features, feature_columns, use_core_columns): elif isinstance(fc, feature_column_lib._EmbeddingColumn): # pylint: enable=protected-access transformed_features[fc.name] = fc_core.input_layer( - features, [fc], - weight_collections=[scope]) + features, [fc], weight_collections=[scope]) else: result = feature_column_ops.transform_features(features, [fc]) if len(result) > 1: @@ -334,10 +333,12 @@ class GradientBoostedDecisionTreeModel(object): self._feature_columns = feature_columns self._learner_config_serialized = learner_config.SerializeToString() self._attempted_trees = variables.Variable( - initial_value=array_ops.zeros([], dtypes.int64), trainable=False, + initial_value=array_ops.zeros([], dtypes.int64), + trainable=False, name="attempted_trees") self._finalized_trees = variables.Variable( - initial_value=array_ops.zeros([], dtypes.int64), trainable=False, + initial_value=array_ops.zeros([], dtypes.int64), + trainable=False, name="finalized_trees") if not features: raise ValueError("Features dictionary must be specified.") @@ -354,9 +355,10 @@ class GradientBoostedDecisionTreeModel(object): self._sparse_int_indices = sparse_int_indices self._sparse_int_values = sparse_int_values self._sparse_int_shapes = sparse_int_shapes - self._reduce_dim = (self._learner_config.multi_class_strategy == - learner_pb2.LearnerConfig.TREE_PER_CLASS and - learner_config.num_classes == 2) + self._reduce_dim = ( + self._learner_config.multi_class_strategy == + learner_pb2.LearnerConfig.TREE_PER_CLASS and + learner_config.num_classes == 2) def _predict_and_return_dict(self, ensemble_handle, ensemble_stamp, mode): """Runs prediction and returns a dictionary of the prediction results. @@ -374,8 +376,8 @@ class GradientBoostedDecisionTreeModel(object): ensemble_stats = training_ops.tree_ensemble_stats(ensemble_handle, ensemble_stamp) num_handlers = ( - len(self._dense_floats) + len(self._sparse_float_shapes) + - len(self._sparse_int_shapes)) + len(self._dense_floats) + len(self._sparse_float_shapes) + len( + self._sparse_int_shapes)) # Used during feature selection. used_handlers = model_ops.tree_ensemble_used_handlers( ensemble_handle, ensemble_stamp, num_all_handlers=num_handlers) @@ -432,8 +434,9 @@ class GradientBoostedDecisionTreeModel(object): # Use the current ensemble to predict on the current batch of input. # For faster prediction we check if the inputs are on the same device # as the model. If not, we create a copy of the model on the worker. - input_deps = (self._dense_floats + self._sparse_float_indices + - self._sparse_int_indices) + input_deps = ( + self._dense_floats + self._sparse_float_indices + + self._sparse_int_indices) if not input_deps: raise ValueError("No input tensors for prediction.") @@ -500,8 +503,9 @@ class GradientBoostedDecisionTreeModel(object): ValueError: if inputs are not valid. """ # Get the worker device from input dependencies. - input_deps = (self._dense_floats + self._sparse_float_indices + - self._sparse_int_indices) + input_deps = ( + self._dense_floats + self._sparse_float_indices + + self._sparse_int_indices) worker_device = input_deps[0].device # Get tensors relevant for training and form the loss. @@ -517,7 +521,7 @@ class GradientBoostedDecisionTreeModel(object): aggregation_method=None)[0] strategy = self._learner_config.multi_class_strategy - class_id = -1 + class_id = constant_op.constant(-1, dtype=dtypes.int32) # Handle different multiclass strategies. if strategy == learner_pb2.LearnerConfig.TREE_PER_CLASS: # We build one vs rest trees. @@ -571,31 +575,39 @@ class GradientBoostedDecisionTreeModel(object): # Get the weights for each example for quantiles calculation, weights = self._get_weights(hessian_shape, squeezed_hessians) - regularization_config = self._learner_config.regularization - min_node_weight = self._learner_config.constraints.min_node_weight # Create all handlers ensuring resources are evenly allocated across PS. fc_name_idx = 0 handlers = [] init_stamp_token = constant_op.constant(0, dtype=dtypes.int64) + l1_regularization = constant_op.constant( + self._learner_config.regularization.l1, dtypes.float32) + l2_regularization = constant_op.constant( + self._learner_config.regularization.l2, dtypes.float32) + tree_complexity_regularization = constant_op.constant( + self._learner_config.regularization.tree_complexity, dtypes.float32) + min_node_weight = constant_op.constant( + self._learner_config.constraints.min_node_weight, dtypes.float32) + epsilon = 0.01 + num_quantiles = 100 + strategy_tensor = constant_op.constant(strategy) with ops.device(self._get_replica_device_setter(worker_device)): # Create handlers for dense float columns for dense_float_column_idx in range(len(self._dense_floats)): fc_name = self._fc_names[fc_name_idx] handlers.append( ordinal_split_handler.DenseSplitHandler( - l1_regularization=regularization_config.l1, - l2_regularization=regularization_config.l2, - tree_complexity_regularization=( - regularization_config.tree_complexity), + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + tree_complexity_regularization=tree_complexity_regularization, min_node_weight=min_node_weight, feature_column_group_id=dense_float_column_idx, - epsilon=0.01, - num_quantiles=100, + epsilon=epsilon, + num_quantiles=num_quantiles, dense_float_column=self._dense_floats[dense_float_column_idx], name=fc_name, gradient_shape=gradient_shape, hessian_shape=hessian_shape, - multiclass_strategy=strategy, + multiclass_strategy=strategy_tensor, init_stamp_token=init_stamp_token)) fc_name_idx += 1 @@ -604,14 +616,13 @@ class GradientBoostedDecisionTreeModel(object): fc_name = self._fc_names[fc_name_idx] handlers.append( ordinal_split_handler.SparseSplitHandler( - l1_regularization=regularization_config.l1, - l2_regularization=regularization_config.l2, - tree_complexity_regularization=( - regularization_config.tree_complexity), + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + tree_complexity_regularization=tree_complexity_regularization, min_node_weight=min_node_weight, feature_column_group_id=sparse_float_column_idx, - epsilon=0.01, - num_quantiles=100, + epsilon=epsilon, + num_quantiles=num_quantiles, sparse_float_column=sparse_tensor.SparseTensor( self._sparse_float_indices[sparse_float_column_idx], self._sparse_float_values[sparse_float_column_idx], @@ -619,7 +630,7 @@ class GradientBoostedDecisionTreeModel(object): name=fc_name, gradient_shape=gradient_shape, hessian_shape=hessian_shape, - multiclass_strategy=strategy, + multiclass_strategy=strategy_tensor, init_stamp_token=init_stamp_token)) fc_name_idx += 1 @@ -628,10 +639,9 @@ class GradientBoostedDecisionTreeModel(object): fc_name = self._fc_names[fc_name_idx] handlers.append( categorical_split_handler.EqualitySplitHandler( - l1_regularization=regularization_config.l1, - l2_regularization=regularization_config.l2, - tree_complexity_regularization=( - regularization_config.tree_complexity), + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + tree_complexity_regularization=tree_complexity_regularization, min_node_weight=min_node_weight, feature_column_group_id=sparse_int_column_idx, sparse_int_column=sparse_tensor.SparseTensor( @@ -641,7 +651,7 @@ class GradientBoostedDecisionTreeModel(object): name=fc_name, gradient_shape=gradient_shape, hessian_shape=hessian_shape, - multiclass_strategy=strategy, + multiclass_strategy=strategy_tensor, init_stamp_token=init_stamp_token)) fc_name_idx += 1 @@ -694,11 +704,11 @@ class GradientBoostedDecisionTreeModel(object): name="continue_centering", trainable=False) stats_update_ops.append( - control_flow_ops.cond(continue_centering, - self._make_update_bias_stats_fn( - ensemble_stamp, predictions, gradients, - bias_stats_accumulator), - control_flow_ops.no_op)) + control_flow_ops.cond( + continue_centering, + self._make_update_bias_stats_fn(ensemble_stamp, predictions, + gradients, bias_stats_accumulator), + control_flow_ops.no_op)) # Update handler stats. handler_reads = collections.OrderedDict() @@ -720,8 +730,8 @@ class GradientBoostedDecisionTreeModel(object): shape=[len(handlers)], seed=[seed + 1, 1]) active_handlers = array_ops.stack( [active_handlers_current_layer, active_handlers_next_layer], axis=1) - active_handlers = (active_handlers < - self._learner_config.feature_fraction_per_level) + active_handlers = ( + active_handlers < self._learner_config.feature_fraction_per_level) elif subsampling_type == "feature_fraction_per_tree": seed = predictions_dict[NUM_TREES_ATTEMPTED] active_handlers_current_layer = stateless.stateless_random_uniform( @@ -729,9 +739,12 @@ class GradientBoostedDecisionTreeModel(object): active_handlers_current_layer = ( active_handlers_current_layer < self._learner_config.feature_fraction_per_tree) - active_handlers = array_ops.stack([ - active_handlers_current_layer, - array_ops.ones([len(handlers)], dtype=dtypes.bool)], axis=1) + active_handlers = array_ops.stack( + [ + active_handlers_current_layer, + array_ops.ones([len(handlers)], dtype=dtypes.bool) + ], + axis=1) else: active_handlers = array_ops.ones([len(handlers), 2], dtype=dtypes.bool) @@ -760,6 +773,7 @@ class GradientBoostedDecisionTreeModel(object): empty_hessians = constant_op.constant( [], dtype=dtypes.float32, shape=empty_hess_shape) + active_handlers = array_ops.unstack(active_handlers, axis=0) for handler_idx in range(len(handlers)): handler = handlers[handler_idx] is_active = active_handlers[handler_idx] @@ -971,7 +985,7 @@ class GradientBoostedDecisionTreeModel(object): # This is a workaround for the slowness of graph building in tf.cond. # See (b/36554864). split_sizes = array_ops.reshape( - array_ops.shape_n(partition_ids_list), [-1]) + array_ops.shape_n(partition_ids_list), [len(partition_ids_list)]) partition_ids = array_ops.concat(partition_ids_list, axis=0) gains = array_ops.concat(gains_list, axis=0) split_infos = array_ops.concat(split_info_list, axis=0) @@ -1036,8 +1050,11 @@ class GradientBoostedDecisionTreeModel(object): # Update ensemble. update_ops = [are_all_splits_ready] - update_model = control_flow_ops.cond(continue_centering, _center_bias_fn, - _grow_ensemble_fn) + if self._center_bias: + update_model = control_flow_ops.cond(continue_centering, + _center_bias_fn, _grow_ensemble_fn) + else: + update_model = _grow_ensemble_fn() update_ops.append(update_model) # Update ensemble stats. -- GitLab From 193dec8aec6624454ec0776dbeeaca31d5d0db95 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 11:44:52 -0700 Subject: [PATCH 007/902] Actually return the value from train_and_evaluate. PiperOrigin-RevId: 197597953 --- tensorflow/python/estimator/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index dc5edc7c87..4f90bcf3a8 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -444,7 +444,7 @@ def train_and_evaluate(estimator, train_spec, eval_spec): 'For distributed training, there can only be one `evaluator` task ' '(with task id 0). Given task id {}'.format(config.task_id)) - executor.run() + return executor.run() class _StopAtSecsHook(session_run_hook.SessionRunHook): -- GitLab From b3e68e52f1f6488710c478596c30e0f0eb2dcad1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 22 May 2018 11:52:51 -0700 Subject: [PATCH 008/902] [XLA:GPU] Add lowering for input fusions with multiple reduce outputs This is limited to reduces that have the same shapes and reduced dimensions. Most of the code is making the individual emission code emit multiple reduction in the same loop. This requires multi-output fusion to provide a speedup. PiperOrigin-RevId: 197599248 --- .../compiler/xla/service/gpu/ir_emitter.h | 7 +- .../xla/service/gpu/ir_emitter_unnested.cc | 337 ++++++++++++------ .../xla/service/gpu/ir_emitter_unnested.h | 42 ++- .../xla/tests/multioutput_fusion_test.cc | 107 ++++++ 4 files changed, 360 insertions(+), 133 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h index b0accc08d4..e55dfc6dae 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h @@ -120,10 +120,11 @@ class IrEmitter : public DfsHloVisitorWithDefault { llvm::Value* GetBasePointer(const HloInstruction& inst) const { return bindings_.GetBasePointer(inst); } - // A convenient helper for calling BufferAssignment::GetUniqueTopLevelSlice. - BufferAllocation::Slice GetAllocationSlice(const HloInstruction& hlo) const { + // A convenient helper for calling BufferAssignment::GetUniqueSlice. + BufferAllocation::Slice GetAllocationSlice( + const HloInstruction& hlo, const ShapeIndex& index = {}) const { return ir_emitter_context_->buffer_assignment() - .GetUniqueTopLevelSlice(&hlo) + .GetUniqueSlice(&hlo, index) .ConsumeValueOrDie(); } diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 55d4c1d13d..d07d197784 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -79,6 +79,7 @@ namespace { using llvm_ir::IrName; using tensorflow::gtl::ArraySlice; +using tensorflow::gtl::InlinedVector; using tensorflow::gtl::nullopt; using tensorflow::gtl::optional; using tensorflow::strings::StrCat; @@ -499,12 +500,24 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { // initializes the output array to the initial value of the reduce. if (HloInstruction::FusionKind::kInput == fusion->fusion_kind()) { switch (root->opcode()) { + case HloOpcode::kTuple: case HloOpcode::kReduce: { VLOG(3) << "Emitting fused reduction to vector: " << fusion->ToString(); - TF_ASSIGN_OR_RETURN(std::unique_ptr initializer_thunk, - BuildInitializerThunk(fusion)); std::vector> thunks; - thunks.push_back(std::move(initializer_thunk)); + ArraySlice reduces = + root->opcode() == HloOpcode::kTuple + ? root->operands() + : ArraySlice(&root, 1); + + // For multi-output fusion emit an initializer for each tuple element. + // Otherwise it's sufficient to just initialize the single output. + for (int i = 0, e = reduces.size(); i != e; ++i) { + TF_ASSIGN_OR_RETURN( + std::unique_ptr initializer_thunk, + BuildInitializerThunk( + fusion, reduces[i] == root ? ShapeIndex() : ShapeIndex({i}))); + thunks.push_back(std::move(initializer_thunk)); + } thunks.push_back(BuildKernelThunk(fusion)); thunk_sequence_->emplace_back( MakeUnique(std::move(thunks), fusion)); @@ -518,11 +531,34 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { FusedIrEmitter fused_emitter(parameter_arrays, &elemental_emitter); TF_RETURN_IF_ERROR(root->Accept(&fused_emitter)); - Shape input_shape = root->operand(0)->shape(); - return EmitReductionToVector( - root, input_shape, fused_emitter.GetGenerator(root->operand(0)), - fused_emitter.GetGenerator(root->operand(1)), root->dimensions(), - root->to_apply()); + // For multi-output fusion CHECK the constraints and feed all the + // reduces into a single loop code generator. Single-output reduce + // fusion is a special case of that. + InlinedVector input_gens; + InlinedVector init_value_gens; + InlinedVector reducers; + for (const HloInstruction* reduce : reduces) { + CHECK_EQ(HloOpcode::kReduce, reduce->opcode()); + // TODO(kramerb): CHECK that layouts are equal. Currently this + // breaks multioutputfusion_test. The test has pre-fused + // instructions, but layout_assignment will not assign any layouts + // for instructions inside of a fused computation. It just removes + // the layouts instead. + CHECK(ShapeUtil::Compatible(reduces[0]->shape(), reduce->shape())); + CHECK(ShapeUtil::Compatible(reduces[0]->operand(0)->shape(), + reduce->operand(0)->shape())); + CHECK(ShapeUtil::Compatible(reduces[0]->operand(1)->shape(), + reduce->operand(1)->shape())); + CHECK(reduces[0]->dimensions() == reduce->dimensions()); + input_gens.push_back(fused_emitter.GetGenerator(reduce->operand(0))); + init_value_gens.push_back( + fused_emitter.GetGenerator(reduce->operand(1))); + reducers.push_back(reduce->to_apply()); + } + const Shape& input_shape = reduces[0]->operand(0)->shape(); + return EmitReductionToVector(reduces[0], input_shape, input_gens, + init_value_gens, reduces[0]->dimensions(), + reducers); } default: LOG(FATAL) << "Bad opcode for input fusion: " @@ -909,8 +945,9 @@ Status IrEmitterUnnested::HandleCopy(HloInstruction* copy) { Status IrEmitterUnnested::EmitReductionToScalar( HloInstruction* reduce, const Shape& input_shape, - const llvm_ir::ElementGenerator& input_gen, - const llvm_ir::ElementGenerator& init_value_gen, HloComputation* reducer) { + tensorflow::gtl::ArraySlice input_gens, + tensorflow::gtl::ArraySlice init_value_gens, + tensorflow::gtl::ArraySlice reducers) { // Number of elements processed by a single thread. constexpr int64 kTileSize = 16; int64 num_elems = ShapeUtil::ElementsIn(input_shape); @@ -962,14 +999,19 @@ Status IrEmitterUnnested::EmitReductionToScalar( // auto loop_body_emitter = [=](const llvm_ir::IrArray::Index& tile_index) -> Status { + const int num_reduces = reducers.size(); llvm::Type* element_ir_type = llvm_ir::PrimitiveTypeToIrType(input_shape.element_type(), module_); - llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca( - element_ir_type, /*ArraySize=*/nullptr, "partial_reduction_result"); - { - TF_ASSIGN_OR_RETURN(llvm::Value * init_ir_value, - init_value_gen(llvm_ir::IrArray::Index({}))); + std::vector partial_reduction_result_addresses; + for (int i = 0; i != num_reduces; ++i) { + llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca( + element_ir_type, /*ArraySize=*/nullptr, + "partial_reduction_result." + llvm::Twine(i)); + TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value, + init_value_gens[i](llvm_ir::IrArray::Index({}))); ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address); + partial_reduction_result_addresses.push_back( + partial_reduction_result_address); } llvm::Value* x_in_tiles = tile_index[0]; @@ -1002,11 +1044,16 @@ Status IrEmitterUnnested::EmitReductionToScalar( llvm_ir::IrArray::Index input_index( /*linear=*/x, input_shape, &ir_builder_); llvm::Value* input_address = ir_builder_.CreateAlloca(element_ir_type); - TF_ASSIGN_OR_RETURN(llvm::Value * input_ir_value, input_gen(input_index)); - ir_builder_.CreateStore(input_ir_value, input_address); - return (EmitCallToNestedComputation( - *reducer, {partial_reduction_result_address, input_address}, - partial_reduction_result_address)); + for (int i = 0; i != num_reduces; ++i) { + TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, + input_gens[i](input_index)); + ir_builder_.CreateStore(input_ir_value, input_address); + TF_RETURN_IF_ERROR(EmitCallToNestedComputation( + *reducers[i], + {partial_reduction_result_addresses[i], input_address}, + partial_reduction_result_addresses[i])); + } + return Status::OK(); }; // x_end = kTileSize + x_in_tiles * kTileSize, i.e., the location that's @@ -1041,20 +1088,24 @@ Status IrEmitterUnnested::EmitReductionToScalar( : element_ir_type; for (int shuffle_distance = kWarpSize / 2; shuffle_distance >= 1; shuffle_distance /= 2) { - llvm::Value* partial_reduction_result = ir_builder_.CreateLoad( - ir_builder_.CreateBitCast(partial_reduction_result_address, - shuffle_ir_type->getPointerTo()), - "partial_reduction_result"); llvm::Value* result_from_other_lane = ir_builder_.CreateAlloca( element_ir_type, nullptr, "result_from_other_lane"); - ir_builder_.CreateStore( - EmitShuffleDown(partial_reduction_result, - ir_builder_.getInt32(shuffle_distance), &ir_builder_), - ir_builder_.CreateBitCast(result_from_other_lane, - shuffle_ir_type->getPointerTo())); - TF_RETURN_IF_ERROR(EmitCallToNestedComputation( - *reducer, {partial_reduction_result_address, result_from_other_lane}, - partial_reduction_result_address)); + for (int i = 0; i != num_reduces; ++i) { + llvm::Value* partial_reduction_result = ir_builder_.CreateLoad( + ir_builder_.CreateBitCast(partial_reduction_result_addresses[i], + shuffle_ir_type->getPointerTo()), + "partial_reduction_result"); + ir_builder_.CreateStore( + EmitShuffleDown(partial_reduction_result, + ir_builder_.getInt32(shuffle_distance), + &ir_builder_), + ir_builder_.CreateBitCast(result_from_other_lane, + shuffle_ir_type->getPointerTo())); + TF_RETURN_IF_ERROR(EmitCallToNestedComputation( + *reducers[i], + {partial_reduction_result_addresses[i], result_from_other_lane}, + partial_reduction_result_addresses[i])); + } } const HloInstruction* output = @@ -1070,14 +1121,25 @@ Status IrEmitterUnnested::EmitReductionToScalar( "lane_id_is_zero", &ir_builder_); llvm_ir::SetToFirstInsertPoint(if_lane_id_is_zero_data.true_block, &ir_builder_); - llvm::Value* output_address = - GetIrArray(*output, *output) - .EmitArrayElementAddress( - llvm_ir::IrArray::Index(/*linear=*/ir_builder_.getInt64(0), - output->shape(), &ir_builder_), - &ir_builder_, "output_element_address"); - return EmitAtomicOperationForNestedComputation( - *reducer, output_address, partial_reduction_result_address); + + for (int i = 0; i != num_reduces; ++i) { + ShapeIndex output_shape_index; + if (output->IsMultiOutputFusion()) { + output_shape_index = {i}; + } + llvm::Value* output_address = + GetIrArray(*output, *output, output_shape_index) + .EmitArrayElementAddress( + llvm_ir::IrArray::Index( + /*linear=*/ir_builder_.getInt64(0), + ShapeUtil::GetSubshape(output->shape(), + output_shape_index), + &ir_builder_), + &ir_builder_, "output_element_address"); + TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation( + *reducers[i], output_address, partial_reduction_result_addresses[i])); + } + return Status::OK(); }; // Emit a parallel loop that iterates through all input tiles, one per thread. @@ -1097,8 +1159,9 @@ Status IrEmitterUnnested::EmitReductionToScalar( Status IrEmitterUnnested::EmitColumnReduction( int64 height, int64 width, HloInstruction* reduce, const Shape& input_shape, - const llvm_ir::ElementGenerator& input_gen, - const llvm_ir::ElementGenerator& init_value_gen, HloComputation* reducer) { + tensorflow::gtl::ArraySlice input_gens, + tensorflow::gtl::ArraySlice init_value_gens, + tensorflow::gtl::ArraySlice reducers) { // Divide the input matrix into tiles of size Kx1. For example, when the // input matrix is 4x4 and K=2, the tiled matrix looks like // @@ -1140,15 +1203,20 @@ Status IrEmitterUnnested::EmitColumnReduction( // } auto loop_body_emitter = [=](const llvm_ir::IrArray::Index& tile_index) -> Status { + const int num_reduces = reducers.size(); // Emit the loop body that reduces one tile. llvm::Type* element_ir_type = llvm_ir::PrimitiveTypeToIrType(input_shape.element_type(), module_); - llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca( - element_ir_type, /*ArraySize=*/nullptr, "partial_reduction_result"); - { - TF_ASSIGN_OR_RETURN(llvm::Value * init_ir_value, - init_value_gen(llvm_ir::IrArray::Index({}))); + std::vector partial_reduction_result_addresses; + for (int i = 0; i != num_reduces; ++i) { + llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca( + element_ir_type, /*ArraySize=*/nullptr, + "partial_reduction_result." + llvm::Twine(i)); + TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value, + init_value_gens[i](llvm_ir::IrArray::Index({}))); ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address); + partial_reduction_result_addresses.push_back( + partial_reduction_result_address); } // Emit an inner for-loop that partially reduces the elements in the given @@ -1206,13 +1274,17 @@ Status IrEmitterUnnested::EmitColumnReduction( .SourceIndexOfTranspose(normalized_input_shape, input_shape, transpose_dimension_mapping, &ir_builder_); - TF_ASSIGN_OR_RETURN(llvm::Value * input_ir_value, - input_gen(input_index)); - ir_builder_.CreateStore(input_ir_value, input_address); + for (int i = 0; i != num_reduces; ++i) { + TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, + input_gens[i](input_index)); + ir_builder_.CreateStore(input_ir_value, input_address); + TF_RETURN_IF_ERROR(EmitCallToNestedComputation( + *reducers[i], + {partial_reduction_result_addresses[i], input_address}, + partial_reduction_result_addresses[i])); + } + return Status::OK(); } - return (EmitCallToNestedComputation( - *reducer, {partial_reduction_result_address, input_address}, - partial_reduction_result_address)); }; // y_end = kTileSize + y_in_tiles * kTileSize, i.e., the y location that's @@ -1241,13 +1313,24 @@ Status IrEmitterUnnested::EmitColumnReduction( &ir_builder_); const HloInstruction* output = reduce->IsFused() ? reduce->parent()->FusionInstruction() : reduce; - llvm::Value* output_address = - GetIrArray(*output, *output) - .EmitArrayElementAddress( - llvm_ir::IrArray::Index(x, output->shape(), &ir_builder_), - &ir_builder_, "output_element_address"); - return EmitAtomicOperationForNestedComputation( - *reducer, output_address, partial_reduction_result_address); + for (int i = 0; i != num_reduces; ++i) { + ShapeIndex output_shape_index; + if (output->IsMultiOutputFusion()) { + output_shape_index = {i}; + } + llvm::Value* output_address = + GetIrArray(*output, *output, output_shape_index) + .EmitArrayElementAddress( + llvm_ir::IrArray::Index( + x, + ShapeUtil::GetSubshape(output->shape(), + output_shape_index), + &ir_builder_), + &ir_builder_, "output_element_address"); + TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation( + *reducers[i], output_address, partial_reduction_result_addresses[i])); + } + return Status::OK(); }; // Emit a parallel loop that iterate through all input tiles. @@ -1267,8 +1350,10 @@ Status IrEmitterUnnested::EmitColumnReduction( Status IrEmitterUnnested::EmitRowReduction( int64 depth, int64 height, int64 width, HloInstruction* reduce, - const Shape& input_shape, const llvm_ir::ElementGenerator& input_gen, - const llvm_ir::ElementGenerator& init_value_gen, HloComputation* reducer) { + const Shape& input_shape, + tensorflow::gtl::ArraySlice input_gens, + tensorflow::gtl::ArraySlice init_value_gens, + tensorflow::gtl::ArraySlice reducers) { // A naive algorithm is: // 1. Divide the input tensor into tiles of size 1x1xK. // 2. Partially reduces each tile to a scalar using one thread. @@ -1358,15 +1443,20 @@ Status IrEmitterUnnested::EmitRowReduction( auto loop_body_emitter = [=](const llvm_ir::IrArray::Index& tile_index) -> Status { + const int num_reduces = reducers.size(); // Emit the loop body that reduces one tile. llvm::Type* element_ir_type = llvm_ir::PrimitiveTypeToIrType( input_shape.element_type(), ir_emitter_context_->llvm_module()); - llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca( - element_ir_type, /*ArraySize=*/nullptr, "partial_reduction_result"); - { - TF_ASSIGN_OR_RETURN(llvm::Value * init_ir_value, - init_value_gen(llvm_ir::IrArray::Index({}))); + std::vector partial_reduction_result_addresses; + for (int i = 0; i != num_reduces; ++i) { + llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca( + element_ir_type, /*ArraySize=*/nullptr, + "partial_reduction_result." + llvm::Twine(i)); + TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value, + init_value_gens[i](llvm_ir::IrArray::Index({}))); ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address); + partial_reduction_result_addresses.push_back( + partial_reduction_result_address); } // Emit an inner for-loop that partially reduces the elements in the given @@ -1449,13 +1539,17 @@ Status IrEmitterUnnested::EmitRowReduction( .SourceIndexOfTranspose(normalized_input_shape, input_shape, transpose_dimension_mapping, &ir_builder_); - TF_ASSIGN_OR_RETURN(llvm::Value * input_ir_value, - input_gen(input_index)); - ir_builder_.CreateStore(input_ir_value, input_address); + for (int i = 0; i != num_reduces; ++i) { + TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, + input_gens[i](input_index)); + ir_builder_.CreateStore(input_ir_value, input_address); + TF_RETURN_IF_ERROR(EmitCallToNestedComputation( + *reducers[i], + {partial_reduction_result_addresses[i], input_address}, + partial_reduction_result_addresses[i])); + } + return Status::OK(); } - return EmitCallToNestedComputation( - *reducer, {partial_reduction_result_address, input_address}, - partial_reduction_result_address); }; llvm::Value* tile_in_bounds = ir_builder_.CreateOr( @@ -1483,20 +1577,24 @@ Status IrEmitterUnnested::EmitRowReduction( : element_ir_type; for (int shuffle_distance = 16; shuffle_distance >= 1; shuffle_distance /= 2) { - llvm::Value* partial_reduction_result = ir_builder_.CreateLoad( - ir_builder_.CreateBitCast(partial_reduction_result_address, - shuffle_ir_type->getPointerTo()), - "partial_reduction_result"); llvm::Value* result_from_other_lane = ir_builder_.CreateAlloca( element_ir_type, nullptr, "result_from_other_lane"); - ir_builder_.CreateStore( - EmitShuffleDown(partial_reduction_result, - ir_builder_.getInt32(shuffle_distance), &ir_builder_), - ir_builder_.CreateBitCast(result_from_other_lane, - shuffle_ir_type->getPointerTo())); - TF_RETURN_IF_ERROR(EmitCallToNestedComputation( - *reducer, {partial_reduction_result_address, result_from_other_lane}, - partial_reduction_result_address)); + for (int i = 0; i != num_reduces; ++i) { + llvm::Value* partial_reduction_result = ir_builder_.CreateLoad( + ir_builder_.CreateBitCast(partial_reduction_result_addresses[i], + shuffle_ir_type->getPointerTo()), + "partial_reduction_result"); + ir_builder_.CreateStore( + EmitShuffleDown(partial_reduction_result, + ir_builder_.getInt32(shuffle_distance), + &ir_builder_), + ir_builder_.CreateBitCast(result_from_other_lane, + shuffle_ir_type->getPointerTo())); + TF_RETURN_IF_ERROR(EmitCallToNestedComputation( + *reducers[i], + {partial_reduction_result_addresses[i], result_from_other_lane}, + partial_reduction_result_addresses[i])); + } } const HloInstruction* output = @@ -1510,13 +1608,24 @@ Status IrEmitterUnnested::EmitRowReduction( "lane_id_is_zero", &ir_builder_); llvm_ir::SetToFirstInsertPoint(if_lane_id_is_zero_data.true_block, &ir_builder_); - llvm::Value* output_address = - GetIrArray(*output, *output) - .EmitArrayElementAddress( - llvm_ir::IrArray::Index(y, output->shape(), &ir_builder_), - &ir_builder_, "output_element_address"); - return EmitAtomicOperationForNestedComputation( - *reducer, output_address, partial_reduction_result_address); + for (int i = 0; i != num_reduces; ++i) { + ShapeIndex output_shape_index; + if (output->IsMultiOutputFusion()) { + output_shape_index = {i}; + } + llvm::Value* output_address = + GetIrArray(*output, *output, output_shape_index) + .EmitArrayElementAddress( + llvm_ir::IrArray::Index( + y, + ShapeUtil::GetSubshape(output->shape(), + output_shape_index), + &ir_builder_), + &ir_builder_, "output_element_address"); + TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation( + *reducers[i], output_address, partial_reduction_result_addresses[i])); + } + return Status::OK(); }; // Emit a parallel loop that iterates through every input tiles. @@ -1543,10 +1652,10 @@ Status IrEmitterUnnested::EmitRowReduction( // elementwise. Status IrEmitterUnnested::EmitReductionToVector( HloInstruction* reduce, const Shape& input_shape, - const llvm_ir::ElementGenerator& input_gen, - const llvm_ir::ElementGenerator& init_value_gen, + tensorflow::gtl::ArraySlice input_gens, + tensorflow::gtl::ArraySlice init_value_gens, tensorflow::gtl::ArraySlice dimensions_to_reduce, - HloComputation* reducer) { + tensorflow::gtl::ArraySlice reducers) { // This emission requires "reduce" to have an input layout. It is either set // by LayoutAssignment (for a top-level kReduce) or by InstructionFusion (for // a fused kReduce). @@ -1581,8 +1690,8 @@ Status IrEmitterUnnested::EmitReductionToVector( // `EmitReductionToVector`, we only need to check whether the minormost // dimension of the input is to keep. if (input_dims_to_keep.empty()) { - return EmitReductionToScalar(reduce, input_shape, input_gen, init_value_gen, - reducer); + return EmitReductionToScalar(reduce, input_shape, input_gens, + init_value_gens, reducers); } else if (input_dims_to_keep.front() == LayoutUtil::Minor(input_shape.layout(), 0)) { // Column reduction. Treat the result of "input" as a matrix whose width @@ -1599,8 +1708,8 @@ Status IrEmitterUnnested::EmitReductionToVector( height *= input_shape.dimensions(input_dim); } } - return EmitColumnReduction(height, width, reduce, input_shape, input_gen, - init_value_gen, reducer); + return EmitColumnReduction(height, width, reduce, input_shape, input_gens, + init_value_gens, reducers); } else { // Reduce the row dimension of a matrix or reduce dimension 0 and 2 in a // 3D tensor. The size of dimension 1 (the height) is the size of the @@ -1626,7 +1735,7 @@ Status IrEmitterUnnested::EmitReductionToVector( } const int64 height = ShapeUtil::ElementsIn(reduce->shape()); return EmitRowReduction(depth, height, width, reduce, input_shape, - input_gen, init_value_gen, reducer); + input_gens, init_value_gens, reducers); } } @@ -1650,16 +1759,15 @@ Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) { MakeUnique(std::move(thunks), reduce)); return EmitReductionToVector( - reduce, input->shape(), - [&](const llvm_ir::IrArray::Index& index) { + reduce, input->shape(), {[&](const llvm_ir::IrArray::Index& index) { return GetIrArray(*input, *reduce) .EmitReadArrayElement(index, &ir_builder_); - }, - [&](const llvm_ir::IrArray::Index& index) { + }}, + {[&](const llvm_ir::IrArray::Index& index) { return GetIrArray(*init_value, *reduce) .EmitReadArrayElement(index, &ir_builder_); - }, - dimensions_to_reduce, reducer); + }}, + dimensions_to_reduce, {reducer}); } thunk_sequence_->emplace_back(BuildKernelThunk(reduce)); @@ -2324,7 +2432,7 @@ std::unique_ptr IrEmitterUnnested::BuildFftThunk( } StatusOr> IrEmitterUnnested::BuildInitializerThunk( - const HloInstruction* hlo) { + const HloInstruction* hlo, const ShapeIndex& index) { bool fused = HloOpcode::kFusion == hlo->opcode(); const HloInstruction* inst = fused ? hlo->fused_expression_root() : hlo; const HloInstruction* init_value = [&] { @@ -2333,6 +2441,11 @@ StatusOr> IrEmitterUnnested::BuildInitializerThunk( return inst->operand(2); case HloOpcode::kReduce: return inst->operand(1); + case HloOpcode::kTuple: + CHECK(hlo->IsMultiOutputFusion() && + inst->operand(index.back())->opcode() == HloOpcode::kReduce); + // For multi-output fusion look through the tuple. + return inst->operand(index.back())->operand(1); default: LOG(FATAL) << "Opcode " << inst->opcode() << " should not need an initializer."; @@ -2356,7 +2469,7 @@ StatusOr> IrEmitterUnnested::BuildInitializerThunk( ArraySlice literal_bytes( reinterpret_cast(literal.untyped_data()), num_bytes); if (c_all_of(literal_bytes, [](uint8 byte) { return byte == 0; })) { - return {MakeUnique(GetAllocationSlice(*hlo), hlo)}; + return {MakeUnique(GetAllocationSlice(*hlo, index), hlo)}; } // If the literal is 8 or 16 bits wide, we can emit a 32-bit memset by @@ -2372,8 +2485,8 @@ StatusOr> IrEmitterUnnested::BuildInitializerThunk( pattern16 = literal_bytes.front(); } uint32 pattern32 = uint32{pattern16} | (uint32{pattern16} << 16); - return {MakeUnique(pattern32, - GetAllocationSlice(*hlo), hlo)}; + return {MakeUnique( + pattern32, GetAllocationSlice(*hlo, index), hlo)}; } // If the literal is an even multiple of 32 bits wide, we can emit a 32-bit @@ -2383,8 +2496,8 @@ StatusOr> IrEmitterUnnested::BuildInitializerThunk( literal_bytes.size() - 4) == 0) { uint32 word; memcpy(&word, literal_bytes.data(), sizeof(word)); - return {MakeUnique(word, GetAllocationSlice(*hlo), - hlo)}; + return {MakeUnique( + word, GetAllocationSlice(*hlo, index), hlo)}; } } diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 14780de96d..a1d4dca5e0 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -110,28 +110,31 @@ class IrEmitterUnnested : public IrEmitter { // `EmitReductionToVector`. Note that input shape might not be // [height x width], but can be bitcast to [height x weight] with "height" // being the major dimension. - Status EmitColumnReduction(int64 height, int64 width, HloInstruction* reduce, - const Shape& input_shape, - const llvm_ir::ElementGenerator& input_gen, - const llvm_ir::ElementGenerator& init_value_gen, - HloComputation* reducer); + Status EmitColumnReduction( + int64 height, int64 width, HloInstruction* reduce, + const Shape& input_shape, + tensorflow::gtl::ArraySlice input_gens, + tensorflow::gtl::ArraySlice init_value_gens, + tensorflow::gtl::ArraySlice reducers); // Emits code that reduces a 3D tensor of shape [depth x height x width] to a // vector of shape [height]. Other parameters have the same meaning as those // of `EmitReductionToVector`. Note that input shape might not be // [depth x height x width], but can be bitcast to [depth x height x weight] // with "depth" being the most major dimension. - Status EmitRowReduction(int64 depth, int64 height, int64 width, - HloInstruction* reduce, const Shape& input_shape, - const llvm_ir::ElementGenerator& input_gen, - const llvm_ir::ElementGenerator& init_value_gen, - HloComputation* reducer); + Status EmitRowReduction( + int64 depth, int64 height, int64 width, HloInstruction* reduce, + const Shape& input_shape, + tensorflow::gtl::ArraySlice input_gens, + tensorflow::gtl::ArraySlice init_value_gens, + tensorflow::gtl::ArraySlice reducers); // Emits code that reduces a tensor of arbitrary rank to a scalar. - Status EmitReductionToScalar(HloInstruction* reduce, const Shape& input_shape, - const llvm_ir::ElementGenerator& input_gen, - const llvm_ir::ElementGenerator& init_value_gen, - HloComputation* reducer); + Status EmitReductionToScalar( + HloInstruction* reduce, const Shape& input_shape, + tensorflow::gtl::ArraySlice input_gens, + tensorflow::gtl::ArraySlice init_value_gens, + tensorflow::gtl::ArraySlice reducers); // Figures out whether `reduce` is a row or column reduction, and which // dimensions to reduce, and calls either `EmitRowReduction` or @@ -141,13 +144,16 @@ class IrEmitterUnnested : public IrEmitter { // generate elements of the input and the initial value. Other parameters mean // the same as for `HandleReduce`. // + // Multiple reduces can be emitted in the same loop, assuming they have the + // same input and output shapes, and the same reduce dimensions. + // // Prerequisite: `IsReductionToVector(*reduce)` Status EmitReductionToVector( HloInstruction* reduce, const Shape& input_shape, - const llvm_ir::ElementGenerator& input_gen, - const llvm_ir::ElementGenerator& init_value_gen, + tensorflow::gtl::ArraySlice input_gens, + tensorflow::gtl::ArraySlice init_value_gens, tensorflow::gtl::ArraySlice dimensions_to_reduce, - HloComputation* reducer); + tensorflow::gtl::ArraySlice reducers); // Returns a KernelThunk that invokes the kernel emitted for `inst`. The // caller needs to make sure `inst` outlives the lifetime of the returned @@ -166,7 +172,7 @@ class IrEmitterUnnested : public IrEmitter { // Returns a thunk that, given a reduce or select-and-scatter op, initializes // its memory to the appropriate initial value. StatusOr> BuildInitializerThunk( - const HloInstruction* hlo); + const HloInstruction* hlo, const ShapeIndex& index = {}); // Returns a thunk that calls host-to-device cuMemcpy to implement `inst`. std::unique_ptr BuildHostToDeviceCopyThunk(const HloInstruction* inst); diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc index ec7ca20bdf..3cbb2452fb 100644 --- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc +++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc @@ -273,5 +273,112 @@ XLA_TEST_F(MultiOutputFusionTest, MultiOutputLoopFeedingMap) { *result, *Literal::CreateR1({0.0, 4.0, 9.0}))); } +const char* const kScalarOps = R"( + HloModule m + + Add { + lhsadd = f32[] parameter(0) + rhsadd = f32[] parameter(1) + ROOT add = f32[] add(lhsadd, rhsadd) + } + + Max { + lhsmax = f32[] parameter(0) + rhsmax = f32[] parameter(1) + ROOT max = f32[] maximum(lhsmax, rhsmax) + } +)"; + +XLA_TEST_F(MultiOutputFusionTest, + DISABLED_ON_CPU(MultiOutputReduceFusionMinor)) { + const string testcase = tensorflow::strings::StrCat(kScalarOps, R"( + fused_reduce { + p0 = f32[2,2,2]{2,1,0} parameter(0) + c0 = f32[] constant(0) + r1 = f32[2,2]{1,0} reduce(p0, c0), dimensions={2}, to_apply=Add + mul = f32[2,2,2]{2,1,0} multiply(p0, p0) + c1 = f32[] constant(5) + r2 = f32[2,2]{1,0} reduce(mul, c1), dimensions={2}, to_apply=Max + ROOT tuple = (f32[2,2]{1,0}, f32[2,2]{1,0}) tuple(r1, r2) + } + + ENTRY reduce { + p = f32[2,2,2]{2,1,0} parameter(0) + ROOT fusion = (f32[2,2]{1,0}, f32[2,2]{1,0}) fusion(p), kind=kInput, + calls=fused_reduce + })"); + auto module = + HloRunner::CreateModuleFromString(testcase, GetDebugOptionsForTest()) + .ValueOrDie(); + auto param = Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}); + TF_ASSERT_OK_AND_ASSIGN(auto result, + Execute(std::move(module), {param.get()})); + EXPECT_TRUE(LiteralTestUtil::Equal( + *result, + *Literal::MakeTupleOwned(Literal::CreateR2({{3, 7}, {11, 15}}), + Literal::CreateR2({{5, 16}, {36, 64}})))); +} + +XLA_TEST_F(MultiOutputFusionTest, + DISABLED_ON_CPU(MultiOutputReduceFusionMajor)) { + const string testcase = tensorflow::strings::StrCat(kScalarOps, R"( + fused_reduce { + p0 = f32[2,2,2]{2,1,0} parameter(0) + c0 = f32[] constant(0) + r1 = f32[2,2]{1,0} reduce(p0, c0), dimensions={0}, to_apply=Add + mul = f32[2,2,2]{2,1,0} multiply(p0, p0) + c1 = f32[] constant(5) + r2 = f32[2,2]{1,0} reduce(mul, c1), dimensions={0}, to_apply=Max + ROOT tuple = (f32[2,2]{1,0}, f32[2,2]{1,0}) tuple(r1, r2) + } + + ENTRY reduce { + p = f32[2,2,2]{2,1,0} parameter(0) + ROOT fusion = (f32[2,2]{1,0}, f32[2,2]{1,0}) fusion(p), kind=kInput, + calls=fused_reduce + })"); + auto module = + HloRunner::CreateModuleFromString(testcase, GetDebugOptionsForTest()) + .ValueOrDie(); + auto param = Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}); + TF_ASSERT_OK_AND_ASSIGN(auto result, + Execute(std::move(module), {param.get()})); + EXPECT_TRUE(LiteralTestUtil::Equal( + *result, *Literal::MakeTupleOwned( + Literal::CreateR2({{6, 8}, {10, 12}}), + Literal::CreateR2({{25, 36}, {49, 64}})))); +} + +XLA_TEST_F(MultiOutputFusionTest, + DISABLED_ON_CPU(MultiOutputReduceFusionScalar)) { + const string testcase = tensorflow::strings::StrCat(kScalarOps, R"( + fused_reduce { + p0 = f32[2,2,2]{2,1,0} parameter(0) + c0 = f32[] constant(0) + r1 = f32[2]{0} reduce(p0, c0), dimensions={0,2}, to_apply=Add + mul = f32[2,2,2]{2,1,0} multiply(p0, p0) + c1 = f32[] constant(5) + r2 = f32[2]{0} reduce(mul, c1), dimensions={0,2}, to_apply=Max + r3 = f32[2]{0} reduce(mul, c1), dimensions={0,2}, to_apply=Add + ROOT tuple = (f32[2]{0}, f32[2]{0}, f32[2]{0}) tuple(r1, r2, r3) + } + + ENTRY reduce { + p = f32[2,2,2]{2,1,0} parameter(0) + ROOT fusion = (f32[2]{0}, f32[2]{0}, f32[2]{0}) fusion(p), kind=kInput, + calls=fused_reduce + })"); + auto module = + HloRunner::CreateModuleFromString(testcase, GetDebugOptionsForTest()) + .ValueOrDie(); + auto param = Literal::CreateR3({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}); + TF_ASSERT_OK_AND_ASSIGN(auto result, + Execute(std::move(module), {param.get()})); + EXPECT_TRUE(LiteralTestUtil::Equal( + *result, *Literal::MakeTupleOwned(Literal::CreateR1({14, 22}), + Literal::CreateR1({36, 64}), + Literal::CreateR1({391, 463})))); +} + } // namespace } // namespace xla -- GitLab From 67b6696f9620734369ae99e7895fa6570d7faca6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 22 May 2018 12:34:51 -0700 Subject: [PATCH 009/902] [XLA:GPU] Emit fused reduces from batchnorm expander This is an intermediate step until we have working multi-output fusion. Once we have it, this change should be reverted as it might interfere with fusion. PiperOrigin-RevId: 197605814 --- tensorflow/compiler/xla/service/gpu/gpu_compiler.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index d50153d8a3..1445684e5d 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -157,11 +157,13 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, if (hlo_module->config().debug_options().xla_gpu_use_cudnn_batchnorm()) { pass.AddPass(); } + // TODO(kramerb): Remove use_fusion once instruction fusion can create + // multi-output fusions from the unfused expander output. pass.AddPass( /*rewrite_training_op=*/true, /*rewrite_inference_op=*/true, /*rewrite_grad_op=*/true, - /*use_fusion=*/false); + /*use_fusion=*/true); // Rewrite gather ops into smaller ones. pass.AddPass(); -- GitLab From a4c9efe6a5bf143f844b1cffbdc839c399620b9b Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Tue, 22 May 2018 12:36:35 -0700 Subject: [PATCH 010/902] Detect unknown batch size in predictions dict PiperOrigin-RevId: 197606059 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 77d117ba78..f0c7564175 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1264,13 +1264,11 @@ class _ModelFnWrapper(object): 'estimator_spec used by TPU prediction must have type' '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec))) + self._verify_tpu_spec_predictions(tpu_estimator_spec.predictions) + captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn) to_record = {} identity_fn = lambda **kwargs: kwargs - # TODO(xiejw): Adds validation for prediction dictionrary. - # TODO(xiejw): Adds support for single tensor as predictions. - if not isinstance(tpu_estimator_spec.predictions, dict): - raise TypeError('TPUEstimatorSpec.predictions must be dict of Tensors.') to_record['predictions'] = [identity_fn, tpu_estimator_spec.predictions] to_record['signals'] = [identity_fn, stopping_signals] if tpu_estimator_spec.host_call is not None: @@ -1282,6 +1280,21 @@ class _ModelFnWrapper(object): return predict_step, host_calls, captured_scaffold_fn + def _verify_tpu_spec_predictions(self, predictions): + """Validates TPUEstimatorSpec.predictions dict.""" + # TODO(xiejw): Adds validation for prediction dictionrary. + # TODO(xiejw): Adds support for single tensor as predictions. + if not isinstance(predictions, dict): + raise TypeError('TPUEstimatorSpec.predictions must be dict of Tensors.') + + for (key, tensor) in predictions.items(): + if tensor.shape[0].value is None: + raise ValueError( + 'The tensor with key ({}) in TPUEstimatorSpec.predictions has ' + 'dynamic shape (should be static). Tensor: {}'.format( + key, tensor)) + return predictions + def _call_model_fn(self, features, labels, is_export_mode=False): """Calls the model_fn with required parameters.""" model_fn_args = function_utils.fn_args(self._model_fn) -- GitLab From bf6644f9d274f549707d3f2a80c77e5eda163ebb Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 22 May 2018 12:46:30 -0700 Subject: [PATCH 011/902] Fix memory leak when going from the fast path to the slow path in eager Fixes #19385 PiperOrigin-RevId: 197607384 --- tensorflow/python/eager/pywrap_tfe_src.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 62deb41e9b..9885b3d3d7 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -49,8 +49,7 @@ using AttrToInputsMap = tensorflow::gtl::FlatMap>; -tensorflow::mutex all_attr_to_input_maps_lock( - tensorflow::LINKER_INITIALIZED); +tensorflow::mutex all_attr_to_input_maps_lock(tensorflow::LINKER_INITIALIZED); tensorflow::gtl::FlatMap* GetAllAttrToInputsMaps() { static auto* all_attr_to_input_maps = new tensorflow::gtl::FlatMap; @@ -754,7 +753,7 @@ PyObject* TFE_Py_RegisterBackwardFunctionGetter(PyObject* e) { void RaiseFallbackException(const char* message) { if (fallback_exception_class != nullptr) { - PyErr_SetObject(fallback_exception_class, Py_BuildValue("s", message)); + PyErr_SetString(fallback_exception_class, message); return; } @@ -772,8 +771,9 @@ int MaybeRaiseExceptionFromTFStatus(TF_Status* status, PyObject* exception) { if (exception == nullptr) { tensorflow::mutex_lock l(exception_class_mutex); if (exception_class != nullptr) { - PyErr_SetObject(exception_class, - Py_BuildValue("si", msg, TF_GetCode(status))); + tensorflow::Safe_PyObjectPtr val( + Py_BuildValue("si", msg, TF_GetCode(status))); + PyErr_SetObject(exception_class, val.get()); return -1; } else { exception = PyExc_RuntimeError; @@ -791,7 +791,8 @@ int MaybeRaiseExceptionFromStatus(const tensorflow::Status& status, if (exception == nullptr) { tensorflow::mutex_lock l(exception_class_mutex); if (exception_class != nullptr) { - PyErr_SetObject(exception_class, Py_BuildValue("si", msg, status.code())); + tensorflow::Safe_PyObjectPtr val(Py_BuildValue("si", msg, status.code())); + PyErr_SetObject(exception_class, val.get()); return -1; } else { exception = PyExc_RuntimeError; -- GitLab From 4d134bad0403ebb5722144d8f859a04a5f21efc2 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 22 May 2018 13:14:18 -0700 Subject: [PATCH 012/902] Move executor_test.cc to tensorflow/core/common_runtime/. PiperOrigin-RevId: 197611583 --- tensorflow/core/BUILD | 38 +++++++++++++++++++ .../executor_test.cc | 0 .../testlib_ops.cc} | 20 +++++++--- tensorflow/core/distributed_runtime/BUILD | 19 ++++++---- .../core/distributed_runtime/master_test.cc | 2 +- tensorflow/core/distributed_runtime/rpc/BUILD | 16 +------- 6 files changed, 67 insertions(+), 28 deletions(-) rename tensorflow/core/{distributed_runtime => common_runtime}/executor_test.cc (100%) rename tensorflow/core/{distributed_runtime/rpc/grpc_testlib_ops.cc => common_runtime/testlib_ops.cc} (84%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5d63cd68ae..05b8423e15 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1108,6 +1108,7 @@ cc_library( ":shape_inference_testutil", ":tensor_testutil", ":test", + ":testlib_ops", "//tensorflow/cc:scope", "//tensorflow/core/kernels:constant_op", "//tensorflow/core/kernels:ops_testutil", @@ -1115,6 +1116,18 @@ cc_library( ], ) +cc_library( + name = "testlib_ops", + testonly = 1, + srcs = ["common_runtime/testlib_ops.cc"], + linkstatic = 1, # Seems to be needed since alwayslink is broken in bazel + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], + alwayslink = 1, +) + # This is a link-only library to provide a DirectSession # implementation of the Session interface. tf_cuda_library( @@ -3748,6 +3761,31 @@ tf_cc_test( ], ) +tf_cc_test( + name = "common_runtime_executor_test", + size = "small", + srcs = ["common_runtime/executor_test.cc"], + linkstatic = tf_kernel_tests_linkstatic(), + deps = [ + ":core", + ":core_cpu", + ":core_cpu_internal", + ":framework", + ":framework_internal", + ":lib", + ":lib_internal", + ":protos_all_cc", + ":test", + ":test_main", + ":testlib", + "//tensorflow/core/kernels:array", + "//tensorflow/core/kernels:control_flow_ops", + "//tensorflow/core/kernels:math", + "//tensorflow/core/kernels:random_ops", + "//tensorflow/core/kernels:state", + ], +) + tf_cc_test( name = "common_runtime_function_test", size = "small", diff --git a/tensorflow/core/distributed_runtime/executor_test.cc b/tensorflow/core/common_runtime/executor_test.cc similarity index 100% rename from tensorflow/core/distributed_runtime/executor_test.cc rename to tensorflow/core/common_runtime/executor_test.cc diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib_ops.cc b/tensorflow/core/common_runtime/testlib_ops.cc similarity index 84% rename from tensorflow/core/distributed_runtime/rpc/grpc_testlib_ops.cc rename to tensorflow/core/common_runtime/testlib_ops.cc index 5597ee7a76..a0139c3ee5 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib_ops.cc +++ b/tensorflow/core/common_runtime/testlib_ops.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/platform/macros.h" @@ -21,8 +22,12 @@ namespace tensorflow { namespace test { // ErrorOp::Compute returns an error. -REGISTER_OP("Error").Input("in: T").Output("out: T").Attr("T: type").Attr( - "message: string"); +REGISTER_OP("Error") + .Input("in: T") + .Output("out: T") + .Attr("T: type") + .Attr("message: string") + .SetShapeFn(shape_inference::UnknownShape); class ErrorOp : public OpKernel { public: explicit ErrorOp(OpKernelConstruction* ctx) : OpKernel(ctx) { @@ -41,7 +46,8 @@ REGISTER_KERNEL_BUILDER(Name("Error").Device(DEVICE_CPU), ErrorOp); REGISTER_OP("InvalidRefType") .Output("out: Ref(TIn)") .Attr("TIn: type") - .Attr("TOut: type"); + .Attr("TOut: type") + .SetShapeFn(shape_inference::UnknownShape); class InvalidRefType : public OpKernel { public: explicit InvalidRefType(OpKernelConstruction* ctx) : OpKernel(ctx) { @@ -63,8 +69,12 @@ REGISTER_KERNEL_BUILDER(Name("InvalidRefType").Device(DEVICE_CPU), // DelayOp::AsyncCompute sleeps for "micros"-econd and then returns // its input. -REGISTER_OP("Delay").Input("in: T").Output("out: T").Attr("T: type").Attr( - "micros: int"); +REGISTER_OP("Delay") + .Input("in: T") + .Output("out: T") + .Attr("T: type") + .Attr("micros: int") + .SetShapeFn(shape_inference::UnchangedShape); class DelayOp : public AsyncOpKernel { public: explicit DelayOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) { diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index 18b7069dbe..ead698d787 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -561,17 +561,19 @@ tf_cc_test( ], ) -# TODO(mrry): Move executor_test.cc to ../common_runtime when once it no longer depends -# on grpc_testlib. -tf_cuda_cc_tests( - name = "executor_tests", +tf_cuda_cc_test( + name = "master_test", size = "medium", srcs = [ - "executor_test.cc", - #"master_test.cc", # TODO(b/27683709): Re-enable when not flaky. + "master_test.cc", ], linkstatic = tf_kernel_tests_linkstatic(), - tags = tf_cuda_tests_tags(), + tags = tf_cuda_tests_tags() + [ + "manual", # TODO(b/27683709): Re-enable when not flaky. + "notap", # TODO(b/27683709): Re-enable when not flaky. + "noguitar", # TODO(b/27683709): Re-enable when not flaky. + "nooss", # TODO(b/27683709): Re-enable when not flaky. + ], deps = [ ":master", ":remote_device", @@ -588,6 +590,7 @@ tf_cuda_cc_tests( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/distributed_runtime/rpc:grpc_channel", + "//tensorflow/core/distributed_runtime/rpc:grpc_master_service_impl", "//tensorflow/core/distributed_runtime/rpc:grpc_testlib", "//tensorflow/core/distributed_runtime/rpc:grpc_util", "//tensorflow/core/distributed_runtime/rpc:grpc_worker_cache", @@ -648,10 +651,10 @@ tf_cuda_cc_test( "//tensorflow/core:tensorflow", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core:testlib", "//tensorflow/core/distributed_runtime:server_lib", "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib", "//tensorflow/core/distributed_runtime/rpc:grpc_session", - "//tensorflow/core/distributed_runtime/rpc:grpc_testlib_ops", "//tensorflow/core/kernels:aggregate_ops", "//tensorflow/core/kernels:array", ], diff --git a/tensorflow/core/distributed_runtime/master_test.cc b/tensorflow/core/distributed_runtime/master_test.cc index f2c1f3489c..0826a90860 100644 --- a/tensorflow/core/distributed_runtime/master_test.cc +++ b/tensorflow/core/distributed_runtime/master_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include "grpc++/grpc++.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_channel.h" +#include "tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_testlib.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/framework/allocator.h" @@ -37,7 +38,6 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/master.pb.h" -#include "tensorflow/core/protobuf/master_service.grpc.pb.h" namespace tensorflow { diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD index 40028ee241..4b2747f26d 100644 --- a/tensorflow/core/distributed_runtime/rpc/BUILD +++ b/tensorflow/core/distributed_runtime/rpc/BUILD @@ -314,18 +314,6 @@ tf_cc_binary( ], ) -tf_cuda_library( - name = "grpc_testlib_ops", - testonly = 1, - srcs = ["grpc_testlib_ops.cc"], - linkstatic = 1, # Seems to be needed since alwayslink is broken in bazel - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - ], - alwayslink = 1, -) - tf_cc_binary( name = "grpc_testlib_server", testonly = 1, @@ -334,11 +322,11 @@ tf_cc_binary( ], deps = [ ":grpc_server_lib", - ":grpc_testlib_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core:testlib", "//tensorflow/core/distributed_runtime:server_lib", "//tensorflow/core/kernels:constant_op", "//tensorflow/core/kernels:cwise_op", @@ -362,12 +350,12 @@ tf_cuda_library( visibility = ["//tensorflow:__subpackages__"], deps = [ ":grpc_session", - ":grpc_testlib_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", + "//tensorflow/core:testlib", ], alwayslink = 1, ) -- GitLab From 9d2c6ff2a542b9bd89b42e3b88e6299eae9bdcc4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 13:49:08 -0700 Subject: [PATCH 013/902] Collective Ops Part 7 Complete just enough of the core implementation to run multi-device collectives locally within a single process. Interfaces are still private and not availble for general use. PiperOrigin-RevId: 197617132 --- .../core/common_runtime/direct_session.cc | 27 ++++++- .../core/common_runtime/direct_session.h | 3 + tensorflow/core/common_runtime/executor.cc | 18 ++++- tensorflow/core/common_runtime/executor.h | 1 + tensorflow/core/common_runtime/function.cc | 3 + .../core/common_runtime/graph_runner.cc | 3 + tensorflow/core/framework/function.h | 2 + tensorflow/core/graph/graph.cc | 3 + tensorflow/core/graph/graph.h | 2 + tensorflow/core/kernels/function_ops.cc | 1 + tensorflow/core/protobuf/config.proto | 25 +++++- tensorflow/core/protobuf/worker.proto | 8 ++ ...nsorflow.-config-proto.-experimental.pbtxt | 80 +++++++++++++++++++ .../api/golden/tensorflow.-config-proto.pbtxt | 8 ++ ...ensorflow.-run-options.-experimental.pbtxt | 80 +++++++++++++++++++ .../api/golden/tensorflow.-run-options.pbtxt | 8 ++ 16 files changed, 268 insertions(+), 4 deletions(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.-run-options.-experimental.pbtxt diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 0afbd02e86..07c1eafedc 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -19,15 +19,19 @@ limitations under the License. #include #include +#include "tensorflow/core/common_runtime/collective_executor_mgr.h" +#include "tensorflow/core/common_runtime/collective_param_resolver_local.h" #include "tensorflow/core/common_runtime/constant_folding.h" #include "tensorflow/core/common_runtime/debugger_state_interface.h" #include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/device_resolver_local.h" #include "tensorflow/core/common_runtime/executor.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/graph_optimizer.h" #include "tensorflow/core/common_runtime/memory_types.h" #include "tensorflow/core/common_runtime/optimization_registry.h" #include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" #include "tensorflow/core/common_runtime/step_stats_collector.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph.pb_text.h" @@ -443,6 +447,18 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options, // Create a run state and start execution. RunState run_state(step_id, &devices_); run_state.rendez = new IntraProcessRendezvous(device_mgr_.get()); + // Set up for collectives if the RunOption declares a key. + if (run_options.experimental().collective_graph_key() > 0) { + if (!collective_executor_mgr_) { + DeviceResolverLocal* drl = new DeviceResolverLocal(device_mgr_.get()); + collective_executor_mgr_.reset(new CollectiveExecutorMgr( + options_.config, device_mgr_.get(), drl, + new CollectiveParamResolverLocal(device_mgr_.get(), drl, + "/job:localhost/replica:0/task:0"))); + } + run_state.collective_executor.reset(new CollectiveExecutor::Handle( + collective_executor_mgr_->FindOrCreate(step_id), true /*inherit_ref*/)); + } // Start parallel Executors. const size_t num_executors = executors_and_keys->items.size(); @@ -459,6 +475,9 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options, args.step_id = step_id; args.call_frame = call_frame; args.rendezvous = run_state.rendez; + args.collective_executor = + (run_state.collective_executor ? run_state.collective_executor->get() + : nullptr); CancellationManager step_cancellation_manager; args.cancellation_manager = &step_cancellation_manager; args.session_state = &session_state_; @@ -768,6 +787,10 @@ Status DirectSession::PRunSetup(const std::vector& input_names, args.rendezvous = run_state->rendez; args.cancellation_manager = cancellation_manager_; + // Note that Collectives are not supported in partial runs + // because RunOptions is not passed in so we can't know whether + // their use is intended. + args.collective_executor = nullptr; args.runner = [this, pool](Executor::Args::Closure c) { SchedClosure(pool, std::move(c)); }; @@ -1518,11 +1541,13 @@ DirectSession::RunState::RunState( const std::vector& pending_input_names, const std::vector& pending_output_names, int64 step_id, const std::vector* devices) - : step_container(step_id, [devices](const string& name) { + : step_container(step_id, [devices, step_id](const string& name) { for (auto d : *devices) { if (!d->resource_manager()->Cleanup(name).ok()) { // Do nothing... } + ScopedAllocatorMgr* sam = d->GetScopedAllocatorMgr(); + if (sam) sam->Cleanup(step_id); } }) { // Initially all the feeds and fetches are pending. diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index 6f9c1b980b..72a2be4816 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/common_runtime/session_factory.h" #include "tensorflow/core/framework/cancellation.h" +#include "tensorflow/core/framework/collective.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/session_state.h" #include "tensorflow/core/framework/tensor.h" @@ -175,6 +176,7 @@ class DirectSession : public Session { mutex mu_; Status status GUARDED_BY(mu_); IntraProcessRendezvous* rendez = nullptr; + std::unique_ptr collective_executor; std::unique_ptr collector; Notification executors_done; std::unordered_map pending_inputs; // true if fed @@ -352,6 +354,7 @@ class DirectSession : public Session { DirectSessionFactory* const factory_; // not owned CancellationManager* cancellation_manager_; + std::unique_ptr collective_executor_mgr_; // Map of placed stateful nodes, i.e. nodes for which is_stateful() // is true, such as "params" and "queue" nodes. Once placed these diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 802bfee890..585d777e81 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/cancellation.h" +#include "tensorflow/core/framework/collective.h" #include "tensorflow/core/framework/control_flow.h" #include "tensorflow/core/framework/device_attributes.pb.h" #include "tensorflow/core/framework/graph.pb.h" @@ -592,7 +593,8 @@ char* GraphView::InitializeNode(char* ptr, const Node* n) { } } } - if (fwd_status.ok() && forward_from[i] == -1) { + if (fwd_status.ok() && + forward_from[i] == OpKernelContext::Params::kNoReservation) { DCHECK_EQ(forward_input.size() % 2, 0); for (int j = 0; j < forward_input.size(); j += 2) { if (forward_input[j + 1] == i) { @@ -770,7 +772,8 @@ void GraphView::SetScopedAllocatorAttrs( << use_node->name(); continue; } - // There should be exactly one output using ScopedAllocation. + // There can be more than one output using ScopedAllocation, but this + // analysis assumes they use the same ScopedAllocator. for (const auto& e : use_node->out_edges()) { if (!e->IsControlEdge()) { AllocatorAttributes attr; @@ -887,6 +890,11 @@ Status InferAllocAttr(const Node* n, const Node* dst, << " remote type " << parsed_dst_name.type; } } + if (n->IsCollective()) { + // We'll make the sweeping assumption that any collective op is going + // to be involved in network i/o. + attr->set_nic_compatible(true); + } return s; } @@ -1289,6 +1297,7 @@ class ExecutorState { int64 step_id_; // Not owned. Rendezvous* rendezvous_; + CollectiveExecutor* collective_executor_ = nullptr; SessionState* session_state_; TensorStore* tensor_store_; // Step-local container. @@ -1411,6 +1420,7 @@ ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl) log_memory_(LogMemory::IsEnabled()), step_id_(args.step_id), rendezvous_(args.rendezvous), + collective_executor_(args.collective_executor), session_state_(args.session_state), tensor_store_(args.tensor_store), step_container_(args.step_container), @@ -1621,6 +1631,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { params.log_memory = log_memory_; params.record_tensor_accesses = impl_->device_record_tensor_accesses_; params.rendezvous = rendezvous_; + params.collective_executor = collective_executor_; params.session_state = session_state_; params.tensor_store = tensor_store_; params.cancellation_manager = cancellation_manager_; @@ -2180,6 +2191,9 @@ bool ExecutorState::NodeDone(const Status& s, const Node* node, if (rendezvous_) { rendezvous_->StartAbort(s); } + if (collective_executor_) { + collective_executor_->StartAbort(s); + } if (cancellation_manager_) { cancellation_manager_->StartCancel(); } diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h index adf80a2417..e5d7b7c53c 100644 --- a/tensorflow/core/common_runtime/executor.h +++ b/tensorflow/core/common_runtime/executor.h @@ -89,6 +89,7 @@ class Executor { SessionState* session_state = nullptr; TensorStore* tensor_store = nullptr; ScopedStepContainer* step_container = nullptr; + CollectiveExecutor* collective_executor = nullptr; // If true, calls Sync() on the device. bool sync_on_finish = false; diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index d05564e9c4..5d9be70522 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/graph_optimizer.h" #include "tensorflow/core/common_runtime/memory_types.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/collective.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/node_def_util.h" @@ -809,6 +810,7 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, exec_args->cancellation_manager = run_opts.cancellation_manager; exec_args->step_container = run_opts.step_container; exec_args->runner = *run_opts.runner; + exec_args->collective_executor = run_opts.collective_executor; Item* item = nullptr; Status s = GetOrCreateItem(handle, &item); @@ -896,6 +898,7 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, exec_args->rendezvous = run_opts.rendezvous; exec_args->stats_collector = run_opts.stats_collector; exec_args->cancellation_manager = run_opts.cancellation_manager; + exec_args->collective_executor = run_opts.collective_executor; exec_args->step_container = run_opts.step_container; exec_args->runner = *run_opts.runner; exec_args->call_frame = frame; diff --git a/tensorflow/core/common_runtime/graph_runner.cc b/tensorflow/core/common_runtime/graph_runner.cc index adf2ef6f44..0a1797fa19 100644 --- a/tensorflow/core/common_runtime/graph_runner.cc +++ b/tensorflow/core/common_runtime/graph_runner.cc @@ -176,6 +176,9 @@ Status GraphRunner::Run(Graph* graph, FunctionLibraryRuntime* function_library, args.step_id = LogMemory::CONSTANT_FOLDING_STEP_ID; args.runner = runner; args.rendezvous = rendez; + // NOTE: Use of graph runner is limited to single-device executions + // so a CollectiveExecutor should never be required. + args.collective_executor = nullptr; // Run the graph. TF_RETURN_IF_ERROR(executor->Run(args)); diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h index e00399f97d..872906756a 100644 --- a/tensorflow/core/framework/function.h +++ b/tensorflow/core/framework/function.h @@ -33,6 +33,7 @@ limitations under the License. namespace tensorflow { class CancellationManager; +class CollectiveExecutor; class GraphDef; class OpKernel; class ProcessFunctionLibraryRuntime; @@ -484,6 +485,7 @@ class FunctionLibraryRuntime { int64 step_id = 0; Rendezvous* rendezvous = nullptr; CancellationManager* cancellation_manager = nullptr; + CollectiveExecutor* collective_executor = nullptr; ScopedStepContainer* step_container = nullptr; StepStatsCollector* stats_collector = nullptr; diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 71d0637dc2..0f748515ef 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -80,6 +80,9 @@ const std::unordered_map& Node::kNodeClassTable = {"Shape", NC_METADATA}, {"Rank", NC_METADATA}, {"_ScopedAllocator", NC_SCOPED_ALLOCATOR}, + {"CollectiveReduce", NC_COLLECTIVE}, + {"CollectiveBcastSend", NC_COLLECTIVE}, + {"CollectiveBcastRecv", NC_COLLECTIVE}, }); #undef REF_CLASS diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 83a69e6b2d..33fb7cb57a 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -163,6 +163,7 @@ class Node { bool IsHostSend() const { return class_ == NC_HOST_SEND; } bool IsHostRecv() const { return class_ == NC_HOST_RECV; } bool IsScopedAllocator() const { return class_ == NC_SCOPED_ALLOCATOR; } + bool IsCollective() const { return class_ == NC_COLLECTIVE; } bool IsMetadata() const { return class_ == NC_METADATA; } @@ -235,6 +236,7 @@ class Node { NC_DELETE_SESSION_TENSOR, NC_METADATA, NC_SCOPED_ALLOCATOR, + NC_COLLECTIVE, NC_OTHER // Not a special kind of node }; diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index 8f66f0a7b9..f2724735bf 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -254,6 +254,7 @@ class SymbolicGradientOp : public AsyncOpKernel { opts.runner = ctx->runner(); opts.stats_collector = ctx->stats_collector(); opts.step_container = ctx->step_container(); + opts.collective_executor = ctx->collective_executor(); std::vector args; args.reserve(ctx->num_inputs()); for (int i = 0; i < ctx->num_inputs(); ++i) { diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 6cd067afcb..410ad227e9 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -379,7 +379,17 @@ message ConfigProto { // shared with other sessions. bool isolate_session_state = 15; - // Next: 16 + // Everything inside Experimental is subject to change and is not subject + // to API stability guarantees in + // https://www.tensorflow.org/programmers_guide/version_compat. + message Experimental { + // Task name for group resolution. + string collective_group_leader = 1; + }; + + Experimental experimental = 16; + + // Next: 17 }; // Options for a single Run() call. @@ -414,6 +424,19 @@ message RunOptions { // Enabling this option can slow down the Run() call. bool report_tensor_allocations_upon_oom = 7; + // Everything inside Experimental is subject to change and is not subject + // to API stability guarantees in + // https://www.tensorflow.org/programmers_guide/version_compat. + message Experimental { + // If non-zero, declares that this graph is going to use collective + // ops and must synchronize step_ids with any other graph with this + // same group_key value (in a distributed computation where tasks + // run disjoint graphs). + int64 collective_graph_key = 1; + }; + + Experimental experimental = 8; + reserved 4; } diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto index 1cb84ca41e..b400638df1 100644 --- a/tensorflow/core/protobuf/worker.proto +++ b/tensorflow/core/protobuf/worker.proto @@ -122,6 +122,14 @@ message RegisterGraphRequest { // Field(s) used by TensorFlow Debugger (tfdbg). DebugOptions debug_options = 5; + + // If graph_def contains any collective ops this must be a positive + // integer used to coordinate execution with other graphs. All + // graphs in a distributed execution with the same + // collective_graph_key will coordinate to use the same step_id + // concurrently so that BufRendezvous entries will make the correct + // values accessible. + int64 collective_graph_key = 7; } message RegisterGraphResponse { diff --git a/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt b/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt new file mode 100644 index 0000000000..0a0669e10c --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt @@ -0,0 +1,80 @@ +path: "tensorflow.ConfigProto.Experimental" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "COLLECTIVE_GROUP_LEADER_FIELD_NUMBER" + mtype: "" + } + member { + name: "DESCRIPTOR" + mtype: "" + } + member { + name: "Extensions" + mtype: "" + } + member_method { + name: "ByteSize" + } + member_method { + name: "Clear" + } + member_method { + name: "ClearExtension" + } + member_method { + name: "ClearField" + } + member_method { + name: "CopyFrom" + } + member_method { + name: "DiscardUnknownFields" + } + member_method { + name: "FindInitializationErrors" + } + member_method { + name: "FromString" + } + member_method { + name: "HasExtension" + } + member_method { + name: "HasField" + } + member_method { + name: "IsInitialized" + } + member_method { + name: "ListFields" + } + member_method { + name: "MergeFrom" + } + member_method { + name: "MergeFromString" + } + member_method { + name: "ParseFromString" + } + member_method { + name: "RegisterExtension" + } + member_method { + name: "SerializePartialToString" + } + member_method { + name: "SerializeToString" + } + member_method { + name: "SetInParent" + } + member_method { + name: "WhichOneof" + } + member_method { + name: "__init__" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt index 009d64aed0..0d53d1c2b9 100644 --- a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt @@ -26,6 +26,14 @@ tf_class { name: "DeviceCountEntry" mtype: "" } + member { + name: "EXPERIMENTAL_FIELD_NUMBER" + mtype: "" + } + member { + name: "Experimental" + mtype: "" + } member { name: "Extensions" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/tensorflow.-run-options.-experimental.pbtxt new file mode 100644 index 0000000000..6a5e46a0b8 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.-run-options.-experimental.pbtxt @@ -0,0 +1,80 @@ +path: "tensorflow.RunOptions.Experimental" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "COLLECTIVE_GRAPH_KEY_FIELD_NUMBER" + mtype: "" + } + member { + name: "DESCRIPTOR" + mtype: "" + } + member { + name: "Extensions" + mtype: "" + } + member_method { + name: "ByteSize" + } + member_method { + name: "Clear" + } + member_method { + name: "ClearExtension" + } + member_method { + name: "ClearField" + } + member_method { + name: "CopyFrom" + } + member_method { + name: "DiscardUnknownFields" + } + member_method { + name: "FindInitializationErrors" + } + member_method { + name: "FromString" + } + member_method { + name: "HasExtension" + } + member_method { + name: "HasField" + } + member_method { + name: "IsInitialized" + } + member_method { + name: "ListFields" + } + member_method { + name: "MergeFrom" + } + member_method { + name: "MergeFromString" + } + member_method { + name: "ParseFromString" + } + member_method { + name: "RegisterExtension" + } + member_method { + name: "SerializePartialToString" + } + member_method { + name: "SerializeToString" + } + member_method { + name: "SetInParent" + } + member_method { + name: "WhichOneof" + } + member_method { + name: "__init__" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt index 2f3e7f1a84..65e55883da 100644 --- a/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt @@ -10,6 +10,14 @@ tf_class { name: "DESCRIPTOR" mtype: "" } + member { + name: "EXPERIMENTAL_FIELD_NUMBER" + mtype: "" + } + member { + name: "Experimental" + mtype: "" + } member { name: "Extensions" mtype: "" -- GitLab From 8a362a264e2219872b390eb8c22286acba32d39f Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Tue, 22 May 2018 13:59:48 -0700 Subject: [PATCH 014/902] [XLA] Skip BF16 output conversion folding when CRS is the root. PiperOrigin-RevId: 197618934 --- .../service/bfloat16_conversion_folding.cc | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc index 08d0152e3c..1b8b2d2045 100644 --- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc +++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc @@ -182,15 +182,26 @@ Status BFloat16ConversionFoldingVisitor::DefaultAction(HloInstruction* hlo) { Status BFloat16ConversionFoldingVisitor::HandleCrossReplicaSum( HloInstruction* crs) { - if (!ShapeUtil::IsTuple(crs->shape()) || - !bfloat16_support_->SupportsMixedPrecisions(*crs)) { - return DefaultAction(crs); - } - // First use DefaultAction() to handle the operands. It can't handle // tuple-shaped output. TF_RETURN_IF_ERROR(DefaultAction(crs)); + if (!bfloat16_support_->SupportsMixedPrecisions(*crs)) { + return Status::OK(); + } + + // If the output is not a tuple, we don't need special handling. + if (!ShapeUtil::IsTuple(crs->shape())) { + return Status::OK(); + } + + // If crs is the root instruction, we should keep its original output type. + // The root instruction implicitly has a use from being the result of the + // computation, and the code below does not take this use into account. + if (crs == computation_->root_instruction()) { + return Status::OK(); + } + // Then do per-tuple-element handling on the output. std::vector> per_tuple_element_gtes( crs->operand_count()); -- GitLab From a39fa2d5265fecfa765eb4a417c537d627899598 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 22 May 2018 14:08:57 -0700 Subject: [PATCH 015/902] [TF:XLA] make miscomparison error messages more readable PiperOrigin-RevId: 197620560 --- tensorflow/compiler/xla/literal_comparison.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/literal_comparison.cc b/tensorflow/compiler/xla/literal_comparison.cc index 3696fdbe12..bf9679cafe 100644 --- a/tensorflow/compiler/xla/literal_comparison.cc +++ b/tensorflow/compiler/xla/literal_comparison.cc @@ -716,9 +716,11 @@ Status Equal(const LiteralSlice& expected, const LiteralSlice& actual) { } return AppendStatus(result, - tensorflow::strings::Printf("expected: %s\nactual: %s", - expected.ToString().c_str(), - actual.ToString().c_str())); + tensorflow::strings::Printf( + "\nat index: %s\nexpected: %s\nactual: %s", + Literal::MultiIndexAsString(multi_index).c_str(), + ToStringTruncated(expected).c_str(), + ToStringTruncated(actual).c_str())); } Status Near(const LiteralSlice& expected, const LiteralSlice& actual, -- GitLab From ee561a30a09a6464bd0c9a0ef69e5f0523477fc8 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Tue, 22 May 2018 14:39:47 -0700 Subject: [PATCH 016/902] [TF:XLA] Roll back the functionality change of cl/197458260 to unbreak test. PiperOrigin-RevId: 197625888 --- tensorflow/compiler/xla/service/hlo_scheduling.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index 854aa94319..6397e46326 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -437,6 +437,7 @@ StatusOr> DFSMemoryScheduler( // simply users-1 for each instruction. By subtracting 1, we're saying that // instructions with no users or a single user don't count; instructions with // lots of fan-out will be visited earlier. + int64 cumulative_total_size = 0; tensorflow::gtl::FlatMap extra_users; tensorflow::gtl::FlatMap total_sizes; for (const HloInstruction* hlo : computation.MakeInstructionPostOrder()) { @@ -449,12 +450,14 @@ StatusOr> DFSMemoryScheduler( int64 logical_buffer_size = SumLogicalBufferSizes( points_to_analysis.GetBuffersDefinedByInstruction(hlo), size_function); total_sizes[hlo] = logical_buffer_size; + cumulative_total_size += logical_buffer_size; tensorflow::gtl::FlatSet unique_operands( hlo->operands().begin(), hlo->operands().end()); for (const HloInstruction* operand : unique_operands) { extra_users[hlo] += extra_users[operand]; total_sizes[hlo] += total_sizes[operand]; } + total_sizes[hlo] = std::min(total_sizes[hlo], cumulative_total_size); } CHECK_EQ(extra_users.size(), computation.instruction_count()); CHECK_EQ(total_sizes.size(), computation.instruction_count()); -- GitLab From a3a5e5cad0bfdd28f43223980f64ce367c732aad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 14:52:36 -0700 Subject: [PATCH 017/902] [TF:XLA] Add a helper to update HLO reachability. This can be used if the user does not care if reachability changed after an update. PiperOrigin-RevId: 197628007 --- .../compiler/xla/service/hlo_reachability.cc | 20 ++++++++++++++----- .../compiler/xla/service/hlo_reachability.h | 10 ++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_reachability.cc b/tensorflow/compiler/xla/service/hlo_reachability.cc index 8e167633bb..4738e46f8a 100644 --- a/tensorflow/compiler/xla/service/hlo_reachability.cc +++ b/tensorflow/compiler/xla/service/hlo_reachability.cc @@ -33,17 +33,27 @@ bool HloReachabilityMap::SetReachabilityToUnion( const HloInstruction* instruction) { BitVector& bit_vector = GetBitVector(instruction); tmp_bit_vector_ = bit_vector; + SetReachabilityToUnionHelper(inputs, instruction, &bit_vector); + return bit_vector != tmp_bit_vector_; +} +void HloReachabilityMap::FastSetReachabilityToUnion( + tensorflow::gtl::ArraySlice inputs, + const HloInstruction* instruction) { + SetReachabilityToUnionHelper(inputs, instruction, &GetBitVector(instruction)); +} + +void HloReachabilityMap::SetReachabilityToUnionHelper( + tensorflow::gtl::ArraySlice inputs, + const HloInstruction* instruction, BitVector* bit_vector) { // If instruction is part of inputs, don't reset the bit_vector. if (std::find(inputs.begin(), inputs.end(), instruction) == inputs.end()) { - bit_vector.SetToZero(); + bit_vector->SetToZero(); } - bit_vector.Set(GetIndex(instruction)); + bit_vector->Set(GetIndex(instruction)); for (const HloInstruction* input : inputs) { - bit_vector.OrWith(GetBitVector(input)); + bit_vector->OrWith(GetBitVector(input)); } - - return bit_vector != tmp_bit_vector_; } void HloReachabilityMap::SetReachable(const HloInstruction* a, diff --git a/tensorflow/compiler/xla/service/hlo_reachability.h b/tensorflow/compiler/xla/service/hlo_reachability.h index 553ec11f6f..69bb2b3cee 100644 --- a/tensorflow/compiler/xla/service/hlo_reachability.h +++ b/tensorflow/compiler/xla/service/hlo_reachability.h @@ -57,6 +57,11 @@ class HloReachabilityMap { tensorflow::gtl::ArraySlice inputs, const HloInstruction* instruction); + // As above, but faster because it does not check if the reachability changed. + void FastSetReachabilityToUnion( + tensorflow::gtl::ArraySlice inputs, + const HloInstruction* instruction); + // Sets entry so that IsReachable(a, b) will return true // // !!! THIS FUNCTION DOES NOT COMPUTE REACHABILITY !!! It sets the adjacency @@ -133,6 +138,11 @@ class HloReachabilityMap { return bit_vectors_[GetIndex(instruction)]; } + // Helper for SetReachabilityToUnion/FastSetReachabilityToUnion. + void SetReachabilityToUnionHelper( + tensorflow::gtl::ArraySlice inputs, + const HloInstruction* instruction, BitVector* bit_vector); + // Return the index of the given instruction. The value is used to index into // the vector of BitVectors and the BitVectors themselves. int GetIndex(const HloInstruction* instruction) const { -- GitLab From 679d6da028392c20f4323f158743e7370b47d1f9 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 22 May 2018 14:55:12 -0700 Subject: [PATCH 018/902] Extract out common code and make things safer; NFC RowMajorMatrixVectorProductEmitter and ColumnMajorMatrixVectorProductEmitter both cache* the generated LLVM IR by keying off the dimensions of the operation, the primitive type etc. Before this CL the code computing the cache key lived separately from the GEMV emitters. This pattern introduces a risk that the GEMV emitters will end up with some state not modeled in the cache key, resulting in a subtle bug. This CL reduces the risk by escapsulating the cache key generation and the input configuration to the GEMV emitters in a single class. * In the sense that two different dot operations with the same M,K,N will share a single LLVM IR function body. PiperOrigin-RevId: 197628423 --- .../xla/service/cpu/dot_op_emitter.cc | 221 +++++++++++------- 1 file changed, 140 insertions(+), 81 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index af69fc3da9..5158779910 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -79,6 +79,65 @@ class TileLoader { std::vector pointers_; }; +// The base class for the classes representing the GEMV emitter configurations. +// +// The IR emitted (modulo the LLVM values representing the input and output +// buffers) by the row major and column major GEMV emitters should be a function +// of their configuration. This is important because their configuration is +// used as a key to cache the generated IR. +class GemvConfig { + public: + // Mixin for convenience. + template + struct User { + public: + PrimitiveType scalar_type() const { + return derived().config().scalar_type(); + } + int64 tile_rows() const { return derived().config().tile_rows(); } + int64 tile_cols() const { return derived().config().tile_cols(); } + int64 m() const { return derived().config().m(); } + int64 k() const { return derived().config().k(); } + int64 has_addend() const { return derived().config().has_addend(); } + + private: + const T& derived() const { return *static_cast(this); } + }; + + PrimitiveType scalar_type() const { return scalar_type_; } + int64 tile_rows() const { return tile_rows_; } + int64 tile_cols() const { return tile_cols_; } + int64 m() const { return m_; } + int64 k() const { return k_; } + bool has_addend() const { return has_addend_; } + + string GetCacheKey() const { + return tensorflow::strings::StrCat( + name_, "_", PrimitiveType_Name(scalar_type()), "_", tile_rows(), "_", + tile_cols(), "_", m(), "_", k(), has_addend() ? "_with_addend" : ""); + } + + protected: + explicit GemvConfig(string name, PrimitiveType scalar_type, int64 tile_rows, + int64 tile_cols, int64 m, int64 k, bool has_addend) + : name_(std::move(name)), + scalar_type_(scalar_type), + tile_rows_(tile_rows), + tile_cols_(tile_cols), + m_(m), + k_(k), + has_addend_(has_addend) {} + + private: + string name_; + PrimitiveType scalar_type_; + int64 tile_rows_; + int64 tile_cols_; + int64 m_; + int64 k_; + bool has_addend_; +}; + // Computes a dot product between "[M,K]{0,1} lhs" with a [K,1] vector (the // layout of the vector does not matter). This implementation uses a tiling // scheme to improve performance. @@ -140,38 +199,46 @@ class TileLoader { // TODO(sanjoy): We should investigate if using gather loads and scatter stores // can be used here have the same inner loop for both column-major and row-major // matrix-vector products. -class ColumnMajorMatrixVectorProductEmitter { +class ColumnMajorMatrixVectorProductEmitter + : public GemvConfig::User { public: - ColumnMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, - int64 tile_rows, int64 tile_cols, - int64 m, int64 k, llvm::Value* lhs, + class Config : public GemvConfig { + public: + explicit Config(PrimitiveType scalar_type, int64 tile_rows, int64 tile_cols, + int64 m, int64 k, bool has_addend) + : GemvConfig(/*name=*/"col_major_gemv", scalar_type, + /*tile_rows=*/tile_rows, /*tile_cols=*/tile_cols, /*m=*/m, + /*k=*/k, /*has_addend=*/has_addend) {} + }; + + ColumnMajorMatrixVectorProductEmitter(const Config& config, llvm::Value* lhs, llvm::Value* rhs, llvm::Value* addend, llvm::Value* result, llvm::IRBuilder<>* ir_builder) - : scalar_type_(scalar_type), - tile_rows_(tile_rows), - tile_cols_(tile_cols), - m_(m), - k_(k), + : config_(config), lhs_(lhs), rhs_(rhs), addend_(addend), result_(result), ir_builder_(ir_builder), ksl_(ir_builder_), - vsl_(scalar_type_, /*vector_size=*/tile_rows_, ir_builder_, "") { - CHECK(tile_rows_ > 0 && IsPowerOfTwo(static_cast(tile_rows_))); + vsl_(config.scalar_type(), /*vector_size=*/config.tile_rows(), + ir_builder_, "") { + CHECK(tile_rows() > 0 && IsPowerOfTwo(static_cast(tile_rows()))); + CHECK(!has_addend() || addend != nullptr); } void Emit(); + const Config& config() const { return config_; } + private: void EmitOuterLoopBody(llvm::Value* column, int64 column_count, bool is_first_column); TileLoader GetLhsTileLoader(llvm::Value* column_start, int64 column_count) { return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, - /*matrix_size_along_minor_dim=*/m_, + /*matrix_size_along_minor_dim=*/m(), /*major_dim_offset=*/column_start, /*tile_size_along_major_dim=*/column_count); } @@ -195,11 +262,7 @@ class ColumnMajorMatrixVectorProductEmitter { void EmitInnerLoopEpilogue(llvm::Value* current_tile_col, int64 columns, bool is_first_tiled_column); - PrimitiveType scalar_type_; - int64 tile_rows_; - int64 tile_cols_; - int64 m_; - int64 k_; + Config config_; llvm::Value* lhs_; llvm::Value* rhs_; llvm::Value* addend_; @@ -223,13 +286,13 @@ void ColumnMajorMatrixVectorProductEmitter::EmitOuterLoopBody( void ColumnMajorMatrixVectorProductEmitter::Emit() { // See the comment on the class declaration for the algorithm used here. - int64 column_remainder = k_ % tile_cols_; - int64 column_limit = k_ - column_remainder; + int64 column_remainder = k() % tile_cols(); + int64 column_limit = k() - column_remainder; ksl_.For("dot.outer.tiled", - /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols_, + /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols(), [&](llvm::Value* column, bool is_first_column) { - EmitOuterLoopBody(column, tile_cols_, is_first_column); + EmitOuterLoopBody(column, tile_cols(), is_first_column); }); if (column_remainder != 0) { @@ -241,10 +304,10 @@ void ColumnMajorMatrixVectorProductEmitter::Emit() { void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( TileLoader* lhs_tile_loader, const std::vector& rhs_tile, int64 columns, bool is_first_column) { - int64 row_limit = m_ - (m_ % tile_rows_); + int64 row_limit = m() - (m() % tile_rows()); ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/row_limit, - /*step=*/tile_rows_, [&](llvm::Value* row) { + /*step=*/tile_rows(), [&](llvm::Value* row) { std::vector lhs_tile = lhs_tile_loader->LoadTile(/*minor_dim_offset=*/row); llvm::Value* accumulator = @@ -260,8 +323,8 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( llvm::Value* current_tile_col, int64 columns, bool is_first_tiled_column) { - int64 row_start = m_ - (m_ % tile_rows_); - if (row_start == m_) { + int64 row_start = m() - (m() % tile_rows()); + if (row_start == m()) { return; } @@ -281,11 +344,11 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( [&](llvm::Value* col, llvm::Value* is_first_scalar_col) { llvm::Value* rhs_element = vsl_.LoadScalar(rhs_, col); llvm::Value* total_offset = - ir_builder_->CreateMul(col, ir_builder_->getInt64(m_)); + ir_builder_->CreateMul(col, ir_builder_->getInt64(m())); llvm::Value* lhs_base_pointer = vsl_.ComputeOffsetPointer(lhs_, total_offset); ksl_.For( - "dot.inner.epilg.inner", /*start=*/row_start, /*end=*/m_, + "dot.inner.epilg.inner", /*start=*/row_start, /*end=*/m(), /*step=*/1, [&](llvm::Value* scalar_row) { llvm::Value* product = vsl_.Mul( vsl_.LoadScalar(lhs_base_pointer, scalar_row), rhs_element); @@ -365,34 +428,42 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( // // We have an inner epilogue loop to deal with the "B" sub-matrix and an outer // epilogue loop to deal with the C,D submatrix. -class RowMajorMatrixVectorProductEmitter { +class RowMajorMatrixVectorProductEmitter + : public GemvConfig::User { public: - RowMajorMatrixVectorProductEmitter(PrimitiveType scalar_type, int64 tile_rows, - int64 tile_cols, int64 m, int64 k, - llvm::Value* lhs, llvm::Value* rhs, - llvm::Value* addend, llvm::Value* result, + class Config : public GemvConfig { + public: + explicit Config(PrimitiveType scalar_type, int64 tile_rows, int64 tile_cols, + int64 m, int64 k, bool has_addend) + : GemvConfig(/*name=*/"row_major_gemv", scalar_type, + /*tile_rows=*/tile_rows, /*tile_cols=*/tile_cols, /*m=*/m, + /*k=*/k, /*has_addend=*/has_addend) {} + }; + + RowMajorMatrixVectorProductEmitter(const Config& config, llvm::Value* lhs, + llvm::Value* rhs, llvm::Value* addend, + llvm::Value* result, llvm::IRBuilder<>* ir_builder) - : scalar_type_(scalar_type), - tile_rows_(tile_rows), - tile_cols_(tile_cols), - m_(m), - k_(k), + : config_(config), lhs_(lhs), rhs_(rhs), addend_(addend), result_(result), ir_builder_(ir_builder), ksl_(ir_builder_), - vsl_(scalar_type_, /*vector_size=*/tile_cols_, ir_builder_, "") { - CHECK(tile_cols_ > 0 && IsPowerOfTwo(static_cast(tile_cols_))); + vsl_(scalar_type(), /*vector_size=*/tile_cols(), ir_builder_, "") { + CHECK(tile_cols() > 0 && IsPowerOfTwo(static_cast(tile_cols()))); + CHECK(!has_addend() || addend != nullptr); } void Emit(); + const Config& config() const { return config_; } + private: TileLoader GetLhsTileLoader(llvm::Value* row_start, int64 row_count) { return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, - /*matrix_size_along_minor_dim=*/k_, + /*matrix_size_along_minor_dim=*/k(), /*major_dim_offset=*/row_start, /*tile_size_along_major_dim=*/row_count); } @@ -405,11 +476,7 @@ class RowMajorMatrixVectorProductEmitter { void EmitInnerLoopEpilogue(llvm::Value* current_tile_row, int64 rows, std::vector* scalar_accumulators); - PrimitiveType scalar_type_; - int64 tile_rows_; - int64 tile_cols_; - int64 m_; - int64 k_; + Config config_; llvm::Value* lhs_; llvm::Value* rhs_; llvm::Value* addend_; @@ -466,12 +533,12 @@ void RowMajorMatrixVectorProductEmitter::EmitOuterLoopBody(llvm::Value* row, void RowMajorMatrixVectorProductEmitter::Emit() { // See the comment on the class declaration for the algorithm used here. - int64 row_remainder = m_ % tile_rows_; - int64 row_limit = m_ - row_remainder; + int64 row_remainder = m() % tile_rows(); + int64 row_limit = m() - row_remainder; ksl_.For("dot.outer.tiled", - /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows_, - [&](llvm::Value* row) { EmitOuterLoopBody(row, tile_rows_); }); + /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows(), + [&](llvm::Value* row) { EmitOuterLoopBody(row, tile_rows()); }); if (row_remainder != 0) { EmitOuterLoopBody(ir_builder_->getInt64(row_limit), row_remainder); @@ -481,10 +548,10 @@ void RowMajorMatrixVectorProductEmitter::Emit() { void RowMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( TileLoader* lhs_tile_loader, int64 rows, std::vector* vector_accumulators) { - int64 column_limit = k_ - (k_ % tile_cols_); + int64 column_limit = k() - (k() % tile_cols()); ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/column_limit, - /*step=*/tile_cols_, [&](llvm::Value* col) { + /*step=*/tile_cols(), [&](llvm::Value* col) { std::vector lhs_tile = lhs_tile_loader->LoadTile(/*minor_dim_offset=*/col); llvm::Value* rhs_value = vsl_.LoadVector(rhs_, col); @@ -499,18 +566,18 @@ void RowMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue( llvm::Value* current_tile_row, int64 rows, std::vector* scalar_accumulators) { - int64 column_start = k_ - (k_ % tile_cols_); - if (column_start == k_) { + int64 column_start = k() - (k() % tile_cols()); + if (column_start == k()) { return; } for (int r = 0; r < rows; r++) { llvm::Value* total_offset = ir_builder_->CreateMul( ir_builder_->CreateAdd(ir_builder_->getInt64(r), current_tile_row), - ir_builder_->getInt64(k_)); + ir_builder_->getInt64(k())); llvm::Value* lhs_base_pointer = vsl_.ComputeOffsetPointer(lhs_, total_offset); - ksl_.For("dot.inner.epilg.inner", /*start=*/column_start, /*end=*/k_, + ksl_.For("dot.inner.epilg.inner", /*start=*/column_start, /*end=*/k(), /*step=*/1, [&](llvm::Value* scalar_col) { llvm::Value* product = vsl_.Mul(vsl_.LoadScalar(lhs_base_pointer, scalar_col), @@ -942,47 +1009,39 @@ bool DotOpEmitter::EmitLlvmIrDotIfProfitable() { if (is_column_major_matrix_vector) { VLOG(2) << "Emitting column major matrix-vector multiply with m = " << m << " and k = " << k; - int64 tile_rows = vector_register_element_size; - int64 tile_cols = tiling_factor; - - string kernel_name = tensorflow::strings::StrCat( - "col_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows, - "_", tile_cols, "_", m, "_", k, addend_array_ ? "_with_addend" : ""); + ColumnMajorMatrixVectorProductEmitter::Config config( + /*scalar_type=*/primitive_type, + /*tile_rows=*/vector_register_element_size, /*tile_cols=*/tiling_factor, + /*m=*/m, /*k=*/k, /*has_addend=*/addend_array_ != nullptr); KernelSupportLibrary::EmitAndCallOutlinedKernel( /*enable_fast_math=*/enable_fast_math, - /*optimize_for_size=*/optimize_for_size, ir_builder_, kernel_name, - lhs_op, rhs_op, + /*optimize_for_size=*/optimize_for_size, ir_builder_, + config.GetCacheKey(), lhs_op, rhs_op, addend_array_ ? addend_array_->GetBasePointer() : nullptr, result_op, - [this, tile_rows, tile_cols, m, k, primitive_type]( - llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* addend_op, - llvm::Value* result_op) { + [this, config](llvm::Value* lhs_op, llvm::Value* rhs_op, + llvm::Value* addend_op, llvm::Value* result_op) { ColumnMajorMatrixVectorProductEmitter emitter( - primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op, - addend_op, result_op, ir_builder_); + config, lhs_op, rhs_op, addend_op, result_op, ir_builder_); emitter.Emit(); }); } else { VLOG(2) << "Emitting row major matrix-vector multiply with m = " << m << " and k = " << k; - int64 tile_rows = tiling_factor; - int64 tile_cols = vector_register_element_size; - - string kernel_name = tensorflow::strings::StrCat( - "row_major_gemv_", PrimitiveType_Name(primitive_type), "_", tile_rows, - "_", tile_cols, "_", m, "_", k, addend_array_ ? "_with_addend" : ""); + RowMajorMatrixVectorProductEmitter::Config config( + /*scalar_type=*/primitive_type, + /*tile_rows=*/tiling_factor, /*tile_cols=*/vector_register_element_size, + /*m=*/m, /*k=*/k, /*has_addend=*/addend_array_ != nullptr); KernelSupportLibrary::EmitAndCallOutlinedKernel( /*enable_fast_math=*/enable_fast_math, - /*optimize_for_size=*/optimize_for_size, ir_builder_, kernel_name, - lhs_op, rhs_op, + /*optimize_for_size=*/optimize_for_size, ir_builder_, + config.GetCacheKey(), lhs_op, rhs_op, addend_array_ ? addend_array_->GetBasePointer() : nullptr, result_op, - [this, tile_rows, tile_cols, m, k, primitive_type]( - llvm::Value* lhs_op, llvm::Value* rhs_op, llvm::Value* addend_op, - llvm::Value* result_op) { + [this, config](llvm::Value* lhs_op, llvm::Value* rhs_op, + llvm::Value* addend_op, llvm::Value* result_op) { RowMajorMatrixVectorProductEmitter emitter( - primitive_type, tile_rows, tile_cols, m, k, lhs_op, rhs_op, - addend_op, result_op, ir_builder_); + config, lhs_op, rhs_op, addend_op, result_op, ir_builder_); emitter.Emit(); }); } -- GitLab From 56502dc77e7ead9c9a4f63bf3405a937307a6f37 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Tue, 22 May 2018 15:01:15 -0700 Subject: [PATCH 019/902] [TF:XLA] Add clarification to the DFS scheduler. PiperOrigin-RevId: 197629355 --- tensorflow/compiler/xla/service/hlo_scheduling.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index 6397e46326..29c337746c 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -457,6 +457,13 @@ StatusOr> DFSMemoryScheduler( extra_users[hlo] += extra_users[operand]; total_sizes[hlo] += total_sizes[operand]; } + // total_sizes[hlo] transitively includes the sizes of all nodes that + // lead to it. But computation is a DAG, so we are double-counting nodes, + // which can lead to overflows for large programs. + // cumulative_total_size caps the size to prevent overflows. + // NOTE(dimvar): this is quite ugly and should be changed. It's unclear + // why we care about transitive sizes; when scheduling a node, its input + // and output buffers should be all that matters, not its "history". total_sizes[hlo] = std::min(total_sizes[hlo], cumulative_total_size); } CHECK_EQ(extra_users.size(), computation.instruction_count()); -- GitLab From 17272b4d1ccb5c7bd0bc3015c34f8bd769516354 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 15:24:01 -0700 Subject: [PATCH 020/902] Adds a kernel that checks whether vector is zero or not. PiperOrigin-RevId: 197633182 --- .../contrib/lite/kernels/fully_connected.cc | 8 ++----- .../lite/kernels/internal/kernel_utils.cc | 11 +++------ .../internal/optimized/neon_tensor_utils.cc | 24 +++++++++++++++++++ .../internal/optimized/neon_tensor_utils.h | 5 ++++ .../internal/optimized/tensor_utils_impl.h | 4 ++++ .../reference/portable_tensor_utils.cc | 7 ++++++ .../reference/portable_tensor_utils.h | 6 +++++ .../lite/kernels/internal/tensor_utils.h | 3 +++ .../kernels/internal/tensor_utils_test.cc | 19 +++++++++++++++ 9 files changed, 73 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc index 3374923e6e..1b942a1910 100644 --- a/tensorflow/contrib/lite/kernels/fully_connected.cc +++ b/tensorflow/contrib/lite/kernels/fully_connected.cc @@ -101,6 +101,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { input_size *= input->dims->data[i]; } + TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 2); const int batch_size = input_size / filter->dims->data[1]; const int num_units = filter->dims->data[0]; @@ -109,8 +110,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumElements(bias), SizeOfDimension(filter, 0)); } - TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 2); - // Note that quantized inference requires that all tensors have their // parameters set. This is usually done during quantized training. TfLiteType data_type = input->type; @@ -218,11 +217,8 @@ TfLiteStatus EvalPieQuantized(TfLiteContext* context, TfLiteNode* node, tensor_utils::ZeroVector(output->data.f, batch_size * num_units); } - // TODO(mirkov): change std::minmax_element with a vectorized call. - auto minmax_element = - std::minmax_element(input->data.f, input->data.f + total_input_size); // Save matrix multiplication computation for all zero input. - if (*minmax_element.first == 0.0 && *minmax_element.second == 0.0) { + if (tensor_utils::IsZeroVector(input->data.f, total_input_size)) { tensor_utils::ApplyActivationToVector(output->data.f, batch_size * num_units, params->activation, output->data.f); diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc index 5f9cfc450d..3bbaaa6a9d 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc @@ -57,12 +57,8 @@ void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr, tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size, output_ptr_batch); - // TODO(mirkov): change std::minmax_element with a vectorized call. - auto minmax_element = std::minmax_element( - input_ptr_batch, input_ptr_batch + batch_size * input_size); - // Save quantization and matmul computation for all zero input. - if (!(*minmax_element.first == 0.0 && *minmax_element.second == 0.0)) { + if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) { // Quantize input from float to uint8 + quantization params (scaling // factor). float unused_min, unused_max; @@ -83,10 +79,9 @@ void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr, delete[] scaling_factors; } - minmax_element = std::minmax_element( - hidden_state_ptr_batch, hidden_state_ptr_batch + batch_size * num_units); // Save quantization and matmul computation for all zero input. - if (!(*minmax_element.first == 0.0 && *minmax_element.second == 0.0)) { + if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch, + batch_size * num_units)) { // Quantize hidden_state float unused_min, unused_max; float* scaling_factors = new float[batch_size]; diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc index 08f7cfa5a5..38ad32c734 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -352,6 +352,30 @@ void NeonSub1Vector(const float* vector, int v_size, float* result) { } } +bool NeonIsZeroVector(const float* vector, int v_size) { + // If v_size is not divisible by kFloatWeightsPerNeonLane, we cannot + // use the main vectorized loop, and we need to process sequentially. + // postamble_start shows the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + + const float32x4_t zero_x4_float = vmovq_n_f32(0.0f); + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + const float32x4_t i_x4_float = vld1q_f32(vector + v); + uint32x4_t cmp_result = vceqq_f32(i_x4_float, zero_x4_float); + if (vgetq_lane_u32(cmp_result, 0) == 0) return false; + if (vgetq_lane_u32(cmp_result, 1) == 0) return false; + if (vgetq_lane_u32(cmp_result, 2) == 0) return false; + if (vgetq_lane_u32(cmp_result, 3) == 0) return false; + } + + // Postamble loop + for (int v = postamble_start; v < v_size; ++v) { + if (vector[v] != 0.0) return false; + } + return true; +} + void NeonClipVector(const float* vector, int v_size, float abs_limit, float* result) { // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.h index 9e60d0657b..7a5a8fc541 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.h @@ -100,6 +100,11 @@ void ZeroVector(float* vector, int v_size) { float Clip(float f, float abs_limit) { return PortableClip(f, abs_limit); } +// Check if all entries of a vector are zero. +bool IsZeroVector(const float* vector, int v_size) { + return NEON_OR_PORTABLE(IsZeroVector, vector, v_size); +} + void ClipVector(const float* vector, int v_size, float abs_limit, float* result) { NEON_OR_PORTABLE(ClipVector, vector, v_size, abs_limit, result); diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h b/tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h index d570dadd86..f14667090f 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h @@ -127,6 +127,10 @@ void PortableZeroVector(float* vector, int v_size); // Limit a float input f between +abs_limit and -abs_limit. float PortableClip(float f, float abs_limit); +// Check if all entries of a vector are zero. +bool PortableIsZeroVector(const float* vector, int v_size); +bool NeonIsZeroVector(const float* vector, int v_size); + // Symmetric quantizer. void PortableSymmetricQuantizeFloats(const float* values, const int size, int8_t* quantized_values, float* min, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc index 2607adc0c1..cc86729fdd 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc @@ -29,6 +29,13 @@ float PortableClip(float f, float abs_limit) { return result; } +bool PortableIsZeroVector(const float* vector, int v_size) { + for (int i = 0; i < v_size; ++i) { + if (*vector++ != 0.0f) return false; + } + return true; +} + void PortableSymmetricQuantizeFloats(const float* values, const int size, int8_t* quantized_values, float* min, float* max, float* scaling_factor) { diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h index 1757a9f5e5..d2e1fecd25 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.h @@ -25,6 +25,8 @@ namespace tensor_utils { // Limit a float input f between +abs_limit and -abs_limit. float PortableClip(float f, float abs_limit); +bool PortableIsZeroVector(const float* vector, int v_size); + void PortableSymmetricQuantizeFloats(const float* values, const int size, int8_t* quantized_values, float* min, float* max, float* scaling_factor); @@ -112,6 +114,10 @@ void PortableReductionSumVector(const float* input_vector, float* output_vector, float Clip(float f, float abs_limit) { return PortableClip(f, abs_limit); } +bool IsZeroVector(const float* vector, int v_size) { + return PortableIsZeroVector(vector, v_size); +} + void SymmetricQuantizeFloats(const float* values, const int size, int8_t* quantized_values, float* min, float* max, float* scaling_factor) { diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils.h b/tensorflow/contrib/lite/kernels/internal/tensor_utils.h index e1c9ccd84b..5160e22307 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor_utils.h +++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils.h @@ -23,6 +23,9 @@ namespace tensor_utils { // Limit a float input f between +abs_limit and -abs_limit. float Clip(float f, float abs_limit); +// Checks if all entries of vector are zero. +bool IsZeroVector(const float* vector, int v_size); + // Quantizes a buffer of floating point values using a symmetric quantization // (i.e. linear quantization without an offset) to 8-bit signed integers. // It also outputs the range (min, max) of the floating point buffer, and the diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc b/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc index 3d8a2eada0..14ee528394 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/tensor_utils_test.cc @@ -32,6 +32,25 @@ TEST(uKernels, ClipTest) { {0.0, -0.5, 1.0, -1.5, 2.0, -2.0, 2.0, -2.0, 2.0, -2.0}))); } +TEST(uKernels, IsZeroTest) { + constexpr int kVectorSize = 21; + static float zeros[kVectorSize] = {0.0}; + EXPECT_TRUE(IsZeroVector(zeros, kVectorSize)); + + static float nonzeros[kVectorSize] = { + 1e-6, 1e-7, 1e-8, 1e-9, 1e-10, 1e-11, 1e-12, + 1e-13, 1e-14, 1e-15, 1e-16, 1e-17, 1e-18, 1e-19, + 1e-20, 1e-21, 1e-22, 1e-23, 1e-24, 1e-25, 1e-26}; + EXPECT_FALSE(IsZeroVector(nonzeros, kVectorSize)); +} + +TEST(uKernels, GeneratedIsZeroTest) { + constexpr int kVectorSize = 39; + std::vector input(kVectorSize); + ZeroVector(input.data(), kVectorSize); + EXPECT_TRUE(IsZeroVector(input.data(), kVectorSize)); +} + TEST(uKernels, SymmetricQuantizeFloatsTest) { constexpr int kVectorSize = 9; static float input[kVectorSize] = {-640, -635.0, -630, 10.0, 2.0, -- GitLab From b7d3d31a78ce90fd9733d67247ae34c694199d19 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 22 May 2018 15:30:02 -0700 Subject: [PATCH 021/902] Remove reservoir sampling from SummaryDbWriter PiperOrigin-RevId: 197634162 --- .../tensorboard/db/summary_db_writer.cc | 153 +++--------------- .../tensorboard/db/summary_db_writer_test.cc | 6 +- 2 files changed, 27 insertions(+), 132 deletions(-) diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index 6590d6f7df..d5d8e4100f 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" +#include + #include "tensorflow/contrib/tensorboard/db/summary_converter.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/node_def.pb.h" @@ -66,14 +68,9 @@ const char* kImagePluginName = "images"; const char* kAudioPluginName = "audio"; const char* kHistogramPluginName = "histograms"; -const int kScalarSlots = 10000; -const int kImageSlots = 10; -const int kAudioSlots = 10; -const int kHistogramSlots = 1; -const int kTensorSlots = 10; - const int64 kReserveMinBytes = 32; const double kReserveMultiplier = 1.5; +const int64 kPreallocateRows = 1000; // Flush is a misnomer because what we're actually doing is having lots // of commits inside any SqliteTransaction that writes potentially @@ -139,22 +136,6 @@ void PatchPluginName(SummaryMetadata* metadata, const char* name) { } } -int GetSlots(const Tensor& t, const SummaryMetadata& metadata) { - if (metadata.plugin_data().plugin_name() == kScalarPluginName) { - return kScalarSlots; - } else if (metadata.plugin_data().plugin_name() == kImagePluginName) { - return kImageSlots; - } else if (metadata.plugin_data().plugin_name() == kAudioPluginName) { - return kAudioSlots; - } else if (metadata.plugin_data().plugin_name() == kHistogramPluginName) { - return kHistogramSlots; - } else if (t.dims() == 0 && t.dtype() != DT_STRING) { - return kScalarSlots; - } else { - return kTensorSlots; - } -} - Status SetDescription(Sqlite* db, int64 id, const StringPiece& markdown) { const char* sql = R"sql( INSERT OR REPLACE INTO Descriptions (id, description) VALUES (?, ?) @@ -481,24 +462,6 @@ class RunMetadata { return insert.StepAndReset(); } - Status GetIsWatching(Sqlite* db, bool* is_watching) - SQLITE_TRANSACTIONS_EXCLUDED(*db) LOCKS_EXCLUDED(mu_) { - mutex_lock lock(mu_); - if (experiment_id_ == kAbsent) { - *is_watching = true; - return Status::OK(); - } - const char* sql = R"sql( - SELECT is_watching FROM Experiments WHERE experiment_id = ? - )sql"; - SqliteStatement stmt; - TF_RETURN_IF_ERROR(db->Prepare(sql, &stmt)); - stmt.BindInt(1, experiment_id_); - TF_RETURN_IF_ERROR(stmt.StepOnce()); - *is_watching = stmt.ColumnInt(0) != 0; - return Status::OK(); - } - private: Status InitializeUser(Sqlite* db, uint64 now) EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (user_id_ != kAbsent || user_name_.empty()) return Status::OK(); @@ -659,43 +622,15 @@ class RunMetadata { /// \brief Tensor writer for a single series, e.g. Tag. /// -/// This class can be used to write an infinite stream of Tensors to the -/// database in a fixed block of contiguous disk space. This is -/// accomplished using Algorithm R reservoir sampling. -/// -/// The reservoir consists of a fixed number of rows, which are inserted -/// using ZEROBLOB upon receiving the first sample, which is used to -/// predict how big the other ones are likely to be. This is done -/// transactionally in a way that tries to be mindful of other processes -/// that might be trying to access the same DB. -/// -/// Once the reservoir fills up, rows are replaced at random, and writes -/// gradually become no-ops. This allows long training to go fast -/// without configuration. The exception is when someone is actually -/// looking at TensorBoard. When that happens, the "keep last" behavior -/// is turned on and Append() will always result in a write. -/// -/// If no one is watching training, this class still holds on to the -/// most recent "dangling" Tensor, so if Finish() is called, the most -/// recent training state can be written to disk. -/// -/// The randomly selected sampling points should be consistent across -/// multiple instances. -/// /// This class is thread safe. class SeriesWriter { public: - SeriesWriter(int64 series, int slots, RunMetadata* meta) - : series_{series}, - slots_{slots}, - meta_{meta}, - rng_{std::mt19937_64::default_seed} { + SeriesWriter(int64 series, RunMetadata* meta) : series_{series}, meta_{meta} { DCHECK(series_ > 0); - DCHECK(slots_ > 0); } Status Append(Sqlite* db, int64 step, uint64 now, double computed_time, - Tensor t) SQLITE_TRANSACTIONS_EXCLUDED(*db) + const Tensor& t) SQLITE_TRANSACTIONS_EXCLUDED(*db) LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); if (rowids_.empty()) { @@ -705,41 +640,20 @@ class SeriesWriter { return s; } } - DCHECK(rowids_.size() == slots_); - int64 rowid; - size_t i = count_; - if (i < slots_) { - rowid = last_rowid_ = rowids_[i]; - } else { - i = rng_() % (i + 1); - if (i < slots_) { - rowid = last_rowid_ = rowids_[i]; - } else { - bool keep_last; - TF_RETURN_IF_ERROR(meta_->GetIsWatching(db, &keep_last)); - if (!keep_last) { - ++count_; - dangling_tensor_.reset(new Tensor(std::move(t))); - dangling_step_ = step; - dangling_computed_time_ = computed_time; - return Status::OK(); - } - rowid = last_rowid_; - } - } + int64 rowid = rowids_.front(); Status s = Write(db, rowid, step, computed_time, t); if (s.ok()) { ++count_; - dangling_tensor_.reset(); } + rowids_.pop_front(); return s; } Status Finish(Sqlite* db) SQLITE_TRANSACTIONS_EXCLUDED(*db) LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); - // Short runs: Delete unused pre-allocated Tensors. - if (count_ < rowids_.size()) { + // Delete unused pre-allocated Tensors. + if (!rowids_.empty()) { SqliteTransaction txn(*db); const char* sql = R"sql( DELETE FROM Tensors WHERE rowid = ? @@ -747,19 +661,13 @@ class SeriesWriter { SqliteStatement deleter; TF_RETURN_IF_ERROR(db->Prepare(sql, &deleter)); for (size_t i = count_; i < rowids_.size(); ++i) { - deleter.BindInt(1, rowids_[i]); + deleter.BindInt(1, rowids_.front()); TF_RETURN_IF_ERROR(deleter.StepAndReset()); + rowids_.pop_front(); } TF_RETURN_IF_ERROR(txn.Commit()); rowids_.clear(); } - // Long runs: Make last sample be the very most recent one. - if (dangling_tensor_) { - DCHECK(last_rowid_ != kAbsent); - TF_RETURN_IF_ERROR(Write(db, last_rowid_, dangling_step_, - dangling_computed_time_, *dangling_tensor_)); - dangling_tensor_.reset(); - } return Status::OK(); } @@ -783,7 +691,6 @@ class SeriesWriter { Status Update(Sqlite* db, int64 step, double computed_time, const Tensor& t, const StringPiece& data, int64 rowid) { - // TODO(jart): How can we ensure reservoir fills on replace? const char* sql = R"sql( UPDATE OR REPLACE Tensors @@ -878,7 +785,7 @@ class SeriesWriter { // TODO(jart): Maybe preallocate index pages by setting step. This // is tricky because UPDATE OR REPLACE can have a side // effect of deleting preallocated rows. - for (int64 i = 0; i < slots_; ++i) { + for (int64 i = 0; i < kPreallocateRows; ++i) { insert.BindInt(1, series_); insert.BindInt(2, reserved_bytes); TF_RETURN_WITH_CONTEXT_IF_ERROR(insert.StepAndReset(), "i=", i); @@ -902,16 +809,10 @@ class SeriesWriter { mutex mu_; const int64 series_; - const int slots_; RunMetadata* const meta_; - std::mt19937_64 rng_ GUARDED_BY(mu_); uint64 count_ GUARDED_BY(mu_) = 0; - int64 last_rowid_ GUARDED_BY(mu_) = kAbsent; - std::vector rowids_ GUARDED_BY(mu_); + std::deque rowids_ GUARDED_BY(mu_); uint64 unflushed_bytes_ GUARDED_BY(mu_) = 0; - std::unique_ptr dangling_tensor_ GUARDED_BY(mu_); - int64 dangling_step_ GUARDED_BY(mu_) = 0; - double dangling_computed_time_ GUARDED_BY(mu_) = 0.0; TF_DISALLOW_COPY_AND_ASSIGN(SeriesWriter); }; @@ -928,10 +829,10 @@ class RunWriter { explicit RunWriter(RunMetadata* meta) : meta_{meta} {} Status Append(Sqlite* db, int64 tag_id, int64 step, uint64 now, - double computed_time, Tensor t, int slots) + double computed_time, const Tensor& t) SQLITE_TRANSACTIONS_EXCLUDED(*db) LOCKS_EXCLUDED(mu_) { - SeriesWriter* writer = GetSeriesWriter(tag_id, slots); - return writer->Append(db, step, now, computed_time, std::move(t)); + SeriesWriter* writer = GetSeriesWriter(tag_id); + return writer->Append(db, step, now, computed_time, t); } Status Finish(Sqlite* db) SQLITE_TRANSACTIONS_EXCLUDED(*db) @@ -948,11 +849,11 @@ class RunWriter { } private: - SeriesWriter* GetSeriesWriter(int64 tag_id, int slots) LOCKS_EXCLUDED(mu_) { + SeriesWriter* GetSeriesWriter(int64 tag_id) LOCKS_EXCLUDED(mu_) { mutex_lock sl(mu_); auto spot = series_writers_.find(tag_id); if (spot == series_writers_.end()) { - SeriesWriter* writer = new SeriesWriter(tag_id, slots, meta_); + SeriesWriter* writer = new SeriesWriter(tag_id, meta_); series_writers_[tag_id].reset(writer); return writer; } else { @@ -1082,8 +983,7 @@ class SummaryDbWriter : public SummaryWriterInterface { TF_RETURN_IF_ERROR( meta_.GetTagId(db_, now, computed_time, tag, &tag_id, metadata)); TF_RETURN_WITH_CONTEXT_IF_ERROR( - run_.Append(db_, tag_id, step, now, computed_time, t, - GetSlots(t, metadata)), + run_.Append(db_, tag_id, step, now, computed_time, t), meta_.user_name(), "/", meta_.experiment_name(), "/", meta_.run_name(), "/", tag, "@", step); return Status::OK(); @@ -1155,8 +1055,7 @@ class SummaryDbWriter : public SummaryWriterInterface { int64 tag_id; TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(), &tag_id, s->metadata())); - return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), t, - GetSlots(t, s->metadata())); + return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), t); } // TODO(jart): Refactor Summary -> Tensor logic into separate file. @@ -1169,8 +1068,7 @@ class SummaryDbWriter : public SummaryWriterInterface { PatchPluginName(s->mutable_metadata(), kScalarPluginName); TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(), &tag_id, s->metadata())); - return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), - std::move(t), kScalarSlots); + return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), t); } Status MigrateHistogram(const Event* e, Summary::Value* s, uint64 now) { @@ -1195,8 +1093,7 @@ class SummaryDbWriter : public SummaryWriterInterface { PatchPluginName(s->mutable_metadata(), kHistogramPluginName); TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(), &tag_id, s->metadata())); - return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), - std::move(t), kHistogramSlots); + return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), t); } Status MigrateImage(const Event* e, Summary::Value* s, uint64 now) { @@ -1210,8 +1107,7 @@ class SummaryDbWriter : public SummaryWriterInterface { PatchPluginName(s->mutable_metadata(), kImagePluginName); TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(), &tag_id, s->metadata())); - return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), - std::move(t), kImageSlots); + return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), t); } Status MigrateAudio(const Event* e, Summary::Value* s, uint64 now) { @@ -1224,8 +1120,7 @@ class SummaryDbWriter : public SummaryWriterInterface { PatchPluginName(s->mutable_metadata(), kAudioPluginName); TF_RETURN_IF_ERROR(meta_.GetTagId(db_, now, e->wall_time(), s->tag(), &tag_id, s->metadata())); - return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), - std::move(t), kAudioSlots); + return run_.Append(db_, tag_id, e->step(), now, e->wall_time(), t); } Env* const env_; diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc index 29b8063218..c34b6763a1 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc @@ -139,7 +139,7 @@ TEST_F(SummaryDbWriterTest, TensorsWritten_RowsGetInitialized) { ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Experiments")); ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Runs")); ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tags")); - ASSERT_EQ(10000LL, QueryInt("SELECT COUNT(*) FROM Tensors")); + ASSERT_EQ(1000LL, QueryInt("SELECT COUNT(*) FROM Tensors")); int64 user_id = QueryInt("SELECT user_id FROM Users"); int64 experiment_id = QueryInt("SELECT experiment_id FROM Experiments"); @@ -188,7 +188,7 @@ TEST_F(SummaryDbWriterTest, EmptyParentNames_NoParentsCreated) { ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Experiments")); ASSERT_EQ(0LL, QueryInt("SELECT COUNT(*) FROM Runs")); ASSERT_EQ(1LL, QueryInt("SELECT COUNT(*) FROM Tags")); - ASSERT_EQ(10000LL, QueryInt("SELECT COUNT(*) FROM Tensors")); + ASSERT_EQ(1000LL, QueryInt("SELECT COUNT(*) FROM Tensors")); } TEST_F(SummaryDbWriterTest, WriteEvent_Scalar) { @@ -205,7 +205,7 @@ TEST_F(SummaryDbWriterTest, WriteEvent_Scalar) { TF_ASSERT_OK(writer_->WriteEvent(std::move(e))); TF_ASSERT_OK(writer_->Flush()); ASSERT_EQ(2LL, QueryInt("SELECT COUNT(*) FROM Tags")); - ASSERT_EQ(20000LL, QueryInt("SELECT COUNT(*) FROM Tensors")); + ASSERT_EQ(2000LL, QueryInt("SELECT COUNT(*) FROM Tensors")); int64 tag1_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'π'"); int64 tag2_id = QueryInt("SELECT tag_id FROM Tags WHERE tag_name = 'φ'"); EXPECT_GT(tag1_id, 0LL); -- GitLab From 7f75fc526898c4c030b5c6f30deb331fcff7b70c Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Tue, 22 May 2018 15:51:17 -0700 Subject: [PATCH 022/902] Always append the trailing slash when look up or insert a directory path in the stat cache. PiperOrigin-RevId: 197637482 --- .../core/platform/cloud/gcs_file_system.cc | 3 ++- .../core/platform/cloud/gcs_file_system_test.cc | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index a7be527c13..8307758936 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -1103,7 +1103,8 @@ Status GcsFileSystem::FolderExists(const string& dirname, bool* result) { } }; GcsFileStat stat; - Status s = stat_cache_->LookupOrCompute(dirname, &stat, compute_func); + Status s = stat_cache_->LookupOrCompute(MaybeAppendSlash(dirname), &stat, + compute_func); if (s.ok()) { *result = stat.base.is_directory; return Status::OK(); diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index bb4ace65a9..3f73b238ad 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -1107,7 +1107,7 @@ TEST(GcsFileSystemTest, FileExists_StatCache) { "\"updated\": \"2016-04-29T23:15:24.896Z\"}")), new FakeHttpRequest( "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" - "path%2Fsubfolder?fields=size%2Cgeneration%2Cupdated\n" + "path%2Fsubfolder%2F?fields=size%2Cgeneration%2Cupdated\n" "Auth Token: fake_token\n" "Timeouts: 5 1 10\n", "", errors::NotFound("404"), 404), @@ -1133,7 +1133,7 @@ TEST(GcsFileSystemTest, FileExists_StatCache) { // HTTP requests. for (int i = 0; i < 10; i++) { TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt")); - TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder")); + TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder/")); } } @@ -1932,6 +1932,14 @@ TEST(GcsFileSystemTest, RenameFile_Object) { "Range: 0-15\n" "Timeouts: 5 1 20\n", "76543210"), + // IsDirectory is checking whether there are children objects. + new FakeHttpRequest( + "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?" + "fields=items%2Fname%2CnextPageToken&prefix=path%2Fsrc.txt%2F" + "&maxResults=1\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n", + "{}"), // Copying to the new location. new FakeHttpRequest( "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" @@ -2318,7 +2326,7 @@ TEST(GcsFileSystemTest, Stat_Cache) { "\"updated\": \"2016-04-29T23:15:24.896Z\"}")), new FakeHttpRequest( "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" - "subfolder?fields=size%2Cgeneration%2Cupdated\n" + "subfolder%2F?fields=size%2Cgeneration%2Cupdated\n" "Auth Token: fake_token\n" "Timeouts: 5 1 10\n", "", errors::NotFound("404"), 404), @@ -2348,7 +2356,7 @@ TEST(GcsFileSystemTest, Stat_Cache) { EXPECT_EQ(1010, stat.length); EXPECT_NEAR(1461971724896, stat.mtime_nsec / 1000 / 1000, 1); EXPECT_FALSE(stat.is_directory); - TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder/", &stat)); EXPECT_EQ(0, stat.length); EXPECT_EQ(0, stat.mtime_nsec); EXPECT_TRUE(stat.is_directory); -- GitLab From 86aedb620a3a9de73b4c6e2d24763ff22aa45d03 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 16:03:16 -0700 Subject: [PATCH 023/902] Expose the new collective reduce and broacast ops as non-public python interface functions. Note that they are not yet fully implemented; this change is to facilitate further development. PiperOrigin-RevId: 197639372 --- .../api_def_CollectiveBcastRecv.pbtxt | 2 +- .../api_def_CollectiveBcastSend.pbtxt | 2 +- .../base_api/api_def_CollectiveReduce.pbtxt | 2 +- .../api_def_CollectiveBcastRecv.pbtxt | 6 + .../api_def_CollectiveBcastSend.pbtxt | 6 + .../python_api/api_def_CollectiveReduce.pbtxt | 6 + tensorflow/python/BUILD | 35 ++++- tensorflow/python/ops/collective_ops.py | 133 ++++++++++++++++++ tensorflow/python/ops/collective_ops_test.py | 80 +++++++++++ 9 files changed, 268 insertions(+), 4 deletions(-) create mode 100644 tensorflow/core/api_def/python_api/api_def_CollectiveBcastRecv.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CollectiveBcastSend.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CollectiveReduce.pbtxt create mode 100644 tensorflow/python/ops/collective_ops.py create mode 100644 tensorflow/python/ops/collective_ops_test.py diff --git a/tensorflow/core/api_def/base_api/api_def_CollectiveBcastRecv.pbtxt b/tensorflow/core/api_def/base_api/api_def_CollectiveBcastRecv.pbtxt index 88049bca36..988bf0a0f8 100644 --- a/tensorflow/core/api_def/base_api/api_def_CollectiveBcastRecv.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_CollectiveBcastRecv.pbtxt @@ -1,5 +1,5 @@ op { graph_op_name: "CollectiveBcastRecv" - visibility: SKIP summary: "Receives a tensor value broadcast from another device." + visibility: HIDDEN } diff --git a/tensorflow/core/api_def/base_api/api_def_CollectiveBcastSend.pbtxt b/tensorflow/core/api_def/base_api/api_def_CollectiveBcastSend.pbtxt index 7ff70f5b17..d212f6dce7 100644 --- a/tensorflow/core/api_def/base_api/api_def_CollectiveBcastSend.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_CollectiveBcastSend.pbtxt @@ -1,5 +1,5 @@ op { graph_op_name: "CollectiveBcastSend" - visibility: SKIP summary: "Broadcasts a tensor value to one or more other devices." + visibility: HIDDEN } diff --git a/tensorflow/core/api_def/base_api/api_def_CollectiveReduce.pbtxt b/tensorflow/core/api_def/base_api/api_def_CollectiveReduce.pbtxt index 10d9771d46..fdd9443ba5 100644 --- a/tensorflow/core/api_def/base_api/api_def_CollectiveReduce.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_CollectiveReduce.pbtxt @@ -1,5 +1,5 @@ op { graph_op_name: "CollectiveReduce" - visibility: SKIP summary: "Mutually reduces multiple tensors of identical type and shape." + visibility: HIDDEN } diff --git a/tensorflow/core/api_def/python_api/api_def_CollectiveBcastRecv.pbtxt b/tensorflow/core/api_def/python_api/api_def_CollectiveBcastRecv.pbtxt new file mode 100644 index 0000000000..78034ccffd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CollectiveBcastRecv.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "CollectiveBcastRecv" + endpoint { + name: "collective.broadcast_recv" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_CollectiveBcastSend.pbtxt b/tensorflow/core/api_def/python_api/api_def_CollectiveBcastSend.pbtxt new file mode 100644 index 0000000000..9d6b2f83fe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CollectiveBcastSend.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "CollectiveBcastSend" + endpoint { + name: "collective.broadcast_send" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_CollectiveReduce.pbtxt b/tensorflow/core/api_def/python_api/api_def_CollectiveReduce.pbtxt new file mode 100644 index 0000000000..27ae8a833a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CollectiveReduce.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "CollectiveReduce" + endpoint { + name: "collective.all_reduce" + } +} diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 7201e12c50..539e5d310e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -79,6 +79,7 @@ py_library( ":check_ops", ":client", ":client_testlib", + ":collective_ops", ":confusion_matrix", ":control_flow_ops", ":cudnn_rnn_ops_gen", @@ -1435,6 +1436,14 @@ tf_gen_op_wrapper_private_py( ], ) +tf_gen_op_wrapper_private_py( + name = "collective_ops_gen", + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/core:collective_ops_op_lib", + ], +) + tf_gen_op_wrapper_private_py( name = "control_flow_ops_gen", visibility = [ @@ -1736,9 +1745,33 @@ py_test( ], ) +py_library( + name = "collective_ops", + srcs = ["ops/collective_ops.py"], + srcs_version = "PY2AND3", + deps = [ + ":collective_ops_gen", + ":framework_for_generated_wrappers", + ], +) + +py_test( + name = "collective_ops_test", + size = "small", + srcs = ["ops/collective_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":client_testlib", + ":collective_ops", + ":framework_for_generated_wrappers", + "//third_party/py/numpy", + ], +) + py_library( name = "control_flow_grad", - srcs = ["ops/control_flow_grad.py"], + srcs = + ["ops/control_flow_grad.py"], srcs_version = "PY2AND3", deps = [ ":control_flow_ops", diff --git a/tensorflow/python/ops/collective_ops.py b/tensorflow/python/ops/collective_ops.py new file mode 100644 index 0000000000..a05fd15eca --- /dev/null +++ b/tensorflow/python/ops/collective_ops.py @@ -0,0 +1,133 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TensorFlow collective Ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import device +from tensorflow.python.ops import gen_collective_ops + + +def all_reduce(t, group_size, group_key, instance_key, merge_op, final_op, + subdiv_offsets=(0)): + """Reduces tensors collectively, across devices. + + Args: + t: the tensor to be reduced. + group_size: the total number of tensors to be collectively reduced. + Each must reside on a different device. + group_key: an integer identifying the group of devices. + instance_key: an integer identifying the participating group of Ops. + merge_op: string naming the binary Op to be applied to compute each + partial reduction. + final_op: string naming the unary Op to be applied to each fully + reduced value. Can be 'Id' for no operation. + subdiv_offsets: a list of integer offsets into the tensor at which each + independent subdivision should begin. Use [0] if no subdivision should + be done. + + Returns: + An Op implementing the distributed reduction. + + Raises: + ValueError: if any of the input parameter constraints are not met. + """ + if not device.canonical_name(t.device): + raise ValueError('Device assignment required for collective ops') + if group_size <= 1: + raise ValueError('Parameter group_size to add_reduce must be at least 2.') + return gen_collective_ops.collective_reduce(t, + group_size=group_size, + group_key=group_key, + instance_key=instance_key, + merge_op=merge_op, + final_op=final_op, + subdiv_offsets=subdiv_offsets) + + +def broadcast_send(t, shape, dtype, group_size, group_key, instance_key): + """Broadcasts one tensor to a group of others, across devices. + + Args: + t: the tensor to be sent. + shape: the shape of the tensor being sent, which must agree with t. + dtype: the type of the tensor being sent, which must agree with t. + group_size: one plus the number of receiving tensors, i.e. the total + number of devices participating. Each tensor must reside on a + different device. + group_key: an integer identifying the group of devices. + instance_key: an integer identifying the participating group of Ops. + + Returns: + An Op implementing the distributed broadcast send. + + Raises: + ValueError: if any of the input parameter constraints are not met. + + Note that the shape and dtype arguments appear redundant since they + should be obtainable from t. The are two reasons for including + them. First, the shape and type of tensors passed via broadcast must + be known ahead of time in their most specific form so that the receive + side can allocate memory for the operation and shape/type inference can + carry forward from there. Including the same declarations on the + send side clarifies a commitment already made. Secondly, having nearly + identical use syntax for send and receive sides may simplify tool-driven + generation of broadcast. + """ + if not device.canonical_name(t.device): + raise ValueError('Device assignment required for collective ops') + if group_size <= 1: + raise ValueError( + 'Parameter group_size to broadcast_send must be at least 2.') + if t.shape != shape: + raise ValueError( + 'Shape of broadcast_send tensor not equal to delcared shape') + if t.dtype != dtype: + raise ValueError( + 'Type of broadcast_send tensor not equal to declared type') + return gen_collective_ops.collective_bcast_send(t, + shape=shape, + group_size=group_size, + group_key=group_key, + instance_key=instance_key) + + +def broadcast_recv(shape, dtype, group_size, group_key, instance_key): + """Receives a broadcasts tensor, across devices. + + Args: + shape: Shape of the tensor to be received. + dtype: Type of the tensor to be received. + group_size: one plus the number of receiving tensors, i.e. the total + number of devices participating. Each tensor must reside on a + different device. + group_key: an integer identifying the group of devices. + instance_key: an integer identifying the participating group of Ops. + + Returns: + An Op implementing the broadcast receive. + + Raises: + ValueError: if any of the input parameter constraints are not met. + """ + if group_size <= 1: + raise ValueError( + 'Parameter group_size to broadcast_send must be at least 2.') + return gen_collective_ops.collective_bcast_recv(shape=shape, + T=dtype, + group_size=group_size, + group_key=group_key, + instance_key=instance_key) diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py new file mode 100644 index 0000000000..8e16cffdf4 --- /dev/null +++ b/tensorflow/python/ops/collective_ops_test.py @@ -0,0 +1,80 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Collective Operations.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import collective_ops +from tensorflow.python.platform import test + +# TODO(tucker): Make these ops work in eager mode. b/79776476 + + +class CollectiveOpTest(test.TestCase): + + def _testCollectiveReduce(self, t0, t1, expected): + group_key = 1 + instance_key = 1 + with self.test_session( + config=config_pb2.ConfigProto(device_count={'CPU': 2})) as sess: + with ops.device('/CPU:0'): + in0 = constant_op.constant(t0) + colred0 = collective_ops.all_reduce(in0, 2, group_key, instance_key, + 'Add', 'Div', [0]) + with ops.device('/CPU:1'): + in1 = constant_op.constant(t1) + colred1 = collective_ops.all_reduce(in1, 2, group_key, instance_key, + 'Add', 'Div', [0]) + run_options = config_pb2.RunOptions() + run_options.experimental.collective_graph_key = 1 + results = sess.run([colred0, colred1], options=run_options) + self.assertAllClose(results[0], expected, rtol=1e-5, atol=1e-5) + self.assertAllClose(results[1], expected, rtol=1e-5, atol=1e-5) + + def testCollectiveReduce(self): + self._testCollectiveReduce([0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1], + [0.3, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3], + [0.2, 1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2]) + + def _testCollectiveBroadcast(self, t0): + group_key = 1 + instance_key = 1 + with self.test_session( + config=config_pb2.ConfigProto(device_count={'CPU': 2})) as sess: + with ops.device('/CPU:0'): + in0 = constant_op.constant(t0) + out0 = collective_ops.broadcast_send(in0, in0.shape, in0.dtype, + 2, group_key, instance_key) + with ops.device('/CPU:1'): + c1 = constant_op.constant(t0) + out1 = collective_ops.broadcast_recv(c1.shape, c1.dtype, + 2, group_key, instance_key) + run_options = config_pb2.RunOptions() + run_options.experimental.collective_graph_key = 1 + results = sess.run([out0, out1], options=run_options) + self.assertAllClose(results[0], t0, rtol=1e-5, atol=1e-5) + self.assertAllClose(results[1], t0, rtol=1e-5, atol=1e-5) + + def testCollectiveBroadcast(self): + self._testCollectiveBroadcast([0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1]) + + +if __name__ == '__main__': + test.main() -- GitLab From 12ea31462d02326f14475516f8290d6e224ee70d Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Tue, 22 May 2018 16:31:32 -0700 Subject: [PATCH 024/902] Fix the LSTM test in TFLite. PiperOrigin-RevId: 197643581 --- tensorflow/contrib/lite/build_def.bzl | 3 ++- .../contrib/lite/testing/tflite_driver.cc | 20 +++++++++++++++++++ .../contrib/lite/testing/tflite_driver.h | 2 ++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 9bfc0a0fbe..c8820ab29b 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -212,12 +212,13 @@ def generated_test_models(): "global_batch_norm", "greater", "greater_equal", - "l2_pool", "l2norm", + "l2_pool", "less", "less_equal", "local_response_norm", "log_softmax", + "lstm", "max_pool", "maximum", "mean", diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc index 58fe5bd6e4..1f07068aee 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -143,6 +143,7 @@ void TfLiteDriver::AllocateTensors() { Invalidate("Failed to allocate tensors"); return; } + ResetLSTMStateTensors(); must_allocate_tensors_ = false; } } @@ -281,5 +282,24 @@ bool TfLiteDriver::CheckResults() { return success; } +void TfLiteDriver::ResetLSTMStateTensors() { + // This is a workaround for initializing state tensors for LSTM. + // TODO(ycling): Refactoring and find a better way to initialize state + // tensors. Maybe write the reset instructions into the test data. + for (auto node_index : interpreter_->execution_plan()) { + const auto& node_and_reg = interpreter_->node_and_registration(node_index); + const auto& node = node_and_reg->first; + const auto& registration = node_and_reg->second; + if (registration.builtin_code == tflite::BuiltinOperator_LSTM && + node.outputs->size >= 2) { + // The first 2 outputs of LSTM are state tensors. + for (int i = 0; i < 2; ++i) { + int node_index = node.outputs->data[i]; + ResetTensor(node_index); + } + } + } +} + } // namespace testing } // namespace tflite diff --git a/tensorflow/contrib/lite/testing/tflite_driver.h b/tensorflow/contrib/lite/testing/tflite_driver.h index 02b7de1534..5493ba3631 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.h +++ b/tensorflow/contrib/lite/testing/tflite_driver.h @@ -48,6 +48,8 @@ class TfLiteDriver : public TestRunner { string ReadOutput(int id) override { return "no-op"; } private: + void ResetLSTMStateTensors(); + class Expectation; bool use_nnapi_ = false; -- GitLab From 09620a1fd3f28cc23f6627884927b6098717355e Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 22 May 2018 16:36:22 -0700 Subject: [PATCH 025/902] [TF:XLA] Bump open source llvm revision to r333002 PiperOrigin-RevId: 197644290 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index f05fac9f3b..758e87b09d 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -453,11 +453,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/710be84a0052819c99b54fb403985e94903a53c5.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/710be84a0052819c99b54fb403985e94903a53c5.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/638915a37f90f26599941977846408864f70ab35.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/638915a37f90f26599941977846408864f70ab35.tar.gz", ], - sha256 = "eecbe2ca5c8161976bb8b088e7fbcdd6eca64c8008a07023def16ecb5f670529", - strip_prefix = "llvm-710be84a0052819c99b54fb403985e94903a53c5", + sha256 = "aae3cacefa318cef030b4ca1e81ee9906752bbd89013cf9d47e156b5ad04b3a5", + strip_prefix = "llvm-638915a37f90f26599941977846408864f70ab35", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) -- GitLab From 25ad31da87086a88d1d14ed5db8731bb9fc90787 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 17:16:44 -0700 Subject: [PATCH 026/902] Add convolution with NHWC layout to stream executor. PiperOrigin-RevId: 197650067 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 70 ++------------------- tensorflow/stream_executor/dnn.h | 2 + 2 files changed, 7 insertions(+), 65 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 5ece80e551..c2c0c283b3 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -457,6 +457,9 @@ class ScopedFilterDescriptor { case dnn::FilterLayout::kOutputInputYX: format = CUDNN_TENSOR_NCHW; break; + case dnn::FilterLayout::kOutputYXInput: + format = CUDNN_TENSOR_NHWC; + break; case dnn::FilterLayout::kOutputInputYX4: format = CUDNN_TENSOR_NCHW_VECT_C; break; @@ -3046,53 +3049,6 @@ bool CudnnSupport::DoFusedConvolve( output_profile_result); } -namespace { -// NOTE(keveman): Temporary data layout transformation until cuDNN supports -// kBatchYXDepth for backward pass. This function allocates temporary memory, -// lays out the source data into the temporary but in the kBatchDepthXY -// layout, and returns the temporary memory. The caller is responsible for -// deallocating the temporary. Since the allocation is done using Stream's -// AllocateTemporaryMemory, a later BlockHostUntilDone could be used for -// deallocation. -// -// transform_scratch is populated with a legitimate temporary allocation iff -// the original output data needs to be transformed. -template -DeviceMemory MaybeTransformLayout( - Stream* stream, const CudnnHandle& cudnn, - dnn::BatchDescriptor* output_descriptor, - DeviceMemory backward_output_data, - std::unique_ptr>* transform_scratch) { - if (output_descriptor->layout() == dnn::DataLayout::kBatchDepthYX) { - return backward_output_data; - } - CHECK(output_descriptor->layout() == dnn::DataLayout::kBatchYXDepth); - *transform_scratch = - stream->AllocateTemporaryArray(backward_output_data.ElementCount()) - .ConsumeValueOrDie(); - dnn::BatchDescriptor transformed_output_descriptor; - transformed_output_descriptor.CloneFrom(*output_descriptor); - transformed_output_descriptor.set_layout(dnn::DataLayout::kBatchDepthYX); - cudnnDataType_t cudnn_type = GetCudnnDataType(); - ScopedTensorDescriptor orig_out_back_nd(*output_descriptor, cudnn_type); - ScopedTensorDescriptor transformed_out_back_nd(transformed_output_descriptor, - cudnn_type); - - float alpha = 1.0f; - float beta = 0.0f; - auto status = cudnnTransformTensor( - cudnn.handle(), &alpha, orig_out_back_nd.handle(), - backward_output_data.opaque(), &beta, transformed_out_back_nd.handle(), - (*transform_scratch)->mutable_device_memory()->opaque()); - - if (status != CUDNN_STATUS_SUCCESS) { - LOG(FATAL) << "Failed to transform the data layout."; - } - output_descriptor->set_layout(dnn::DataLayout::kBatchDepthYX); - return (*transform_scratch)->device_memory(); -} -} // namespace - bool CudnnSupport::DoTransformTensor(Stream* stream, const dnn::BatchDescriptor& input_desc, dnn::DataType input_type, @@ -3124,7 +3080,7 @@ template bool CudnnSupport::DoConvolveBackwardDataImpl( Stream* stream, const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory& filter_data, - const dnn::BatchDescriptor& output_descriptor_in, + const dnn::BatchDescriptor& output_descriptor, DeviceMemory backward_output_data, const dnn::ConvolutionDescriptor& convolution_descriptor, const dnn::BatchDescriptor& input_descriptor, @@ -3145,14 +3101,6 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( auto cudnn = cudnn_->GetHandle(parent_, stream); - // TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass. - dnn::BatchDescriptor output_descriptor; - output_descriptor.CloneFrom(output_descriptor_in); - std::unique_ptr> transform_scratch; - backward_output_data = - MaybeTransformLayout(stream, cudnn, &output_descriptor, - backward_output_data, &transform_scratch); - ScopedTensorDescriptor out_back_nd(output_descriptor, cudnn_type); ScopedTensorDescriptor in_back_nd(input_descriptor, cudnn_type); ScopedFilterDescriptor filter(filter_descriptor, cudnn_type); @@ -3386,7 +3334,7 @@ template bool CudnnSupport::DoConvolveBackwardFilterImpl( Stream* stream, const dnn::BatchDescriptor& input_descriptor, const DeviceMemory& input_data, - const dnn::BatchDescriptor& output_descriptor_in, + const dnn::BatchDescriptor& output_descriptor, DeviceMemory backward_output_data, const dnn::ConvolutionDescriptor& convolution_descriptor, const dnn::FilterDescriptor& filter_descriptor, @@ -3407,14 +3355,6 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( auto cudnn = cudnn_->GetHandle(parent_, stream); - // TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass. - dnn::BatchDescriptor output_descriptor; - output_descriptor.CloneFrom(output_descriptor_in); - std::unique_ptr> transform_scratch; - backward_output_data = - MaybeTransformLayout(stream, cudnn, &output_descriptor, - backward_output_data, &transform_scratch); - ScopedTensorDescriptor out_back_nd(output_descriptor, cudnn_type); ScopedTensorDescriptor input_nd(input_descriptor, cudnn_type); ScopedFilterDescriptor filter(filter_descriptor, cudnn_type); diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 38abc66079..3df5365c23 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -349,6 +349,8 @@ enum class FilterLayout : int64 { kOutputInputYX = 0, // cuDNN's default filter layout, laid out as: // (major) output feature maps >> input feature maps >> // rows >> columns (minor). + kOutputYXInput, // major to minor: + // (output features, row, columns, input features) kOutputInputYX4, // laid out the same as kOutputInputYX but each element is a // vector of 4 feature maps. kInputYXOutput, // Same as dist_belief's default filter layout. -- GitLab From 2cf614fcab604f1b3d6271004f681f7d635eda5f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 17:17:17 -0700 Subject: [PATCH 027/902] Fix an issue when mixing sparse and dense features in the same model. PiperOrigin-RevId: 197650140 --- .../learner/batch/ordinal_split_handler.py | 9 +- .../training/functions/gbdt_batch_test.py | 167 +++++++++++++++--- 2 files changed, 150 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py index 23f4021c34..ecb2f60187 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py @@ -78,6 +78,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops + _BIAS_FEATURE_ID = -1 # Pattern to remove all non alpha numeric from a string. _PATTERN = re.compile(r"[\W_]+") @@ -409,10 +410,14 @@ def _specialize_sparse_split(is_multi_dimentional): gen_quantile_ops.quantile_accumulator_get_buckets( quantile_accumulator_handles=[quantile_accumulator_handle], stamp_token=stamp_token)) + # quantile_accumulator_get_buckets returns a list of results per handle that + # we pass to it. In this case we're getting results just for one resource. + are_splits_ready = are_splits_ready[0] + buckets = buckets[0] # After we receive the boundaries from previous iteration we can flush # the quantile accumulator. - with ops.control_dependencies([buckets[0]]): + with ops.control_dependencies([buckets]): flush_quantiles = gen_quantile_ops.quantile_accumulator_flush( quantile_accumulator_handle=quantile_accumulator_handle, stamp_token=stamp_token, @@ -433,7 +438,7 @@ def _specialize_sparse_split(is_multi_dimentional): partition_ids, gains, split_infos = ( split_handler_ops.build_sparse_inequality_splits( num_minibatches=num_minibatches, - bucket_boundaries=buckets[0], + bucket_boundaries=buckets, partition_ids=partition_ids, bucket_ids=bucket_ids, gradients=gradients, diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index f9c22283b7..289fb195db 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -31,7 +31,6 @@ from tensorflow.python.feature_column import feature_column_lib as core_feature_ from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib from tensorflow.contrib.learn.python.learn.estimators import model_fn - from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util @@ -97,8 +96,8 @@ class GbdtTest(test_util.TensorFlowTestCase): array_ops.zeros([2], dtypes.int64)) features["sparse_int"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), - array_ops.zeros([2], dtypes.int64), - array_ops.zeros([2], dtypes.int64)) + array_ops.zeros([2], dtypes.int64), array_ops.zeros([2], + dtypes.int64)) (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes) = ( @@ -139,8 +138,8 @@ class GbdtTest(test_util.TensorFlowTestCase): array_ops.zeros([2], dtypes.int64)) features["sparse_categorical"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), - array_ops.zeros( - [2], dtypes.string), array_ops.zeros([2], dtypes.int64)) + array_ops.zeros([2], dtypes.string), array_ops.zeros([2], + dtypes.int64)) feature_columns = set() feature_columns.add(layers.real_valued_column("dense_float")) feature_columns.add( @@ -235,7 +234,8 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - logits_dimension=1, features=features) + logits_dimension=1, + features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -316,6 +316,113 @@ class GbdtTest(test_util.TensorFlowTestCase): }""" self.assertProtoEquals(expected_tree, output.trees[0]) + def testTrainFnChiefSparseAndDense(self): + """Tests the train function with sparse and dense features.""" + with self.test_session() as sess: + ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, tree_ensemble_config="", name="tree_ensemble") + learner_config = learner_pb2.LearnerConfig() + learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 + learner_config.num_classes = 2 + learner_config.regularization.l1 = 0 + learner_config.regularization.l2 = 0 + learner_config.constraints.max_tree_depth = 1 + learner_config.constraints.min_node_weight = 0 + features = {} + features["dense_float"] = array_ops.ones([4, 1], dtypes.float32) + features["sparse_float"] = sparse_tensor.SparseTensor( + array_ops.zeros([2, 2], dtypes.int64), + array_ops.zeros([2], dtypes.float32), + array_ops.constant([4, 1], dtypes.int64)) + + gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( + is_chief=True, + num_ps_replicas=0, + center_bias=False, + ensemble_handle=ensemble_handle, + examples_per_layer=1, + learner_config=learner_config, + logits_dimension=1, + features=features) + + predictions = array_ops.constant( + [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) + partition_ids = array_ops.zeros([4], dtypes.int32) + ensemble_stamp = variables.Variable( + initial_value=0, + name="ensemble_stamp", + trainable=False, + dtype=dtypes.int64) + + predictions_dict = { + "predictions": predictions, + "predictions_no_dropout": predictions, + "partition_ids": partition_ids, + "ensemble_stamp": ensemble_stamp, + "num_trees": 12, + } + + labels = array_ops.ones([4, 1], dtypes.float32) + weights = array_ops.ones([4, 1], dtypes.float32) + # Create train op. + train_op = gbdt_model.train( + loss=math_ops.reduce_mean( + _squared_loss(labels, weights, predictions)), + predictions_dict=predictions_dict, + labels=labels) + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + + # On first run, expect no splits to be chosen because the quantile + # buckets will not be ready. + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 0) + self.assertEquals(len(output.tree_weights), 0) + self.assertEquals(stamp_token.eval(), 1) + + # Update the stamp to be able to run a second time. + sess.run([ensemble_stamp.assign_add(1)]) + + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 1) + self.assertAllClose(output.tree_weights, [0.1]) + self.assertEquals(stamp_token.eval(), 2) + expected_tree = """ + nodes { + sparse_float_binary_split_default_right { + split{ + left_id: 1 + right_id: 2 + } + } + node_metadata { + gain: 1.125 + } + } + nodes { + leaf { + vector { + value: 1.0 + } + } + } + nodes { + leaf { + vector { + value: -0.5 + } + } + }""" + self.assertProtoEquals(expected_tree, output.trees[0]) + def testTrainFnChiefScalingNumberOfExamples(self): """Tests the train function running on chief without bias centering.""" with self.test_session() as sess: @@ -339,7 +446,8 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=num_examples_fn, learner_config=learner_config, - logits_dimension=1, features=features) + logits_dimension=1, + features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -442,7 +550,8 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - logits_dimension=1, features=features) + logits_dimension=1, + features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -513,7 +622,8 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - logits_dimension=1, features=features) + logits_dimension=1, + features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -576,7 +686,8 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - logits_dimension=1, features=features) + logits_dimension=1, + features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -622,7 +733,8 @@ class GbdtTest(test_util.TensorFlowTestCase): with self.test_session() as sess: # Create ensemble with one bias node. ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() - text_format.Merge(""" + text_format.Merge( + """ trees { nodes { leaf { @@ -659,14 +771,15 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - logits_dimension=1, features=features) + logits_dimension=1, + features=features) # Create predict op. mode = model_fn.ModeKeys.EVAL predictions_dict = sess.run(gbdt_model.predict(mode)) self.assertEquals(predictions_dict["ensemble_stamp"], 3) - self.assertAllClose(predictions_dict["predictions"], [[0.25], [0.25], - [0.25], [0.25]]) + self.assertAllClose(predictions_dict["predictions"], + [[0.25], [0.25], [0.25], [0.25]]) self.assertAllClose(predictions_dict["partition_ids"], [0, 0, 0, 0]) def testTrainFnMulticlassFullHessian(self): @@ -698,7 +811,8 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - logits_dimension=5, features=features) + logits_dimension=5, + features=features) predictions = array_ops.constant( [[0.0, -1.0, 0.5, 1.2, 3.1], [1.0, 0.0, 0.8, 0.3, 1.0], @@ -801,7 +915,8 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - logits_dimension=5, features=features) + logits_dimension=5, + features=features) predictions = array_ops.constant( [[0.0, -1.0, 0.5, 1.2, 3.1], [1.0, 0.0, 0.8, 0.3, 1.0], @@ -893,8 +1008,8 @@ class GbdtTest(test_util.TensorFlowTestCase): learner_config.constraints.max_tree_depth = 1 learner_config.constraints.min_node_weight = 0 features = { - "dense_float": array_ops.constant( - [[1.0], [1.5], [2.0]], dtypes.float32), + "dense_float": + array_ops.constant([[1.0], [1.5], [2.0]], dtypes.float32), } gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( @@ -904,7 +1019,8 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - logits_dimension=5, features=features) + logits_dimension=5, + features=features) batch_size = 3 predictions = array_ops.constant( @@ -986,7 +1102,8 @@ class GbdtTest(test_util.TensorFlowTestCase): self.assertAllClose( 0.893284678459, output.trees[0].nodes[2].leaf.sparse_vector.value[0], - atol=1e-4, rtol=1e-4) + atol=1e-4, + rtol=1e-4) def testTrainFnChiefFeatureSelectionReachedLimitNoGoodSplit(self): """Tests the train function running on chief with feature selection.""" @@ -1230,9 +1347,9 @@ class GbdtTest(test_util.TensorFlowTestCase): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() tree = tree_ensemble_config.trees.add() - _set_float_split(tree.nodes.add() - .sparse_float_binary_split_default_right.split, 2, 4.0, - 1, 2) + _set_float_split( + tree.nodes.add().sparse_float_binary_split_default_right.split, 2, + 4.0, 1, 2) _append_to_leaf(tree.nodes.add().leaf, 0, 0.5) _append_to_leaf(tree.nodes.add().leaf, 1, 1.2) tree_ensemble_config.tree_weights.append(1.0) @@ -1241,7 +1358,8 @@ class GbdtTest(test_util.TensorFlowTestCase): metadata.num_layers_grown = 1 tree_ensemble_config = tree_ensemble_config.SerializeToString() ensemble_handle = model_ops.tree_ensemble_variable( - stamp_token=0, tree_ensemble_config=tree_ensemble_config, + stamp_token=0, + tree_ensemble_config=tree_ensemble_config, name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 @@ -1333,5 +1451,6 @@ class GbdtTest(test_util.TensorFlowTestCase): self.assertEquals(output.growing_metadata.num_layers_attempted, 2) + if __name__ == "__main__": googletest.main() -- GitLab From 9db974d895201b8ab2e9e34e142917db898da823 Mon Sep 17 00:00:00 2001 From: Bjarke Hammersholt Roune Date: Tue, 22 May 2018 18:22:37 -0700 Subject: [PATCH 028/902] Add interfaces to Compiler that are sufficient to implement a backend-independent offline auto-tuner for backend configurations of ops as well as automatic testing across candidate configurations. Also add a simple Scanner class that is handy for parsing things. PiperOrigin-RevId: 197657512 --- tensorflow/compiler/xla/BUILD | 31 +++ tensorflow/compiler/xla/scanner.cc | 197 ++++++++++++++++++ tensorflow/compiler/xla/scanner.h | 102 +++++++++ tensorflow/compiler/xla/scanner_test.cc | 121 +++++++++++ tensorflow/compiler/xla/service/compiler.cc | 6 + tensorflow/compiler/xla/service/compiler.h | 11 + tensorflow/compiler/xla/service/hlo_runner.cc | 22 +- tensorflow/compiler/xla/service/hlo_runner.h | 12 +- tensorflow/compiler/xla/util.h | 6 + 9 files changed, 496 insertions(+), 12 deletions(-) create mode 100644 tensorflow/compiler/xla/scanner.cc create mode 100644 tensorflow/compiler/xla/scanner.h create mode 100644 tensorflow/compiler/xla/scanner_test.cc diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index c6deb959a5..c08db7e3fb 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -499,6 +499,37 @@ cc_library( ], ) +cc_library( + name = "scanner", + srcs = ["scanner.cc"], + hdrs = ["scanner.h"], + visibility = [":internal"], + deps = [ + ":status", + ":status_macros", + ":types", + ":util", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + +tf_cc_test( + name = "scanner_test", + srcs = ["scanner_test.cc"], + deps = [ + ":scanner", + ":status", + ":status_macros", + ":test", + ":types", + ":util", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:test_main", + ], +) + cc_library( name = "text_literal_reader", srcs = ["text_literal_reader.cc"], diff --git a/tensorflow/compiler/xla/scanner.cc b/tensorflow/compiler/xla/scanner.cc new file mode 100644 index 0000000000..f23a1417fc --- /dev/null +++ b/tensorflow/compiler/xla/scanner.cc @@ -0,0 +1,197 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/scanner.h" + +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/strings/strcat.h" + +namespace xla { +namespace { + +// Returns true if c can be the first character in an identifier. +bool IsIdentifierFirst(int c) { return std::isalpha(c) || c == '_'; } + +// Returns true if c can be the non-first character in an identifier. +bool IsIdentifierLater(int c) { return std::isalnum(c) || c == '_'; } + +// Returns true if str is an identifier. +bool IsIdentifier(tensorflow::StringPiece str) { + if (str.empty() || !IsIdentifierFirst(str[0])) { + return false; + } + for (int64 i = 1; i < str.size(); ++i) { + if (!IsIdentifierLater(str[i])) { + return false; + } + } + return true; +} + +} // namespace + +Scanner::Scanner(tensorflow::StringPiece input) : input_(input), position_(0) {} + +bool Scanner::ok() const { return status().ok(); } + +const Status& Scanner::status() const { return status_; } + +bool Scanner::Match(tensorflow::StringPiece match) { + SkipWhitespace(); + if (ok() && position_ + match.size() <= input_.size() && + std::equal(match.begin(), match.end(), input_.begin() + position_)) { + SkipChars(match.size()); + + VLOG(10) << "Matched \"" << match << "\""; + return true; + } else { + return false; + } +} + +void Scanner::Expect(tensorflow::StringPiece expect) { + if (!Match(expect)) { + SetError(tensorflow::strings::StrCat("Expected \"", expect, "\".")); + } +} + +bool Scanner::MatchReadIdentifier(string* identifier) { + SkipWhitespace(); + if (!IsIdentifierFirst(PeekChar())) { + return false; + } + identifier->clear(); + do { + *identifier += ReadChar(); + } while (IsIdentifierLater(PeekChar())); + + VLOG(10) << "Read identifier " << identifier; + CHECK(IsIdentifier(*identifier)); + return true; +} + +string Scanner::ReadIdentifier() { + string identifier; + if (!MatchReadIdentifier(&identifier)) { + SetError("Expected identifier."); + } + return identifier; +} + +void Scanner::ExpectIdentifier(tensorflow::StringPiece expect) { + CHECK(IsIdentifier(expect)); + + string identifier; + if (!MatchReadIdentifier(&identifier)) { + SetError(tensorflow::strings::StrCat("Expected identifier ", expect, ".")); + } + if (identifier != expect) { + SetError(tensorflow::strings::StrCat("Expected identifier ", expect, + ", but got ", identifier, ".")); + } +} + +// Matches the end of the input, also known as End Of File (EOF). +bool Scanner::MatchEof() { + SkipWhitespace(); + return PeekChar() == EOF; +} + +void Scanner::ExpectEof() { + if (!MatchEof()) { + SetError("Expected end of input."); + } +} + +// Reads a vector of the format "(1, 2, 3)". +std::vector Scanner::ReadIntVector() { + std::vector ints; + Expect("("); + if (!Match(")") && ok()) { + ints.push_back(ReadInt()); + while (Match(",")) { + ints.push_back(ReadInt()); + } + Expect(")"); + } + + VLOG(10) << "Read int vector with " << ints.size() << " elements."; + return ints; +} + +int64 Scanner::ReadInt() { + bool negative = Match("-"); + if (!PeekDigit()) { + SetError("Expected integer."); + return 0; + } + + int64 integer = 0; + do { + integer = (ReadChar() - '0') + integer * 10; + } while (PeekDigit()); + integer = negative ? -integer : integer; + + VLOG(10) << "Read integer " << integer; + return integer; +} + +void Scanner::SkipWhitespace() { + while (PeekWhitespace()) { + SkipChars(1); + } +} + +int Scanner::ReadChar() { + int c = PeekChar(); + SkipChars(1); + + VLOG(20) << "Read char " << c; + return c; +} + +int Scanner::PeekChar() const { + return ok() && position_ < input_.size() ? input_[position_] : EOF; +} + +bool Scanner::PeekDigit() const { + // Do not use std::isdigit since it depends on the locale and we do not + // handle any digits beyond 0-9. + const char c = PeekChar(); + return '0' <= c && c <= '9'; +} + +bool Scanner::PeekAlnum() const { return std::isalnum(PeekChar()); } + +bool Scanner::PeekWhitespace() const { return std::isspace(PeekChar()); } + +void Scanner::SkipChars(int64 count) { + CHECK_GE(count, 0); + position_ += count; +} + +void Scanner::SetError(string error_message) { + // Only the first error is recorded since any later errors will likely be a + // consequence of the first error. + if (ok()) { + status_ = InvalidArgumentStrCat(std::move(error_message)); + position_ = input_.size(); + VLOG(10) << "Failed scanner with error " << status_.ToString(); + } else { + VLOG(10) << "Error on already failed scanner is " << error_message; + } +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/scanner.h b/tensorflow/compiler/xla/scanner.h new file mode 100644 index 0000000000..86b04ae7f9 --- /dev/null +++ b/tensorflow/compiler/xla/scanner.h @@ -0,0 +1,102 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SCANNER_H_ +#define TENSORFLOW_COMPILER_XLA_SCANNER_H_ + +#include "tensorflow/compiler/xla/status.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/lib/core/stringpiece.h" + +namespace xla { + +// Simple class for parsing data. The concepts for the interface are: +// +// Match(x): Returns true if x is next in the input and in that case skips +// past it. Otherwise returns false. +// +// Expect(x): As Match(x), but requires x to be next in the input. +// +// MatchReadX(x): Returns true if an X is next in the input and in that case +// skips past it and assigns it to x. Otherwise returns false. +// +// ReadX(): As ReadMatchX(), but requires an X to be next in the input and +// returns it. +// +// PeekX(): Returns true if an X is next in the input and does not skip +// past it either way. +// +// All of these, except those that work on individual characters, skip +// whitespace. +// +// If a requirement is not met, the error is available in status(). A Scanner +// with a failed status() will behave as though the rest of the input is EOF and +// will not record further errors after that point. +class Scanner { + public: + Scanner(tensorflow::StringPiece input); + + bool ok() const; + const Status& status() const; + + bool Match(tensorflow::StringPiece match); + void Expect(tensorflow::StringPiece expect); + + // Match-reads an identifier. An identifier starts with an alphabetic + // character or an underscore followed by any number of characters that are + // each alphanumeric or underscore. + bool MatchReadIdentifier(string* identifier); + + string ReadIdentifier(); + + void ExpectIdentifier(tensorflow::StringPiece expect); + + // Matches the end of the input, also known as End Of File (EOF). + bool MatchEof(); + void ExpectEof(); + + // Reads a vector of the format "(1, 4, 5)". + std::vector ReadIntVector(); + + // Reads an integer. Can start with a minus but not a plus. + int64 ReadInt(); + + // Keeps skipping until encountering a non-whitespace character. + void SkipWhitespace(); + + // *** Below here are character-level methods that do not skip whitespace. + + int ReadChar(); + int PeekChar() const; + bool PeekDigit() const; + bool PeekAlnum() const; + bool PeekWhitespace() const; + + // Skip past the next count characters. + void SkipChars(int64 count); + + private: + // Sets a failed status. The input is in effect replaced with EOF after + // this. Only the first error is recorded. + void SetError(string error_message); + + const tensorflow::StringPiece input_; + int64 position_; + Status status_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SCANNER_H_ diff --git a/tensorflow/compiler/xla/scanner_test.cc b/tensorflow/compiler/xla/scanner_test.cc new file mode 100644 index 0000000000..d392a240b1 --- /dev/null +++ b/tensorflow/compiler/xla/scanner_test.cc @@ -0,0 +1,121 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/scanner.h" + +#include + +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/platform/env.h" + +namespace xla { +namespace { + +TEST(Scanner, Empty) { + Scanner scanner(""); + + EXPECT_EQ(scanner.PeekChar(), EOF); + EXPECT_TRUE(scanner.MatchEof()); + EXPECT_TRUE(scanner.Match("")); + EXPECT_FALSE(scanner.Match("1")); + EXPECT_TRUE(scanner.ok()); +} + +TEST(Scanner, Prefix) { + Scanner scanner("1234 5"); + EXPECT_FALSE(scanner.MatchEof()); + EXPECT_TRUE(scanner.Match("12")); + EXPECT_TRUE(scanner.Match("34 ")); + EXPECT_FALSE(scanner.MatchEof()); + EXPECT_FALSE(scanner.Match("5 ")); + EXPECT_TRUE(scanner.Match("5")); + EXPECT_TRUE(scanner.MatchEof()); +} + +TEST(Scanner, Whitespace) { + Scanner scanner(" \t\n\r 1\t2\n\n"); + + EXPECT_FALSE(scanner.Match(" ")); + EXPECT_TRUE(scanner.Match("1")); + EXPECT_TRUE(scanner.Match("2")); + EXPECT_TRUE(scanner.MatchEof()); + EXPECT_TRUE(scanner.ok()); +} + +TEST(Scanner, Fail) { + Scanner scanner("153 4q"); + + scanner.Expect("5"); + EXPECT_FALSE(scanner.ok()); + EXPECT_FALSE(scanner.status().ok()); + + EXPECT_TRUE(scanner.MatchEof()); +} + +TEST(Scanner, Identifier) { + Scanner scanner("1 q1 _1_ _1a= qqb"); + + string identifier = "foo"; + EXPECT_FALSE(scanner.MatchReadIdentifier(&identifier)); + EXPECT_EQ(identifier, "foo"); + scanner.Match("1"); + + EXPECT_TRUE(scanner.MatchReadIdentifier(&identifier)); + EXPECT_EQ(identifier, "q1"); + + scanner.ExpectIdentifier("_1_"); + EXPECT_TRUE(scanner.ok()); + + scanner.ExpectIdentifier("_1a"); + EXPECT_TRUE(scanner.ok()); + + // The = after _1a is not included in the identifier. + scanner.Expect("="); + + // The expected identifier matches a prefix but is not the full identifier in + // the input. + EXPECT_TRUE(scanner.ok()); + scanner.ExpectIdentifier("qq"); + EXPECT_FALSE(scanner.ok()); +} + +TEST(Scanner, Int) { + Scanner scanner("1_2 3% -1 124345 -363 0 -0"); + EXPECT_EQ(1, scanner.ReadInt()); + EXPECT_TRUE(scanner.Match("_")); + EXPECT_EQ(2, scanner.ReadInt()); + EXPECT_EQ(3, scanner.ReadInt()); + EXPECT_TRUE(scanner.Match("%")); + EXPECT_EQ(-1, scanner.ReadInt()); + EXPECT_EQ(124345, scanner.ReadInt()); + EXPECT_EQ(-363, scanner.ReadInt()); + EXPECT_EQ(0, scanner.ReadInt()); + EXPECT_EQ(0, scanner.ReadInt()); + EXPECT_TRUE(scanner.MatchEof()); +} + +TEST(Scanner, IntVector) { + Scanner scanner("()(0) (-1,2) ( 3 , 4 )"); + EXPECT_THAT(scanner.ReadIntVector(), testing::IsEmpty()); + EXPECT_THAT(scanner.ReadIntVector(), testing::ElementsAre(0)); + EXPECT_THAT(scanner.ReadIntVector(), testing::ElementsAre(-1, 2)); + EXPECT_THAT(scanner.ReadIntVector(), testing::ElementsAre(3, 4)); + EXPECT_TRUE(scanner.MatchEof()); + EXPECT_TRUE(scanner.ok()); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc index 8b01a6c4b5..31f84e88f8 100644 --- a/tensorflow/compiler/xla/service/compiler.cc +++ b/tensorflow/compiler/xla/service/compiler.cc @@ -28,6 +28,12 @@ namespace xla { /* static */ tensorflow::mutex Compiler::platform_compiler_mutex_( tensorflow::LINKER_INITIALIZED); +std::vector Compiler::ComputeBackendConfigs( + const HloInstruction& hlo, se::StreamExecutor* executor) const { + CHECK(executor != nullptr); + return {}; +} + /* static */ std::map* Compiler::GetPlatformCompilerFactories() { static auto* r = new std::map; diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index a4b59d1ba9..c39db58b78 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -24,9 +24,11 @@ limitations under the License. #include #include #include +#include #include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/logical_buffer.h" @@ -153,6 +155,15 @@ class Compiler { std::vector> stream_exec, DeviceMemoryAllocator* device_allocator) = 0; + // Returns the backend configurations that the backend will consider for the + // given HLO. Returns no configurations if the backend does not support + // configurations for the given HLO. + // + // The stream executor is passed in to provide information about the hardware + // that the backend configurations would be targeting. + virtual std::vector ComputeBackendConfigs( + const HloInstruction& hlo, se::StreamExecutor* executor) const; + // Compiles the HLO module for ahead-of-time execution. This is intended for // use in static compilation. virtual StatusOr>> diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 2a601ec3d1..7127adf456 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -94,8 +94,8 @@ HloRunner::~HloRunner() {} StatusOr> HloRunner::Execute( std::unique_ptr module, - const tensorflow::gtl::ArraySlice arguments, - bool run_hlo_passes) { + const tensorflow::gtl::ArraySlice arguments, bool run_hlo_passes, + ExecutionProfile* profile) { TF_ASSIGN_OR_RETURN(std::unique_ptr executable, CreateExecutable(std::move(module), run_hlo_passes)); se::Stream stream(backend().default_stream_executor()); @@ -127,7 +127,7 @@ StatusOr> HloRunner::Execute( TF_ASSIGN_OR_RETURN( ScopedShapedBuffer result, executable->ExecuteOnStreamWrapper( - &service_run_options, /*profile=*/nullptr, argument_buffer_ptrs)); + &service_run_options, /*profile=*/profile, argument_buffer_ptrs)); auto result_literal = backend().transfer_manager()->TransferLiteralFromDevice( stream.parent(), result); @@ -141,6 +141,18 @@ StatusOr> HloRunner::Execute( return result_literal; } +StatusOr> HloRunner::Execute( + std::unique_ptr module, + const tensorflow::gtl::ArraySlice> arguments, + bool run_hlo_passes, ExecutionProfile* profile) { + // Construct a vector of plain pointers for the arguments. + std::vector argument_pointers; + c_transform( + arguments, std::back_inserter(argument_pointers), + [](const std::unique_ptr& literal) { return literal.get(); }); + return Execute(std::move(module), argument_pointers, run_hlo_passes, profile); +} + StatusOr>> HloRunner::ExecuteReplicated( std::unique_ptr module, const ReplicatedExecuteOptions& options) { @@ -295,4 +307,8 @@ Backend& HloRunner::backend() { return *backend_; } +const Backend& HloRunner::backend() const { + return const_cast(this)->backend(); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index 53f7c6fe4a..aa62659ac3 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -110,19 +110,12 @@ class HloRunner { StatusOr> Execute( std::unique_ptr module, const tensorflow::gtl::ArraySlice arguments, - bool run_hlo_passes = true); + bool run_hlo_passes = true, ExecutionProfile* profile = nullptr); StatusOr> Execute( std::unique_ptr module, const tensorflow::gtl::ArraySlice> arguments, - bool run_hlo_passes = true) { - // Construct a vector of plain pointers for the arguments. - std::vector argument_pointers; - c_transform( - arguments, std::back_inserter(argument_pointers), - [](const std::unique_ptr& literal) { return literal.get(); }); - return Execute(std::move(module), argument_pointers, run_hlo_passes); - } + bool run_hlo_passes = true, ExecutionProfile* profile = nullptr); // Executes a given HLO module into a set of replicas, and returns a map // with the replica number as key, and the corresponding returned literal as @@ -137,6 +130,7 @@ class HloRunner { // This creates the backend lazily so it's possible to instantiate an // HloRunner in a program without any backends linked in. Backend& backend(); + const Backend& backend() const; private: // Creates an executable object given an HLO module. If run_hlo_passes is diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index be33bd6dd1..b7309885b4 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -218,6 +218,12 @@ Status Unavailable(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2); // Passed-varargs variant of the InvalidArgument factory above. Status InvalidArgumentV(const char* format, va_list args); +template +Status InvalidArgumentStrCat(Args&&... concat) { + return InvalidArgument( + "%s", tensorflow::strings::StrCat(std::forward(concat)...).c_str()); +} + template Status UnimplementedStrCat(Args&&... concat) { return Unimplemented( -- GitLab From bab1cf5d49a4ab8c5ea8930612a9ba1d5b0d5ef3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 19:07:13 -0700 Subject: [PATCH 029/902] Internal Change PiperOrigin-RevId: 197661636 --- tensorflow/python/debug/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index 183994ddaa..16ae74a19f 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -1003,6 +1003,7 @@ cuda_py_test( "no_oss", # Test flaky due to port collisions. "no_windows", "noasan", # Times out due to size of test (b/73731462). + "optonly", # Test flaky (b/80130873) "oss_serial", ], ) -- GitLab From 69895ff3b6910238b677477d4ad1cc6cf05121c5 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 22 May 2018 21:57:14 -0700 Subject: [PATCH 030/902] Use the right attributes when creating placeholder nodes. PiperOrigin-RevId: 197673355 --- .../core/grappler/optimizers/function_optimizer.cc | 3 +++ tensorflow/core/grappler/utils/functions.cc | 5 ++++- tensorflow/core/grappler/utils/functions_test.cc | 12 ++++++------ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 611d871eea..fa228c68a1 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -610,6 +610,9 @@ Status InlineFunction(const NodeDef& func_node, const FunctionDef& func, // Turn input placeholders into identity nodes. CHECK_EQ(0, func_body_node.input_size()); func_body_node.set_op("Identity"); + (*func_body_node.mutable_attr())["T"] = func_body_node.attr().at("dtype"); + func_body_node.mutable_attr()->erase("dtype"); + func_body_node.mutable_attr()->erase("shape"); int input_idx = input_placeholders_idx[func_body_node.name()]; func_body_node.add_input( strings::StrCat(func_inputs->name(), ":", input_idx)); diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc index 5a5dc47fa0..d64cb49715 100644 --- a/tensorflow/core/grappler/utils/functions.cc +++ b/tensorflow/core/grappler/utils/functions.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/utils.h" @@ -525,7 +526,9 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func, NodeDef* placeholder = function_body.add_node(); placeholder->set_name(input.name()); placeholder->set_op("Placeholder"); - (*placeholder->mutable_attr())["T"].set_type(input_data_type); + (*placeholder->mutable_attr())["dtype"].set_type(input_data_type); + (*placeholder->mutable_attr())["shape"].mutable_shape()->set_unknown_rank( + true); InputArgExpansion input_expansion{/*input_name=*/input.name(), /*data_type=*/input_data_type, diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc index 302f02dd39..8c3cc70351 100644 --- a/tensorflow/core/grappler/utils/functions_test.cc +++ b/tensorflow/core/grappler/utils/functions_test.cc @@ -256,7 +256,7 @@ TEST_F(FunctionsTest, FromSimpleFunctionDef) { for (const NodeDef &node : item.function_body().node()) { if (node.name() == "x" && count++) { EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(DT_FLOAT, node.attr().at("dtype").type()); EXPECT_EQ(0, node.input_size()); } else if (node.name() == "two" && count++) { EXPECT_EQ("Const", node.op()); @@ -333,7 +333,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithMultiOutputNodes) { if (node.name() == "x" || node.name() == "y" || node.name() == "dz") { count++; EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(DT_FLOAT, node.attr().at("dtype").type()); EXPECT_EQ(0, node.input_size()); } else if (node.name() == "rx" && count++) { EXPECT_EQ("BroadcastGradientArgs", node.op()); @@ -402,7 +402,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithNestedFuncs) { if (node.name() == "x" || node.name() == "y") { count++; EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(DT_FLOAT, node.attr().at("dtype").type()); EXPECT_EQ(0, node.input_size()); } else if (node.name() == "a0" && count++) { EXPECT_EQ("Swap", node.op()); @@ -465,7 +465,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithOutputMappings) { for (const NodeDef &node : item.function_body().node()) { if (node.name() == "in" && count++) { EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(DT_FLOAT, node.attr().at("dtype").type()); EXPECT_EQ(0, node.input_size()); } else if (node.name() == "Linear_func" && count++) { EXPECT_EQ("Identity", node.op()); @@ -517,9 +517,9 @@ TEST_F(FunctionsTest, FromFunctionDefWithInputForwarding) { count++; EXPECT_EQ("Placeholder", node.op()); if (node.name() == "arg3") { - EXPECT_EQ(DT_INT32, node.attr().at("T").type()); + EXPECT_EQ(DT_INT32, node.attr().at("dtype").type()); } else { - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(DT_FLOAT, node.attr().at("dtype").type()); } } EXPECT_EQ(5, count); -- GitLab From 0c420d1036d0951a270654b43e4eab451bb956c7 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 23 May 2018 05:19:00 -0700 Subject: [PATCH 031/902] [TF:XLA:GPU] Relax test tolerance due to flakiness. PiperOrigin-RevId: 197708758 --- tensorflow/compiler/tests/jit_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/tests/jit_test.py b/tensorflow/compiler/tests/jit_test.py index 4b0043b6b4..6e0db54b7a 100644 --- a/tensorflow/compiler/tests/jit_test.py +++ b/tensorflow/compiler/tests/jit_test.py @@ -125,7 +125,7 @@ class JitLaunchTest(test.TestCase): for (x, y) in zip(compiled, direct): self.assertAllClose(x, y, rtol=1e-1) else: - self.assertAllClose(compiled, direct) + self.assertAllClose(compiled, direct, rtol=1e-2) def testNoOutputs(self): with session_lib.Session() as sess: -- GitLab From d1f44e1c60d38cc36bc438b59338c3a4eecf0615 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 23 May 2018 06:45:12 -0700 Subject: [PATCH 032/902] Add a method XlaTensor:ReleaseShapedBuffer() to relinquish the shaped buffer owned by an XlaTensor. Add an equality operator for xla::ShapeIndexView. PiperOrigin-RevId: 197716313 --- tensorflow/compiler/jit/xla_device_context.cc | 14 ++++++++++---- tensorflow/compiler/jit/xla_tensor.cc | 8 +++++--- tensorflow/compiler/jit/xla_tensor.h | 11 +++++++---- tensorflow/compiler/xla/shape_util.cc | 17 +++++++++++++++++ tensorflow/compiler/xla/shape_util.h | 3 +++ tensorflow/core/framework/tensor.h | 1 + 6 files changed, 43 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index ff30b62bad..c7648347a8 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -60,10 +60,11 @@ Status XlaTransferManager::TransferLiteralToDevice( const Tensor& host_tensor, Tensor* device_tensor) const { xla::Literal literal; TF_RETURN_IF_ERROR(HostTensorToLiteral(host_tensor, &literal)); - VLOG(1) << "Transfer to device as literal: " << literal.ToString(); const xla::ShapedBuffer& shaped_buffer = XlaTensor::FromTensor(device_tensor)->shaped_buffer(); + VLOG(1) << "Transfer to device as literal: " << literal.ToString() << " " + << shaped_buffer.ToString(); return transfer_manager_->TransferLiteralToDevice(stream_->parent(), literal, shaped_buffer); } @@ -76,7 +77,8 @@ Status XlaTransferManager::TransferLiteralFromDevice( TF_ASSIGN_OR_RETURN(std::unique_ptr literal, transfer_manager_->TransferLiteralFromDevice( stream_->parent(), shaped_buffer)); - VLOG(1) << "Transfer from device as literal: " << literal->ToString(); + VLOG(1) << "Transfer from device as literal: " << literal->ToString() << " " + << shaped_buffer.ToString(); Tensor tensor; TF_RETURN_IF_ERROR( LiteralToHostTensor(*literal, host_tensor->dtype(), &tensor)); @@ -98,7 +100,9 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor, << " " << reinterpret_cast( device_tensor->tensor_data().data()) - << " " << cpu_tensor->NumElements(); + << " " << cpu_tensor->NumElements() << " " + << cpu_tensor->shape().DebugString() << " " + << device_tensor->shape().DebugString(); void* src_ptr = const_cast(DMAHelper::base(cpu_tensor)); const int64 total_bytes = cpu_tensor->TotalBytes(); @@ -165,7 +169,9 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor, device_tensor->tensor_data().data()) << " " << reinterpret_cast(cpu_tensor->tensor_data().data()) - << device_tensor->NumElements(); + << " " << device_tensor->NumElements() << " " + << cpu_tensor->shape().DebugString() << " " + << device_tensor->shape().DebugString(); const int64 total_bytes = cpu_tensor->TotalBytes(); se::DeviceMemoryBase dev_src_ptr = diff --git a/tensorflow/compiler/jit/xla_tensor.cc b/tensorflow/compiler/jit/xla_tensor.cc index a7211c9c7e..3c44c4ae6d 100644 --- a/tensorflow/compiler/jit/xla_tensor.cc +++ b/tensorflow/compiler/jit/xla_tensor.cc @@ -18,7 +18,7 @@ limitations under the License. namespace tensorflow { -/*static*/ XlaTensor* XlaTensor::FromTensor(Tensor* tensor) { +/*static*/ XlaTensor* XlaTensor::FromTensor(const Tensor* tensor) { if (tensor->NumElements() == 0) { return nullptr; } @@ -27,8 +27,8 @@ namespace tensorflow { return xla_tensor; } -/*static*/ const XlaTensor* XlaTensor::FromTensor(const Tensor* tensor) { - return FromTensor(const_cast(tensor)); +/*static*/ bool XlaTensor::RefCountIsOne(const Tensor& tensor) { + return tensor.RefCountIsOne(); } /*static*/ se::DeviceMemoryBase XlaTensor::DeviceMemoryFromTensor( @@ -67,6 +67,8 @@ Status XlaTensor::AllocateShapedBuffer(DataType dtype, const TensorShape& shape, index_to_buffer.second = buffer.Forget(); } + VLOG(4) << shaped_buffer.ToString(); + set_shaped_buffer(std::move(shaped_buffer)); return Status::OK(); } diff --git a/tensorflow/compiler/jit/xla_tensor.h b/tensorflow/compiler/jit/xla_tensor.h index 6b29c82ec1..c54001a999 100644 --- a/tensorflow/compiler/jit/xla_tensor.h +++ b/tensorflow/compiler/jit/xla_tensor.h @@ -34,10 +34,9 @@ class XlaTensor { public: // Downcast from a Tensor to an XlaTensor. Return nullptr if the downcast // fails. - static XlaTensor* FromTensor(Tensor* tensor); - // Downcast from a Tensor to an XlaTensor. Return nullptr if the downcast - // fails. - static const XlaTensor* FromTensor(const Tensor* tensor); + static XlaTensor* FromTensor(const Tensor* tensor); + + static bool RefCountIsOne(const Tensor& tensor); // Create a DeviceMemoryBase from a Tensor. The Tensor can be an XlaTensor, in // which case the returned value is shaped_buffer()->root_buffer(), or a @@ -62,6 +61,10 @@ class XlaTensor { CHECK(has_shaped_buffer()); return *shaped_buffer_; } + xla::ShapedBuffer& shaped_buffer() { + CHECK(has_shaped_buffer()); + return *shaped_buffer_; + } // Mutates the XlaTensor to set the ShapedBuffer. void set_shaped_buffer(xla::ScopedShapedBuffer shaped_buffer) { shaped_buffer_ = diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 7a897f6f8f..2cdee30340 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -55,6 +55,23 @@ string ShapeIndexView::ToString() const { "}"); } +bool ShapeIndexView::operator==(const ShapeIndexView& other) const { + if (size() != other.size()) { + return false; + } + for (auto it = begin(), other_it = other.begin(); it != end(); + ++it, ++other_it) { + if (*it != *other_it) { + return false; + } + } + return true; +} + +bool ShapeIndexView::operator!=(const ShapeIndexView& other) const { + return !(*this == other); +} + std::ostream& operator<<(std::ostream& out, const ShapeIndex& shape_index) { out << shape_index.ToString(); return out; diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index cb8bf5a2b9..73e014805f 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -132,6 +132,9 @@ class ShapeIndexView { return ShapeIndexView(new_begin, end_); } + bool operator==(const ShapeIndexView& other) const; + bool operator!=(const ShapeIndexView& other) const; + string ToString() const; private: diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h index 58fbced606..d2f2609d3b 100644 --- a/tensorflow/core/framework/tensor.h +++ b/tensorflow/core/framework/tensor.h @@ -484,6 +484,7 @@ class Tensor { friend class TensorTestHelper; // For access to set_shape friend class OpKernelContext; // For access to RefCountIsOne(). friend class ScopedAllocator; // For access to buf_. + friend class XlaTensor; // For access to RefCountIsOne(). friend class XlaTensorBuffer; // For access to the private constructor taking // the buffer template -- GitLab From 7a82d0fd10901f4b59f38e838a24a04df8305f73 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 09:16:52 -0700 Subject: [PATCH 033/902] Support batch size > 1 in L2Normalization 8 bit quantized implementations. PiperOrigin-RevId: 197736184 --- .../internal/optimized/optimized_ops.h | 39 ++++++++++-------- .../internal/reference/reference_ops.h | 41 ++++++++++--------- .../contrib/lite/kernels/l2norm_test.cc | 30 +++++++++++++- 3 files changed, 71 insertions(+), 39 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 6e5ceec85e..1b4660ef4f 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -2353,24 +2353,27 @@ inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); - TFLITE_DCHECK_EQ(outer_size, 1); - int32 square_l2_norm = 0; - for (int i = 0; i < depth; i++) { - int32 diff = input_data[i] - input_zero_point; - square_l2_norm += diff * diff; - } - int32 inv_l2norm_multiplier; - int inv_l2norm_shift; - GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier, - &inv_l2norm_shift); - - for (int i = 0; i < depth; i++) { - int32 diff = input_data[i] - input_zero_point; - int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne( - 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); - int32 unclamped_output_val = 128 + rescaled_diff; - int32 output_val = std::min(255, std::max(0, unclamped_output_val)); - output_data[i] = static_cast(output_val); + for (int i = 0; i < outer_size; ++i) { + int32 square_l2_norm = 0; + for (int c = 0; c < depth; c++) { + int32 diff = input_data[c] - input_zero_point; + square_l2_norm += diff * diff; + } + int32 inv_l2norm_multiplier; + int inv_l2norm_shift; + GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier, + &inv_l2norm_shift); + + for (int c = 0; c < depth; c++) { + int32 diff = *input_data - input_zero_point; + int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne( + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); + int32 unclamped_output_val = 128 + rescaled_diff; + int32 output_val = std::min(255, std::max(0, unclamped_output_val)); + *output_data = static_cast(output_val); + ++input_data; + ++output_data; + } } } diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index a56fc0635b..cd4af48bee 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -895,25 +895,28 @@ inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, const Dims<4>& output_dims) { const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); const int outer_size = MatchingFlatSizeSkipDim(input_dims, 0, output_dims); - TFLITE_DCHECK_EQ(outer_size, 1); - int32 square_l2_norm = 0; - for (int i = 0; i < depth; i++) { - int32 diff = input_data[Offset(input_dims, i, 0, 0, 0)] - input_zero_point; - square_l2_norm += diff * diff; - } - int32 inv_l2norm_multiplier; - int inv_l2norm_shift; - GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier, - &inv_l2norm_shift); - - for (int i = 0; i < depth; i++) { - int32 diff = input_data[Offset(input_dims, i, 0, 0, 0)] - input_zero_point; - int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne( - 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); - int32 unclamped_output_val = 128 + rescaled_diff; - int32 output_val = std::min(255, std::max(0, unclamped_output_val)); - output_data[Offset(output_dims, i, 0, 0, 0)] = - static_cast(output_val); + for (int i = 0; i < outer_size; ++i) { + int32 square_l2_norm = 0; + for (int c = 0; c < depth; c++) { + int32 diff = + input_data[Offset(input_dims, c, i, 0, 0)] - input_zero_point; + square_l2_norm += diff * diff; + } + int32 inv_l2norm_multiplier; + int inv_l2norm_shift; + GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier, + &inv_l2norm_shift); + + for (int c = 0; c < depth; c++) { + int32 diff = + input_data[Offset(input_dims, c, i, 0, 0)] - input_zero_point; + int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne( + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); + int32 unclamped_output_val = 128 + rescaled_diff; + int32 output_val = std::min(255, std::max(0, unclamped_output_val)); + output_data[Offset(output_dims, c, i, 0, 0)] = + static_cast(output_val); + } } } diff --git a/tensorflow/contrib/lite/kernels/l2norm_test.cc b/tensorflow/contrib/lite/kernels/l2norm_test.cc index 11cc666bad..070ed60040 100644 --- a/tensorflow/contrib/lite/kernels/l2norm_test.cc +++ b/tensorflow/contrib/lite/kernels/l2norm_test.cc @@ -67,7 +67,7 @@ class L2NormOpModel : public SingleOpModel { int output_; }; -TEST(L2NormOpTest, SimpleTest) { +TEST(L2NormOpTest, SimpleFloatTest) { L2NormOpModel m({1, 1, 1, 6}, TensorType_FLOAT32, ActivationFunctionType_NONE); m.SetInput({-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); @@ -76,7 +76,7 @@ TEST(L2NormOpTest, SimpleTest) { ElementsAreArray({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})); } -TEST(L2NormOpTest, MultipleBatchesTest) { +TEST(L2NormOpTest, MultipleBatchFloatTest) { L2NormOpModel m({3, 1, 1, 6}, TensorType_FLOAT32, ActivationFunctionType_NONE); m.SetInput({ @@ -105,6 +105,32 @@ TEST(L2NormOpTest, SimpleUint8Test) { ArrayFloatNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}, 0.1))); } +TEST(L2NormOpTest, MultipleBatchUint8Test) { + L2NormOpModel m({3, 1, 1, 6}, TensorType_UINT8, ActivationFunctionType_NONE); + + m.QuantizeAndPopulate(m.input(), + { + -1.1, 0.6, 0.7, 1.2, -0.7, 0.1, // batch 1 + -1.1, 0.6, 0.7, 1.2, -0.7, 0.1, // batch 2 + -1.1, 0.6, 0.7, 1.2, -0.7, 0.1, // batch 3 + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({ + 58, 166, 173, 205, 83, 134, // batch 1 + 58, 166, 173, 205, 83, 134, // batch 2 + 58, 166, 173, 205, 83, 134, // batch 3 + })); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + -0.55, 0.3, 0.35, 0.6, -0.35, 0.05, // batch 1 + -0.55, 0.3, 0.35, 0.6, -0.35, 0.05, // batch 2 + -0.55, 0.3, 0.35, 0.6, -0.35, 0.05, // batch 3 + }, + 0.1))); +} + } // namespace } // namespace tflite -- GitLab From 98d0286770d182de0dbf952c1ec0e079d8471a26 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 09:20:12 -0700 Subject: [PATCH 034/902] Add 'platform_' libraries in core/BUILD. PiperOrigin-RevId: 197736600 --- tensorflow/core/BUILD | 305 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 265 insertions(+), 40 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 05b8423e15..9b32a6e192 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -294,43 +294,18 @@ cc_library( ], ) -PLATFORM_BASE_HDRS = [ - "platform/env_time.h", - "platform/logging.h", - "platform/macros.h", - "platform/types.h", - "platform/byte_order.h", -] - -PLATFORM_OTHER_HDRS = [ - "platform/abi.h", - "platform/stacktrace.h", - "platform/stacktrace_handler.h", - "platform/context.h", - "platform/cpu_info.h", - "platform/cpu_feature_guard.h", - "platform/dynamic_annotations.h", - "platform/error.h", - "platform/env.h", - "platform/file_system.h", - "platform/file_system_helper.h", - "platform/fingerprint.h", - "platform/init_main.h", - "platform/mem.h", - "platform/mutex.h", - "platform/net.h", - "platform/notification.h", - "platform/null_file_system.h", - "platform/prefetch.h", - "platform/profile_utils/clock_cycle_profiler.h", - "platform/profile_utils/cpu_utils.h", - "platform/protobuf.h", - "platform/strong_hash.h", - "platform/subprocess.h", - "platform/thread_annotations.h", -] +filegroup( + name = "platform_base_hdrs", + srcs = [ + "platform/byte_order.h", + "platform/env_time.h", + "platform/logging.h", + "platform/macros.h", + "platform/types.h", + ], + visibility = ["//visibility:private"], +) -# Smaller platform libraries that don't depend on "lib" or "lib_internal". cc_library( name = "platform_base", srcs = tf_platform_hdrs([ @@ -342,16 +317,261 @@ cc_library( ]) + [ "platform/env_time.cc", ], - hdrs = PLATFORM_BASE_HDRS, + hdrs = [":platform_base_hdrs"], copts = tf_copts(), - # TODO(ahentz): remove use of this library so we can move it into 'platform' tags = ["avoid_dep"], + visibility = ["//tensorflow/core:__subpackages__"], deps = [ ":lib_platform", "//tensorflow/core/platform/default/build_config:base", ], ) +filegroup( + name = "platform_port_hdrs", + srcs = [ + "platform/cpu_info.h", + "platform/dynamic_annotations.h", + "platform/init_main.h", + "platform/mem.h", + "platform/mutex.h", + "platform/thread_annotations.h", + ], + visibility = ["//visibility:private"], +) + +# Headers that are not exported as part of ":lib". +filegroup( + name = "platform_port_internal_hdrs", + srcs = [ + "platform/demangle.h", + "platform/host_info.h", + "platform/snappy.h", + ], + visibility = ["//visibility:private"], +) + +cc_library( + name = "platform_port", + srcs = tf_platform_hdrs([ + "cpu_info.h", + "dynamic_annotations.h", + "thread_annotations.h", + "mutex.h", + ]) + tf_platform_srcs([ + "port.cc", + ]) + [ + "platform/cpu_info.cc", + ], + hdrs = [ + ":platform_port_hdrs", + ":platform_port_internal_hdrs", + ], + copts = tf_copts(), + visibility = ["//tensorflow/core:__subpackages__"], + deps = [ + ":lib_platform", + ":platform_base", + "//tensorflow/core/platform/default/build_config:port", + "@snappy", + ], +) + +filegroup( + name = "platform_protobuf_hdrs", + srcs = [ + "platform/protobuf.h", + ], + visibility = ["//visibility:private"], +) + +# Headers that are not exported as part of ":lib". +filegroup( + name = "platform_protobuf_internal_hdrs", + srcs = [ + "platform/protobuf_internal.h", + ], + visibility = ["//visibility:private"], +) + +cc_library( + name = "platform_protobuf", + srcs = tf_platform_hdrs([ + "protobuf.h", + ]) + tf_platform_srcs([ + "protobuf.cc", + ]) + [ + "platform/protobuf_util.cc", + ], + hdrs = [ + ":platform_protobuf_hdrs", + ":platform_protobuf_internal_hdrs", + ], + copts = tf_copts(), + visibility = ["//tensorflow/core:__subpackages__"], + deps = [ + ":lib_platform", + ":platform_base", + ":platform_port", + "//tensorflow/core/platform/default/build_config:protobuf", + "@protobuf_archive//:protobuf", + ], +) + +filegroup( + name = "platform_env_hdrs", + srcs = [ + "platform/env.h", + "platform/file_statistics.h", + "platform/file_system.h", + ], + visibility = ["//visibility:private"], +) + +# Headers that are not exported as part of ":lib". +filegroup( + name = "platform_env_internal_hdrs", + srcs = [ + "platform/load_library.h", + ], + visibility = ["//visibility:private"], +) + +cc_library( + name = "platform_env", + srcs = tf_platform_srcs([ + "env.cc", + "load_library.cc", + ]) + tf_platform_hdrs([ + ]) + [ + "platform/env.cc", + "platform/file_system.cc", + ], + hdrs = [ + ":platform_env_hdrs", + ":platform_env_internal_hdrs", + ], + copts = tf_copts(), + visibility = ["//tensorflow/core:__subpackages__"], + deps = [ + ":error_codes_proto_cc", + ":lib", + ":lib_internal", + ":lib_platform", + ":platform_base", + ":platform_port", + ":platform_protobuf", + "//tensorflow/core/platform/default/build_config:env", + ], +) + +filegroup( + name = "platform_file_system_hdrs", + srcs = [ + "platform/file_system_helper.h", + "platform/null_file_system.h", + ], + visibility = ["//visibility:private"], +) + +cc_library( + name = "platform_file_system", + srcs = tf_platform_srcs([ + ]) + tf_platform_hdrs([ + "windows_file_system.h", + ]) + [ + "platform/file_system_helper.cc", + ], + hdrs = [ + ":platform_file_system_hdrs", + ], + copts = tf_copts(), + visibility = ["//tensorflow/core:__subpackages__"], + deps = [ + ":lib", + ":lib_platform", + ":platform_env", + ], +) + +filegroup( + name = "platform_other_hdrs", + srcs = [ + "platform/abi.h", + "platform/context.h", + "platform/cpu_feature_guard.h", + "platform/error.h", + "platform/fingerprint.h", + "platform/net.h", + "platform/notification.h", + "platform/prefetch.h", + "platform/profile_utils/android_armv7a_cpu_utils_helper.h", + "platform/profile_utils/clock_cycle_profiler.h", + "platform/profile_utils/cpu_utils.h", + "platform/profile_utils/i_cpu_utils_helper.h", + "platform/stacktrace.h", + "platform/stacktrace_handler.h", + "platform/strong_hash.h", + "platform/subprocess.h", + ], + visibility = ["//visibility:private"], +) + +# Headers that are not exported as part of ":lib". +filegroup( + name = "platform_other_internal_hdrs", + srcs = [ + "platform/denormal.h", + "platform/setround.h", + "platform/tracing.h", + ], + visibility = ["//visibility:private"], +) + +cc_library( + name = "platform_other", + srcs = tf_platform_srcs([ + "subprocess.cc", + "net.cc", + "tracing.cc", + ]) + tf_platform_hdrs([ + "tracing.h", + "error.h", + "context.h", + "fingerprint.h", + "notification.h", + "stacktrace.h", + "strong_hash.h", + "subprocess.h", + "tracing_impl.h", + ]) + [ + "platform/cpu_feature_guard.cc", + "platform/setround.cc", + "platform/tracing.cc", + "platform/denormal.cc", + "platform/profile_utils/android_armv7a_cpu_utils_helper.cc", + "platform/profile_utils/clock_cycle_profiler.cc", + "platform/profile_utils/cpu_utils.cc", + ], + hdrs = [ + ":platform_other_hdrs", + ":platform_other_internal_hdrs", + ], + copts = tf_copts(), + visibility = ["//tensorflow/core:__subpackages__"], + deps = [ + ":lib", + ":lib_platform", + ":platform_base", + ":platform_env", + ":platform_port", + ":platform_protobuf", + "//tensorflow/core/platform/default/build_config:other", + "//tensorflow/core/platform/default/build_config:platformlib", + "//tensorflow/core/platform/default/build_config:port", + ], +) + # Minimal lib so that tools used for mobile compilation # don't have to depend on lib/platformlib. cc_library( @@ -385,8 +605,7 @@ cc_library( # tf_cc_test and tf_cc_binary will include the necessary symbols. cc_library( name = "lib", - hdrs = PLATFORM_BASE_HDRS + - PLATFORM_OTHER_HDRS + [ + hdrs = [ "lib/bfloat16/bfloat16.h", "lib/core/arena.h", "lib/core/bitmap.h", @@ -433,6 +652,12 @@ cc_library( "lib/strings/str_util.h", "lib/strings/strcat.h", "lib/strings/stringprintf.h", + ":platform_base_hdrs", + ":platform_env_hdrs", + ":platform_file_system_hdrs", + ":platform_other_hdrs", + ":platform_port_hdrs", + ":platform_protobuf_hdrs", ], visibility = ["//visibility:public"], deps = [ -- GitLab From 8a2972f5ddc9a986b702397f18203d1513511d2b Mon Sep 17 00:00:00 2001 From: Bjarke Hammersholt Roune Date: Wed, 23 May 2018 09:44:39 -0700 Subject: [PATCH 035/902] Quick fix for Kokoro breakage. PiperOrigin-RevId: 197739982 --- tensorflow/compiler/xla/scanner_test.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/compiler/xla/scanner_test.cc b/tensorflow/compiler/xla/scanner_test.cc index d392a240b1..10cd0c6a04 100644 --- a/tensorflow/compiler/xla/scanner_test.cc +++ b/tensorflow/compiler/xla/scanner_test.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// TODO(b/80179519): Fix open source build for real. +#if 0 #include "tensorflow/compiler/xla/scanner.h" #include @@ -119,3 +121,4 @@ TEST(Scanner, IntVector) { } // namespace } // namespace xla +#endif -- GitLab From 787f185cd73a8b090a3dadc1432e9bfdf527e345 Mon Sep 17 00:00:00 2001 From: Patrick Nguyen Date: Wed, 23 May 2018 09:54:06 -0700 Subject: [PATCH 036/902] Fix typo in error message. PiperOrigin-RevId: 197741341 --- tensorflow/core/framework/op.cc | 4 ++-- tensorflow/core/graph/graph_constructor_test.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/framework/op.cc b/tensorflow/core/framework/op.cc index 0873d4e47b..b8309eafb0 100644 --- a/tensorflow/core/framework/op.cc +++ b/tensorflow/core/framework/op.cc @@ -97,7 +97,7 @@ Status OpRegistry::LookUp(const string& op_type_name, "Make sure the Op and Kernel are registered in the " "binary running in this process. Note that if you " "are loading a saved graph which used ops from " - "tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done" + "tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done " "before importing the graph, as contrib ops are lazily registered " "when the module is first accessed."); VLOG(1) << status.ToString(); @@ -256,7 +256,7 @@ Status OpListOpRegistry::LookUp(const string& op_type_name, "Make sure the Op and Kernel are registered in the " "binary running in this process. Note that if you " "are loading a saved graph which used ops from " - "tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done" + "tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done " "before importing the graph, as contrib ops are lazily registered " "when the module is first accessed."); } diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc index c54b4fa269..6309870190 100644 --- a/tensorflow/core/graph/graph_constructor_test.cc +++ b/tensorflow/core/graph/graph_constructor_test.cc @@ -3170,7 +3170,7 @@ TEST_F(GraphConstructorTest, ImportGraphDef_UnknownOps) { {"Make sure the Op and Kernel are registered in the " "binary running in this process. Note that if you " "are loading a saved graph which used ops from " - "tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done" + "tf.contrib, accessing (e.g.) `tf.contrib.resampler` should be done " "before importing the graph, as contrib ops are lazily registered " "when the module is first accessed."}); } -- GitLab From 9d7daede6e94090f0efa4e9ac59328c0d3887cc8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 09:58:30 -0700 Subject: [PATCH 037/902] PiperOrigin-RevId: 197741984 --- tensorflow/contrib/android/BUILD | 2 ++ tensorflow/contrib/lite/examples/android/BUILD | 2 ++ tensorflow/contrib/lite/java/demo/app/src/main/BUILD | 2 ++ tensorflow/contrib/lite/java/ovic/BUILD | 2 ++ tensorflow/contrib/lite/java/ovic/demo/app/BUILD | 2 ++ .../lite/java/src/testhelper/java/org/tensorflow/lite/BUILD | 2 ++ .../contrib/lite/models/smartreply/demo/app/src/main/BUILD | 2 ++ tensorflow/examples/android/BUILD | 2 ++ 8 files changed, 16 insertions(+) diff --git a/tensorflow/contrib/android/BUILD b/tensorflow/contrib/android/BUILD index c10179ba8b..1c19d07bcc 100644 --- a/tensorflow/contrib/android/BUILD +++ b/tensorflow/contrib/android/BUILD @@ -1,6 +1,8 @@ # Description: # JNI-based Java inference interface for TensorFlow. +load("//tools/build_defs/android:rules.bzl", "android_library") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/examples/android/BUILD b/tensorflow/contrib/lite/examples/android/BUILD index 5700007256..88f435e871 100644 --- a/tensorflow/contrib/lite/examples/android/BUILD +++ b/tensorflow/contrib/lite/examples/android/BUILD @@ -1,6 +1,8 @@ # Description: # TensorFlow camera demo app for Android. +load("//tools/build_defs/android:rules.bzl", "android_binary") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD index d6fbef9cc9..a45871f7cf 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD @@ -1,3 +1,5 @@ +load("//tools/build_defs/android:rules.bzl", "android_binary") + package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/java/ovic/BUILD b/tensorflow/contrib/lite/java/ovic/BUILD index 362d93636f..5f05906bc5 100644 --- a/tensorflow/contrib/lite/java/ovic/BUILD +++ b/tensorflow/contrib/lite/java/ovic/BUILD @@ -1,6 +1,8 @@ # Description: # OVIC Benchmarker Java API. +load("//tools/build_defs/android:rules.bzl", "android_library") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD index 83974f4b33..156a4ab8f5 100644 --- a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD +++ b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD @@ -1,3 +1,5 @@ +load("//tools/build_defs/android:rules.bzl", "android_binary") + # Sample app for OVIC benchmarking. licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD index b524246d43..770062c90d 100644 --- a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD +++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD @@ -1,6 +1,8 @@ # Description: # Internal helper function to test TF Lite API. +load("//tools/build_defs/android:rules.bzl", "android_library") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD index f8767b443a..8a86ecbf91 100644 --- a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD @@ -1,3 +1,5 @@ +load("//tools/build_defs/android:rules.bzl", "android_binary") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD index 07f096418f..118138296c 100644 --- a/tensorflow/examples/android/BUILD +++ b/tensorflow/examples/android/BUILD @@ -1,6 +1,8 @@ # Description: # TensorFlow camera demo app for Android. +load("//tools/build_defs/android:rules.bzl", "android_binary") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 -- GitLab From ee303929b18745e7892d872ceac46c326a32ea93 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 23 May 2018 09:59:25 -0700 Subject: [PATCH 038/902] Add a "--no_search_hints" flag to the api-docs generator. PiperOrigin-RevId: 197742114 --- tensorflow/tools/docs/generate_lib.py | 25 ++++++++-- tensorflow/tools/docs/parser.py | 60 ++++++++++++++++++++++++ tensorflow/tools/docs/pretty_docs.py | 66 ++------------------------- 3 files changed, 85 insertions(+), 66 deletions(-) diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index 111d54d820..853ec6194f 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -50,7 +50,11 @@ def _is_free_function(py_object, full_name, index): return True -def write_docs(output_dir, parser_config, yaml_toc, root_title='TensorFlow'): +def write_docs(output_dir, + parser_config, + yaml_toc, + root_title='TensorFlow', + search_hints=True): """Write previously extracted docs to disk. Write a docs page for each symbol included in the indices of parser_config to @@ -66,6 +70,8 @@ def write_docs(output_dir, parser_config, yaml_toc, root_title='TensorFlow'): indices. yaml_toc: Set to `True` to generate a "_toc.yaml" file. root_title: The title name for the root level index.md. + search_hints: (bool) include meta-data search hints at the top of each + output file. Raises: ValueError: if `output_dir` is not an absolute path @@ -134,7 +140,13 @@ def write_docs(output_dir, parser_config, yaml_toc, root_title='TensorFlow'): if not os.path.exists(directory): os.makedirs(directory) # This function returns raw bytes in PY2 or unicode in PY3. - text = pretty_docs.build_md_page(page_info) + if search_hints: + content = [page_info.get_metadata_html()] + else: + content = [''] + + content.append(pretty_docs.build_md_page(page_info)) + text = '\n'.join(content) if six.PY3: text = text.encode('utf-8') with open(path, 'wb') as f: @@ -467,6 +479,12 @@ class DocGenerator(object): self._do_not_descend_map = _get_default_do_not_descend_map() self.yaml_toc = True + self.argument_parser.add_argument( + '--no_search_hints', + dest='search_hints', + action='store_false', + default=True) + def add_output_dir_argument(self): self.argument_parser.add_argument( '--output_dir', @@ -553,7 +571,8 @@ class DocGenerator(object): output_dir, parser_config, yaml_toc=self.yaml_toc, - root_title=root_title) + root_title=root_title, + search_hints=getattr(flags, 'search_hints', True)) _other_docs(flags.src_dir, flags.output_dir, reference_resolver) parser_config.reference_resolver.log_errors() diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index fb0bd2c2ff..7363e4f8b5 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -21,6 +21,7 @@ from __future__ import print_function import ast import collections import functools +import itertools import json import os import re @@ -906,6 +907,9 @@ class _FunctionPageInfo(object): def add_decorator(self, dec): self._decorators.append(dec) + def get_metadata_html(self): + return _Metadata(self.full_name).build_html() + class _ClassPageInfo(object): """Collects docs for a class page. @@ -1099,6 +1103,14 @@ class _ClassPageInfo(object): """Returns a list of `_LinkInfo` pointing to any nested classes.""" return self._classes + def get_metadata_html(self): + meta_data = _Metadata(self.full_name) + for item in itertools.chain(self.classes, self.properties, self.methods, + self.other_members): + meta_data.append(item) + + return meta_data.build_html() + def _add_class(self, short_name, full_name, obj, doc, url): """Adds a `_LinkInfo` for a nested class to `classes` list. @@ -1330,6 +1342,16 @@ class _ModulePageInfo(object): self._other_members.append( _OtherMemberInfo(short_name, full_name, obj, doc)) + def get_metadata_html(self): + meta_data = _Metadata(self.full_name) + + # Objects with their own pages are not added to the matadata list for the + # module, the module only has a link to the object page. No docs. + for item in self.other_members: + meta_data.append(item) + + return meta_data.build_html() + def collect_docs_for_module(self, parser_config): """Collect information necessary specifically for a module's doc page. @@ -1656,3 +1678,41 @@ def generate_global_index(library_name, index, reference_resolver): # TODO(markdaoust): use a _ModulePageInfo -> prety_docs.build_md_page() return '\n'.join(lines) + + +class _Metadata(object): + """A class for building a page's Metadata block. + + Attributes: + name: The name of the page being described by the Metadata block. + """ + + def __init__(self, name): + """Creates a Metadata builder. + + Args: + name: The name of the page being described by the Metadata block. + """ + self.name = name + self._content = [] + + def append(self, item): + """Adds an item from the page to the Metadata block. + + Args: + item: The parsed page section to add. + """ + self._content.append(item.short_name) + + def build_html(self): + """Returns the Metadata block as an Html string.""" + schema = 'http://developers.google.com/ReferenceObject' + parts = ['
' % schema] + + parts.append('' % self.name) + for item in self._content: + parts.append('' % item) + + parts.extend(['
', '']) + + return '\n'.join(parts) diff --git a/tensorflow/tools/docs/pretty_docs.py b/tensorflow/tools/docs/pretty_docs.py index 55ab5bdd49..63d4fef91c 100644 --- a/tensorflow/tools/docs/pretty_docs.py +++ b/tensorflow/tools/docs/pretty_docs.py @@ -27,7 +27,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import itertools import textwrap @@ -58,8 +57,7 @@ def build_md_page(page_info): def _build_function_page(page_info): """Given a FunctionPageInfo object Return the page as an md string.""" - parts = [_Metadata(page_info.full_name).build_html()] - parts.append('# %s\n\n' % page_info.full_name) + parts = ['# %s\n\n' % page_info.full_name] if len(page_info.aliases) > 1: parts.append('### Aliases:\n\n') @@ -83,17 +81,7 @@ def _build_function_page(page_info): def _build_class_page(page_info): """Given a ClassPageInfo object Return the page as an md string.""" - meta_data = _Metadata(page_info.full_name) - for item in itertools.chain( - page_info.classes, - page_info.properties, - page_info.methods, - page_info.other_members): - meta_data.append(item) - - parts = [meta_data.build_html()] - - parts.append('# {page_info.full_name}\n\n'.format(page_info=page_info)) + parts = ['# {page_info.full_name}\n\n'.format(page_info=page_info)] parts.append('## Class `%s`\n\n' % page_info.full_name.split('.')[-1]) if page_info.bases: @@ -186,17 +174,7 @@ def _build_class_page(page_info): def _build_module_page(page_info): """Given a ClassPageInfo object Return the page as an md string.""" - meta_data = _Metadata(page_info.full_name) - - # Objects with their own pages are not added to the matadata list for the - # module, as the only thing on the module page is a link to the object's page. - for item in page_info.other_members: - meta_data.append(item) - - parts = [meta_data.build_html()] - - parts.append( - '# Module: {full_name}\n\n'.format(full_name=page_info.full_name)) + parts = ['# Module: {full_name}\n\n'.format(full_name=page_info.full_name)] if len(page_info.aliases) > 1: parts.append('### Aliases:\n\n') @@ -317,41 +295,3 @@ def _build_function_details(function_details): parts.append(''.join(sub)) return '\n'.join(parts) - - -class _Metadata(object): - """A class for building a page's Metadata block. - - Attributes: - name: The name of the page being described by the Metadata block. - """ - - def __init__(self, name): - """Create a Metadata builder. - - Args: - name: The name of the page being described by the Metadata block. - """ - self.name = name - self._content = [] - - def append(self, item): - """Add an item from the page to the Metadata block. - - Args: - item: The parsed page section to add. - """ - self._content.append(item.short_name) - - def build_html(self): - """Return the Metadata block as an Html string.""" - schema = 'http://developers.google.com/ReferenceObject' - parts = ['
' % schema] - - parts.append('' % self.name) - for item in self._content: - parts.append('' % item) - - parts.extend(['
', '', '']) - - return '\n'.join(parts) -- GitLab From 896a6d74959c02b5c41087f96e77ef166fe484e3 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 23 May 2018 10:01:15 -0700 Subject: [PATCH 039/902] Keep column order in make_csv_dataset. PiperOrigin-RevId: 197742412 --- .../python/kernel_tests/csv_dataset_op_test.py | 17 +++++++++++++++++ tensorflow/contrib/data/python/ops/readers.py | 5 +++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py index 641a389c03..f9f11a1555 100644 --- a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py @@ -308,6 +308,23 @@ class CsvDatasetOpTest(test.TestCase): record_defaults=record_defaults, ) + def testMakeCsvDataset_fieldOrder(self): + data = [[ + '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19', + '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19' + ]] + file_path = self.setup_files(data) + + with ops.Graph().as_default() as g: + ds = readers.make_csv_dataset( + file_path, batch_size=1, shuffle=False, num_epochs=1) + next_batch = ds.make_one_shot_iterator().get_next() + + with self.test_session(graph=g) as sess: + result = list(sess.run(next_batch).values()) + + self.assertEqual(result, sorted(result)) + class CsvDatasetBenchmark(test.Benchmark): """Benchmarks for the various ways of creating a dataset from CSV files. diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 75c31a944a..f938153f5f 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import csv import numpy as np @@ -467,11 +468,11 @@ def make_csv_dataset( Args: *columns: list of `Tensor`s corresponding to one csv record. Returns: - A dictionary of feature names to values for that particular record. If + An OrderedDict of feature names to values for that particular record. If label_name is provided, extracts the label feature to be returned as the second element of the tuple. """ - features = dict(zip(column_names, columns)) + features = collections.OrderedDict(zip(column_names, columns)) if label_name is not None: label = features.pop(label_name) return features, label -- GitLab From 8441db8afeee0efbeac9b457016d8558fbcde2b6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 10:05:58 -0700 Subject: [PATCH 040/902] Combine op-profiles collected from individual TPUs. PiperOrigin-RevId: 197743291 --- tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index 816897499b..e51f64521f 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -79,6 +79,7 @@ ProfileRequest PopulateProfileRequest(int duration_ms, request.set_repository_root(repository_root); request.set_session_id(session_id); } + request.add_tools("op_profile"); request.add_tools("input_pipeline"); request.add_tools("overview_page"); *request.mutable_opts() = opts; -- GitLab From c78d4e8e7e032986789b0755b399b6c9ad274b5d Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 23 May 2018 10:29:58 -0700 Subject: [PATCH 041/902] Update build visibility of //third_party/tensorflow/contrib/signal PiperOrigin-RevId: 197747430 --- tensorflow/contrib/signal/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/signal/BUILD b/tensorflow/contrib/signal/BUILD index fdecceff52..6bd58c4d32 100644 --- a/tensorflow/contrib/signal/BUILD +++ b/tensorflow/contrib/signal/BUILD @@ -1,4 +1,4 @@ -package(default_visibility = ["//tensorflow:__subpackages__"]) +package(default_visibility = ["//tensorflow:internal"]) licenses(["notice"]) # Apache 2.0 -- GitLab From 7b78417a00e6805557d530c1f1fcc8b2a44d6e2e Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 23 May 2018 10:43:28 -0700 Subject: [PATCH 042/902] Add a checkpointable list data structure Allows tracking of Layers and other checkpointable objects by number. Fixes #19250. PiperOrigin-RevId: 197749961 --- tensorflow/contrib/checkpoint/__init__.py | 6 +- tensorflow/contrib/checkpoint/python/BUILD | 16 +- .../examples/rnn_colorbot/rnn_colorbot.py | 10 +- .../eager/python/examples/rnn_ptb/rnn_ptb.py | 10 +- tensorflow/python/keras/BUILD | 1 + tensorflow/python/keras/engine/network.py | 6 +- .../python/training/checkpointable/BUILD | 39 +++- .../python/training/checkpointable/base.py | 8 +- .../checkpointable/data_structures.py | 218 ++++++++++++++++++ .../checkpointable/data_structures_base.py | 27 +++ .../checkpointable/data_structures_test.py | 142 ++++++++++++ 11 files changed, 454 insertions(+), 29 deletions(-) create mode 100644 tensorflow/python/training/checkpointable/data_structures.py create mode 100644 tensorflow/python/training/checkpointable/data_structures_base.py create mode 100644 tensorflow/python/training/checkpointable/data_structures_test.py diff --git a/tensorflow/contrib/checkpoint/__init__.py b/tensorflow/contrib/checkpoint/__init__.py index af8df72618..bd0bc9e56b 100644 --- a/tensorflow/contrib/checkpoint/__init__.py +++ b/tensorflow/contrib/checkpoint/__init__.py @@ -18,11 +18,14 @@ Visualization and inspection: @@dot_graph_from_checkpoint @@object_metadata -Creating and managing dependencies: +Managing dependencies: @@Checkpointable @@CheckpointableObjectGraph @@NoDependency @@split_dependency + +Checkpointable data structures: +@@List @@UniqueNameTracker """ @@ -36,6 +39,7 @@ from tensorflow.contrib.checkpoint.python.visualize import dot_graph_from_checkp from tensorflow.core.protobuf.checkpointable_object_graph_pb2 import CheckpointableObjectGraph from tensorflow.python.training.checkpointable.base import Checkpointable from tensorflow.python.training.checkpointable.base import NoDependency +from tensorflow.python.training.checkpointable.data_structures import List from tensorflow.python.training.checkpointable.util import object_metadata from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/checkpoint/python/BUILD b/tensorflow/contrib/checkpoint/python/BUILD index 53f4e97f99..0b67619c11 100644 --- a/tensorflow/contrib/checkpoint/python/BUILD +++ b/tensorflow/contrib/checkpoint/python/BUILD @@ -11,6 +11,7 @@ py_library( ":containers", ":split_dependency", ":visualize", + "//tensorflow/python/training/checkpointable:data_structures", ], ) @@ -30,8 +31,8 @@ py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:framework_test_lib", "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:training", "//tensorflow/python/training/checkpointable:base", + "//tensorflow/python/training/checkpointable:util", "@six_archive//:six", ], ) @@ -44,6 +45,7 @@ py_library( deps = [ "//tensorflow/python:control_flow_ops", "//tensorflow/python:training", + "//tensorflow/python/training/checkpointable:base", ], ) @@ -55,8 +57,9 @@ py_test( "//tensorflow/python:array_ops", "//tensorflow/python:framework_test_lib", "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:training", "//tensorflow/python/eager:test", + "//tensorflow/python/training/checkpointable:base", + "//tensorflow/python/training/checkpointable:util", ], ) @@ -67,6 +70,8 @@ py_library( visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/python:pywrap_tensorflow", + "//tensorflow/python/training/checkpointable:base", + "//tensorflow/python/training/checkpointable:util", ], ) @@ -75,10 +80,13 @@ py_test( srcs = ["visualize_test.py"], deps = [ ":visualize", - "//tensorflow/python:array_ops", - "//tensorflow/python:framework_test_lib", + "//tensorflow/python:constant_op", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:training", + "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", + "//tensorflow/python/keras:engine", + "//tensorflow/python/keras:layers", + "//tensorflow/python/training/checkpointable:util", ], ) diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py index 492adbe1d8..5ee2176154 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py +++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py @@ -152,7 +152,7 @@ class RNNColorbot(tf.keras.Model): self.label_dimension = label_dimension self.keep_prob = keep_prob - self.cells = self._add_cells( + self.cells = tf.contrib.checkpoint.List( [tf.nn.rnn_cell.BasicLSTMCell(size) for size in rnn_cell_sizes]) self.relu = layers.Dense( label_dimension, activation=tf.nn.relu, name="relu") @@ -204,14 +204,6 @@ class RNNColorbot(tf.keras.Model): hidden_states = tf.gather_nd(chars, indices) return self.relu(hidden_states) - def _add_cells(self, cells): - # "Magic" required for keras.Model classes to track all the variables in - # a list of layers.Layer objects. - # TODO(ashankar): Figure out API so user code doesn't have to do this. - for i, c in enumerate(cells): - setattr(self, "cell-%d" % i, c) - return cells - def loss(labels, predictions): """Computes mean squared loss.""" diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py index 74701b2f4f..c2340a293a 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py +++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py @@ -50,7 +50,7 @@ class RNN(tf.keras.Model): def __init__(self, hidden_dim, num_layers, keep_ratio): super(RNN, self).__init__() self.keep_ratio = keep_ratio - self.cells = self._add_cells([ + self.cells = tf.contrib.checkpoint.List([ tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim) for _ in range(num_layers) ]) @@ -74,14 +74,6 @@ class RNN(tf.keras.Model): # tuple (output, output_states). return [input_seq] - def _add_cells(self, cells): - # "Magic" required for keras.Model classes to track all the variables in - # a list of Layer objects. - # TODO(ashankar): Figure out API so user code doesn't have to do this. - for i, c in enumerate(cells): - setattr(self, "cell-%d" % i, c) - return cells - class Embedding(layers.Layer): """An Embedding layer.""" diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 5d730695b9..fe40c9fbed 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -135,6 +135,7 @@ py_library( deps = [ ":backend", "//tensorflow/python/data", + "//tensorflow/python/training/checkpointable:data_structures_base", "@six_archive//:six", ], ) diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index 4a0e16f073..6e818ec62a 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -41,6 +41,7 @@ from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.keras.utils.layer_utils import print_summary as print_layer_summary from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.checkpointable import base as checkpointable +from tensorflow.python.training.checkpointable import data_structures_base from tensorflow.python.training.checkpointable import util as checkpointable_utils from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect @@ -321,7 +322,10 @@ class Network(base_layer.Layer): no_dependency = isinstance(value, checkpointable.NoDependency) if no_dependency: value = value.value - if isinstance(value, (base_layer.Layer, Network)): + if isinstance(value, ( + base_layer.Layer, + Network, + data_structures_base.CheckpointableDataStructureBase)): try: is_graph_network = self._is_graph_network except AttributeError: diff --git a/tensorflow/python/training/checkpointable/BUILD b/tensorflow/python/training/checkpointable/BUILD index a7ae6e50a9..87ba4dc91c 100644 --- a/tensorflow/python/training/checkpointable/BUILD +++ b/tensorflow/python/training/checkpointable/BUILD @@ -22,8 +22,9 @@ py_library( "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", "//tensorflow/python:io_ops_gen", - "//tensorflow/python:ops", + "//tensorflow/python:platform", "//tensorflow/python:saveable_object", "//tensorflow/python:util", "//tensorflow/python/eager:context", @@ -40,6 +41,42 @@ py_test( ], ) +py_library( + name = "data_structures_base", + srcs = ["data_structures_base.py"], + srcs_version = "PY2AND3", + deps = [ + ":base", + ], +) + +py_library( + name = "data_structures", + srcs = ["data_structures.py"], + srcs_version = "PY2AND3", + deps = [ + ":base", + ":data_structures_base", + ], +) + +py_test( + name = "data_structures_test", + srcs = ["data_structures_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":data_structures", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:layers", + "//tensorflow/python:math_ops", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + "//tensorflow/python/keras:engine", + "//tensorflow/python/keras:layers", + ], +) + py_library( name = "util", srcs = ["util.py"], diff --git a/tensorflow/python/training/checkpointable/base.py b/tensorflow/python/training/checkpointable/base.py index e378f0e898..cfe7259e1b 100644 --- a/tensorflow/python/training/checkpointable/base.py +++ b/tensorflow/python/training/checkpointable/base.py @@ -591,11 +591,11 @@ class CheckpointableBase(object): self._unconditional_checkpoint_dependencies): if name == old_name: self._unconditional_checkpoint_dependencies[index] = new_reference - else: + elif current_object is None: self._unconditional_checkpoint_dependencies.append(new_reference) - - self._unconditional_dependency_names[name] = checkpointable - self._handle_deferred_dependencies(name=name, checkpointable=checkpointable) + self._unconditional_dependency_names[name] = checkpointable + self._handle_deferred_dependencies( + name=name, checkpointable=checkpointable) return checkpointable def _handle_deferred_dependencies(self, name, checkpointable): diff --git a/tensorflow/python/training/checkpointable/data_structures.py b/tensorflow/python/training/checkpointable/data_structures.py new file mode 100644 index 0000000000..b514f7bdb2 --- /dev/null +++ b/tensorflow/python/training/checkpointable/data_structures.py @@ -0,0 +1,218 @@ +"""Checkpointable data structures.""" +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from tensorflow.python.keras.engine import base_layer +from tensorflow.python.training.checkpointable import base as checkpointable_lib +from tensorflow.python.training.checkpointable import data_structures_base + + +# TODO(allenl): We could track regular Python data structures which get assigned +# to Checkpointable objects. Making this work with restore-on-create would be +# tricky; we'd need to re-create nested structures with our own wrapped objects +# on assignment to an attribute, and track the user's original structure to make +# sure they don't modify it except through the wrappers (since we could save the +# user's updated structure, but would have no way to support restore-on-create +# for those modifications). +# TODO(allenl): A dictionary data structure would be good too. +class CheckpointableDataStructure( + data_structures_base.CheckpointableDataStructureBase): + """Base class for data structures which contain checkpointable objects.""" + + def __init__(self): + self._layers = [] + self.trainable = True + + def _track_value(self, value, name): + """Add a dependency on `value`.""" + if isinstance(value, checkpointable_lib.CheckpointableBase): + self._track_checkpointable(value, name=name) + else: + raise ValueError( + ("Only checkpointable objects (such as Layers or Optimizers) may be " + "stored in a List object. Got %s, which does not inherit from " + "CheckpointableBase.") % (value,)) + if isinstance(value, ( + base_layer.Layer, + data_structures_base.CheckpointableDataStructureBase)): + if value not in self._layers: + self._layers.append(value) + if hasattr(value, "_use_resource_variables"): + # In subclassed models, legacy layers (tf.layers) must always use + # resource variables. + value._use_resource_variables = True # pylint: disable=protected-access + + @property + def layers(self): + return self._layers + + @property + def trainable_weights(self): + if not self.trainable: + return [] + weights = [] + for layer in self.layers: + weights += layer.trainable_weights + return weights + + @property + def non_trainable_weights(self): + weights = [] + for layer in self.layers: + weights += layer.non_trainable_weights + if not self.trainable: + trainable_weights = [] + for layer in self.layers: + trainable_weights += layer.trainable_weights + return trainable_weights + weights + return weights + + @property + def weights(self): + return self.trainable_weights + self.non_trainable_weights + + @property + def variables(self): + return self.weights + + @property + def updates(self): + """Aggregate updates from any `Layer` instances.""" + # Updates and conditional losses are forwarded as-is rather than being + # filtered based on inputs, since this is just a container and won't ever + # have any inputs. + aggregated = [] + for layer in self.layers: + aggregated += layer.updates + return aggregated + + @property + def losses(self): + """Aggregate losses from any `Layer` instances.""" + aggregated = [] + for layer in self.layers: + aggregated += layer.losses + return aggregated + + def __hash__(self): + # Support object-identity hashing, so these structures can be used as keys + # in sets/dicts. + return id(self) + + def __eq__(self, other): + # Similar to Tensors, checkpointable data structures use object-identity + # equality to support set/dict membership. + return self is other + + +class List(CheckpointableDataStructure, collections.Sequence): + """An append-only sequence type which is checkpointable. + + Maintains checkpoint dependencies on its contents (which must also be + checkpointable), and forwards any `Layer` metadata such as updates and losses. + + Note that `List` is purely a container. It lets a `tf.keras.Model` or + other checkpointable object know about its contents, but does not call any + `Layer` instances which are added to it. To indicate a sequence of `Layer` + instances which should be called sequentially, use `tf.keras.Sequential`. + + Example usage: + ```python + class HasList(tf.keras.Model): + + def __init__(self): + super(HasList, self).__init__() + self.layer_list = tf.contrib.checkpoint.List([layers.Dense(3)]) + self.layer_list.append(layers.Dense(4)) + + def call(self, x): + aggregation = 0. + for l in self.layer_list: + x = l(x) + aggregation += tf.reduce_sum(x) + return aggregation + ``` + + This kind of wrapping is necessary because `Checkpointable` objects do not + (yet) deeply inspect regular Python data structures, so for example assigning + a regular list (`self.layer_list = [layers.Dense(3)]`) does not create a + checkpoint dependency and does not add the `Layer` instance's weights to its + parent `Model`. + """ + + def __init__(self, *args, **kwargs): + """Construct a new sequence. Arguments are passed to `list()`.""" + super(List, self).__init__() + self._storage = list(*args, **kwargs) + for index, element in enumerate(self._storage): + self._track_value(element, name=self._name_element(index)) + + def _name_element(self, index): + return "%d" % (index,) + + def append(self, value): + """Add a new checkpointable value.""" + self._track_value(value, self._name_element(len(self._storage))) + self._storage.append(value) + + def extend(self, values): + """Add a sequence of checkpointable values.""" + for index_offset, value in enumerate(values): + self._track_value( + value, name=self._name_element(len(self._storage) + index_offset)) + self._storage.extend(values) + + def __iadd__(self, values): + self.extend(values) + return self + + def __add__(self, other): + if isinstance(other, List): + return List(self._storage + other._storage) # pylint: disable=protected-access + else: + return List(self._storage + other) + + def __getitem__(self, key): + return self._storage[key] + + def __len__(self): + return len(self._storage) + + def __repr__(self): + return "List(%s)" % (repr(self._storage),) + + @property + def updates(self): + """Aggregate updates from any `Layer` instances.""" + # Updates and conditional losses are forwarded as-is rather than being + # filtered based on inputs, since this is just a container and won't ever + # have any inputs. + aggregated = [] + for layer in self.layers: + aggregated += layer.updates + return aggregated + + @property + def losses(self): + """Aggregate losses from any `Layer` instances.""" + aggregated = [] + for layer in self.layers: + aggregated += layer.losses + return aggregated diff --git a/tensorflow/python/training/checkpointable/data_structures_base.py b/tensorflow/python/training/checkpointable/data_structures_base.py new file mode 100644 index 0000000000..f1b2cf105b --- /dev/null +++ b/tensorflow/python/training/checkpointable/data_structures_base.py @@ -0,0 +1,27 @@ +"""A trivial base class to avoid circular imports for isinstance checks.""" +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +from tensorflow.python.training.checkpointable import base as checkpointable_lib + + +class CheckpointableDataStructureBase(checkpointable_lib.CheckpointableBase): + """Base class for data structures which contain checkpointable objects.""" + + pass diff --git a/tensorflow/python/training/checkpointable/data_structures_test.py b/tensorflow/python/training/checkpointable/data_structures_test.py new file mode 100644 index 0000000000..6cabbea771 --- /dev/null +++ b/tensorflow/python/training/checkpointable/data_structures_test.py @@ -0,0 +1,142 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import test_util +from tensorflow.python.keras.engine import training +from tensorflow.python.keras.layers import core +from tensorflow.python.keras.layers import normalization +from tensorflow.python.layers import core as non_keras_core +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.training.checkpointable import data_structures + + +class HasList(training.Model): + + def __init__(self): + super(HasList, self).__init__() + self.layer_list = data_structures.List([core.Dense(3)]) + self.layer_list.append(core.Dense(4)) + self.layer_list.extend( + [core.Dense(5), + core.Dense(6, kernel_regularizer=math_ops.reduce_sum)]) + self.layer_list += [ + core.Dense(7, bias_regularizer=math_ops.reduce_sum), + core.Dense(8) + ] + self.layer_list += ( + data_structures.List([core.Dense(9)]) + data_structures.List( + [core.Dense(10)])) + self.layer_list.extend( + data_structures.List( + list(sequence=[core.Dense(11)]) + [core.Dense(12)])) + self.layers_with_updates = data_structures.List( + sequence=(normalization.BatchNormalization(),)) + + def call(self, x): + aggregation = 0. + for l in self.layer_list: + x = l(x) + aggregation += math_ops.reduce_sum(x) + bn, = self.layers_with_updates + return bn(x) / aggregation + + +class ListTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def testTracking(self): + model = HasList() + output = model(array_ops.ones([32, 2])) + self.assertAllEqual([32, 12], output.shape) + self.assertEqual(2, len(model.layers)) + self.assertIs(model.layer_list, model.layers[0]) + self.assertEqual(10, len(model.layers[0].layers)) + for index in range(10): + self.assertEqual(3 + index, model.layers[0].layers[index].units) + self.assertEqual(2, len(model._checkpoint_dependencies)) + self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref) + self.assertIs(model.layers_with_updates, + model._checkpoint_dependencies[1].ref) + self.assertEqual( + 10, len(model._checkpoint_dependencies[0].ref._checkpoint_dependencies)) + self.evaluate([v.initializer for v in model.variables]) + self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + self.evaluate(model.variables[0].assign(array_ops.zeros([2, 3]))) + model.load_weights(save_path) + self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], + self.evaluate(model.variables[0])) + + def testUpdatesForwarded(self): + with context.graph_mode(): + model = HasList() + model_input = array_ops.ones([32, 2]) + model(model_input) + self.assertGreater(len(model.layers_with_updates[0].updates), 0) + self.assertEqual(set(model.layers_with_updates[0].updates), + set(model.updates)) + + with context.eager_mode(): + model = HasList() + model_input = array_ops.ones([32, 2]) + model(model_input) + self.assertEqual(0, len(model.updates)) + + @test_util.run_in_graph_and_eager_modes() + def testLossesForwarded(self): + model = HasList() + model_input = array_ops.ones([32, 2]) + model(model_input) + self.assertEqual(2, len(model.losses)) + + def testNotCheckpointable(self): + class NotCheckpointable(object): + pass + + with self.assertRaises(ValueError): + data_structures.List([NotCheckpointable()]) + + def testCallNotImplemented(self): + with self.assertRaisesRegexp(TypeError, "not callable"): + data_structures.List()(1.) + + def testNoPop(self): + with self.assertRaises(AttributeError): + data_structures.List().pop() + + def testNesting(self): + with context.graph_mode(): + inner = data_structures.List() + outer = data_structures.List([inner]) + inner.append(non_keras_core.Dense(1)) + inner[0](array_ops.ones([2, 3])) + self.assertEqual(2, len(outer.variables)) + self.assertIsInstance( + outer.variables[0], + resource_variable_ops.ResourceVariable) + + +if __name__ == "__main__": + test.main() -- GitLab From f6e5089c41fc234ca19fabe2e529ee877a09a33d Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 23 May 2018 11:31:36 -0700 Subject: [PATCH 043/902] [TF:XLA] Register a real implementation of ControlTrigger on XLA devices. PiperOrigin-RevId: 197759239 --- tensorflow/compiler/jit/xla_device_ops.h | 4 +++- tensorflow/compiler/tests/xla_device_test.py | 7 +++++++ tensorflow/compiler/tf2xla/kernels/no_op.cc | 5 ++--- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h index 9c00a0682c..59822a18c0 100644 --- a/tensorflow/compiler/jit/xla_device_ops.h +++ b/tensorflow/compiler/jit/xla_device_ops.h @@ -73,7 +73,9 @@ class XlaDeviceDummyOp : public OpKernel { \ REGISTER_KERNEL_BUILDER( \ Name("VarHandleOp").Device(DEVICE).HostMemory("resource"), \ - ResourceHandleOp); + ResourceHandleOp); \ + REGISTER_KERNEL_BUILDER(Name("ControlTrigger").Device(DEVICE), \ + ControlTriggerOp); } // namespace tensorflow diff --git a/tensorflow/compiler/tests/xla_device_test.py b/tensorflow/compiler/tests/xla_device_test.py index b707bd0963..f0b010fa67 100644 --- a/tensorflow/compiler/tests/xla_device_test.py +++ b/tensorflow/compiler/tests/xla_device_test.py @@ -23,6 +23,7 @@ import numpy as np from tensorflow.compiler.tests.xla_test import XLATestCase from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_control_flow_ops from tensorflow.python.platform import test @@ -46,6 +47,12 @@ class XlaDeviceTest(XLATestCase): result = sess.run(z, {x: inputs}) self.assertAllCloseAccordingToType(result, inputs + inputs) + def testControlTrigger(self): + with self.test_session() as sess: + with self.test_scope(): + x = gen_control_flow_ops.control_trigger() + sess.run(x) + if __name__ == "__main__": test.main() diff --git a/tensorflow/compiler/tf2xla/kernels/no_op.cc b/tensorflow/compiler/tf2xla/kernels/no_op.cc index 8c8a9bbe78..65ab9da8d7 100644 --- a/tensorflow/compiler/tf2xla/kernels/no_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/no_op.cc @@ -24,8 +24,7 @@ namespace tensorflow { REGISTER_XLA_OP(Name("NoOp").CompilationOnly(), NoOp); // We register ControlTrigger as a no-op. This is correct since nodes seen -// by the XLA compiler are never dead. This may need rethinking when we add -// support for conditionals to XLA. -REGISTER_XLA_OP(Name("ControlTrigger"), NoOp); +// by the XLA compiler are never dead. +REGISTER_XLA_OP(Name("ControlTrigger").CompilationOnly(), NoOp); } // namespace tensorflow -- GitLab From d2309fe5895ba431a4bb11e79564d12028cc93d5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 12:18:23 -0700 Subject: [PATCH 044/902] Introduce Encoder and Decoder classes so that platform/*coding* doesn't have to depend on framework/resource_handler and framework/variant. PiperOrigin-RevId: 197768387 --- .../contrib/cmake/tf_core_framework.cmake | 6 -- tensorflow/core/BUILD | 16 +-- tensorflow/core/framework/resource_handle.cc | 25 +++++ tensorflow/core/framework/resource_handle.h | 9 ++ tensorflow/core/framework/tensor.cc | 13 ++- tensorflow/core/framework/variant.cc | 33 +++++++ .../core/framework/variant_encode_decode.h | 10 ++ .../core/platform/default/build_config.bzl | 8 -- .../core/platform/default/string_coding.cc | 30 ++++++ .../core/platform/default/string_coding.h | 98 +++++++++++++++++++ tensorflow/core/platform/tensor_coding.cc | 36 +------ tensorflow/core/platform/tensor_coding.h | 10 +- tensorflow/core/platform/variant_coding.cc | 71 -------------- tensorflow/core/platform/variant_coding.h | 40 -------- 14 files changed, 219 insertions(+), 186 deletions(-) create mode 100644 tensorflow/core/platform/default/string_coding.cc create mode 100644 tensorflow/core/platform/default/string_coding.h delete mode 100644 tensorflow/core/platform/variant_coding.cc delete mode 100644 tensorflow/core/platform/variant_coding.h diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index b47c32f1c4..dac84ccb0d 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -213,10 +213,6 @@ else() list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_srcs_exclude}) endif() -file(GLOB tf_core_platform_exclude_srcs - "${tensorflow_source_dir}/tensorflow/core/platform/variant_coding.cc") -list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_exclude_srcs}) - list(APPEND tf_core_lib_srcs ${tf_core_platform_srcs}) if(UNIX) @@ -286,8 +282,6 @@ set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.c file(GLOB_RECURSE tf_core_framework_srcs "${tensorflow_source_dir}/tensorflow/core/framework/*.h" "${tensorflow_source_dir}/tensorflow/core/framework/*.cc" - "${tensorflow_source_dir}/tensorflow/core/platform/variant_coding.h" - "${tensorflow_source_dir}/tensorflow/core/platform/variant_coding.cc" "${tensorflow_source_dir}/tensorflow/core/graph/edgeset.h" "${tensorflow_source_dir}/tensorflow/core/graph/edgeset.cc" "${tensorflow_source_dir}/tensorflow/core/graph/graph.h" diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 9b32a6e192..19e88d6ff1 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -111,8 +111,6 @@ load( "tf_additional_lib_deps", "tf_additional_lib_hdrs", "tf_additional_lib_srcs", - "tf_additional_framework_hdrs", - "tf_additional_framework_srcs", "tf_additional_minimal_lib_srcs", "tf_additional_proto_hdrs", "tf_additional_proto_srcs", @@ -2003,8 +2001,6 @@ cc_library( "platform/**/device_tracer.cc", "platform/**/logging.cc", "platform/abi.cc", - "platform/variant_coding.cc", - "platform/**/variant_cord_coding.cc", ], ) + tf_additional_lib_srcs( exclude = [ @@ -2017,8 +2013,6 @@ cc_library( "platform/**/device_tracer.cc", "platform/**/logging.cc", "platform/abi.cc", - "platform/variant_coding.cc", - "platform/**/variant_cord_coding.cc", ] + # Protobuf deps already included through the ":lib_proto_parsing" # dependency. @@ -2268,7 +2262,6 @@ cc_library( ) FRAMEWORK_INTERNAL_PRIVATE_HEADERS = [ - "platform/variant_coding.h", "graph/edgeset.h", "graph/graph.h", "graph/graph_def_builder.h", @@ -2309,14 +2302,13 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [ "framework/tracking_allocator.h", # only needed for tests "framework/unique_tensor_references.h", "framework/variant.h", - "platform/variant_coding.h", "util/command_line_flags.h", "util/env_var.h", "util/equal_graph_def.h", "util/presized_cuckoo_map.h", "util/tensor_slice_set.h", "util/tensor_slice_util.h", -] + tf_additional_framework_hdrs() +] tf_cuda_library( name = "framework_internal", @@ -2358,9 +2350,7 @@ cc_header_only_library( tf_cuda_library( name = "framework_internal_impl", - srcs = FRAMEWORK_INTERNAL_PRIVATE_HEADERS + [ - "platform/variant_coding.cc", - ] + glob( + srcs = FRAMEWORK_INTERNAL_PRIVATE_HEADERS + glob( [ "example/**/*.cc", "framework/**/*.cc", @@ -2394,7 +2384,7 @@ tf_cuda_library( "util/memmapped_file_system.cc", "util/memmapped_file_system_writer.cc", ], - }) + tf_additional_framework_srcs(), + }), hdrs = FRAMEWORK_INTERNAL_PUBLIC_HEADERS, copts = tf_copts(), linkopts = select({ diff --git a/tensorflow/core/framework/resource_handle.cc b/tensorflow/core/framework/resource_handle.cc index 39ef82765f..fc3a329b3b 100644 --- a/tensorflow/core/framework/resource_handle.cc +++ b/tensorflow/core/framework/resource_handle.cc @@ -66,4 +66,29 @@ string ProtoDebugString(const ResourceHandle& handle) { return handle.DebugString(); } +void EncodeResourceHandleList(const ResourceHandle* p, int64 n, + std::unique_ptr e) { + ResourceHandleProto proto; + for (int i = 0; i < n; ++i) { + p[i].AsProto(&proto); + e->Append(proto); + } + e->Finalize(); +} + +bool DecodeResourceHandleList(std::unique_ptr d, + ResourceHandle* ps, int64 n) { + std::vector sizes(n); + if (!d->ReadSizes(&sizes)) return false; + + ResourceHandleProto proto; + for (int i = 0; i < n; ++i) { + if (!proto.ParseFromArray(d->Data(sizes[i]), sizes[i])) { + return false; + } + ps[i].FromProto(proto); + } + return true; +} + } // namespace tensorflow diff --git a/tensorflow/core/framework/resource_handle.h b/tensorflow/core/framework/resource_handle.h index 06df1b9046..db213669a3 100644 --- a/tensorflow/core/framework/resource_handle.h +++ b/tensorflow/core/framework/resource_handle.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_FRAMEWORK_RESOURCE_HANDLE_H_ #define TENSORFLOW_FRAMEWORK_RESOURCE_HANDLE_H_ +#include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { @@ -77,6 +78,14 @@ class ResourceHandle { // For backwards compatibility for when this was a proto string ProtoDebugString(const ResourceHandle& handle); +// Encodes a list of ResourceHandle protos in the given StringListEncoder. +void EncodeResourceHandleList(const ResourceHandle* p, int64 n, + std::unique_ptr e); + +// Decodes a list of ResourceHandle protos from the given StringListDecoder. +bool DecodeResourceHandleList(std::unique_ptr d, + ResourceHandle* ps, int64 n); + } // namespace tensorflow #endif // TENSORFLOW_FRAMEWORK_RESOURCE_HANDLE_H_ diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index 167e0eaa6e..384a42fc11 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -51,7 +51,6 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/platform/variant_coding.h" namespace tensorflow { @@ -207,7 +206,8 @@ struct Helper { // "out", which is usually the TensorProto::tensor_content. template static void Encode(TensorBuffer* in, int64 n, Destination* out) { - port::EncodeResourceHandleList(in->base(), n, out); + EncodeResourceHandleList(in->base(), n, + port::NewStringListEncoder(out)); } // Decodes "n" elements of type string from "in" and constructs a @@ -217,7 +217,8 @@ struct Helper { static TensorBuffer* Decode(Allocator* a, const Source& in, int64 n) { auto* buf = new Buffer(a, n); ResourceHandle* ps = buf->template base(); - if (ps == nullptr || !port::DecodeResourceHandleList(in, ps, n)) { + if (ps == nullptr || + !DecodeResourceHandleList(port::NewStringListDecoder(in), ps, n)) { buf->Unref(); return nullptr; } @@ -237,7 +238,8 @@ struct Helper { // "out", which is usually the TensorProto::tensor_content. template static void Encode(TensorBuffer* in, int64 n, Destination* out) { - port::EncodeVariantList(in->base(), n, out); + EncodeVariantList(in->base(), n, + port::NewStringListEncoder(out)); } // Decodes "n" elements of type Variant from "in" and constructs a @@ -247,7 +249,8 @@ struct Helper { static TensorBuffer* Decode(Allocator* a, const Source& in, int64 n) { auto* buf = new Buffer(a, n); Variant* ps = buf->template base(); - if (ps == nullptr || !port::DecodeVariantList(in, ps, n)) { + if (ps == nullptr || + !DecodeVariantList(port::NewStringListDecoder(in), ps, n)) { buf->Unref(); return nullptr; } diff --git a/tensorflow/core/framework/variant.cc b/tensorflow/core/framework/variant.cc index 6ad2fafee7..5a507804b0 100644 --- a/tensorflow/core/framework/variant.cc +++ b/tensorflow/core/framework/variant.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/framework/variant.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/variant_encode_decode.h" +#include "tensorflow/core/framework/variant_op_registry.h" #include "tensorflow/core/framework/variant_tensor_data.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/map_util.h" @@ -73,4 +74,36 @@ bool DecodeVariant(const string& buf, VariantTensorDataProto* value) { return value->ParseFromString(buf); } +void EncodeVariantList(const Variant* variant_array, int64 n, + std::unique_ptr e) { + for (int i = 0; i < n; ++i) { + string s; + variant_array[i].Encode(&s); + e->Append(s); + } + e->Finalize(); +} + +bool DecodeVariantList(std::unique_ptr d, + Variant* variant_array, int64 n) { + std::vector sizes(n); + if (!d->ReadSizes(&sizes)) return false; + + for (int i = 0; i < n; ++i) { + if (variant_array[i].is_empty()) { + variant_array[i] = VariantTensorDataProto(); + } + string str(d->Data(sizes[i]), sizes[i]); + if (!variant_array[i].Decode(str)) return false; + if (!DecodeUnaryVariant(&variant_array[i])) { + LOG(ERROR) << "Could not decode variant with type_name: \"" + << variant_array[i].TypeName() + << "\". Perhaps you forgot to register a " + "decoder via REGISTER_UNARY_VARIANT_DECODE_FUNCTION?"; + return false; + } + } + return true; +} + } // end namespace tensorflow diff --git a/tensorflow/core/framework/variant_encode_decode.h b/tensorflow/core/framework/variant_encode_decode.h index 5a84f9d943..ded04b2a30 100644 --- a/tensorflow/core/framework/variant_encode_decode.h +++ b/tensorflow/core/framework/variant_encode_decode.h @@ -259,6 +259,16 @@ void EncodeVariant(const VariantTensorDataProto& value, string* buf); template <> bool DecodeVariant(const string& buf, VariantTensorDataProto* value); +// Encodes an array of Variant objects in to the given StringListEncoder. +// `variant_array` is assumed to point to an array of `n` Variant objects. +void EncodeVariantList(const Variant* variant_array, int64 n, + std::unique_ptr e); + +// Decodes an array of Variant objects from the given StringListDecoder. +// `variant_array` is assumed to point to an array of `n` Variant objects. +bool DecodeVariantList(std::unique_ptr d, + Variant* variant_array, int64 n); + } // end namespace tensorflow #endif // TENSORFLOW_FRAMEWORK_VARIANT_ENCODE_DECODE_H_ diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index b4b756b866..284581b41c 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -495,14 +495,6 @@ def tf_additional_lib_srcs(exclude = []): ], exclude = exclude), }) -# pylint: disable=unused-argument -def tf_additional_framework_hdrs(exclude = []): - return [] - -def tf_additional_framework_srcs(exclude = []): - return [] -# pylint: enable=unused-argument - def tf_additional_minimal_lib_srcs(): return [ "platform/default/integral_types.h", diff --git a/tensorflow/core/platform/default/string_coding.cc b/tensorflow/core/platform/default/string_coding.cc new file mode 100644 index 0000000000..7410ee6782 --- /dev/null +++ b/tensorflow/core/platform/default/string_coding.cc @@ -0,0 +1,30 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/default/string_coding.h" + +namespace tensorflow { +namespace port { + +std::unique_ptr NewStringListEncoder(string* out) { + return std::unique_ptr(new StringListEncoder(out)); +} + +std::unique_ptr NewStringListDecoder(const string& in) { + return std::unique_ptr(new StringListDecoder(in)); +} + +} // namespace port +} // namespace tensorflow diff --git a/tensorflow/core/platform/default/string_coding.h b/tensorflow/core/platform/default/string_coding.h new file mode 100644 index 0000000000..70b8ab0144 --- /dev/null +++ b/tensorflow/core/platform/default/string_coding.h @@ -0,0 +1,98 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_STRING_CODING_H_ +#define TENSORFLOW_CORE_PLATFORM_DEFAULT_STRING_CODING_H_ + +// IWYU pragma: private, include "third_party/tensorflow/core/platform/tensor_coding.h" +// IWYU pragma: friend third_party/tensorflow/core/platform/tensor_coding.h + +#include "tensorflow/core/lib/core/coding.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace port { + +// Encodes sequences of strings and serialized protocol buffers into a string. +// Normal usage consists of zero or more calls to Append() and a single call to +// Finalize(). +class StringListEncoder { + public: + explicit StringListEncoder(string* out) : out_(out) {} + + // Encodes the given protocol buffer. This may not be called after Finalize(). + void Append(const protobuf::MessageLite& m) { + core::PutVarint32(out_, m.ByteSize()); + m.AppendToString(&rest_); + } + + // Encodes the given string. This may not be called after Finalize(). + void Append(const string& s) { + core::PutVarint32(out_, s.length()); + strings::StrAppend(&rest_, s); + } + + // Signals end of the encoding process. No other calls are allowed after this. + void Finalize() { strings::StrAppend(out_, rest_); } + + private: + string* out_; + string rest_; +}; + +// Decodes a string into sequences of strings (which may represent serialized +// protocol buffers). Normal usage involves a single call to ReadSizes() in +// order to retrieve the length of all the strings in the sequence. For each +// size returned a call to Data() is expected and will return the actual +// string. +class StringListDecoder { + public: + explicit StringListDecoder(const string& in) : reader_(in) {} + + // Populates the given vector with the lengths of each string in the sequence + // being decoded. Upon returning the vector is guaranteed to contain as many + // elements as there are strings in the sequence. + bool ReadSizes(std::vector* sizes) { + int64 total = 0; + for (auto& size : *sizes) { + if (!core::GetVarint32(&reader_, &size)) return false; + total += size; + } + if (total != static_cast(reader_.size())) { + return false; + } + return true; + } + + // Returns a pointer to the next string in the sequence, then prepares for the + // next call by advancing 'size' characters in the sequence. + const char* Data(uint32 size) { + const char* data = reader_.data(); + reader_.remove_prefix(size); + return data; + } + + private: + StringPiece reader_; +}; + +std::unique_ptr NewStringListEncoder(string* out); +std::unique_ptr NewStringListDecoder(const string& in); + +} // namespace port +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_STRING_CODING_H_ diff --git a/tensorflow/core/platform/tensor_coding.cc b/tensorflow/core/platform/tensor_coding.cc index 17dc81f7e0..84601de39a 100644 --- a/tensorflow/core/platform/tensor_coding.cc +++ b/tensorflow/core/platform/tensor_coding.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/core/platform/tensor_coding.h" #include -#include "tensorflow/core/framework/resource_handle.pb.h" + #include "tensorflow/core/lib/core/coding.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -66,39 +66,5 @@ void CopyFromArray(string* s, const char* base, size_t bytes) { s->assign(base, bytes); } -void EncodeResourceHandleList(const ResourceHandle* p, int64 n, string* out) { - out->clear(); - string rest; - ResourceHandleProto proto; - for (int i = 0; i < n; ++i) { - p[i].AsProto(&proto); - core::PutVarint32(out, proto.ByteSize()); - proto.AppendToString(&rest); - } - *out += rest; -} - -bool DecodeResourceHandleList(const string& in, ResourceHandle* ps, int64 n) { - std::vector sizes(n); - StringPiece reader(in); - int64 total = 0; - for (auto& size : sizes) { - if (!core::GetVarint32(&reader, &size)) return false; - total += size; - } - if (total != static_cast(reader.size())) { - return false; - } - ResourceHandleProto proto; - for (int i = 0; i < n; ++i) { - if (!proto.ParseFromArray(reader.data(), sizes[i])) { - return false; - } - ps[i].FromProto(proto); - reader.remove_prefix(sizes[i]); - } - return true; -} - } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/tensor_coding.h b/tensorflow/core/platform/tensor_coding.h index 19f53e6374..6c6d75830d 100644 --- a/tensorflow/core/platform/tensor_coding.h +++ b/tensorflow/core/platform/tensor_coding.h @@ -18,7 +18,6 @@ limitations under the License. #define TENSORFLOW_PLATFORM_TENSOR_CODING_H_ #include -#include "tensorflow/core/framework/resource_handle.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/platform.h" @@ -26,6 +25,8 @@ limitations under the License. #ifdef PLATFORM_GOOGLE #include "tensorflow/core/platform/google/cord_coding.h" +#else +#include "tensorflow/core/platform/default/string_coding.h" #endif namespace tensorflow { @@ -51,13 +52,6 @@ bool DecodeStringList(const string& src, string* strings, int64 n); // Assigns base[0..bytes-1] to *s void CopyFromArray(string* s, const char* base, size_t bytes); -// Encodes a list of ResourceHandle protos in the given string. -void EncodeResourceHandleList(const ResourceHandle* handles, int64 n, - string* out); - -// Decodes a list of ResourceHandle protos from the given string. -bool DecodeResourceHandleList(const string& in, ResourceHandle* ps, int64 n); - } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/variant_coding.cc b/tensorflow/core/platform/variant_coding.cc deleted file mode 100644 index 48c5389d29..0000000000 --- a/tensorflow/core/platform/variant_coding.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/platform/variant_coding.h" - -#include -#include "tensorflow/core/framework/tensor.pb.h" -#include "tensorflow/core/framework/variant_op_registry.h" -#include "tensorflow/core/lib/core/coding.h" -#include "tensorflow/core/lib/core/stringpiece.h" -#include "tensorflow/core/lib/strings/strcat.h" - -namespace tensorflow { -namespace port { - -void EncodeVariantList(const Variant* variant_array, int64 n, string* out) { - out->clear(); - string rest; - for (int i = 0; i < n; ++i) { - string s; - variant_array[i].Encode(&s); - core::PutVarint32(out, s.length()); - strings::StrAppend(&rest, s); - } - strings::StrAppend(out, rest); -} - -bool DecodeVariantList(const string& in, Variant* variant_array, int64 n) { - std::vector sizes(n); - StringPiece reader(in); - int64 total = 0; - for (auto& size : sizes) { - if (!core::GetVarint32(&reader, &size)) return false; - total += size; - } - if (total != static_cast(reader.size())) { - return false; - } - - for (int i = 0; i < n; ++i) { - if (variant_array[i].is_empty()) { - variant_array[i] = VariantTensorDataProto(); - } - string str(reader.data(), sizes[i]); - if (!variant_array[i].Decode(str)) return false; - if (!DecodeUnaryVariant(&variant_array[i])) { - LOG(ERROR) << "Could not decode variant with type_name: \"" - << variant_array[i].TypeName() - << "\". Perhaps you forgot to register a " - "decoder via REGISTER_UNARY_VARIANT_DECODE_FUNCTION?"; - return false; - } - reader.remove_prefix(sizes[i]); - } - return true; -} - -} // end namespace port -} // end namespace tensorflow diff --git a/tensorflow/core/platform/variant_coding.h b/tensorflow/core/platform/variant_coding.h deleted file mode 100644 index a971857e4a..0000000000 --- a/tensorflow/core/platform/variant_coding.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_PLATFORM_VARIANT_CODING_H_ -#define TENSORFLOW_PLATFORM_VARIANT_CODING_H_ - -#include "tensorflow/core/framework/variant.h" -#include "tensorflow/core/framework/variant_encode_decode.h" - -#ifdef PLATFORM_GOOGLE -#include "tensorflow/core/platform/google/variant_cord_coding.h" -#endif - -namespace tensorflow { -namespace port { - -// Encodes an array of Variant objects in to the given string. -// `variant_array` is assumed to point to an array of `n` Variant objects. -void EncodeVariantList(const Variant* variant_array, int64 n, string* out); - -// Decodes an array of Variant objects from the given string. -// `variant_array` is assumed to point to an array of `n` Variant objects. -bool DecodeVariantList(const string& in, Variant* variant_array, int64 n); - -} // end namespace port -} // end namespace tensorflow - -#endif // TENSORFLOW_PLATFORM_VARIANT_CODING_H_ -- GitLab From 0c8b05d00b7c77802ff37200bb009b5ec8ee1eed Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 23 May 2018 12:26:50 -0700 Subject: [PATCH 045/902] Automated g4 rollback of changelist 197741984 PiperOrigin-RevId: 197769770 --- tensorflow/contrib/android/BUILD | 2 -- tensorflow/contrib/lite/examples/android/BUILD | 2 -- tensorflow/contrib/lite/java/demo/app/src/main/BUILD | 2 -- tensorflow/contrib/lite/java/ovic/BUILD | 2 -- tensorflow/contrib/lite/java/ovic/demo/app/BUILD | 2 -- .../lite/java/src/testhelper/java/org/tensorflow/lite/BUILD | 2 -- .../contrib/lite/models/smartreply/demo/app/src/main/BUILD | 2 -- tensorflow/examples/android/BUILD | 2 -- 8 files changed, 16 deletions(-) diff --git a/tensorflow/contrib/android/BUILD b/tensorflow/contrib/android/BUILD index 1c19d07bcc..c10179ba8b 100644 --- a/tensorflow/contrib/android/BUILD +++ b/tensorflow/contrib/android/BUILD @@ -1,8 +1,6 @@ # Description: # JNI-based Java inference interface for TensorFlow. -load("//tools/build_defs/android:rules.bzl", "android_library") - package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/examples/android/BUILD b/tensorflow/contrib/lite/examples/android/BUILD index 88f435e871..5700007256 100644 --- a/tensorflow/contrib/lite/examples/android/BUILD +++ b/tensorflow/contrib/lite/examples/android/BUILD @@ -1,8 +1,6 @@ # Description: # TensorFlow camera demo app for Android. -load("//tools/build_defs/android:rules.bzl", "android_binary") - package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD index a45871f7cf..d6fbef9cc9 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD @@ -1,5 +1,3 @@ -load("//tools/build_defs/android:rules.bzl", "android_binary") - package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/java/ovic/BUILD b/tensorflow/contrib/lite/java/ovic/BUILD index 5f05906bc5..362d93636f 100644 --- a/tensorflow/contrib/lite/java/ovic/BUILD +++ b/tensorflow/contrib/lite/java/ovic/BUILD @@ -1,8 +1,6 @@ # Description: # OVIC Benchmarker Java API. -load("//tools/build_defs/android:rules.bzl", "android_library") - package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD index 156a4ab8f5..83974f4b33 100644 --- a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD +++ b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD @@ -1,5 +1,3 @@ -load("//tools/build_defs/android:rules.bzl", "android_binary") - # Sample app for OVIC benchmarking. licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD index 770062c90d..b524246d43 100644 --- a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD +++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD @@ -1,8 +1,6 @@ # Description: # Internal helper function to test TF Lite API. -load("//tools/build_defs/android:rules.bzl", "android_library") - package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD index 8a86ecbf91..f8767b443a 100644 --- a/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/models/smartreply/demo/app/src/main/BUILD @@ -1,5 +1,3 @@ -load("//tools/build_defs/android:rules.bzl", "android_binary") - package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD index 118138296c..07f096418f 100644 --- a/tensorflow/examples/android/BUILD +++ b/tensorflow/examples/android/BUILD @@ -1,8 +1,6 @@ # Description: # TensorFlow camera demo app for Android. -load("//tools/build_defs/android:rules.bzl", "android_binary") - package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 -- GitLab From a0106575e1f445dde23c96a85b650f38251a2ca3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 12:35:05 -0700 Subject: [PATCH 046/902] Extracts the SimplifyReshape optimization into its own method. PiperOrigin-RevId: 197770994 --- .../grappler/optimizers/constant_folding.cc | 75 ++++++++++--------- .../grappler/optimizers/constant_folding.h | 19 +++-- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 8bdb164b03..4ebe1cabfc 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1631,20 +1631,20 @@ Status ConstantFolding::ReplaceOperationWithConstant( return Status::OK(); } -Status ConstantFolding::SimplifyGraph(GraphDef* optimized_graph, - GraphProperties* properties, - bool use_shape_info) { +Status ConstantFolding::SimplifyGraph(bool use_shape_info, + GraphDef* optimized_graph, + GraphProperties* properties) { for (int i = 0; i < optimized_graph->node_size(); ++i) { - TF_RETURN_IF_ERROR(SimplifyNode(optimized_graph->mutable_node(i), - optimized_graph, properties, - use_shape_info)); + TF_RETURN_IF_ERROR(SimplifyNode(use_shape_info, + optimized_graph->mutable_node(i), + optimized_graph, properties)); } return Status::OK(); } -Status ConstantFolding::SimplifyNode(NodeDef* node, GraphDef* optimized_graph, - GraphProperties* properties, - bool use_shape_info) { +Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, + GraphDef* optimized_graph, + GraphProperties* properties) { if (IsSplit(*node) && node->attr().at("num_split").i() == 1) { ReplaceOperationWithIdentity(1, *properties, node, optimized_graph); return Status::OK(); @@ -2150,20 +2150,16 @@ Status ConstantFolding::SimplifyNode(NodeDef* node, GraphDef* optimized_graph, graph_modified_ = true; return Status::OK(); } - if (use_shape_info && IsSimplifiableReshape(*node, *properties)) { - DataType output_type = node->attr().at("T").type(); - node->set_op("Identity"); - node->clear_attr(); - (*node->mutable_attr())["T"].set_type(output_type); - *node->mutable_input(1) = AsControlDependency(node->input(1)); + + if (SimplifyReshape(*properties, use_shape_info, node)) { graph_modified_ = true; return Status::OK(); } bool arithmetic_simplification_succeed = false; - Status simplify_arithmetic_status = SimplifyArithmeticOperations( - optimized_graph, properties, node, use_shape_info, - &arithmetic_simplification_succeed); + Status simplify_arithmetic_status = + SimplifyArithmeticOperations(*properties, use_shape_info, optimized_graph, + node, &arithmetic_simplification_succeed); if (!simplify_arithmetic_status.ok()) { return simplify_arithmetic_status; } else if (arithmetic_simplification_succeed) { @@ -2204,9 +2200,21 @@ Status ConstantFolding::SimplifyNode(NodeDef* node, GraphDef* optimized_graph, return Status::OK(); } +bool ConstantFolding::SimplifyReshape(const GraphProperties& properties, + bool use_shape_info, NodeDef* node) { + if (!use_shape_info) return false; + if (!IsSimplifiableReshape(*node, properties)) return false; + DataType output_type = node->attr().at("T").type(); + node->set_op("Identity"); + node->clear_attr(); + (*node->mutable_attr())["T"].set_type(output_type); + *node->mutable_input(1) = AsControlDependency(node->input(1)); + return true; +} + Status ConstantFolding::SimplifyArithmeticOperations( - GraphDef* optimized_graph, GraphProperties* properties, NodeDef* node, - bool use_shape_info, bool* success) { + const GraphProperties& properties, bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, bool* success) { const bool is_mul = IsMul(*node) || IsLogicalAnd(*node); const bool is_matmul = IsMatMul(*node); const bool is_add = IsAdd(*node) || IsBiasAdd(*node) || IsLogicalOr(*node); @@ -2215,8 +2223,8 @@ Status ConstantFolding::SimplifyArithmeticOperations( // Simplify arithmetic operations with ones or zeros. if (use_shape_info && (is_mul || is_matmul || is_add || is_sub || is_any_div) && - properties->HasInputProperties(node->name()) && - properties->HasOutputProperties(node->name())) { + properties.HasInputProperties(node->name()) && + properties.HasOutputProperties(node->name())) { const NodeDef* x = node_map_->GetNode(node->input(0)); const NodeDef* y = node_map_->GetNode(node->input(1)); if (x == nullptr || y == nullptr) { @@ -2224,19 +2232,19 @@ Status ConstantFolding::SimplifyArithmeticOperations( node->DebugString()); } const TensorShapeProto& output_shape = - properties->GetOutputProperties(node->name())[0].shape(); + properties.GetOutputProperties(node->name())[0].shape(); // Simplify element-wise multiplication by ones or addition/subtraction // of zeros. const TensorShapeProto& y_shape = - properties->GetInputProperties(node->name())[1].shape(); + properties.GetInputProperties(node->name())[1].shape(); const bool x_is_zero = IsZeros(*x); const bool x_is_one = x_is_zero ? false : IsOnes(*x); const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); if (y_matches_output_shape && ((is_mul && x_is_one) || (is_add && x_is_zero))) { // 1 * y = y or 0 + y = y. - ReplaceOperationWithSnapshot(1, *properties, node, optimized_graph); + ReplaceOperationWithSnapshot(1, properties, node, optimized_graph); *success = true; return Status::OK(); } @@ -2259,14 +2267,14 @@ Status ConstantFolding::SimplifyArithmeticOperations( } const TensorShapeProto& x_shape = - properties->GetInputProperties(node->name())[0].shape(); + properties.GetInputProperties(node->name())[0].shape(); const bool y_is_zero = IsZeros(*y); const bool y_is_one = y_is_zero ? false : IsOnes(*y); const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); if (x_matches_output_shape && (((is_mul || is_any_div) && y_is_one) || ((is_add || is_sub) && y_is_zero))) { // x * 1 = x or x / 1 = x or x +/- 0 = x - ReplaceOperationWithSnapshot(0, *properties, node, optimized_graph); + ReplaceOperationWithSnapshot(0, properties, node, optimized_graph); *success = true; return Status::OK(); } @@ -2276,9 +2284,8 @@ Status ConstantFolding::SimplifyArithmeticOperations( const PartialTensorShape shp(output_shape); if (shp.IsFullyDefined() && IsLogicalOr(*node) && (y_is_one || x_is_one)) { bool replace_succeed = false; - Status replace_op_status = - ReplaceOperationWithConstant(1, *properties, output_shape, node, - optimized_graph, &replace_succeed); + Status replace_op_status = ReplaceOperationWithConstant( + 1, properties, output_shape, node, optimized_graph, &replace_succeed); if (!replace_op_status.ok()) { return replace_op_status; } else if (replace_succeed) { @@ -2296,7 +2303,7 @@ Status ConstantFolding::SimplifyArithmeticOperations( if (shp.IsFullyDefined()) { bool replace_succeed = false; Status replace_op_status = - ReplaceOperationWithConstant(0, *properties, output_shape, node, + ReplaceOperationWithConstant(0, properties, output_shape, node, optimized_graph, &replace_succeed); if (!replace_op_status.ok()) { return replace_op_status; @@ -2309,11 +2316,11 @@ Status ConstantFolding::SimplifyArithmeticOperations( // matches the output shape and thus forward the corresponding zero // input. if ((is_mul || is_any_div) && x_is_zero && x_matches_output_shape) { - ReplaceOperationWithIdentity(0, *properties, node, optimized_graph); + ReplaceOperationWithIdentity(0, properties, node, optimized_graph); *success = true; return Status::OK(); } else if (is_mul && y_is_zero && y_matches_output_shape) { - ReplaceOperationWithIdentity(1, *properties, node, optimized_graph); + ReplaceOperationWithIdentity(1, properties, node, optimized_graph); *success = true; return Status::OK(); } @@ -2855,7 +2862,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, TF_RETURN_IF_ERROR(FoldGraph(optimized_graph)); node_map_.reset(new NodeMap(optimized_graph)); TF_RETURN_IF_ERROR( - SimplifyGraph(optimized_graph, &properties, can_use_shape_info)); + SimplifyGraph(can_use_shape_info, optimized_graph, &properties)); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index e477934f30..3cf379fbc0 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -97,10 +97,10 @@ class ConstantFolding : public GraphOptimizer { const GraphProperties& properties) const; bool IsSimplifiableReshape(const NodeDef& node, const GraphProperties& properties) const; - Status SimplifyGraph(GraphDef* output, GraphProperties* properties, - bool use_shape_info); - Status SimplifyNode(NodeDef* node, GraphDef* optimized_graph, - GraphProperties* properties, bool use_shape_info); + Status SimplifyGraph(bool use_shape_info, GraphDef* optimized_graph, + GraphProperties* properties); + Status SimplifyNode(bool use_shape_info, NodeDef* node, + GraphDef* optimized_graph, GraphProperties* properties); Status RunOptimizationPass(Cluster* cluster, const GrapplerItem& item, GraphDef* output); @@ -134,11 +134,16 @@ class ConstantFolding : public GraphOptimizer { // Simplifies arithmetic operations with ones or zeros. Returns the status, // and updates the success input argument that denotes if any simplification // was applied. - Status SimplifyArithmeticOperations(GraphDef* optimized_graph, - GraphProperties* properties, - NodeDef* node, bool use_shape_info, + Status SimplifyArithmeticOperations(const GraphProperties& properties, + bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, bool* success); + // Simplifies a Reshape operation to an Identity operation if the input node + // to the operation is a constant. + bool SimplifyReshape(const GraphProperties& properties, bool use_shape_info, + NodeDef* node); + // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From 85404e8f113c79dbeec5685166a4e797abffd505 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Wed, 23 May 2018 13:17:39 -0700 Subject: [PATCH 047/902] Adding utility class for manipulating a GraphDef. PiperOrigin-RevId: 197777416 --- .../core/grappler/optimizers/data/BUILD | 34 +++ .../grappler/optimizers/data/graph_utils.cc | 217 ++++++++++++++++++ .../grappler/optimizers/data/graph_utils.h | 81 +++++++ .../optimizers/data/graph_utils_test.cc | 142 ++++++++++++ 4 files changed, 474 insertions(+) create mode 100644 tensorflow/core/grappler/optimizers/data/BUILD create mode 100644 tensorflow/core/grappler/optimizers/data/graph_utils.cc create mode 100644 tensorflow/core/grappler/optimizers/data/graph_utils.h create mode 100644 tensorflow/core/grappler/optimizers/data/graph_utils_test.cc diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD new file mode 100644 index 0000000000..29ebb9a69e --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -0,0 +1,34 @@ +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow:tensorflow.bzl", "tf_cc_test") +load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all") + +cc_library( + name = "graph_utils", + srcs = ["graph_utils.cc"], + hdrs = [ + "graph_utils.h", + ], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/grappler:graph_view", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:grappler_item_builder", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/clusters:virtual_cluster", + "//tensorflow/core/grappler/optimizers:meta_optimizer", + ] + tf_protos_all(), +) + +tf_cc_test( + name = "graph_utils_test", + srcs = ["graph_utils_test.cc"], + visibility = ["//visibility:public"], + deps = [ + ":graph_utils", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc new file mode 100644 index 0000000000..df12de37da --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc @@ -0,0 +1,217 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" + +#include "tensorflow/core/framework/device_base.h" +#include "tensorflow/core/grappler/clusters/virtual_cluster.h" +#include "tensorflow/core/grappler/graph_view.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/grappler_item_builder.h" +#include "tensorflow/core/grappler/optimizers/meta_optimizer.h" +#include "tensorflow/core/util/ptr_util.h" + +namespace tensorflow { +namespace grappler { +namespace graph_utils { +namespace { + +int FindNodeWithPredicate(const std::function& predicate, + const GraphDef& graph) { + for (int i = 0; i < graph.node_size(); ++i) { + if (predicate(graph.node(i))) { + return i; + } + } + return -1; +} + +std::vector CreateNameIndex(const GraphDef& graph) { + std::map names; + for (int i = 0; i < graph.node_size(); ++i) { + names[graph.node(i).name()] = i; + } + std::vector index(graph.node_size()); + int i = 0; + for (const auto& pair : names) { + index[i++] = pair.second; + } + return index; +} + +std::vector CreateInputIndex(const NodeDef& node) { + std::map inputs; + for (int i = 0; i < node.input_size(); ++i) { + inputs[node.input(i)] = i; + } + std::vector index(node.input_size()); + int i = 0; + for (const auto& pair : inputs) { + index[i++] = pair.second; + } + return index; +} + +Status AddScalarConstNodeHelper( + DataType dtype, const std::function& add_value, + GraphDef* graph, NodeDef** result) { + NodeDef* node = graph->add_node(); + const string& name = strings::StrCat("Const/_", graph->node_size()); + node->set_name(name); + node->set_op("Const"); + (*node->mutable_attr())["dtype"].set_type(dtype); + std::unique_ptr tensor = + tensorflow::MakeUnique(); + std::unique_ptr tensor_shape = + tensorflow::MakeUnique(); + tensor->set_allocated_tensor_shape(tensor_shape.release()); + tensor->set_dtype(dtype); + add_value(tensor.get()); + (*node->mutable_attr())["value"].set_allocated_tensor(tensor.release()); + *result = node; + return Status::OK(); +} + +} // namespace + +Status AddNode(const string& name, const string& op, + const std::vector& inputs, + const std::vector>& attributes, + GraphDef* graph, NodeDef** result) { + NodeDef* node = graph->add_node(); + if (!name.empty()) { + node->set_name(name); + } else { + node->set_name(strings::StrCat(op, "/_", graph->node_size())); + } + node->set_op(op); + for (const string& input : inputs) { + node->add_input(input); + } + for (auto attr : attributes) { + (*node->mutable_attr())[attr.first] = attr.second; + } + *result = node; + return Status::OK(); +} + +template <> +Status AddScalarConstNode(bool v, GraphDef* graph, NodeDef** result) { + return AddScalarConstNodeHelper( + DT_BOOL, [v](TensorProto* proto) { proto->add_bool_val(v); }, graph, + result); +} + +template <> +Status AddScalarConstNode(double v, GraphDef* graph, NodeDef** result) { + return AddScalarConstNodeHelper( + DT_DOUBLE, [v](TensorProto* proto) { proto->add_double_val(v); }, graph, + result); +} + +template <> +Status AddScalarConstNode(float v, GraphDef* graph, NodeDef** result) { + return AddScalarConstNodeHelper( + DT_FLOAT, [v](TensorProto* proto) { proto->add_float_val(v); }, graph, + result); +} + +template <> +Status AddScalarConstNode(int v, GraphDef* graph, NodeDef** result) { + return AddScalarConstNodeHelper( + DT_INT32, [v](TensorProto* proto) { proto->add_int_val(v); }, graph, + result); +} + +template <> +Status AddScalarConstNode(int64 v, GraphDef* graph, NodeDef** result) { + return AddScalarConstNodeHelper( + DT_INT64, [v](TensorProto* proto) { proto->add_int64_val(v); }, graph, + result); +} + +template <> +Status AddScalarConstNode(StringPiece v, GraphDef* graph, NodeDef** result) { + return AddScalarConstNodeHelper( + DT_STRING, + [v](TensorProto* proto) { proto->add_string_val(v.data(), v.size()); }, + graph, result); +} + +bool Compare(const GraphDef& g1, const GraphDef& g2) { + if (g1.node_size() != g2.node_size()) { + return false; + } + std::vector name_index1 = CreateNameIndex(g1); + std::vector name_index2 = CreateNameIndex(g2); + for (int i = 0; i < g1.node_size(); ++i) { + int idx1 = name_index1[i]; + int idx2 = name_index2[i]; + if (g1.node(idx1).op() != g2.node(idx2).op()) { + return false; + } + if (g1.node(idx1).name() != g2.node(idx2).name()) { + return false; + } + if (g1.node(idx1).input_size() != g2.node(idx2).input_size()) { + return false; + } + std::vector input_index1 = CreateInputIndex(g1.node(idx1)); + std::vector input_index2 = CreateInputIndex(g2.node(idx2)); + for (int j = 0; j < g1.node(idx1).input_size(); ++j) { + if (!IsSameInput(g1.node(idx1).input(input_index1[j]), + g2.node(idx2).input(input_index2[j]))) { + return false; + } + } + } + return true; +} + +bool ContainsNodeWithName(const string& name, const GraphDef& graph) { + return FindNodeWithName(name, graph) != -1; +} + +bool ContainsNodeWithOp(const string& op, const GraphDef& graph) { + return FindNodeWithOp(op, graph) != -1; +} + +Status DeleteNodes(const std::set& nodes_to_delete, GraphDef* graph) { + int last = graph->node_size() - 1; + for (int i = graph->node_size() - 1; i >= 0; --i) { + const NodeDef& node = graph->node(i); + if (nodes_to_delete.find(node.name()) != nodes_to_delete.end()) { + graph->mutable_node()->SwapElements(i, last); + last--; + } + } + graph->mutable_node()->DeleteSubrange(last + 1, + graph->node_size() - last - 1); + return Status::OK(); +} + +int FindNodeWithName(const string& name, const GraphDef& graph) { + return FindNodeWithPredicate( + [name](const NodeDef& node) { return node.name() == name; }, graph); +} + +int FindNodeWithOp(const string& op, const GraphDef& graph) { + return FindNodeWithPredicate( + [op](const NodeDef& node) { return node.op() == op; }, graph); +} + +} // end namespace graph_utils +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h new file mode 100644 index 0000000000..b40ca44d78 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h @@ -0,0 +1,81 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_UTILS_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_UTILS_H_ + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { +namespace grappler { +namespace graph_utils { + +// Adds a node to the graph. +Status AddNode(const string& name, const string& op, + const std::vector& inputs, + const std::vector>& attributes, + GraphDef* graph, NodeDef** result); + +// Adds a Const node with the given value to the graph. +template +Status AddScalarConstNode(T v, GraphDef* graph, NodeDef** result) { + return errors::Unimplemented("Type %s is not supported.", + DataTypeToEnum::value); +} +template <> +Status AddScalarConstNode(bool v, GraphDef* graph, NodeDef** result); +template <> +Status AddScalarConstNode(double v, GraphDef* graph, NodeDef** result); +template <> +Status AddScalarConstNode(float v, GraphDef* graph, NodeDef** result); +template <> +Status AddScalarConstNode(int v, GraphDef* graph, NodeDef** result); +template <> +Status AddScalarConstNode(int64 v, GraphDef* graph, NodeDef** result); +template <> +Status AddScalarConstNode(StringPiece v, GraphDef* graph, NodeDef** result); + +// Checks whether the two graphs are the same. +bool Compare(const GraphDef& g1, const GraphDef& g2); + +// Checks whether the graph contains a node with the given name. +bool ContainsNodeWithName(const string& name, const GraphDef& graph); + +// Checks whether the graph contains a node with the given op. +bool ContainsNodeWithOp(const string& op, const GraphDef& graph); + +// Deletes nodes from the graph. +Status DeleteNodes(const std::set& nodes_to_delete, GraphDef* graph); + +// Returns the index of the node with the given name or -1 if the node does +// not exist. +int FindNodeWithName(const string& name, const GraphDef& graph); + +// Returns the index of a node with the given op or -1 if no such node +// exists. +int FindNodeWithOp(const string& op, const GraphDef& graph); + +} // end namespace graph_utils +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_UTILS_H_ diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc new file mode 100644 index 0000000000..b34726044e --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc @@ -0,0 +1,142 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace graph_utils { +namespace { + +class GraphUtilsTest : public ::testing::Test {}; + +TEST_F(GraphUtilsTest, AddScalarConstNodeBool) { + GraphDef graph; + NodeDef* bool_node; + TF_EXPECT_OK(AddScalarConstNode(true, &graph, &bool_node)); + EXPECT_TRUE(ContainsNodeWithName(bool_node->name(), graph)); + EXPECT_EQ(bool_node->attr().at("value").tensor().bool_val(0), true); +} + +TEST_F(GraphUtilsTest, AddScalarConstNodeDouble) { + GraphDef graph; + NodeDef* double_node; + TF_EXPECT_OK(AddScalarConstNode(3.14, &graph, &double_node)); + EXPECT_TRUE(ContainsNodeWithName(double_node->name(), graph)); + EXPECT_FLOAT_EQ(double_node->attr().at("value").tensor().double_val(0), 3.14); +} + +TEST_F(GraphUtilsTest, AddScalarConstNodeFloat) { + GraphDef graph; + NodeDef* float_node; + TF_EXPECT_OK(AddScalarConstNode(3.14, &graph, &float_node)); + EXPECT_TRUE(ContainsNodeWithName(float_node->name(), graph)); + EXPECT_FLOAT_EQ(float_node->attr().at("value").tensor().float_val(0), 3.14); +} + +TEST_F(GraphUtilsTest, AddScalarConstNodeInt) { + GraphDef graph; + NodeDef* int_node; + TF_EXPECT_OK(AddScalarConstNode(42, &graph, &int_node)); + EXPECT_TRUE(ContainsNodeWithName(int_node->name(), graph)); + EXPECT_EQ(int_node->attr().at("value").tensor().int_val(0), 42); +} + +TEST_F(GraphUtilsTest, AddScalarConstNodeInt64) { + GraphDef graph; + NodeDef* int64_node; + TF_EXPECT_OK(AddScalarConstNode(42, &graph, &int64_node)); + EXPECT_TRUE(ContainsNodeWithName(int64_node->name(), graph)); + EXPECT_EQ(int64_node->attr().at("value").tensor().int64_val(0), 42); +} + +TEST_F(GraphUtilsTest, AddScalarConstNodeString) { + GraphDef graph; + NodeDef* string_node; + TF_EXPECT_OK(AddScalarConstNode("hello", &graph, &string_node)); + EXPECT_TRUE(ContainsNodeWithName(string_node->name(), graph)); + EXPECT_EQ(string_node->attr().at("value").tensor().string_val(0), "hello"); +} + +TEST_F(GraphUtilsTest, Compare) { + GraphDef graphA; + GraphDef graphB; + EXPECT_TRUE(Compare(graphA, graphB)); + + NodeDef* nodeA; + TF_EXPECT_OK(AddNode("A", "OpA", {}, {}, &graphA, &nodeA)); + NodeDef* nodeB; + TF_EXPECT_OK(AddNode("B", "OpB", {"A"}, {}, &graphA, &nodeB)); + EXPECT_FALSE(Compare(graphA, graphB)); + + graphB.mutable_node()->CopyFrom(graphA.node()); + EXPECT_TRUE(Compare(graphA, graphB)); +} + +TEST_F(GraphUtilsTest, ContainsNodeWithName) { + GraphDef graph; + EXPECT_TRUE(!ContainsNodeWithName("A", graph)); + + NodeDef* node; + TF_EXPECT_OK(AddNode("A", "OpA", {}, {}, &graph, &node)); + EXPECT_TRUE(ContainsNodeWithName("A", graph)); + + TF_EXPECT_OK(DeleteNodes({"A"}, &graph)); + EXPECT_TRUE(!ContainsNodeWithName("A", graph)); +} + +TEST_F(GraphUtilsTest, ContainsNodeWithOp) { + GraphDef graph; + EXPECT_TRUE(!ContainsNodeWithOp("OpA", graph)); + + NodeDef* node; + TF_EXPECT_OK(AddNode("A", "OpA", {}, {}, &graph, &node)); + EXPECT_TRUE(ContainsNodeWithOp("OpA", graph)); + + TF_EXPECT_OK(DeleteNodes({"A"}, &graph)); + EXPECT_TRUE(!ContainsNodeWithOp("OpA", graph)); +} + +TEST_F(GraphUtilsTest, FindNodeWithName) { + GraphDef graph; + EXPECT_EQ(FindNodeWithName("A", graph), -1); + + NodeDef* node; + TF_EXPECT_OK(AddNode("A", "OpA", {}, {}, &graph, &node)); + EXPECT_NE(FindNodeWithName("A", graph), -1); + + TF_EXPECT_OK(DeleteNodes({"A"}, &graph)); + EXPECT_EQ(FindNodeWithName("A", graph), -1); +} + +TEST_F(GraphUtilsTest, FindNodeWithOp) { + GraphDef graph; + EXPECT_EQ(FindNodeWithOp("OpA", graph), -1); + + NodeDef* node; + TF_EXPECT_OK(AddNode("A", "OpA", {}, {}, &graph, &node)); + EXPECT_NE(FindNodeWithOp("OpA", graph), -1); + + TF_EXPECT_OK(DeleteNodes({"A"}, &graph)); + EXPECT_EQ(FindNodeWithOp("OpA", graph), -1); +} + +} // namespace +} // namespace graph_utils +} // namespace grappler +} // namespace tensorflow -- GitLab From b9895025c9019e69f59fa197d5b216b697d66bfe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 13:22:27 -0700 Subject: [PATCH 048/902] Internal Change PiperOrigin-RevId: 197778159 --- tensorflow/python/kernel_tests/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 61f3f69e84..3c944b16e1 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2322,6 +2322,9 @@ cuda_py_test( "//tensorflow/python:nn_ops", ], shard_count = 2, + tags = [ + "nogpu", # Flaky: b/80127739 + ], ) cuda_py_test( -- GitLab From 0920a3c59ee85d1917e27fc69ed5cbd8e4b7a6c4 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 23 May 2018 13:45:14 -0700 Subject: [PATCH 049/902] Add a test to reproduce copy-on-read bug for variables PiperOrigin-RevId: 197781741 --- tensorflow/compiler/tests/eager_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tensorflow/compiler/tests/eager_test.py b/tensorflow/compiler/tests/eager_test.py index 52d8d6d295..583a2c26d4 100644 --- a/tensorflow/compiler/tests/eager_test.py +++ b/tensorflow/compiler/tests/eager_test.py @@ -136,6 +136,23 @@ class EagerTest(XLATestCase): grads = backprop.implicit_grad(f)() self.assertEqual(2., grads[0][0].numpy()) + def testMultipleVariableReads(self): + # TODO(b/79715516): Currently, whenever we read a variable by going + # through XLA, we create a copy. This leads large memory usage. + self.skipTest('When variable is read through XLA, a copy is created.') + + with self.test_scope(): + # Create 128MiB variables + var = resource_variable_ops.ResourceVariable( + array_ops.ones([32, 1024, 1024])) + + # Read the same variable 100 times. If the underlying tensor + # is not copied, this is a trivial operation. If it is copied, + # this will eat over 13GB and OOM. + values = [] + for _ in range(100): + values.append(var.value()) + class EagerFunctionTest(XLATestCase): -- GitLab From 190d2d57bc8d2e3f43fbe6cbf649d878efdff878 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 23 May 2018 13:55:32 -0700 Subject: [PATCH 050/902] Clear docstrings for auto-generated module files, and detach github links from generated files. PiperOrigin-RevId: 197783520 --- tensorflow/tools/docs/parser.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 7363e4f8b5..50c9052741 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -615,6 +615,9 @@ def _parse_md_docstring(py_object, relative_path_to_root, reference_resolver): docstring, compatibility = _handle_compatibility(raw_docstring) docstring, function_details = _parse_function_details(docstring) + if 'Generated by: tensorflow/tools/api/generator' in docstring: + docstring = '' + return _DocstringInfo( docstring.split('\n')[0], docstring, function_details, compatibility) @@ -1597,7 +1600,8 @@ class _GeneratedFile(object): return True def __str__(self): - return 'Defined in `%s%s`.\n\n' % (self.path_prefix, self.path) + return 'Defined in generated file: `%s%s`.\n\n' % (self.path_prefix, + self.path) def _get_defined_in(py_object, parser_config): @@ -1634,6 +1638,8 @@ def _get_defined_in(py_object, parser_config): if re.match(r'.*/gen_[^/]*\.py$', path): return _GeneratedFile(path, parser_config) + if 'genfiles' in path or 'tools/api/generator' in path: + return _GeneratedFile(path, parser_config) elif re.match(r'.*_pb2\.py$', path): # The _pb2.py files all appear right next to their defining .proto file. return _ProtoFile(path[:-7] + '.proto', parser_config) -- GitLab From f9de7982610c77ff16f96f5c6c8b60cc992e207b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 14:12:40 -0700 Subject: [PATCH 051/902] New quantized log(x) for x > 1. Used for LogSoftmax. PiperOrigin-RevId: 197786738 --- .../internal/optimized/optimized_ops.h | 163 ++++++++++- .../internal/reference/reference_ops.h | 257 +++++++++++++++++- 2 files changed, 406 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 1b4660ef4f..025e2825c6 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -140,6 +140,45 @@ MatrixMap MapAsMatrixWithGivenNumberOfRows(Scalar* data, return MatrixMap(data, rows, cols); } +// This is like the template-parameter version, except that the power-of-two is +// passed as a function parameter. The template version is to be preferred, +// since some target hardware optimizations depend on the range of the exponent. +template +IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) { + if (exponent == 0) { + return x; + } + using ScalarIntegerType = + typename gemmlowp::FixedPointRawTypeTraits::ScalarRawType; + const IntegerType min = + gemmlowp::Dup(std::numeric_limits::min()); + const IntegerType max = + gemmlowp::Dup(std::numeric_limits::max()); + const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType); + + const std::int32_t threshold = + ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1); + const IntegerType positive_mask = + gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup(threshold)); + const IntegerType negative_mask = + gemmlowp::MaskIfLessThan(x, gemmlowp::Dup(-threshold)); + + IntegerType result = gemmlowp::ShiftLeft(x, exponent); + result = gemmlowp::SelectUsingMask(positive_mask, max, result); + result = gemmlowp::SelectUsingMask(negative_mask, min, result); + return result; +} + +// This is like the template-parameter version, except that the power-of-two is +// passed as a function parameter. See raw-integer version for further comments. +template +gemmlowp::FixedPoint +SaturatingRoundingMultiplyByPOTParam( + gemmlowp::FixedPoint a, int exponent) { + return gemmlowp::FixedPoint::FromRaw( + SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent)); +} + // DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING ELEMENT-WISE // BROADCASTING. // @@ -4559,6 +4598,119 @@ inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, } } +template +inline gemmlowp::FixedPoint +log_x_for_x_greater_than_or_equal_to_1_impl( + gemmlowp::FixedPoint input_val) { + // assert(__builtin_clz(0u) >= std::numeric_limits::digits - 1); + // assert(__builtin_clz(0u) <= std::numeric_limits::digits); + using FixedPoint0 = gemmlowp::FixedPoint; + // The reason for accumulating the result with an extra bit of headroom is + // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled * + // recip_denom will otherwise introduce an error. + static constexpr int kAccumIntegerBits = OutputIntegerBits + 1; + using FixedPointAccum = gemmlowp::FixedPoint; + + const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1488522236, std::log(2.0)); + const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5))); + const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1518500250, std::sqrt(0.5)); + const FixedPoint0 one_quarter = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0); + + const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0))); + const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0))); + const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1057819769, + 2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0))); + const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0))); + + const FixedPointAccum shifted_quarter = + gemmlowp::Rescale(one_quarter); + + // Reinterpret the input value as Q0.31, because we will figure out the + // required shift "ourselves" instead of using, say, Rescale. + FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw()); + // z_a_pow_2 = input_integer_bits - z_a_headroom; + int z_a_headroom_plus_1 = __builtin_clz(static_cast(z_a.raw())); + FixedPoint0 r_a_tmp = + SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1)); + const int32 r_a_raw = + SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1); + // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25); + // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25, + // InputIntegerBits - z_b_headroom - 0.25); + const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp( + FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam( + InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)), + shifted_quarter); + + // z_b is treated like z_a, but premultiplying by sqrt(0.5). + FixedPoint0 z_b = z_a * sqrt_half; + int z_b_headroom = __builtin_clz(static_cast(z_b.raw())) - 1; + const int32 r_b_raw = + SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom); + const FixedPointAccum z_b_pow_2_adj = SaturatingSub( + FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam( + InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)), + shifted_quarter); + + const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw)); + const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw( + std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw())); + + const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half); + FixedPoint0 q = r - sqrt_sqrt_half; + q = q + q; + + const FixedPoint0 common_sq = q * q; + const FixedPoint0 num = q * r + q * common_sq * alpha_n; + const FixedPoint0 denom_minus_one_0 = + p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q; + const FixedPoint0 recip_denom = + one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0); + + const FixedPointAccum num_scaled = gemmlowp::Rescale(num); + return gemmlowp::Rescale(z_pow_2_adj * log_2 + + num_scaled * recip_denom); +} + +// Minimum output bits to accommodate log of maximum input range. It actually +// does not matter if one considers, say, [-64,64] or [-64,64). +// +// For example, run this through Octave: +// [0:127; ... +// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ... +// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))] +constexpr int min_log_x_output_bits(int input_bits) { + return input_bits > 90 + ? 7 + : input_bits > 44 + ? 6 + : input_bits > 21 + ? 5 + : input_bits > 10 + ? 4 + : input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1; +} + +template +inline gemmlowp::FixedPoint +log_x_for_x_greater_than_or_equal_to_1( + gemmlowp::FixedPoint input_val) { + static_assert( + OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits), + "Output integer bits must be sufficent to accommodate logs of inputs."); + return log_x_for_x_greater_than_or_equal_to_1_impl( + input_val); +} + // Currently just a copy of the reference code. inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, int32 input_multiplier, int32 input_left_shift, @@ -4604,13 +4756,10 @@ inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, } } - // TODO(b/77858996): Implement fixed-point log(). - // Not a fully-quantized implementation: floating-point log(). - const float float_log_sum_of_exps = - std::log(static_cast(sum_of_exps.raw()) / - (1 << (31 - kAccumulationIntegerBits))); - const int32 fixed_log_sum_of_exps = static_cast(TfLiteRound( - float_log_sum_of_exps * (1 << (31 - kScaledDiffIntegerBits)))); + const int32 fixed_log_sum_of_exps = + log_x_for_x_greater_than_or_equal_to_1( + sum_of_exps) + .raw(); // rescaled_diff_min is smallest representable in // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index cd4af48bee..f6d8d3257b 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -33,8 +33,139 @@ limitations under the License. #include "tensorflow/contrib/lite/kernels/internal/types.h" namespace tflite { + +// TODO(b/77858996): Add these to gemmlowp. +template +IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) { + static_assert(std::is_same::value, "unimplemented"); + return a; +} + +template <> +inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) { + std::int64_t a64 = a; + std::int64_t b64 = b; + std::int64_t sum = a64 + b64; + return static_cast(std::min( + static_cast(std::numeric_limits::max()), + std::max( + static_cast(std::numeric_limits::min()), + sum))); +} + +template +gemmlowp::FixedPoint SaturatingAddNonGemmlowp( + gemmlowp::FixedPoint a, + gemmlowp::FixedPoint b) { + return gemmlowp::FixedPoint::FromRaw( + SaturatingAddNonGemmlowp(a.raw(), b.raw())); +} + +template +IntegerType SaturatingSub(IntegerType a, IntegerType b) { + static_assert(std::is_same::value, "unimplemented"); + return a; +} + +template <> +inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) { + std::int32_t a32 = a; + std::int32_t b32 = b; + std::int32_t diff = a32 - b32; + return static_cast(std::min(32767, std::max(-32768, diff))); +} + +template <> +inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) { + std::int64_t a64 = a; + std::int64_t b64 = b; + std::int64_t diff = a64 - b64; + return static_cast(std::min( + static_cast(std::numeric_limits::max()), + std::max( + static_cast(std::numeric_limits::min()), + diff))); +} + +template +gemmlowp::FixedPoint SaturatingSub( + gemmlowp::FixedPoint a, + gemmlowp::FixedPoint b) { + return gemmlowp::FixedPoint::FromRaw( + SaturatingSub(a.raw(), b.raw())); +} +// End section to be moved to gemmlowp. + namespace reference_ops { +inline int32 MultiplyByQuantizedMultiplierSmallerThanOne( + int32 x, int32 quantized_multiplier, int right_shift) { + using gemmlowp::RoundingDivideByPOT; + using gemmlowp::SaturatingRoundingDoublingHighMul; + return RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(x, quantized_multiplier), right_shift); +} + +inline int32 MultiplyByQuantizedMultiplierGreaterThanOne( + int32 x, int32 quantized_multiplier, int left_shift) { + using gemmlowp::SaturatingRoundingDoublingHighMul; + return SaturatingRoundingDoublingHighMul(x * (1 << left_shift), + quantized_multiplier); +} + +template +int CountLeadingZeros(T integer_input) { + static_assert(std::is_unsigned::value, + "Only unsigned integer types handled."); + if (integer_input == 0) { + return std::numeric_limits::digits; + } + const T one_in_leading_positive = static_cast(1) + << (std::numeric_limits::digits - 1); + int leading_zeros = 0; + while (integer_input < one_in_leading_positive) { + integer_input <<= 1; + ++leading_zeros; + } + return leading_zeros; +} + +template +IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) { + if (exponent == 0) { + return x; + } + using ScalarIntegerType = + typename gemmlowp::FixedPointRawTypeTraits::ScalarRawType; + const IntegerType min = + gemmlowp::Dup(std::numeric_limits::min()); + const IntegerType max = + gemmlowp::Dup(std::numeric_limits::max()); + const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType); + + const std::int32_t threshold = + ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1); + const IntegerType positive_mask = + gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup(threshold)); + const IntegerType negative_mask = + gemmlowp::MaskIfLessThan(x, gemmlowp::Dup(-threshold)); + + IntegerType result = gemmlowp::ShiftLeft(x, exponent); + result = gemmlowp::SelectUsingMask(positive_mask, max, result); + result = gemmlowp::SelectUsingMask(negative_mask, min, result); + return result; +} + +// If we want to leave IntegerBits fixed, then multiplication +// by a power of two has to be saturating/rounding, not exact anymore. +template +gemmlowp::FixedPoint +SaturatingRoundingMultiplyByPOTParam( + gemmlowp::FixedPoint a, int exponent) { + return gemmlowp::FixedPoint::FromRaw( + SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent)); +} + // DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING ELEMENT-WISE // BROADCASTING. // @@ -2642,6 +2773,121 @@ inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, } } +// Although currently the name of this function says that it cannot handle +// values less than 1, in practice it can handle as low as 1/x_max, where +// x_max is the largest representable input. In other words, the output range +// is symmetric. +template +inline gemmlowp::FixedPoint +log_x_for_x_greater_than_or_equal_to_1_impl( + gemmlowp::FixedPoint input_val) { + using FixedPoint0 = gemmlowp::FixedPoint; + // The reason for accumulating the result with an extra bit of headroom is + // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled * + // recip_denom will otherwise introduce an error. + static constexpr int kAccumIntegerBits = OutputIntegerBits + 1; + using FixedPointAccum = gemmlowp::FixedPoint; + + const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1488522236, std::log(2.0)); + const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5))); + const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1518500250, std::sqrt(0.5)); + const FixedPoint0 one_quarter = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0); + + const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0))); + const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0))); + const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1057819769, + 2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0))); + const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0))); + + const FixedPointAccum shifted_quarter = + gemmlowp::Rescale(one_quarter); + + // Reinterpret the input value as Q0.31, because we will figure out the + // required shift "ourselves" instead of using, say, Rescale. + FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw()); + // z_a_pow_2 = input_integer_bits - z_a_headroom; + int z_a_headroom_plus_1 = CountLeadingZeros(static_cast(z_a.raw())); + FixedPoint0 r_a_tmp = + SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1)); + const int32 r_a_raw = + SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1); + // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25); + // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25, + // InputIntegerBits - z_b_headroom - 0.25); + const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp( + FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam( + InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)), + shifted_quarter); + + // z_b is treated like z_a, but premultiplying by sqrt(0.5). + FixedPoint0 z_b = z_a * sqrt_half; + int z_b_headroom = CountLeadingZeros(static_cast(z_b.raw())) - 1; + const int32 r_b_raw = + SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom); + const FixedPointAccum z_b_pow_2_adj = SaturatingSub( + FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam( + InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)), + shifted_quarter); + + const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw)); + const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw( + std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw())); + + const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half); + FixedPoint0 q = r - sqrt_sqrt_half; + q = q + q; + + const FixedPoint0 common_sq = q * q; + const FixedPoint0 num = q * r + q * common_sq * alpha_n; + const FixedPoint0 denom_minus_one_0 = + p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q; + const FixedPoint0 recip_denom = + one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0); + + const FixedPointAccum num_scaled = gemmlowp::Rescale(num); + return gemmlowp::Rescale(z_pow_2_adj * log_2 + + num_scaled * recip_denom); +} + +// Minimum output bits to accommodate log of maximum input range. It actually +// does not matter if one considers, say, [-64,64] or [-64,64). +// +// For example, run this through Octave: +// [0:127; ... +// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ... +// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))] +constexpr int min_log_x_output_bits(int input_bits) { + return input_bits > 90 + ? 7 + : input_bits > 44 + ? 6 + : input_bits > 21 + ? 5 + : input_bits > 10 + ? 4 + : input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1; +} + +template +inline gemmlowp::FixedPoint +log_x_for_x_greater_than_or_equal_to_1( + gemmlowp::FixedPoint input_val) { + static_assert( + OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits), + "Output integer bits must be sufficent to accommodate logs of inputs."); + return log_x_for_x_greater_than_or_equal_to_1_impl( + input_val); +} + inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, int32 input_multiplier, int32 input_left_shift, int32 reverse_scaling_divisor, @@ -2684,13 +2930,10 @@ inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, } } - // TODO(b/77858996): Implement fixed-point log(). - // Not a fully-quantized implementation: floating-point log(). - const float float_log_sum_of_exps = - std::log(static_cast(sum_of_exps.raw()) / - (1 << (31 - kAccumulationIntegerBits))); - const int32 fixed_log_sum_of_exps = static_cast(TfLiteRound( - float_log_sum_of_exps * (1 << (31 - kScaledDiffIntegerBits)))); + const int32 fixed_log_sum_of_exps = + log_x_for_x_greater_than_or_equal_to_1( + sum_of_exps) + .raw(); // rescaled_diff_min is smallest representable in // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the -- GitLab From e0090257a0c88c1a1e16f92a88423de01ef231ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 14:33:59 -0700 Subject: [PATCH 052/902] Add NNAPI delegation for EMBEDING_LOOKUP, RNN, SVDF PiperOrigin-RevId: 197790679 --- tensorflow/contrib/lite/nnapi_delegate.cc | 158 +++++++++++++++++----- tensorflow/contrib/lite/nnapi_delegate.h | 8 ++ 2 files changed, 132 insertions(+), 34 deletions(-) diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index 107c84e666..eed57d412b 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -155,7 +155,6 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter, nn_type, static_cast(tensor->dims->size), reinterpret_cast(tensor->dims->data), scale, zeroPoint}; CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); - // TODO(aselle): Based on Michael's suggestion, limiting this to read // only memory if (tensor->allocation_type == kTfLiteMmapRo) { @@ -168,7 +167,12 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter, CHECK_NN(ANeuralNetworksModel_setOperandValue( nn_model, next_id, tensor->data.raw, tensor->bytes)); } + } else if (tensor->bytes == 0) { + // These size 0 tensors are optional tensors reserved. + CHECK_NN( + ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0)); } + ++next_id; } return next_id; @@ -177,7 +181,9 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter, // Adds the operations and their parameters to the NN API model. // 'next-id' is the operand ID of the next operand of the model. void AddOpsAndParams(tflite::Interpreter* interpreter, - ANeuralNetworksModel* nn_model, uint32_t next_id) { + ANeuralNetworksModel* nn_model, uint32_t next_id, + std::vector* model_state_inputs, + std::vector* model_state_outputs) { for (size_t i = 0; i < interpreter->nodes_size(); i++) { const auto* node_and_registration = interpreter->node_and_registration(i); const TfLiteNode& node = node_and_registration->first; @@ -188,6 +194,8 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, // Add the parameters. std::vector augmented_inputs( node.inputs->data, node.inputs->data + node.inputs->size); + std::vector augmented_outputs( + node.outputs->data, node.outputs->data + node.outputs->size); auto add_scalar_int32 = [&nn_model, &augmented_inputs, &next_id](int value) { @@ -207,12 +215,23 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, augmented_inputs.push_back(next_id++); }; + // Handle state tensors of RNN, LSTM, SVDF. + // For each state_out tensor, a corresponding state_in operand needs to be + // created for NNAPI. auto duplicate_state_tensor_float32 = - [interpreter, &nn_model, &augmented_inputs](int tensor_id) { + [interpreter, &nn_model, &next_id, &augmented_inputs, + &model_state_inputs, &model_state_outputs](int tensor_id) { const TfLiteTensor* tensor = interpreter->tensor(tensor_id); - CHECK_NN(ANeuralNetworksModel_setOperandValue( - nn_model, tensor_id, tensor->data.raw, tensor->bytes)); - augmented_inputs.push_back(tensor_id); + ANeuralNetworksOperandType operand_type{ + ANEURALNETWORKS_TENSOR_FLOAT32, + static_cast(tensor->dims->size), + reinterpret_cast(tensor->dims->data), + tensor->params.scale, tensor->params.zero_point}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); + augmented_inputs.push_back(next_id); + model_state_inputs->push_back(next_id); + model_state_outputs->push_back(tensor_id); + next_id++; }; auto add_add_params = [&add_scalar_int32]() { add_scalar_int32(0); }; @@ -275,28 +294,51 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, add_scalar_float32(builtin->proj_clip); }; + // LSTM in NNAPI requires scratch tensor as an output operand. + auto add_lstm_scratch_tensor_float32 = [interpreter, &node, &nn_model, + &next_id, &augmented_outputs]() { + int scratch_buffer_index = node.temporaries->data[0]; + const TfLiteTensor* tensor = interpreter->tensor(scratch_buffer_index); + ANeuralNetworksOperandType operand_type{ + ANEURALNETWORKS_TENSOR_FLOAT32, + static_cast(tensor->dims->size), + reinterpret_cast(tensor->dims->data), tensor->params.scale, + tensor->params.zero_point}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); + augmented_outputs.insert(augmented_outputs.begin(), next_id++); + }; + auto add_mean_params = [&add_scalar_int32](void* data) { auto builtin = reinterpret_cast(data); add_scalar_int32(builtin->keep_dims); }; -#if 0 - auto add_reshape_params = [&](void* data) { - auto builtin = reinterpret_cast(data); - uint32_t tensor_size_shape = builtin->num_dimensions; - ANeuralNetworksOperandType operand_type{ - ANEURALNETWORKS_TENSOR_INT32, - {static_cast(1), - reinterpret_cast(&tensor_size_shape)}, - 0, - 0}; - CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) - CHECK_NN(ANeuralNetworksModel_setOperandValue( - nn_model, next_id, builtin->shape, - sizeof(int) * builtin->num_dimensions)); - augmented_inputs.push_back(next_id++); + auto add_svdf_params = [&add_scalar_int32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_int32(builtin->rank); + add_scalar_int32(builtin->activation); }; -#endif + + auto add_rnn_params = [&add_scalar_int32](void* data) { + auto builtin = reinterpret_cast(data); + add_scalar_int32(builtin->activation); + }; + + // Handle optional input tensors. + auto add_optional_tensors = [&nn_model, &augmented_inputs, + &next_id](int nn_type) { + for (size_t idx = 0; idx < augmented_inputs.size(); idx++) { + if (augmented_inputs[idx] == kOptionalTensor) { + const std::vector dim = {0, 0}; + ANeuralNetworksOperandType operand_type{nn_type, 2, dim.data(), 0, 0}; + CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) + CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, + nullptr, 0)) + augmented_inputs[idx] = next_id++; + } + } + }; + int nnapi_version = 10; ANeuralNetworksOperationType nn_op_type; @@ -366,13 +408,31 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, break; case tflite::BuiltinOperator_LSTM: { duplicate_state_tensor_float32( - node.outputs->data[/*kOutputStateTensor*/ 1]); + node.outputs->data[/*kOutputStateTensor*/ 0]); duplicate_state_tensor_float32( - node.outputs->data[/*kCellStateTensor*/ 2]); + node.outputs->data[/*kCellStateTensor*/ 1]); add_lstm_params(node.builtin_data); + add_lstm_scratch_tensor_float32(); + add_optional_tensors(ANEURALNETWORKS_TENSOR_FLOAT32); nn_op_type = ANEURALNETWORKS_LSTM; break; } + case tflite::BuiltinOperator_SVDF: { + duplicate_state_tensor_float32(node.outputs->data[/*kStateTensor*/ 0]); + add_svdf_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_SVDF; + break; + } + case tflite::BuiltinOperator_RNN: { + duplicate_state_tensor_float32( + node.outputs->data[/*kHiddenStateTensor*/ 0]); + add_rnn_params(node.builtin_data); + nn_op_type = ANEURALNETWORKS_RNN; + break; + } + case tflite::BuiltinOperator_EMBEDDING_LOOKUP: + nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP; + break; case tflite::BuiltinOperator_PAD: nnapi_version = 11; // require NNAPI 1.1 nn_op_type = ANEURALNETWORKS_PAD; @@ -392,12 +452,9 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, break; case tflite::BuiltinOperator_CONCAT_EMBEDDINGS: case tflite::BuiltinOperator_LSH_PROJECTION: - case tflite::BuiltinOperator_SVDF: case tflite::BuiltinOperator_HASHTABLE_LOOKUP: - case tflite::BuiltinOperator_RNN: case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: - case tflite::BuiltinOperator_EMBEDDING_LOOKUP: case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: @@ -450,8 +507,9 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, // Add the operation. CHECK_NN(ANeuralNetworksModel_addOperation( nn_model, nn_op_type, static_cast(augmented_inputs.size()), - augmented_inputs.data(), static_cast(node.outputs->size), - reinterpret_cast(node.outputs->data))); + augmented_inputs.data(), + static_cast(augmented_outputs.size()), + reinterpret_cast(augmented_outputs.data()))); } } @@ -475,12 +533,25 @@ TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) { } uint32_t next_id = addTensorOperands(interpreter, nn_model_, skip_list); - AddOpsAndParams(interpreter, nn_model_, next_id); + AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_, + &model_states_outputs_); + + std::vector augmented_inputs = interpreter->inputs(); + std::vector augmented_outputs = interpreter->outputs(); + + // All state tensors input/output need to be treated as model input/output. + augmented_inputs.insert(augmented_inputs.end(), + model_states_inputs_.begin(), + model_states_inputs_.end()); + augmented_outputs.insert(augmented_outputs.end(), + model_states_outputs_.begin(), + model_states_outputs_.end()); + CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs( - nn_model_, static_cast(interpreter->inputs().size()), - reinterpret_cast(interpreter->inputs().data()), - static_cast(interpreter->outputs().size()), - reinterpret_cast(interpreter->outputs().data()))); + nn_model_, static_cast(augmented_inputs.size()), + reinterpret_cast(augmented_inputs.data()), + static_cast(augmented_outputs.size()), + reinterpret_cast(augmented_outputs.data()))); CHECK_NN(ANeuralNetworksModel_finish(nn_model_)); } if (!nn_compiled_model_) { @@ -507,6 +578,7 @@ TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) { CHECK_NN(ANeuralNetworksExecution_setInput( execution, i, nullptr, tensor->data.raw, tensor->bytes)); } + // Tell nn api where to place final data. for (size_t i = 0; i < interpreter->outputs().size(); i++) { int output = interpreter->outputs()[i]; @@ -514,6 +586,24 @@ TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) { CHECK_NN(ANeuralNetworksExecution_setOutput( execution, i, nullptr, tensor->data.raw, tensor->bytes)); } + + // The state_out of previous invocation need to be mapped to state_in of + // current invocation. + for (size_t i = 0; i < model_states_outputs_.size(); i++) { + int state_tensor_idx = model_states_outputs_[i]; + TfLiteTensor* tensor = interpreter->tensor(state_tensor_idx); + // Here we are using a deep copy for state_in tensors so that we are not + // reading and writing into the same buffer during a invocation. + // TODO(miaowang): using double shared buffer to minimize the copies. + CHECK_NN(ANeuralNetworksExecution_setInput( + execution, i + interpreter->inputs().size(), nullptr, tensor->data.raw, + tensor->bytes)); + // Tell NNAPI where to output the state_out. + CHECK_NN(ANeuralNetworksExecution_setOutput( + execution, i + interpreter->outputs().size(), nullptr, tensor->data.raw, + tensor->bytes)); + } + // Currently use blocking compute. ANeuralNetworksEvent* event = nullptr; CHECK_NN(ANeuralNetworksExecution_startCompute(execution, &event)); diff --git a/tensorflow/contrib/lite/nnapi_delegate.h b/tensorflow/contrib/lite/nnapi_delegate.h index e98000929a..94dea4f9b2 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.h +++ b/tensorflow/contrib/lite/nnapi_delegate.h @@ -59,6 +59,14 @@ class NNAPIDelegate { ANeuralNetworksModel* nn_model_ = nullptr; // The NN API compilation handle ANeuralNetworksCompilation* nn_compiled_model_ = nullptr; + + // List of state tensors for LSTM, RNN, SVDF. + // NN API does not allow ops to maintain states across multiple + // invocations. We need to manually create state input tensors from + // corresponding state output tensors of TFLite operations, and map them + // correctly. + std::vector model_states_inputs_; + std::vector model_states_outputs_; }; } // namespace tflite -- GitLab From d01df010c325288d9ea88cfd8038c81a35df9766 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Wed, 23 May 2018 14:36:23 -0700 Subject: [PATCH 053/902] Modify model output names to not be unique when in distribution context. PiperOrigin-RevId: 197791115 --- tensorflow/contrib/distribute/python/BUILD | 18 +++ .../contrib/distribute/python/keras_test.py | 148 ++++++++++++++++++ tensorflow/python/estimator/keras.py | 23 ++- 3 files changed, 184 insertions(+), 5 deletions(-) create mode 100644 tensorflow/contrib/distribute/python/keras_test.py diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index 64a77bbed1..00161b2d48 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -547,3 +547,21 @@ cuda_py_test( "no_pip", ], ) + +cuda_py_test( + name = "keras_test", + srcs = ["keras_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/contrib/distribute/python:mirrored_strategy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:training", + "//tensorflow/python/estimator:keras", + "//tensorflow/python/estimator:run_config", + "//tensorflow/python/keras", + ], + tags = [ + "multi_and_single_gpu", + "notsan", + ], +) diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py new file mode 100644 index 0000000000..75ecd90dcf --- /dev/null +++ b/tensorflow/contrib/distribute/python/keras_test.py @@ -0,0 +1,148 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Keras Sequential and Functional models.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import numpy as np + +from tensorflow.contrib.distribute.python import mirrored_strategy +from tensorflow.python import keras +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.estimator import keras as keras_lib +from tensorflow.python.estimator import run_config as run_config_lib +from tensorflow.python.framework import test_util +from tensorflow.python.keras import testing_utils +from tensorflow.python.platform import gfile +from tensorflow.python.platform import test +from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import rmsprop + +_RANDOM_SEED = 1337 +_TRAIN_SIZE = 200 +_INPUT_SIZE = (10,) +_NUM_CLASS = 2 + + +def simple_sequential_model(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(16, activation='relu', input_shape=_INPUT_SIZE)) + model.add(keras.layers.Dropout(0.1)) + model.add(keras.layers.Dense(_NUM_CLASS, activation='softmax')) + return model + + +def simple_functional_model(): + a = keras.layers.Input(shape=_INPUT_SIZE) + b = keras.layers.Dense(16, activation='relu')(a) + b = keras.layers.Dropout(0.1)(b) + b = keras.layers.Dense(_NUM_CLASS, activation='softmax')(b) + model = keras.models.Model(inputs=[a], outputs=[b]) + return model + + +def get_ds_train_input_fn(): + np.random.seed(_RANDOM_SEED) + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=_TRAIN_SIZE, + test_samples=50, + input_shape=_INPUT_SIZE, + num_classes=_NUM_CLASS) + y_train = keras.utils.to_categorical(y_train) + + dataset = dataset_ops.Dataset.from_tensor_slices((x_train, y_train)) + dataset = dataset.batch(32) + return dataset + + +def get_ds_test_input_fn(): + np.random.seed(_RANDOM_SEED) + _, (x_test, y_test) = testing_utils.get_test_data( + train_samples=_TRAIN_SIZE, + test_samples=50, + input_shape=_INPUT_SIZE, + num_classes=_NUM_CLASS) + y_test = keras.utils.to_categorical(y_test) + + dataset = dataset_ops.Dataset.from_tensor_slices((x_test, y_test)) + dataset = dataset.batch(32) + return dataset + + +class TestKerasDistributionStrategy(test_util.TensorFlowTestCase): + + def setUp(self): + self._base_dir = os.path.join(self.get_temp_dir(), + 'keras_mirrored_strategy_test') + gfile.MakeDirs(self._base_dir) + self._config = run_config_lib.RunConfig( + tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir) + + def tearDown(self): + writer_cache.FileWriterCache.clear() + if os.path.isdir(self._base_dir): + gfile.DeleteRecursively(self._base_dir) + + def test_train_functional_with_distribution_strategy(self): + dist = mirrored_strategy.MirroredStrategy( + devices=['/device:GPU:0', '/device:GPU:1']) + keras_model = simple_functional_model() + keras_model.compile( + loss='categorical_crossentropy', + optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01)) + config = run_config_lib.RunConfig(tf_random_seed=_RANDOM_SEED, + model_dir=self._base_dir, + train_distribute=dist) + with self.test_session(): + est_keras = keras_lib.model_to_estimator( + keras_model=keras_model, config=config) + before_eval_results = est_keras.evaluate( + input_fn=get_ds_test_input_fn, steps=1) + est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16) + after_eval_results = est_keras.evaluate(input_fn=get_ds_test_input_fn, + steps=1) + self.assertLess(after_eval_results['loss'], before_eval_results['loss']) + + writer_cache.FileWriterCache.clear() + gfile.DeleteRecursively(self._config.model_dir) + + def test_train_sequential_with_distribution_strategy(self): + dist = mirrored_strategy.MirroredStrategy( + devices=['/device:GPU:0', '/device:GPU:1']) + keras_model = simple_sequential_model() + keras_model.compile( + loss='categorical_crossentropy', + optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01)) + config = run_config_lib.RunConfig(tf_random_seed=_RANDOM_SEED, + model_dir=self._base_dir, + train_distribute=dist) + with self.test_session(): + est_keras = keras_lib.model_to_estimator( + keras_model=keras_model, config=config) + before_eval_results = est_keras.evaluate( + input_fn=get_ds_test_input_fn, steps=1) + est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16) + after_eval_results = est_keras.evaluate(input_fn=get_ds_test_input_fn, + steps=1) + self.assertLess(after_eval_results['loss'], before_eval_results['loss']) + + writer_cache.FileWriterCache.clear() + gfile.DeleteRecursively(self._config.model_dir) + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py index 9519d33525..9961fa74c2 100644 --- a/tensorflow/python/estimator/keras.py +++ b/tensorflow/python/estimator/keras.py @@ -20,7 +20,7 @@ from __future__ import division from __future__ import print_function import os - +import re from tensorflow.python.client import session from tensorflow.python.estimator import estimator as estimator_lib from tensorflow.python.estimator import export as export_lib @@ -42,10 +42,12 @@ from tensorflow.python.ops import metrics as metrics_module from tensorflow.python.ops import variables as variables_module from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants +from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.training import saver as saver_lib from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import tf_export + _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY @@ -136,8 +138,9 @@ def _in_place_subclassed_model_reset(model): To "instantiate" an identical model in a new TF graph, we reuse the original model object, but we clear its state. - After calling this function on a model intance, you can use the model instance - as if it were a model clone (in particular you can use it in a new graph). + After calling this function on a model instance, you can use the model + instance as if it were a model clone (in particular you can use it in a new + graph). This method clears the state of the input model. It is thus destructive. However the original state can be restored fully by calling @@ -220,7 +223,6 @@ def _in_place_subclassed_model_reset(model): for name in attributes_to_cache: attributes_cache[name] = getattr(model, name) model._original_attributes_cache = attributes_cache - # Reset built state model.built = False model.inputs = None @@ -340,8 +342,19 @@ def _create_keras_model_fn(keras_model, custom_objects=None): """model_fn for keras Estimator.""" model = _clone_and_build_model(mode, keras_model, custom_objects, features, labels) + model_output_names = [] + # We need to make sure that the output names of the last layer in the model + # is the same for each of the cloned models. This is required for mirrored + # strategy when we call regroup. + if distribute_lib.has_distribution_strategy(): + for name in model.output_names: + name = re.compile(r'_\d$').sub('', name) + model_output_names.append(name) + else: + model_output_names = model.output_names + # Get inputs to EstimatorSpec - predictions = dict(zip(model.output_names, model.outputs)) + predictions = dict(zip(model_output_names, model.outputs)) loss = None train_op = None -- GitLab From 6291321143ed2b122063593ef8a1e77f201f715f Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 23 May 2018 14:56:55 -0700 Subject: [PATCH 054/902] [XLA] Draw hollow arrowheads for small arrays in dumped HLO graphs. The intent is to make it easier to tell what's "big" and what's "small". PiperOrigin-RevId: 197794902 --- .../compiler/xla/service/hlo_graph_dumper.cc | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 17e3c405f1..81f8743dca 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1154,6 +1154,20 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) { return Join(lines, "
"); } +// Gets the total number of array elements in the given shape. For tuples, this +// is the sum of all the sizes of all of the array elements recursively in the +// tuple. +static int64 TotalElementsInShape(const Shape& shape) { + int64 elems = 0; + ShapeUtil::ForEachSubshape( + shape, [&](const Shape& subshape, const ShapeIndex& /*index*/) { + if (ShapeUtil::IsArray(subshape)) { + elems += ShapeUtil::ElementsIn(subshape); + } + }); + return elems; +} + void HloDotDumper::AddInstructionIncomingEdges(const HloInstruction* instr) { auto add_edge = [&](const HloInstruction* from, const HloInstruction* to, int64 operand_num, bool control_edge = false) { @@ -1173,9 +1187,16 @@ void HloDotDumper::AddInstructionIncomingEdges(const HloInstruction* instr) { } else if (control_edge) { edge_label = "style=\"dotted\" color=\"gray\" label=\"ctrl\""; } - const char* kEdgeFmt = R"(%s -> %s [tooltip="%s -> %s" %s];)"; + + // We print "small" arrays using a hollow arrowhead and "large" arrays using + // a filled arrowhead. For now, we use an arbitrary cutoff for what "big" + // means. + bool is_big_array = TotalElementsInShape(from->shape()) >= 4096; + + const char* kEdgeFmt = R"(%s -> %s [arrowhead=%s tooltip="%s -> %s" %s];)"; edges_.push_back(Printf(kEdgeFmt, InstructionId(from), InstructionId(to), - from->name(), to->name(), edge_label)); + (is_big_array ? "normal" : "empty"), from->name(), + to->name(), edge_label)); }; // Add edges from instr's operands to instr. Parameters within fusion -- GitLab From aee7ebade2a975bdc3518bc47aef7d4f29614eb6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 14:57:23 -0700 Subject: [PATCH 055/902] Allow vars_to_warm_start to be a list of strings or Variables, which allows for non-TRAINABLE_VARIABLES to be warm-started. PiperOrigin-RevId: 197794989 --- tensorflow/python/estimator/estimator.py | 17 +++- .../python/training/warm_starting_util.py | 89 +++++++++++++++---- .../training/warm_starting_util_test.py | 41 +++++++++ 3 files changed, 124 insertions(+), 23 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index a2e84c8092..ecb5659716 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -1746,10 +1746,19 @@ class WarmStartSettings( ckpt_to_initialize_from: [Required] A string specifying the directory with checkpoint file(s) or path to checkpoint from which to warm-start the model parameters. - vars_to_warm_start: [Optional] A regular expression that captures which - variables to warm-start (see tf.get_collection). Defaults to `'.*'`, - which warm-starts all variables. If `None` is explicitly given, only - variables specified in `var_name_to_vocab_info` will be warm-started. + vars_to_warm_start: [Optional] One of the following: + + - A regular expression (string) that captures which variables to + warm-start (see tf.get_collection). This expression will only consider + variables in the TRAINABLE_VARIABLES collection. + - A list of Variables to warm-start. + - A list of strings, each representing a full variable name to warm-start. + - `None`, in which case only variables specified in + `var_name_to_vocab_info` will be warm-started. + + Defaults to `'.*'`, which warm-starts all variables in the + TRAINABLE_VARIABLES collection. Note that this excludes variables such as + accumulators and moving statistics from batch norm. var_name_to_vocab_info: [Optional] Dict of variable names (strings) to VocabInfo. The variable names should be "full" variables, not the names of the partitions. If not explicitly provided, the variable is assumed to diff --git a/tensorflow/python/training/warm_starting_util.py b/tensorflow/python/training/warm_starting_util.py index b0f37f8cb9..ec740abdd1 100644 --- a/tensorflow/python/training/warm_starting_util.py +++ b/tensorflow/python/training/warm_starting_util.py @@ -237,6 +237,62 @@ def _warm_start_var_with_vocab(var, # pylint: enable=protected-access +def _get_grouped_variables(vars_to_warm_start): + """Collects and groups (possibly partitioned) variables into a dictionary. + + The variables can be provided explicitly through vars_to_warm_start, or they + are retrieved from collections (see below). + + Args: + vars_to_warm_start: One of the following: + + - A regular expression (string) that captures which variables to + warm-start (see tf.get_collection). This expression will only consider + variables in the TRAINABLE_VARIABLES collection. + - A list of Variables to warm-start. + - A list of strings, each representing a full variable name to warm-start. + - `None`, in which case only variables specified in + `var_name_to_vocab_info` will be warm-started. + Returns: + A dictionary mapping variable names (strings) to lists of Variables. + Raises: + ValueError: If vars_to_warm_start is not a string, `None`, a list of + `Variables`, or a list of strings. + """ + if isinstance(vars_to_warm_start, str) or vars_to_warm_start is None: + # Both vars_to_warm_start = '.*' and vars_to_warm_start = None will match + # everything (in TRAINABLE_VARIABLES) here. + list_of_vars = ops.get_collection( + ops.GraphKeys.TRAINABLE_VARIABLES, + scope=vars_to_warm_start) + elif isinstance(vars_to_warm_start, list): + if all([isinstance(v, str) for v in vars_to_warm_start]): + list_of_vars = [] + for v in vars_to_warm_start: + list_of_vars += ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES, + scope=v) + elif all([_is_variable(v) for v in vars_to_warm_start]): + list_of_vars = vars_to_warm_start + else: + raise ValueError("If `vars_to_warm_start` is a list, it must be all " + "`Variable` or all `str`. Given types are {}".format( + [type(v) for v in vars_to_warm_start])) + else: + raise ValueError("`vars_to_warm_start must be a `list` or `str`. Given " + "type is {}".format(type(vars_to_warm_start))) + # We have to deal with partitioned variables, since get_collection flattens + # out the list. + grouped_variables = {} + for v in list_of_vars: + if not isinstance(v, list): + var_name = _infer_var_name([v]) + else: + var_name = _infer_var_name(v) + grouped_variables.setdefault(var_name, []).append(v) + + return grouped_variables + + @tf_export("train.warm_start") def warm_start(ckpt_to_initialize_from, vars_to_warm_start=".*", @@ -251,10 +307,19 @@ def warm_start(ckpt_to_initialize_from, ckpt_to_initialize_from: [Required] A string specifying the directory with checkpoint file(s) or path to checkpoint from which to warm-start the model parameters. - vars_to_warm_start: [Optional] A regular expression that captures which - variables to warm-start (see tf.get_collection). Defaults to `'.*'`, - which warm-starts all variables. If `None` is explicitly given, only - variables specified in `var_name_to_vocab_info` will be warm-started. + vars_to_warm_start: [Optional] One of the following: + + - A regular expression (string) that captures which variables to + warm-start (see tf.get_collection). This expression will only consider + variables in the TRAINABLE_VARIABLES collection. + - A list of Variables to warm-start. + - A list of strings, each representing a full variable name to warm-start. + - `None`, in which case only variables specified in + `var_name_to_vocab_info` will be warm-started. + + Defaults to `'.*'`, which warm-starts all variables in the + TRAINABLE_VARIABLES collection. Note that this excludes variables such as + accumulators and moving statistics from batch norm. var_name_to_vocab_info: [Optional] Dict of variable names (strings) to VocabInfo. The variable names should be "full" variables, not the names of the partitions. If not explicitly provided, the variable is assumed to @@ -274,21 +339,7 @@ def warm_start(ckpt_to_initialize_from, if var_name_to_prev_var_name is None: var_name_to_prev_var_name = {} logging.info("Warm-starting from: %s", (ckpt_to_initialize_from,)) - # We have to deal with partitioned variables, since get_collection flattens - # out the list. - grouped_variables = {} - # Both vars_to_warm_start = '.*' and - # vars_to_warm_start = None will match everything here. - for v in ops.get_collection( - # TODO(eddz): Allow for different collections here (to support - # warm-starting accumulators). - ops.GraphKeys.TRAINABLE_VARIABLES, - scope=vars_to_warm_start): - if not isinstance(v, list): - var_name = _infer_var_name([v]) - else: - var_name = _infer_var_name(v) - grouped_variables.setdefault(var_name, []).append(v) + grouped_variables = _get_grouped_variables(vars_to_warm_start) # Keep track of which var_names in var_name_to_prev_var_name and # var_name_to_vocab_info have been used. Err on the safer side by throwing an diff --git a/tensorflow/python/training/warm_starting_util_test.py b/tensorflow/python/training/warm_starting_util_test.py index 7e8cbd6bae..6a4c207d79 100644 --- a/tensorflow/python/training/warm_starting_util_test.py +++ b/tensorflow/python/training/warm_starting_util_test.py @@ -36,6 +36,7 @@ from tensorflow.python.training import warm_starting_util as ws_util ones = init_ops.ones_initializer norms = init_ops.truncated_normal_initializer rand = init_ops.random_uniform_initializer +zeros = init_ops.zeros_initializer class WarmStartingUtilTest(test.TestCase): @@ -305,6 +306,46 @@ class WarmStartingUtilTest(test.TestCase): self.assertAllEqual([[0.5], [0.], [0.]], fruit_weights_vars[1].eval(sess)) + def testWarmStart_ListOfVariables(self): + # Save checkpoint from which to warm-start. + _, prev_int_val = self._create_prev_run_var("v1", shape=[10, 1], + initializer=ones()) + # Verify we initialized the values correctly. + self.assertAllEqual(np.ones([10, 1]), prev_int_val) + + # New graph, new session with warm-starting. + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + # Initialize with zeros. + var = variable_scope.get_variable( + "v1", + shape=[10, 1], + initializer=zeros()) + ws_util.warm_start(self.get_temp_dir(), vars_to_warm_start=[var]) + sess.run(variables.global_variables_initializer()) + # Verify weights were correctly warm-started (init overridden to ones). + self.assertAllEqual(var.eval(), prev_int_val) + + def testWarmStart_ListOfStrings(self): + # Save checkpoint from which to warm-start. + _, prev_int_val = self._create_prev_run_var("v1", shape=[10, 1], + initializer=ones()) + # Verify we initialized the values correctly. + self.assertAllEqual(np.ones([10, 1]), prev_int_val) + + # New graph, new session with warm-starting. + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + # Initialize with zeros. + var = variable_scope.get_variable( + "v1", + shape=[10, 1], + initializer=zeros()) + ws_util.warm_start(self.get_temp_dir(), vars_to_warm_start=["v1"]) + sess.run(variables.global_variables_initializer()) + # Verify weights were correctly warm-started (init overridden to ones). + self.assertAllEqual(var.eval(), prev_int_val) + def testWarmStart_SparseColumnIntegerized(self): # Create feature column. sc_int = fc.categorical_column_with_identity("sc_int", num_buckets=10) -- GitLab From 7de8d665ab473582db92f9ed7dcba0406f16705d Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 23 May 2018 14:58:08 -0700 Subject: [PATCH 056/902] [XLA] Fix exhaustive_f32_elementwise_test's size marker. "enormous" is a size, not a tag. PiperOrigin-RevId: 197795125 --- tensorflow/compiler/xla/tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 4883380be1..fd54ac761c 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -619,6 +619,7 @@ xla_test( xla_test( name = "exhaustive_f32_elementwise_op_test", + size = "enormous", srcs = ["exhaustive_f32_elementwise_op_test.cc"], backends = [ "cpu", @@ -626,7 +627,6 @@ xla_test( ], shard_count = 48, tags = [ - "enormous", "manual", "notap", ], -- GitLab From 5a44ff92f0f467642e0ea872a1287990c13284d7 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Wed, 23 May 2018 15:17:03 -0700 Subject: [PATCH 057/902] [TF:XLA] Add tests to show that the List scheduler handles tuples correctly (in and out of fusions). PiperOrigin-RevId: 197798787 --- .../compiler/xla/service/hlo_scheduling.cc | 2 + .../xla/service/hlo_scheduling_test.cc | 95 +++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index 29c337746c..68b2cde83a 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -299,6 +299,8 @@ class ListScheduler { auto best_it = ready_queue.end(); --best_it; const HloInstruction* best = best_it->second.instruction; + VLOG(2) << "Schedule instruction: " << best->ToShortString() + << " Bytes freed: " << best_it->first.first; ready_queue.erase(best_it); ready_instructions.erase(best); schedule.push_back(best); diff --git a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc index c018ba2ffc..0bc930f9ea 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc @@ -289,5 +289,100 @@ TEST_F(HloSchedulingTest, ListAccountsForSubcomputations) { EXPECT_TRUE(ordering.ExecutesBefore(transpose, add)); } +TEST_F(HloSchedulingTest, TuplesAreAccountedCorrectly) { + auto builder = HloComputation::Builder(TestName()); + const auto TUPLE_SIZE = 1; + const Shape r1f32 = ShapeUtil::MakeShape(xla::F32, {6}); + + // Wrap lit in abs because constants are considered free by + // IgnoreInstruction, and it skews the accounting. + auto lit = builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({1, 1, 1, 1, 1, 1}))); + auto abs_const = builder.AddInstruction( + HloInstruction::CreateUnary(r1f32, HloOpcode::kAbs, lit)); + + auto abs_abs1 = builder.AddInstruction( + HloInstruction::CreateUnary(r1f32, HloOpcode::kAbs, abs_const)); + auto tuple = builder.AddInstruction(HloInstruction::CreateTuple( + tensorflow::gtl::ArraySlice({abs_abs1}))); + auto tuple_elm = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(r1f32, tuple, 0)); + + auto abs_abs2 = builder.AddInstruction( + HloInstruction::CreateUnary(r1f32, HloOpcode::kAbs, abs_const)); + + builder.AddInstruction(HloInstruction::CreateBinary(r1f32, HloOpcode::kAdd, + tuple_elm, abs_abs2)); + + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + TF_ASSERT_OK_AND_ASSIGN( + SequentialHloOrdering::HloModuleSequence sequence, + CreateMemoryMinimizingSequence(*module, + [&TUPLE_SIZE](const BufferValue& buffer) { + return ShapeUtil::ByteSizeOf( + buffer.shape(), TUPLE_SIZE); + }, + ListMemoryScheduler)); + + // Verify that all instructions are in the sequence. + EXPECT_EQ(module->entry_computation()->instruction_count(), + sequence.at(module->entry_computation()).size()); + SequentialHloOrdering ordering(module.get(), sequence); + // tuple allocates the tuple buffer and doesn't free anything. + // abs_abs2 uses the same buffer for input/output, so its bytes-freed is 0. + // abs_abs2 should be scheduled before tuple by List. + EXPECT_TRUE(ordering.ExecutesBefore(abs_abs2, tuple)); +} + +TEST_F(HloSchedulingTest, MultiOutputFusionAccountedCorrectly) { + const Shape r1f32 = ShapeUtil::MakeShape(xla::F32, {5}); + HloComputation::Builder builder(TestName()); + + auto c1 = builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({1, 1, 1, 1, 1}))); + auto c2 = builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({1, 2, 3, 4, 5}))); + auto c3 = builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({0, 2, 4, 6, 8}))); + + auto add = builder.AddInstruction( + HloInstruction::CreateBinary(r1f32, HloOpcode::kAdd, c1, c2)); + auto mul = builder.AddInstruction( + HloInstruction::CreateBinary(r1f32, HloOpcode::kMultiply, add, c3)); + auto tuple = builder.AddInstruction(HloInstruction::CreateTuple({add, mul})); + + auto tuple_elm = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(r1f32, tuple, 0)); + + auto exp = builder.AddInstruction( + HloInstruction::CreateUnary(r1f32, HloOpcode::kExp, c3)); + + builder.AddInstruction( + HloInstruction::CreateBinary(r1f32, HloOpcode::kAdd, tuple_elm, exp)); + + auto module = CreateNewModule(); + auto* computation = module->AddEntryComputation(builder.Build()); + + auto fusion = computation->CreateFusionInstruction( + {tuple, mul, add}, HloInstruction::FusionKind::kLoop); + + TF_ASSERT_OK_AND_ASSIGN(SequentialHloOrdering::HloModuleSequence sequence, + CreateMemoryMinimizingSequence( + *module, + [](const BufferValue& buffer) { + return ShapeUtil::ByteSizeOf(buffer.shape(), 2); + }, + ListMemoryScheduler)); + + // Verify that all instructions are in the sequence. + EXPECT_EQ(module->entry_computation()->instruction_count(), + sequence.at(module->entry_computation()).size()); + SequentialHloOrdering ordering(module.get(), sequence); + // fusion allocates memory for the tuple elements and doesn't free anything, + // so it's more expensive than exp. + EXPECT_TRUE(ordering.ExecutesBefore(exp, fusion)); +} + } // namespace } // namespace xla -- GitLab From f504a2445051c4c48eb9edd6a023b1f33a2793f2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 15:24:47 -0700 Subject: [PATCH 058/902] Adding scatter_nd* ops to Andrtoid build. PiperOrigin-RevId: 197799974 --- tensorflow/core/kernels/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index e1911361ce..1f0157acf4 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5171,7 +5171,6 @@ filegroup( "partitioned_function_ops.cc", # Excluded due to experimental status: "debug_ops.*", - "scatter_nd_op*", "mutex_ops.*", "batch_kernels.*", "regex_replace_op.cc", -- GitLab From 78c3a8870d2f748f356415e8d7acf9748d09c197 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 15:46:03 -0700 Subject: [PATCH 059/902] Add support for partitioned variables to SDCA. PiperOrigin-RevId: 197803127 --- .../learn/python/learn/estimators/linear.py | 6 +- .../python/learn/estimators/linear_test.py | 112 ++++++++ .../python/kernel_tests/sdca_ops_test.py | 71 ++++- .../linear_optimizer/python/ops/sdca_ops.py | 252 ++++++++++++++---- .../linear_optimizer/python/sdca_estimator.py | 29 +- .../python/sdca_estimator_test.py | 84 ++++++ .../linear_optimizer/python/sdca_optimizer.py | 29 +- 7 files changed, 512 insertions(+), 71 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py index 70b70af98c..e100bc7a1e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py @@ -31,7 +31,6 @@ import six from tensorflow.contrib import layers from tensorflow.contrib.framework import deprecated from tensorflow.contrib.framework import deprecated_arg_values -from tensorflow.python.training import training_util from tensorflow.contrib.layers.python.layers import feature_column from tensorflow.contrib.learn.python.learn.estimators import estimator from tensorflow.contrib.learn.python.learn.estimators import head as head_lib @@ -51,6 +50,7 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import session_run_hook from tensorflow.python.training import training as train +from tensorflow.python.training import training_util # The default learning rate of 0.2 is a historical artifact of the initial @@ -244,7 +244,9 @@ def sdca_model_fn(features, labels, mode, params): parent_scope = "linear" with variable_scope.variable_scope( - values=features.values(), name_or_scope=parent_scope) as scope: + values=features.values(), + name_or_scope=parent_scope, + partitioner=optimizer.partitioner) as scope: features = features.copy() features.update(layers.transform_features(features, feature_columns)) logits, columns_to_variables, bias = ( diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py index 0a863f0e20..597ca4e86d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py @@ -43,6 +43,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import partitioned_variables from tensorflow.python.platform import test from tensorflow.python.training import ftrl from tensorflow.python.training import input as input_lib @@ -966,6 +967,63 @@ class LinearClassifierTest(test.TestCase): scores = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(scores['accuracy'], 0.9) + def testSdcaOptimizerPartitionedVariables(self): + """Tests LinearClassifier with SDCAOptimizer with partitioned variables.""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + constant_op.constant([[0.6], [0.8], [0.3]]), + 'sq_footage': + constant_op.constant([[900.0], [700.0], [600.0]]), + 'country': + sparse_tensor.SparseTensor( + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 3], [2, 1]], + dense_shape=[3, 5]), + 'weights': + constant_op.constant([[3.0], [1.0], [1.0]]) + }, constant_op.constant([[1], [0], [1]]) + + price = feature_column_lib.real_valued_column('price') + sq_footage_bucket = feature_column_lib.bucketized_column( + feature_column_lib.real_valued_column('sq_footage'), + boundaries=[650.0, 800.0]) + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + sq_footage_country = feature_column_lib.crossed_column( + [sq_footage_bucket, country], hash_bucket_size=10) + + sdca_optimizer = sdca_optimizer_lib.SDCAOptimizer( + example_id_column='example_id', + partitioner=partitioned_variables.fixed_size_partitioner( + num_shards=2, axis=0)) + + tf_config = { + 'cluster': { + run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1'] + } + } + with test.mock.patch.dict('os.environ', + {'TF_CONFIG': json.dumps(tf_config)}): + config = run_config.RunConfig() + # Because we did not start a distributed cluster, we need to pass an + # empty ClusterSpec, otherwise the device_setter will look for + # distributed jobs, such as "/job:ps" which are not present. + config._cluster_spec = server_lib.ClusterSpec({}) + + classifier = linear.LinearClassifier( + feature_columns=[price, sq_footage_bucket, country, sq_footage_country], + weight_column_name='weights', + optimizer=sdca_optimizer, + config=config) + classifier.fit(input_fn=input_fn, steps=50) + scores = classifier.evaluate(input_fn=input_fn, steps=1) + print('all scores = {}'.format(scores)) + self.assertGreater(scores['accuracy'], 0.9) + def testEval(self): """Tests that eval produces correct metrics. """ @@ -1540,6 +1598,60 @@ class LinearRegressorTest(test.TestCase): loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.05) + def testSdcaOptimizerPartitionedVariables(self): + """Tests LinearRegressor with SDCAOptimizer with partitioned variables.""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + constant_op.constant([0.6, 0.8, 0.3]), + 'sq_footage': + constant_op.constant([[900.0], [700.0], [600.0]]), + 'country': + sparse_tensor.SparseTensor( + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 3], [2, 1]], + dense_shape=[3, 5]), + 'weights': + constant_op.constant([[3.0], [5.0], [7.0]]) + }, constant_op.constant([[1.55], [-1.25], [-3.0]]) + + price = feature_column_lib.real_valued_column('price') + sq_footage_bucket = feature_column_lib.bucketized_column( + feature_column_lib.real_valued_column('sq_footage'), + boundaries=[650.0, 800.0]) + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + sq_footage_country = feature_column_lib.crossed_column( + [sq_footage_bucket, country], hash_bucket_size=10) + sdca_optimizer = sdca_optimizer_lib.SDCAOptimizer( + example_id_column='example_id', symmetric_l2_regularization=1.0, + partitioner=partitioned_variables.fixed_size_partitioner( + num_shards=2, axis=0)) + tf_config = { + 'cluster': { + run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1'] + } + } + with test.mock.patch.dict('os.environ', + {'TF_CONFIG': json.dumps(tf_config)}): + config = run_config.RunConfig() + # Because we did not start a distributed cluster, we need to pass an + # empty ClusterSpec, otherwise the device_setter will look for + # distributed jobs, such as "/job:ps" which are not present. + config._cluster_spec = server_lib.ClusterSpec({}) + + regressor = linear.LinearRegressor( + feature_columns=[price, sq_footage_bucket, country, sq_footage_country], + weight_column_name='weights', + optimizer=sdca_optimizer, + config=config) + regressor.fit(input_fn=input_fn, steps=20) + loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] + self.assertLess(loss, 0.05) + def testSdcaOptimizerSparseFeaturesWithL1Reg(self): """Tests LinearClassifier with SDCAOptimizer and sparse features.""" diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index b5741967ab..d0c32b43cc 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -35,6 +35,8 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_sdca_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import googletest @@ -132,15 +134,22 @@ def make_random_examples_and_variables_dicts(num_examples, dim, num_non_zero): return examples_dict, variables_dict -def make_variable_dict(max_age, max_gender): +def make_variable_dict(max_age, max_gender, partitioned=False): # TODO(sibyl-toe9oF2e): Figure out how to derive max_age & max_gender from # examples_dict. - age_weights = variables_lib.Variable( - array_ops.zeros( - [max_age + 1], dtype=dtypes.float32)) - gender_weights = variables_lib.Variable( - array_ops.zeros( - [max_gender + 1], dtype=dtypes.float32)) + partitioner = None + if partitioned: + partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2, + axis=0) + with variable_scope.variable_scope( + name_or_scope='variables', + partitioner=partitioner): + age_weights = variables_lib.Variable( + array_ops.zeros( + [max_age + 1], dtype=dtypes.float32)) + gender_weights = variables_lib.Variable( + array_ops.zeros( + [max_gender + 1], dtype=dtypes.float32)) return dict( sparse_features_weights=[age_weights, gender_weights], dense_features_weights=[]) @@ -265,6 +274,54 @@ class SdcaWithLogisticLossTest(SdcaModelTest): self.assertAllClose( 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) + def testPartitionedPrimals(self): + # Setup test data + example_protos = [ + make_example_proto({ + 'age': [0], + 'gender': [0] + }, 0), + make_example_proto({ + 'age': [1], + 'gender': [1] + }, 1), + ] + example_weights = [1.0, 1.0] + for num_shards in _SHARD_NUMBERS: + with self._single_threaded_test_session(): + examples = make_example_dict(example_protos, example_weights) + variables = make_variable_dict(1, 1, partitioned=True) + options = dict( + symmetric_l2_regularization=1, + symmetric_l1_regularization=0, + num_table_shards=num_shards, + loss_type='logistic_loss') + + lr = SdcaModel(examples, variables, options) + variables_lib.global_variables_initializer().run() + unregularized_loss = lr.unregularized_loss(examples) + loss = lr.regularized_loss(examples) + predictions = lr.predictions(examples) + self.assertAllClose(0.693147, unregularized_loss.eval()) + self.assertAllClose(0.693147, loss.eval()) + train_op = lr.minimize() + for _ in range(_MAX_ITERATIONS): + train_op.run() + lr.update_weights(train_op).run() + # The high tolerance in unregularized_loss comparisons is due to the + # fact that it's possible to trade off unregularized_loss vs. + # regularization and still have a sum that is quite close to the + # optimal regularized_loss value. SDCA's duality gap only ensures that + # the regularized_loss is within 0.01 of optimal. + # 0.525457 is the optimal regularized_loss. + # 0.411608 is the unregularized_loss at that optimum. + self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05) + self.assertAllClose(0.525457, loss.eval(), atol=0.01) + predicted_labels = get_binary_predictions_for_logistic(predictions) + self.assertAllEqual([0, 1], predicted_labels.eval()) + self.assertAllClose( + 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) + def testSparseRandom(self): dim = 20 num_examples = 1000 diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index f980746a19..0047d5753a 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -22,12 +22,14 @@ import collections from six.moves import range from tensorflow.contrib.linear_optimizer.python.ops.sharded_mutable_dense_hashtable import ShardedMutableDenseHashTable +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework.ops import internal_convert_to_tensor from tensorflow.python.framework.ops import name_scope from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import gen_sdca_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops @@ -43,9 +45,6 @@ __all__ = ['SdcaModel'] class SdcaModel(object): """Stochastic dual coordinate ascent solver for linear models. - This class currently only supports a single machine (multi-threaded) - implementation. We expect the weights and duals to fit in a single machine. - Loss functions supported: * Binary logistic loss @@ -182,18 +181,41 @@ class SdcaModel(object): # TODO(sibyl-Aix6ihai): Use optimizer interface to make use of slot creation logic. def _create_slots(self): - # Make internal variables which have the updates before applying L1 - # regularization. + """Make unshrinked internal variables (slots).""" + # Unshrinked variables have the updates before applying L1 regularization. + # Each unshrinked slot variable is either a `Variable` or list of + # `Variable`, depending on the value of its corresponding primary variable. + # We avoid using `PartitionedVariable` for the unshrinked slots since we do + # not need any of the extra information. self._slots = collections.defaultdict(list) for name in ['sparse_features_weights', 'dense_features_weights']: for var in self._variables[name]: - with ops.device(var.device): - # TODO(andreasst): remove SDCAOptimizer suffix once bug 30843109 is - # fixed - self._slots['unshrinked_' + name].append( - var_ops.Variable( - array_ops.zeros_like(var.initialized_value(), dtypes.float32), - name=var.op.name + '_unshrinked/SDCAOptimizer')) + # Our primary variable may be either a PartitionedVariable, or a list + # of Variables (each representing a partition). + if (isinstance(var, var_ops.PartitionedVariable) or + isinstance(var, list)): + var_list = [] + # pylint: disable=protected-access + for v in var: + with ops.colocate_with(v): + # TODO(andreasst): remove SDCAOptimizer suffix once bug 30843109 + # is fixed. + slot_var = var_ops.Variable( + initial_value=array_ops.zeros_like(v.initialized_value(), + dtypes.float32), + name=v.op.name + '_unshrinked/SDCAOptimizer') + var_list.append(slot_var) + self._slots['unshrinked_' + name].append(var_list) + # pylint: enable=protected-access + else: + with ops.device(var.device): + # TODO(andreasst): remove SDCAOptimizer suffix once bug 30843109 is + # fixed. + self._slots['unshrinked_' + name].append( + var_ops.Variable( + array_ops.zeros_like(var.initialized_value(), + dtypes.float32), + name=var.op.name + '_unshrinked/SDCAOptimizer')) def _assertSpecified(self, items, check_in): for x in items: @@ -205,16 +227,25 @@ class SdcaModel(object): if not isinstance(check_in[x], list): raise ValueError(x + ' must be a list.') + def _var_to_list(self, var): + """Wraps var in a list if it is not a list or PartitionedVariable.""" + if not (isinstance(var, list) or + isinstance(var, var_ops.PartitionedVariable)): + var = [var] + return var + def _l1_loss(self): """Computes the (un-normalized) l1 loss of the model.""" with name_scope('sdca/l1_loss'): sums = [] for name in ['sparse_features_weights', 'dense_features_weights']: - for weights in self._convert_n_to_tensor(self._variables[name]): - with ops.device(weights.device): - sums.append( - math_ops.reduce_sum( - math_ops.abs(math_ops.cast(weights, dtypes.float64)))) + for var in self._variables[name]: + for v in self._var_to_list(var): + weights = internal_convert_to_tensor(v) + with ops.device(weights.device): + sums.append( + math_ops.reduce_sum( + math_ops.abs(math_ops.cast(weights, dtypes.float64)))) # SDCA L1 regularization cost is: l1 * sum(|weights|) return self._options['symmetric_l1_regularization'] * math_ops.add_n(sums) @@ -223,17 +254,37 @@ class SdcaModel(object): with name_scope('sdca/l2_loss'): sums = [] for name in ['sparse_features_weights', 'dense_features_weights']: - for weights in self._convert_n_to_tensor(self._variables[name]): - with ops.device(weights.device): - sums.append( - math_ops.reduce_sum( - math_ops.square(math_ops.cast(weights, dtypes.float64)))) + for var in self._variables[name]: + for v in self._var_to_list(var): + weights = internal_convert_to_tensor(v) + with ops.device(weights.device): + sums.append(math_ops.reduce_sum(math_ops.square(math_ops.cast( + weights, dtypes.float64)))) # SDCA L2 regularization cost is: l2 * sum(weights^2) / 2 return l2 * math_ops.add_n(sums) / 2.0 def _convert_n_to_tensor(self, input_list, as_ref=False): """Converts input list to a set of tensors.""" - return [internal_convert_to_tensor(x, as_ref=as_ref) for x in input_list] + # input_list can be a list of Variables (that are implicitly partitioned), + # in which case the underlying logic in internal_convert_to_tensor will not + # concatenate the partitions together. This method takes care of the + # concatenating (we only allow partitioning on the first axis). + output_list = [] + for x in input_list: + tensor_to_convert = x + if isinstance(x, list) or isinstance(x, var_ops.PartitionedVariable): + # We only allow for partitioning on the first axis. + tensor_to_convert = array_ops.concat(x, axis=0) + output_list.append(internal_convert_to_tensor( + tensor_to_convert, as_ref=as_ref)) + return output_list + + def _get_first_dimension_size_statically(self, w, num_partitions): + """Compute the static size of the first dimension for a sharded variable.""" + dim_0_size = w[0].get_shape()[0] + for p in range(1, num_partitions): + dim_0_size += w[p].get_shape()[0] + return dim_0_size def _linear_predictions(self, examples): """Returns predictions of the form w*x.""" @@ -286,6 +337,28 @@ class SdcaModel(object): result = math_ops.sigmoid(result) return result + def _get_partitioned_update_ops(self, + v_num, + num_partitions_by_var, + p_assignments_by_var, + gather_ids_by_var, + weights, + full_update, + p_assignments, + num_partitions): + """Get updates for partitioned variables.""" + num_partitions = num_partitions_by_var[v_num] + p_assignments = p_assignments_by_var[v_num] + gather_ids = gather_ids_by_var[v_num] + updates = data_flow_ops.dynamic_partition( + full_update, p_assignments, num_partitions) + update_ops = [] + for p in range(num_partitions): + with ops.colocate_with(weights[p]): + result = state_ops.scatter_add(weights[p], gather_ids[p], updates[p]) + update_ops.append(result) + return update_ops + def minimize(self, global_step=None, name=None): """Add operations to train a linear model by minimizing the loss function. @@ -318,18 +391,89 @@ class SdcaModel(object): # Solver returns example_state_update, new delta sparse_feature_weights # and delta dense_feature_weights. - weights_tensor = self._convert_n_to_tensor(self._slots[ - 'unshrinked_sparse_features_weights']) sparse_weights = [] sparse_indices = [] - for w, i in zip(weights_tensor, sparse_feature_indices): - # Find the feature ids to lookup in the variables. - with ops.device(w.device): - sparse_indices.append( - math_ops.cast( - array_ops.unique(math_ops.cast(i, dtypes.int32))[0], - dtypes.int64)) - sparse_weights.append(array_ops.gather(w, sparse_indices[-1])) + # If we have partitioned variables, keep a few lists of Tensors around + # that we need for the assign_add after the op call to + # gen_sdca_ops.sdca_optimizer(). + num_partitions_by_var = [] + p_assignments_by_var = [] + gather_ids_by_var = [] + for w, i in zip(self._slots['unshrinked_sparse_features_weights'], + sparse_feature_indices): + # Append the sparse_indices (in full-variable space). + sparse_idx = math_ops.cast( + array_ops.unique(math_ops.cast(i, dtypes.int32))[0], + dtypes.int64) + sparse_indices.append(sparse_idx) + if isinstance(w, list) or isinstance(w, var_ops.PartitionedVariable): + num_partitions = len(w) + flat_ids = array_ops.reshape(sparse_idx, [-1]) + # We use div partitioning, which is easiest to support downstream. + # Compute num_total_ids as the sum of dim-0 of w, then assign + # to partitions based on a constant number of ids per partition. + # Optimize if we already know the full shape statically. + dim_0_size = self._get_first_dimension_size_statically( + w, num_partitions) + + if dim_0_size.value: + num_total_ids = constant_op.constant(dim_0_size.value, + flat_ids.dtype) + else: + dim_0_sizes = [] + for p in range(num_partitions): + if w[p].get_shape()[0].value is not None: + dim_0_sizes.append(w[p].get_shape()[0].value) + else: + with ops.colocate_with(w[p]): + dim_0_sizes.append(array_ops.shape(w[p])[0]) + num_total_ids = math_ops.reduce_sum( + math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype)) + ids_per_partition = num_total_ids // num_partitions + extras = num_total_ids % num_partitions + + p_assignments = math_ops.maximum( + flat_ids // (ids_per_partition + 1), + (flat_ids - extras) // ids_per_partition) + + # Emulate a conditional using a boolean indicator tensor + new_ids = array_ops.where(p_assignments < extras, + flat_ids % (ids_per_partition + 1), + (flat_ids - extras) % ids_per_partition) + + # Cast partition assignments to int32 for use in dynamic_partition. + # There really should not be more than 2^32 partitions. + p_assignments = math_ops.cast(p_assignments, dtypes.int32) + # Partition list of ids based on assignments into num_partitions + # separate lists. + gather_ids = data_flow_ops.dynamic_partition(new_ids, + p_assignments, + num_partitions) + # Append these to the lists for use in the later update. + num_partitions_by_var.append(num_partitions) + p_assignments_by_var.append(p_assignments) + gather_ids_by_var.append(gather_ids) + + # Gather the weights from each partition. + partition_gathered_weights = [] + for p in range(num_partitions): + with ops.colocate_with(w[p]): + partition_gathered_weights.append( + array_ops.gather(w[p], gather_ids[p])) + + # Stitch the weights back together in the same order they were before + # we dynamic_partitioned them. + condition_indices = data_flow_ops.dynamic_partition( + math_ops.range(array_ops.shape(new_ids)[0]), + p_assignments, num_partitions) + batch_gathered_weights = data_flow_ops.dynamic_stitch( + condition_indices, partition_gathered_weights) + else: + w_as_tensor = internal_convert_to_tensor(w) + with ops.device(w_as_tensor.device): + batch_gathered_weights = array_ops.gather( + w_as_tensor, sparse_idx) + sparse_weights.append(batch_gathered_weights) # pylint: disable=protected-access esu, sfw, dfw = gen_sdca_ops.sdca_optimizer( @@ -355,12 +499,25 @@ class SdcaModel(object): with ops.control_dependencies([esu]): update_ops = [self._hashtable.insert(example_ids_hashed, esu)] # Update the weights before the proximal step. - for w, i, u in zip(self._slots['unshrinked_sparse_features_weights'], - sparse_indices, sfw): - update_ops.append(state_ops.scatter_add(w, i, u)) + for v_num, (w, i, u) in enumerate( + zip(self._slots['unshrinked_sparse_features_weights'], + sparse_indices, sfw)): + if (isinstance(w, var_ops.PartitionedVariable) or + isinstance(w, list)): + update_ops += self._get_partitioned_update_ops( + v_num, num_partitions_by_var, p_assignments_by_var, + gather_ids_by_var, w, u, p_assignments, num_partitions) + else: + update_ops.append(state_ops.scatter_add(w, i, u)) for w, u in zip(self._slots['unshrinked_dense_features_weights'], dfw): - update_ops.append(w.assign_add(u)) - + if (isinstance(w, var_ops.PartitionedVariable) or + isinstance(w, list)): + split_updates = array_ops.split( + u, num_or_size_splits=[v.shape.as_list()[0] for v in w]) + for v, split_update in zip(w, split_updates): + update_ops.append(state_ops.assign_add(v, split_update)) + else: + update_ops.append(state_ops.assign_add(w, u)) if not global_step: return control_flow_ops.group(*update_ops) with ops.control_dependencies(update_ops): @@ -385,21 +542,22 @@ class SdcaModel(object): for name in ['sparse_features_weights', 'dense_features_weights']: for var, slot_var in zip(self._variables[name], self._slots['unshrinked_' + name]): - update_ops.append(var.assign(slot_var)) + for v, sv in zip(self._var_to_list(var), self._var_to_list(slot_var)): + update_ops.append(v.assign(sv)) # Apply proximal step. with ops.control_dependencies(update_ops): update_ops = [] for name in ['sparse_features_weights', 'dense_features_weights']: for var in self._variables[name]: - with ops.device(var.device): - # pylint: disable=protected-access - update_ops.append( - gen_sdca_ops.sdca_shrink_l1( - self._convert_n_to_tensor( - [var], as_ref=True), - l1=self._symmetric_l1_regularization(), - l2=self._symmetric_l2_regularization())) + for v in self._var_to_list(var): + with ops.device(v.device): + # pylint: disable=protected-access + update_ops.append( + gen_sdca_ops.sdca_shrink_l1( + self._convert_n_to_tensor([v], as_ref=True), + l1=self._symmetric_l1_regularization(), + l2=self._symmetric_l2_regularization())) return control_flow_ops.group(*update_ops) def approximate_duality_gap(self): diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py index d4e54c82f9..200e7de6b9 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py @@ -116,6 +116,7 @@ def sdca_model_fn(features, labels, mode, params, config=None): num_loss_partitions = params["num_loss_partitions"] weight_column_name = params["weight_column_name"] update_weights_hook = params.get("update_weights_hook", None) + partitioner = params["partitioner"] loss_type = None if isinstance(head, head_lib._BinarySvmHead): # pylint: disable=protected-access @@ -136,12 +137,14 @@ def sdca_model_fn(features, labels, mode, params, config=None): example_id_column=example_id_column, num_loss_partitions=n_loss_partitions, symmetric_l1_regularization=l1_regularization, - symmetric_l2_regularization=l2_regularization) + symmetric_l2_regularization=l2_regularization, + partitioner=partitioner) parent_scope = "linear" with variable_scope.variable_scope( - values=features.values(), name_or_scope=parent_scope) as scope: + values=features.values(), name_or_scope=parent_scope, + partitioner=partitioner) as scope: features = features.copy() features.update(layers.transform_features(features, feature_columns)) logits, columns_to_variables, bias = ( @@ -213,7 +216,8 @@ class _SDCAEstimator(estimator.Estimator): l2_regularization=1.0, num_loss_partitions=None, config=None, - feature_engineering_fn=None): + feature_engineering_fn=None, + partitioner=None): """Construct a `_SDCAEstimator` estimator object. Args: @@ -241,6 +245,8 @@ class _SDCAEstimator(estimator.Estimator): feature_engineering_fn: Feature engineering function. Takes features and labels which are the output of `input_fn` and returns features and labels which will be fed into the model. + partitioner: Variable partitioner for the primal weights (`div` + partitioning strategy will be used). Returns: A `_SDCAEstimator` estimator. @@ -267,6 +273,7 @@ class _SDCAEstimator(estimator.Estimator): "l2_regularization": l2_regularization, "weight_column_name": weight_column_name, "update_weights_hook": _SdcaUpdateWeightsHook(), + "partitioner": partitioner, } super(_SDCAEstimator, self).__init__( @@ -336,7 +343,8 @@ class SDCALogisticClassifier(_SDCAEstimator): l2_regularization=1.0, num_loss_partitions=None, config=None, - feature_engineering_fn=None): + feature_engineering_fn=None, + partitioner=None): """Construct a `SDCALogisticClassifier` object. Args: @@ -361,6 +369,8 @@ class SDCALogisticClassifier(_SDCAEstimator): feature_engineering_fn: Feature engineering function. Takes features and labels which are the output of `input_fn` and returns features and labels which will be fed into the model. + partitioner: Variable partitioner for the primal weights (`div` + partitioning strategy will be used). Returns: A `SDCALogisiticClassifier` estimator. @@ -376,7 +386,8 @@ class SDCALogisticClassifier(_SDCAEstimator): l2_regularization=l2_regularization, num_loss_partitions=num_loss_partitions, config=config, - feature_engineering_fn=None) + feature_engineering_fn=None, + partitioner=partitioner) def predict_classes(self, input_fn=None): """Runs inference to determine the predicted class. @@ -463,7 +474,8 @@ class SDCALinearRegressor(_SDCAEstimator): l2_regularization=1.0, num_loss_partitions=None, config=None, - feature_engineering_fn=None): + feature_engineering_fn=None, + partitioner=None): """Construct a `SDCALinearRegressor` estimator object. @@ -489,6 +501,8 @@ class SDCALinearRegressor(_SDCAEstimator): feature_engineering_fn: Feature engineering function. Takes features and labels which are the output of `input_fn` and returns features and labels which will be fed into the model. + partitioner: Variable partitioner for the primal weights (`div` + partitioning strategy will be used). Returns: A `SDCALinearRegressor` estimator. @@ -503,7 +517,8 @@ class SDCALinearRegressor(_SDCAEstimator): l2_regularization=l2_regularization, num_loss_partitions=num_loss_partitions, config=config, - feature_engineering_fn=None) + feature_engineering_fn=None, + partitioner=partitioner) def predict_scores(self, input_fn): """Returns predicted scores for given features. diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py index bed3d5139f..6476671882 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator_test.py @@ -25,6 +25,7 @@ from tensorflow.contrib.linear_optimizer.python import sdca_estimator from tensorflow.core.protobuf import config_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import partitioned_variables from tensorflow.python.platform import test @@ -273,6 +274,47 @@ class SDCALogisticClassifierTest(test.TestCase): metrics = classifier.evaluate(input_fn=input_fn, steps=1) self.assertGreater(metrics['accuracy'], 0.9) + def testPartitionedMixedFeatures(self): + """Tests SDCALogisticClassifier with a mix of features (partitioned).""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + constant_op.constant([[0.6], [0.8], [0.3]]), + 'sq_footage': + constant_op.constant([900.0, 700.0, 600.0]), + 'country': + sparse_tensor.SparseTensor( + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 3], [2, 1]], + dense_shape=[3, 5]), + 'weights': + constant_op.constant([[3.0], [1.0], [1.0]]) + }, constant_op.constant([[1], [0], [1]]) + + with self._single_threaded_test_session(): + price = feature_column_lib.real_valued_column('price') + sq_footage_bucket = feature_column_lib.bucketized_column( + feature_column_lib.real_valued_column('sq_footage'), + boundaries=[650.0, 800.0]) + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + sq_footage_country = feature_column_lib.crossed_column( + [sq_footage_bucket, country], hash_bucket_size=10) + classifier = sdca_estimator.SDCALogisticClassifier( + example_id_column='example_id', + feature_columns=[ + price, sq_footage_bucket, country, sq_footage_country + ], + weight_column_name='weights', + partitioner=partitioned_variables.fixed_size_partitioner( + num_shards=2, axis=0)) + classifier.fit(input_fn=input_fn, steps=50) + metrics = classifier.evaluate(input_fn=input_fn, steps=1) + self.assertGreater(metrics['accuracy'], 0.9) + class SDCALinearRegressorTest(test.TestCase): @@ -350,6 +392,48 @@ class SDCALinearRegressorTest(test.TestCase): loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] self.assertLess(loss, 0.05) + def testMixedFeaturesArbitraryWeightsPartitioned(self): + """Tests SDCALinearRegressor works with a mix of features (partitioned).""" + + def input_fn(): + return { + 'example_id': + constant_op.constant(['1', '2', '3']), + 'price': + constant_op.constant([[0.6], [0.8], [0.3]]), + 'sq_footage': + constant_op.constant([[900.0], [700.0], [600.0]]), + 'country': + sparse_tensor.SparseTensor( + values=['IT', 'US', 'GB'], + indices=[[0, 0], [1, 3], [2, 1]], + dense_shape=[3, 5]), + 'weights': + constant_op.constant([[3.0], [5.0], [7.0]]) + }, constant_op.constant([[1.55], [-1.25], [-3.0]]) + + with self._single_threaded_test_session(): + price = feature_column_lib.real_valued_column('price') + sq_footage_bucket = feature_column_lib.bucketized_column( + feature_column_lib.real_valued_column('sq_footage'), + boundaries=[650.0, 800.0]) + country = feature_column_lib.sparse_column_with_hash_bucket( + 'country', hash_bucket_size=5) + sq_footage_country = feature_column_lib.crossed_column( + [sq_footage_bucket, country], hash_bucket_size=10) + regressor = sdca_estimator.SDCALinearRegressor( + example_id_column='example_id', + feature_columns=[ + price, sq_footage_bucket, country, sq_footage_country + ], + l2_regularization=1.0, + weight_column_name='weights', + partitioner=partitioned_variables.fixed_size_partitioner( + num_shards=2, axis=0)) + regressor.fit(input_fn=input_fn, steps=20) + loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] + self.assertLess(loss, 0.05) + def testSdcaOptimizerSparseFeaturesWithL1Reg(self): """SDCALinearRegressor works with sparse features and L1 regularization.""" diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py index 12039ecc6f..9872c6f97c 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py @@ -64,7 +64,8 @@ class SDCAOptimizer(object): of workers running the train steps. It defaults to 1 (single machine). `num_table_shards` defines the number of shards for the internal state table, typically set to match the number of parameter servers for large - data sets. + data sets. You can also specify a `partitioner` object to partition the primal + weights during training (`div` partitioning strategy will be used). """ def __init__(self, @@ -73,13 +74,15 @@ class SDCAOptimizer(object): num_table_shards=None, symmetric_l1_regularization=0.0, symmetric_l2_regularization=1.0, - adaptive=True): + adaptive=True, + partitioner=None): self._example_id_column = example_id_column self._num_loss_partitions = num_loss_partitions self._num_table_shards = num_table_shards self._symmetric_l1_regularization = symmetric_l1_regularization self._symmetric_l2_regularization = symmetric_l2_regularization self._adaptive = adaptive + self._partitioner = partitioner def get_name(self): return 'SDCAOptimizer' @@ -108,6 +111,10 @@ class SDCAOptimizer(object): def adaptive(self): return self._adaptive + @property + def partitioner(self): + return self._partitioner + def get_train_step(self, columns_to_variables, weight_column_name, loss_type, features, targets, global_step): """Returns the training operation of an SdcaModel optimizer.""" @@ -175,10 +182,12 @@ class SDCAOptimizer(object): sparse_feature_column = _dense_tensor_to_sparse_feature_column( dense_bucket_tensor) sparse_feature_with_values.append(sparse_feature_column) - # For bucketized columns, the variables list contains exactly one - # element. - sparse_feature_with_values_weights.append( - columns_to_variables[column][0]) + # If a partitioner was used during variable creation, we will have a + # list of Variables here larger than 1. + vars_to_append = columns_to_variables[column][0] + if len(columns_to_variables[column]) > 1: + vars_to_append = columns_to_variables[column] + sparse_feature_with_values_weights.append(vars_to_append) elif isinstance( column, ( @@ -226,8 +235,12 @@ class SDCAOptimizer(object): array_ops.shape(ids)[0]), [-1]) sparse_feature_with_values.append( SparseFeatureColumn(example_ids_filtered, reproject_ids, weights)) - sparse_feature_with_values_weights.append( - columns_to_variables[column][0]) + # If a partitioner was used during variable creation, we will have a + # list of Variables here larger than 1. + vars_to_append = columns_to_variables[column][0] + if len(columns_to_variables[column]) > 1: + vars_to_append = columns_to_variables[column] + sparse_feature_with_values_weights.append(vars_to_append) else: raise ValueError('SDCAOptimizer does not support column type %s.' % type(column).__name__) -- GitLab From ad978e4181f551d92c728dda33ca2f9c03520c70 Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Wed, 23 May 2018 15:58:28 -0700 Subject: [PATCH 060/902] Fix CurlHttpRequest handling unexpectedly large responses This fixes a few issues with CurlHttpRequest (and correspondingly GcsFileSystem): - Return status FAILED_PRECONDITION (i.e. "your buffer was too small") when CurlHttpRequest has a direct response buffer and the response is too large for the buffer, instead of UNAVAILABLE, since if the server resource is actually a fixed size, retrying automatically won't help at all. Also, include the message about the too-small buffer size in the returned Status as opposed to logging it, making it more obvious that it's treated as a message about a hard failure versus just a warning. - If the response was actually a 416 Range Not Satisfied response, fully pretend that the response had no body even if we got one (I'm looking at you GCS... it returns a 177-byte error message). This means: - Ignore a "buffer too small" error produced by the logic described above - Don't report the length of that body in GetResultBufferDirectBytesTransferred(), which looks to the client like data corruption, just report 0 (this fix makes it match the behavior of the non-direct-buffer response handling) I also tweaked the error messages, e.g. the message that includes an HTTP response code shouldn't report the CURLcode since it will always be CURLE_OK at that point. PiperOrigin-RevId: 197805003 --- .../core/platform/cloud/curl_http_request.cc | 107 ++++++++++-------- .../core/platform/cloud/curl_http_request.h | 18 +-- .../platform/cloud/curl_http_request_test.cc | 76 +++++++++---- 3 files changed, 120 insertions(+), 81 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 081d4cf043..a1be4aacce 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -112,10 +112,6 @@ class LibCurlProxy : public LibCurl { } void curl_free(void* p) override { ::curl_free(p); } - - const char* curl_easy_strerror(CURLcode errornum) override { - return ::curl_easy_strerror(errornum); - } }; } // namespace @@ -313,7 +309,7 @@ void CurlHttpRequest::SetResultBufferDirect(char* buffer, size_t size) { CHECK(buffer != nullptr); CheckNotSent(); - direct_response_ = DirectResponseState{buffer, size, 0}; + direct_response_ = DirectResponseState{buffer, size, 0, 0}; CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, reinterpret_cast(this))); CHECK_CURL_OK(libcurl_->curl_easy_setopt( @@ -335,24 +331,15 @@ size_t CurlHttpRequest::WriteCallbackDirect(const void* ptr, size_t size, size_t curl_bytes_received = size * nmemb; size_t user_buffer_bytes_available = state->buffer_size_ - state->bytes_transferred_; - - // The HTTP server may send a response body that is longer than what we - // expected. We must not use CHECK() for this situation, because that would - // imply a code bug (in this client code) where none exists; the violation of - // expectations would have been caused by the server, not the client. So we - // report a log warning, if an HTTP server is misbehaving. - if (curl_bytes_received > user_buffer_bytes_available) { - LOG(WARNING) << "The HTTP response body that we received is longer than we " - "requested or expected. " - << "Total bytes requested: " << state->buffer_size_ - << " Bytes received (so far) in HTTP response body: " - << (state->bytes_transferred_ + curl_bytes_received); - } - size_t bytes_to_copy = std::min(curl_bytes_received, user_buffer_bytes_available); memcpy(&state->buffer_[state->bytes_transferred_], ptr, bytes_to_copy); state->bytes_transferred_ += bytes_to_copy; + state->bytes_received_ += curl_bytes_received; + // If we didn't have room to store the full response, returning less than + // curl_bytes_received here will abort the transfer and curl_easy_perform() + // will return CURLE_WRITE_ERROR. We will detect and handle this error there, + // and can use state->bytes_received_ as stored above for logging purposes. return bytes_to_copy; } @@ -447,23 +434,7 @@ Status CurlHttpRequest::Send() { } const CURLcode curl_result = libcurl_->curl_easy_perform(curl_); - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - curl_result, "Performing request. Detailed error: ", error_buffer); - - auto get_error_message = [this, curl_result, &error_buffer]() -> string { - StringPiece response = GetResponse(); - string error_message = strings::StrCat( - "Error executing an HTTP request (HTTP response code ", response_code_, - ", error code ", curl_result, ", error message '", error_buffer, "')"); - if (!response.empty()) { - return strings::StrCat( - error_message, ", response '", - response.substr(0, - std::min(response.size(), response_to_error_limit_)), - "'"); - } - return error_message; - }; + TF_RETURN_IF_ERROR(CURLcodeToStatus(curl_result, error_buffer)); double written_size = 0; CHECK_CURL_OK(libcurl_->curl_easy_getinfo(curl_, CURLINFO_SIZE_DOWNLOAD, @@ -472,6 +443,18 @@ Status CurlHttpRequest::Send() { CHECK_CURL_OK(libcurl_->curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, &response_code_)); + auto get_error_message = [this]() -> string { + string error_message = strings::StrCat( + "Error executing an HTTP request: HTTP response code ", response_code_); + StringPiece body = GetResponse(); + if (!body.empty()) { + return strings::StrCat( + error_message, " with body '", + body.substr(0, std::min(body.size(), response_to_error_limit_)), "'"); + } + return error_message; + }; + Status result; switch (response_code_) { // The group of response codes indicating that the request achieved @@ -485,9 +468,12 @@ Status CurlHttpRequest::Send() { case 416: // Requested Range Not Satisfiable // The requested range had no overlap with the available range. - // This doesn't indicate an error, but this does mean an empty response - // body. + // This doesn't indicate an error, but we should produce an empty response + // body. (Not all servers do; GCS returns a short error message body.) response_buffer_->clear(); + if (IsDirectResponse()) { + direct_response_.bytes_transferred_ = 0; + } result = Status::OK(); break; @@ -613,14 +599,13 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal, << " bytes for " << now - that->last_progress_timestamp_ << " seconds and will be aborted. CURL timing information: " << "lookup time: " << lookup_time << " (" - << that->libcurl_->curl_easy_strerror(lookup_time_status) + << curl_easy_strerror(lookup_time_status) << "), connect time: " << connect_time << " (" - << that->libcurl_->curl_easy_strerror(connect_time_status) + << curl_easy_strerror(connect_time_status) << "), pre-transfer time: " << pretransfer_time << " (" - << that->libcurl_->curl_easy_strerror(pretransfer_time_status) + << curl_easy_strerror(pretransfer_time_status) << "), start-transfer time: " << starttransfer_time << " (" - << that->libcurl_->curl_easy_strerror(starttransfer_time_status) - << ")"; + << curl_easy_strerror(starttransfer_time_status) << ")"; return 1; // Will abort the request. } @@ -628,12 +613,36 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal, return 0; } -Status CURLcodeToStatus(CURLcode code) { - // Return Unavailable to retry by default. We probably should distinguish - // between permanent or temporary failures. - return errors::Unavailable("Error executing an HTTP request (error code ", - code, ", error message '", - curl_easy_strerror(code), "')"); +Status CurlHttpRequest::CURLcodeToStatus(CURLcode code, + const char* error_buffer) { + if (code == CURLE_OK) { + return Status::OK(); + } + string error_message = strings::StrCat( + "Error executing an HTTP request: libcurl code ", code, " meaning '", + curl_easy_strerror(code), "', error details: "); + // Special-case response-too-large errors as FAILED_PRECONDITION. + if (code == CURLE_WRITE_ERROR && IsDirectResponse() && + direct_response_.bytes_received_ > direct_response_.buffer_size_) { + string overflow_message = strings::StrCat( + "Received ", direct_response_.bytes_received_, " response bytes ", + "for a ", direct_response_.buffer_size_, "-byte buffer"); + uint64 response_code = 0; + const CURLcode get_response_result = libcurl_->curl_easy_getinfo( + curl_, CURLINFO_RESPONSE_CODE, &response_code); + // Special-case 416 Range Not Satisfied responses; they sometimes have + // a response body (e.g. GCS sends one with an error message) but we + // pretend as though they don't, so actually ignore this error. + if (get_response_result == CURLE_OK && response_code == 416) { + return Status::OK(); + } + return errors::FailedPrecondition( + strings::StrCat(error_message, overflow_message)); + } + // Return Unavailable to retry by default. There may be other permanent + // failures that should be distinguished. + return errors::Unavailable( + strings::StrCat(error_message, *error_buffer ? error_buffer : "(none)")); } } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h index e658948ab9..1b2029926d 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.h +++ b/tensorflow/core/platform/cloud/curl_http_request.h @@ -167,6 +167,10 @@ class CurlHttpRequest : public HttpRequest { void CheckNotSent() const; StringPiece GetResponse() const; + /// Helper to convert the given CURLcode and error buffer, representing the + /// result of performing a transfer, into a Status with an error message. + Status CURLcodeToStatus(CURLcode code, const char* error_buffer); + LibCurl* libcurl_; Env* env_; @@ -181,6 +185,7 @@ class CurlHttpRequest : public HttpRequest { char* buffer_; size_t buffer_size_; size_t bytes_transferred_; + size_t bytes_received_; }; DirectResponseState direct_response_ = {}; @@ -261,21 +266,8 @@ class LibCurl { virtual void curl_slist_free_all(curl_slist* list) = 0; virtual char* curl_easy_escape(CURL* curl, const char* str, int length) = 0; virtual void curl_free(void* p) = 0; - - virtual const char* curl_easy_strerror(CURLcode errornum) = 0; }; -Status CURLcodeToStatus(CURLcode code); - -#define TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR(_code, ...) \ - do { \ - if (_code != CURLE_OK) { \ - ::tensorflow::Status _status = ::tensorflow::CURLcodeToStatus(_code); \ - ::tensorflow::errors::AppendToMessage(&_status, __VA_ARGS__); \ - return _status; \ - } \ - } while (0) - } // namespace tensorflow #endif // TENSORFLOW_CORE_PLATFORM_CLOUD_CURL_HTTP_REQUEST_H_ diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc index 522b717568..eb9023d708 100644 --- a/tensorflow/core/platform/cloud/curl_http_request_test.cc +++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc @@ -149,8 +149,12 @@ class FakeLibCurl : public LibCurl { } while (bytes_read > 0); } if (write_data_ || write_callback_) { - write_callback_(response_content_.c_str(), 1, response_content_.size(), - write_data_); + size_t bytes_handled = write_callback_( + response_content_.c_str(), 1, response_content_.size(), write_data_); + // Mimic real libcurl behavior by checking write callback return value. + if (bytes_handled != response_content_.size()) { + curl_easy_perform_result_ = CURLE_WRITE_ERROR; + } } for (const auto& header : response_headers_) { header_callback_(header.c_str(), 1, header.size(), header_data_); @@ -219,10 +223,6 @@ class FakeLibCurl : public LibCurl { } void curl_free(void* p) override { port::Free(p); } - const char* curl_easy_strerror(CURLcode errornum) override { - return ""; - } - // Variables defining the behavior of this fake. string response_content_; uint64 response_code_; @@ -302,7 +302,7 @@ TEST(CurlHttpRequestTest, GetRequest_Direct) { string expected_response = "get response"; size_t response_bytes_transferred = http_request.GetResultBufferDirectBytesTransferred(); - EXPECT_EQ(response_bytes_transferred, expected_response.size()); + EXPECT_EQ(expected_response.size(), response_bytes_transferred); EXPECT_EQ( "get response", string(scratch.begin(), scratch.begin() + response_bytes_transferred)); @@ -318,6 +318,48 @@ TEST(CurlHttpRequestTest, GetRequest_Direct) { EXPECT_EQ(200, http_request.GetResponseCode()); } +TEST(CurlHttpRequestTest, GetRequest_Direct_ResponseTooLarge) { + FakeLibCurl libcurl("get response", 200); + CurlHttpRequest http_request(&libcurl); + + std::vector scratch(5, 0); + + http_request.SetUri("http://www.testuri.com"); + http_request.SetResultBufferDirect(scratch.data(), scratch.size()); + const Status& status = http_request.Send(); + EXPECT_EQ(error::FAILED_PRECONDITION, status.code()); + EXPECT_EQ( + "Error executing an HTTP request: libcurl code 23 meaning " + "'Failed writing received data to disk/application', error details: " + "Received 12 response bytes for a 5-byte buffer", + status.error_message()); + + // As long as the request clearly fails, ok to leave truncated response here. + EXPECT_EQ(5, http_request.GetResultBufferDirectBytesTransferred()); + EXPECT_EQ("get r", string(scratch.begin(), scratch.begin() + 5)); +} + +TEST(CurlHttpRequestTest, GetRequest_Direct_RangeOutOfBound) { + FakeLibCurl libcurl("get response", 416); + CurlHttpRequest http_request(&libcurl); + + const string initialScratch = "abcde"; + std::vector scratch; + scratch.insert(scratch.end(), initialScratch.begin(), initialScratch.end()); + + http_request.SetUri("http://www.testuri.com"); + http_request.SetRange(0, 4); + http_request.SetResultBufferDirect(scratch.data(), scratch.size()); + TF_EXPECT_OK(http_request.Send()); + EXPECT_EQ(416, http_request.GetResponseCode()); + + // Some servers (in particular, GCS) return an error message payload with a + // 416 Range Not Satisfiable response. We should pretend it's not there when + // reporting bytes transferred, but it's ok if it writes to scratch. + EXPECT_EQ(0, http_request.GetResultBufferDirectBytesTransferred()); + EXPECT_EQ("get r", string(scratch.begin(), scratch.end())); +} + TEST(CurlHttpRequestTest, GetRequest_Empty) { FakeLibCurl libcurl("", 200); CurlHttpRequest http_request(&libcurl); @@ -357,28 +399,26 @@ TEST(CurlHttpRequestTest, GetRequest_RangeOutOfBound) { http_request.SetResultBuffer(&scratch); TF_EXPECT_OK(http_request.Send()); + // Some servers (in particular, GCS) return an error message payload with a + // 416 Range Not Satisfiable response. We should pretend it's not there. EXPECT_TRUE(scratch.empty()); EXPECT_EQ(416, http_request.GetResponseCode()); } TEST(CurlHttpRequestTest, GetRequest_503) { FakeLibCurl libcurl("get response", 503); - libcurl.curl_easy_perform_result_ = CURLE_WRITE_ERROR; CurlHttpRequest http_request(&libcurl); std::vector scratch; scratch.insert(scratch.end(), kTestContent.begin(), kTestContent.end()); http_request.SetUri("http://www.testuri.com"); - http_request.AddAuthBearerHeader("fake-bearer"); - http_request.SetRange(100, 199); http_request.SetResultBuffer(&scratch); const auto& status = http_request.Send(); EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( - "Error executing an HTTP request (error code 23, error message 'Failed " - "writing received data to disk/application')\n\tPerforming request. " - "Detailed error: ", + "Error executing an HTTP request: HTTP response code 503 with body " + "'get response'", status.error_message()); } @@ -395,9 +435,8 @@ TEST(CurlHttpRequestTest, GetRequest_HttpCode0) { const auto& status = http_request.Send(); EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( - "Error executing an HTTP request (error code 28, error message 'Timeout " - "was reached')\n\tPerforming request. Detailed error: Operation timed " - "out", + "Error executing an HTTP request: libcurl code 28 meaning " + "'Timeout was reached', error details: Operation timed out", status.error_message()); EXPECT_EQ(0, http_request.GetResponseCode()); } @@ -630,9 +669,8 @@ TEST(CurlHttpRequestTest, ProgressIsStuck) { auto status = http_request.Send(); EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( - "Error executing an HTTP request (error code 42, error message " - "'Operation was aborted by an application callback')\n\tPerforming " - "request. Detailed error: ", + "Error executing an HTTP request: libcurl code 42 meaning 'Operation " + "was aborted by an application callback', error details: (none)", status.error_message()); } -- GitLab From 07cd8f2565cd1c7a44be681379eb7dfc64a77b1c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 16:02:19 -0700 Subject: [PATCH 061/902] added support for calling fit on Dataset objects PiperOrigin-RevId: 197805615 --- tensorflow/python/keras/engine/training.py | 88 ++++++++------- .../python/keras/engine/training_test.py | 101 ++++++++++++++++-- .../python/keras/engine/training_utils.py | 15 +-- 3 files changed, 151 insertions(+), 53 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index ff50d0b6e2..0db805cc84 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -112,6 +112,8 @@ class Model(Network): super(Model, self).__init__(*args, **kwargs) # Create a cache for iterator get_next op. self._iterator_get_next = weakref.WeakKeyDictionary() + # Create a cache for dataset - uninitialized iterators + self._dataset_iterator_cache = weakref.WeakKeyDictionary() def compile(self, optimizer, @@ -670,12 +672,12 @@ class Model(Network): (in case the model has multiple inputs). - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. - - A `tf.data` dataset iterator. + - A `tf.data` dataset or a dataset iterator. y: Target data. Like the input data `x`, it could be either Numpy array(s) or TensorFlow tensor(s). It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset iterator, - `y` should not be specified + tensor targets, or inversely). If `x` is a dataset or a + dataset iterator, `y` should not be specified (since targets will be obtained from the iterator). sample_weight: An optional sample-weight array passed by the user to weight the importance of each sample in `x`. @@ -706,11 +708,16 @@ class Model(Network): RuntimeError: If the model was never compiled. """ if isinstance(x, dataset_ops.Dataset): - raise ValueError('You passed a `Dataset` instance to your model (%s), ' - 'which is not supported. Instead, pass an `Iterator`, ' - 'which you can obtain e.g. via ' - '`dataset.make_one_shot_iterator()` (the exact method ' - 'to use will depend on your specific dataset).' % x) + if context.executing_eagerly(): + x = x.make_one_shot_iterator() + else: + if x in self._dataset_iterator_cache: + x = self._dataset_iterator_cache[x] + else: + iterator = x.make_initializable_iterator() + self._dataset_iterator_cache[x] = iterator + x = iterator + K.get_session().run(x.initializer) # Validates `steps` argument based on x's type. if check_steps: @@ -719,7 +726,7 @@ class Model(Network): is_x_eager_iterator = isinstance(x, iterator_ops.EagerIterator) is_x_iterator = isinstance(x, iterator_ops.Iterator) - # Validate user inputs when data is given as a dataset iterator. + # Validate user inputs when data is given as a dataset or dataset iterator. if is_x_iterator or is_x_eager_iterator: training_utils.validate_iterator_input(x, y, sample_weight, validation_split) @@ -1130,19 +1137,19 @@ class Model(Network): (in case the model has multiple inputs). - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. - - A `tf.data` dataset iterator. + - A `tf.data` dataset or a dataset iterator. y: Target data. Like the input data `x`, it could be either Numpy array(s) or TensorFlow tensor(s). It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset iterator, - `y` should not be specified + tensor targets, or inversely). If `x` is a dataset or dataset + iterator, `y` should not be specified (since targets will be obtained from the iterator). batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. Do not specify the `batch_size` if your data is in the - form of symbolic tensors or dataset iterators (since they generate - batches). + form of symbolic tensors, datasets, or dataset iterators + (since they generate batches). epochs: Integer. Number of epochs to train the model. An epoch is an iteration over the entire `x` and `y` data provided. @@ -1164,7 +1171,7 @@ class Model(Network): on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is - not supported when `x` is a dataset iterator. + not supported when `x` is a dataset or a dataset iterator. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. @@ -1172,7 +1179,7 @@ class Model(Network): `validation_data` could be: - tuple `(x_val, y_val)` of Numpy arrays or tensors - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays - - dataset iterator + - dataset or a dataset iterator shuffle: Boolean (whether to shuffle the training data before each epoch) or str (for 'batch'). 'batch' is a special option for dealing with the @@ -1195,7 +1202,7 @@ class Model(Network): to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`. This argument is not - supported when `x` is a dataset iterator. + supported when `x` is a dataset or a dataset iterator. initial_epoch: Integer. Epoch at which to start training (useful for resuming a previous training run). @@ -1252,7 +1259,8 @@ class Model(Network): # Prepare validation data. if validation_data: if (isinstance(validation_data, iterator_ops.Iterator) or - isinstance(validation_data, iterator_ops.EagerIterator)): + isinstance(validation_data, iterator_ops.EagerIterator) or + isinstance(validation_data, dataset_ops.Dataset)): val_x = validation_data val_y = None val_sample_weight = None @@ -1266,8 +1274,9 @@ class Model(Network): 'When passing a `validation_data` argument, ' 'it must contain either 2 items (x_val, y_val), ' 'or 3 items (x_val, y_val, val_sample_weights), ' - 'or alternatively it could be a dataset iterator. However we ' - 'received `validation_data=%s`' % validation_data) + 'or alternatively it could be a dataset or a ' + 'dataset or a dataset iterator. ' + 'However we received `validation_data=%s`' % validation_data) # Validate and standardize validation data. val_x, val_y, val_sample_weights = self._standardize_user_data( @@ -1351,19 +1360,19 @@ class Model(Network): (in case the model has multiple inputs). - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. - - A `tf.data` dataset iterator. + - A `tf.data` dataset or a dataset iterator. y: Target data. Like the input data `x`, it could be either Numpy array(s) or TensorFlow tensor(s). It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset iterator, - `y` should not be specified - (since targets will be obtained from the iterator). + tensor targets, or inversely). + If `x` is a dataset or a dataset iterator, `y` should not be specified + (since targets will be obtained from the iterator/dataset). batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. Do not specify the `batch_size` is your data is in the - form of symbolic tensors or dataset iterators (since they generate - batches). + form of symbolic tensors, datasets, or dataset iterators + (since they generate batches). verbose: 0 or 1. Verbosity mode. 0 = silent, 1 = progress bar. sample_weight: Optional Numpy array of weights for @@ -1377,7 +1386,7 @@ class Model(Network): to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`. This argument is not - supported when `x` is a dataset iterator. + supported when `x` is a dataset or a dataset iterator. steps: Integer or `None`. Total number of steps (batches of samples) before declaring the evaluation round finished. @@ -1426,13 +1435,13 @@ class Model(Network): (in case the model has multiple inputs). - A TensorFlow tensor, or a list of tensors (in case the model has multiple inputs). - - A `tf.data` dataset iterator. + - A `tf.data` dataset or a dataset iterator. batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. Do not specify the `batch_size` is your data is in the - form of symbolic tensors or dataset iterators (since they generate - batches). + form of symbolic tensors, dataset, or dataset iterators + (since they generate batches). verbose: Verbosity mode, 0 or 1. steps: Total number of steps (batches of samples) before declaring the prediction round finished. @@ -1473,12 +1482,12 @@ class Model(Network): (in case the model has multiple inputs). - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. - - A `tf.data` dataset iterator. + - A `tf.data` dataset or a dataset iterator. y: Target data. Like the input data `x`, it could be either Numpy array(s) or TensorFlow tensor(s). It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset iterator, - `y` should not be specified + tensor targets, or inversely). If `x` is a dataset or a + dataset iterator, `y` should not be specified (since targets will be obtained from the iterator). sample_weight: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. @@ -1487,8 +1496,7 @@ class Model(Network): to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). This argument is not - supported when `x` is a dataset iterator. - + supported when `x` is a dataset or a dataset iterator. class_weight: Optional dictionary mapping class indices (integers) to a weight (float) to apply to the model's loss for the samples @@ -1537,12 +1545,12 @@ class Model(Network): (in case the model has multiple inputs). - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. - - A `tf.data` dataset iterator. + - A `tf.data` dataset or a dataset iterator. y: Target data. Like the input data `x`, it could be either Numpy array(s) or TensorFlow tensor(s). It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset iterator, - `y` should not be specified + tensor targets, or inversely). If `x` is a dataset or a + dataset iterator, `y` should not be specified (since targets will be obtained from the iterator). sample_weight: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. @@ -1551,7 +1559,7 @@ class Model(Network): to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). This argument is not - supported when `x` is a dataset iterator. + supported when `x` is a dataset or a dataset iterator. Returns: Scalar test loss (if the model has a single output and no metrics) @@ -1590,7 +1598,7 @@ class Model(Network): (in case the model has multiple inputs). - A TensorFlow tensor, or a list of tensors (in case the model has multiple inputs). - - A `tf.data` dataset iterator. + - A `tf.data` dataset or a dataset iterator. Returns: Numpy array(s) of predictions. diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 7dec0bbf8a..222e3496c1 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -1742,7 +1742,7 @@ class TestTrainingWithDatasetIterators(test.TestCase): # Test with validation split with self.assertRaisesRegexp( ValueError, '`validation_split` argument is not supported ' - 'when input `x` is a dataset iterator'): + 'when input `x` is a dataset or a dataset iterator'): model.fit(iterator, epochs=1, steps_per_epoch=2, verbose=0, validation_split=0.5, validation_steps=2) @@ -1751,7 +1751,7 @@ class TestTrainingWithDatasetIterators(test.TestCase): sample_weight = np.random.random((10,)) with self.assertRaisesRegexp( ValueError, '`sample_weight` argument is not supported ' - 'when input `x` is a dataset iterator'): + 'when input `x` is a dataset or a dataset iterator'): model.fit( iterator, epochs=1, @@ -1760,10 +1760,6 @@ class TestTrainingWithDatasetIterators(test.TestCase): sample_weight=sample_weight) # Test invalid usage - with self.assertRaisesRegexp(ValueError, - 'Instead, pass an `Iterator`'): - model.fit(dataset, - epochs=1, steps_per_epoch=2, verbose=0) with self.assertRaisesRegexp(ValueError, 'you should not specify a target'): model.fit(iterator, iterator, @@ -1829,5 +1825,98 @@ class TestTrainingWithDatasetIterators(test.TestCase): 'dataset iterator ran out of data') +class TestTrainingWithDataset(test.TestCase): + + def test_calling_model_on_same_dataset(self): + with self.test_session(): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((10, 3), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + # Call fit with validation data + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, + validation_data=dataset, validation_steps=2) + # Finalize the graph to make sure new ops aren't added when calling on the + # same dataset + ops.get_default_graph().finalize() + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, + validation_data=dataset, validation_steps=2) + + @tf_test_util.run_in_graph_and_eager_modes() + def test_training_and_eval_methods_on_dataset(self): + with self.test_session(): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((10, 3), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset, steps=2, verbose=1) + model.predict(dataset, steps=2) + model.train_on_batch(dataset) + model.predict_on_batch(dataset) + + # Test with validation data + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, + validation_data=dataset, validation_steps=2) + + # Test with validation split + with self.assertRaisesRegexp( + ValueError, '`validation_split` argument is not supported ' + 'when input `x` is a dataset or a dataset iterator'): + model.fit(dataset, + epochs=1, steps_per_epoch=2, verbose=0, + validation_split=0.5, validation_steps=2) + + # Test with sample weight. + sample_weight = np.random.random((10,)) + with self.assertRaisesRegexp( + ValueError, '`sample_weight` argument is not supported ' + 'when input `x` is a dataset or a dataset iterator'): + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + sample_weight=sample_weight) + + # Test invalid usage + with self.assertRaisesRegexp(ValueError, + 'you should not specify a target'): + model.fit(dataset, dataset, + epochs=1, steps_per_epoch=2, verbose=0) + + with self.assertRaisesRegexp( + ValueError, 'you should specify the `steps_per_epoch` argument'): + model.fit(dataset, epochs=1, verbose=0) + with self.assertRaisesRegexp(ValueError, + 'you should specify the `steps` argument'): + model.evaluate(dataset, verbose=0) + with self.assertRaisesRegexp(ValueError, + 'you should specify the `steps` argument'): + model.predict(dataset, verbose=0) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 7d214d61a4..c53948b902 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -632,19 +632,20 @@ def validate_iterator_input(x, y, sample_weight, validation_split=None): provided by user. """ if y is not None: - raise ValueError('You passed a dataset iterator (%s) as input `x` to ' - 'your model. In that case, you should not specify ' - 'a target (`y`) argument, since the dataset iterator ' - 'generates both input data and target data. ' + raise ValueError('You passed a dataset or dataset iterator (%s) as ' + 'input `x` to your model. In that case, you should ' + 'not specify a target (`y`) argument, since the dataset ' + 'or dataset iterator generates both input data and ' + 'target data. ' 'Received: %s' % (x, y)) if sample_weight is not None: - raise ValueError('`sample_weight` argument is not supported when input' - ' `x` is a dataset iterator. ' + raise ValueError('`sample_weight` argument is not supported when input ' + '`x` is a dataset or a dataset iterator. ' 'Received: x=%s, sample_weight=%s' % (x, sample_weight)) if validation_split is not None and validation_split != 0.0: raise ValueError( '`validation_split` argument is not supported when ' - 'input `x` is a dataset iterator. ' + 'input `x` is a dataset or a dataset iterator. ' 'Received: x=%s, validation_split=%f' % (x, validation_split)) -- GitLab From b23009234feddb0eba7870adc9334dfe60308e7c Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 23 May 2018 16:06:03 -0700 Subject: [PATCH 062/902] Run only small and medium tests in CI builds. PiperOrigin-RevId: 197806292 --- tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh | 2 +- tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh | 2 +- tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh | 2 +- tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh | 2 +- tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh | 1 + tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh | 1 + tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh | 1 + 7 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh index 51e10f81f8..8eeddcdb82 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh @@ -34,5 +34,5 @@ yes "" | $PYTHON_BIN_PATH configure.py # Run bazel test command. Double test timeouts to avoid flakes. bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test --test_lang_filters=cc,java -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 --config=opt \ - --test_output=errors -- \ + --test_output=errors --test_size_filters=small,medium -- \ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh index ea14848b1a..8eca1987f0 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh @@ -33,5 +33,5 @@ yes "" | $PYTHON_BIN_PATH configure.py # Run bazel test command. Double test timeouts to avoid flakes. bazel test --test_tag_filters=-no_oss,-oss_serial,-gpu,-benchmark-test --test_lang_filters=py -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 --build_tests_only --config=opt \ - --test_output=errors -- \ + --test_output=errors --test_size_filters=small,medium -- \ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh index c798081250..2b68de3c5b 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh @@ -33,7 +33,7 @@ yes "" | $PYTHON_BIN_PATH configure.py # Run bazel test command. Double test timeouts to avoid flakes. bazel test --test_tag_filters=-no_oss,-oss_serial,-gpu,-benchmark-test -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 --config=opt \ - --test_output=errors -- \ + --test_size_filters=small,medium --test_output=errors -- \ //tensorflow/contrib/... \ -//tensorflow/contrib/lite/... \ //tensorflow/contrib/lite:context_test \ diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh index a9accb9dd5..51eb2cd7e6 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh @@ -33,5 +33,5 @@ yes "" | $PYTHON_BIN_PATH configure.py # Run bazel test command. Double test timeouts to avoid flakes. bazel test --test_tag_filters=-no_oss,-oss_serial,-gpu,-benchmark-test --test_lang_filters=py -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 --build_tests_only --config=opt \ - --test_output=errors -- \ + --test_output=errors --test_size_filters=small,medium -- \ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh index 02224d8e9d..9d2c8383fa 100755 --- a/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh +++ b/tensorflow/tools/ci_build/linux/gpu/run_cc_core.sh @@ -37,5 +37,6 @@ yes "" | $PYTHON_BIN_PATH configure.py bazel test --config=cuda --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \ --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ --build_tests_only --test_output=errors --local_test_jobs=8 --config=opt \ + --test_size_filters=small,medium \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh index 0367a53d14..5b3383e105 100755 --- a/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh +++ b/tensorflow/tools/ci_build/linux/gpu/run_py3_core.sh @@ -37,5 +37,6 @@ yes "" | $PYTHON_BIN_PATH configure.py bazel test --config=cuda --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \ --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ --build_tests_only --test_output=errors --local_test_jobs=8 --config=opt \ + --test_size_filters=small,medium \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh index a410c10b61..d085e21b03 100755 --- a/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh +++ b/tensorflow/tools/ci_build/xla/linux/gpu/run_py3.sh @@ -37,6 +37,7 @@ bazel clean # Run bazel test command. Double test timeouts to avoid flakes. bazel test --config=cuda --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --test_size_filters=small,medium \ --build_tests_only --test_output=errors --local_test_jobs=8 \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ --config=xla -- \ -- GitLab From fa5c52e31f30e8cb88a5452e3b4aefc786fb8852 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Wed, 23 May 2018 16:10:30 -0700 Subject: [PATCH 063/902] Add support for IndexedSlices in Distribution Strategy all reduce. Issue reported in #19069 PiperOrigin-RevId: 197806955 --- tensorflow/contrib/distribute/python/BUILD | 19 +++ .../distribute/python/cross_tower_ops.py | 41 +++-- .../distribute/python/cross_tower_ops_test.py | 135 +++++++++++++--- .../distribute/python/cross_tower_utils.py | 45 ++++++ .../python/cross_tower_utils_test.py | 152 ++++++++++++++++++ tensorflow/python/ops/gradients_impl.py | 30 ++-- tensorflow/python/ops/gradients_test.py | 48 ++++++ 7 files changed, 430 insertions(+), 40 deletions(-) create mode 100644 tensorflow/contrib/distribute/python/cross_tower_utils_test.py diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index 00161b2d48..3118deaa47 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -445,6 +445,7 @@ py_library( srcs = ["cross_tower_utils.py"], srcs_version = "PY2AND3", deps = [ + ":values", "//tensorflow/contrib/nccl:nccl_py", "//tensorflow/python:array_ops", "//tensorflow/python:framework_ops", @@ -452,6 +453,24 @@ py_library( ], ) +cuda_py_test( + name = "cross_tower_utils_test", + srcs = ["cross_tower_utils_test.py"], + additional_deps = [ + ":combinations", + ":cross_tower_utils", + "@absl_py//absl/testing:parameterized", + "//tensorflow/python:constant_op", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + ], + tags = [ + "no_pip", + ], +) + py_library( name = "cross_tower_ops", srcs = ["cross_tower_ops.py"], diff --git a/tensorflow/contrib/distribute/python/cross_tower_ops.py b/tensorflow/contrib/distribute/python/cross_tower_ops.py index c6a1bf6a9f..a411b880e8 100644 --- a/tensorflow/contrib/distribute/python/cross_tower_ops.py +++ b/tensorflow/contrib/distribute/python/cross_tower_ops.py @@ -77,12 +77,12 @@ def _all_devices_match(value_destination_pairs): return True -def _simple_broadcast(tensor, destinations): +def _simple_broadcast(value, destinations): index = {} devices = _get_devices_from(destinations) for d in devices: - with ops.device(d): - index[d] = array_ops.identity(tensor) + index[d] = cross_tower_utils.copy_tensor_or_indexed_slices_to_device( + value, d) return value_lib.Mirrored(index) @@ -98,7 +98,9 @@ def _simple_reduce(per_device_value, reduce_to_device, accumulation_fn, continue count += len(v_list) # Sum within each device before aggregating across devices. - v = math_ops.add_n(v_list) + # TODO(yuefengz): Check whether it helps to use accumulation_fn here. + v = cross_tower_utils.aggregate_tensors_or_indexed_slices( + v_list, math_ops.add_n) else: count += 1 all_values.append(v) @@ -107,11 +109,12 @@ def _simple_reduce(per_device_value, reduce_to_device, accumulation_fn, with ops.device(reduce_to_device): with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): - if method_string == "sum": - reduced = accumulation_fn(all_values) - elif method_string == "mean": - reduced = accumulation_fn(all_values) / count - else: + reduced = cross_tower_utils.aggregate_tensors_or_indexed_slices( + all_values, accumulation_fn) + if method_string == "mean": + reduced = cross_tower_utils.divide_by_n_tensors_or_indexed_slices( + reduced, count) + elif method_string != "sum": raise ValueError("`method_string` must be 'sum' or 'mean'") return reduced @@ -444,10 +447,18 @@ class AllReduceCrossTowerOps(CrossTowerOps): super(AllReduceCrossTowerOps, self).__init__() def _reduce(self, method_string, per_device_value, destinations): + contains_indexed_slices = cross_tower_utils.contains_indexed_slices( + per_device_value) if ((destinations is None or _devices_match(per_device_value, destinations)) - and not context.executing_eagerly()): + and not context.executing_eagerly() + and not contains_indexed_slices): return self._batch_all_reduce(method_string, [per_device_value])[0] else: + if contains_indexed_slices: + logging.log_first_n( + logging.WARN, + "Efficient allreduce is not supported for IndexedSlices.", 10) + devices = _get_devices_from(destinations or per_device_value) reduce_to_device = devices[0] reduced = _simple_reduce(per_device_value, reduce_to_device, @@ -455,14 +466,18 @@ class AllReduceCrossTowerOps(CrossTowerOps): return self.broadcast(reduced, devices) def _batch_reduce(self, method_string, value_destination_pairs): - if (_all_devices_match(value_destination_pairs) and - not context.executing_eagerly()): + all_devices_match = _all_devices_match(value_destination_pairs) + contains_indexed_slices = cross_tower_utils.contains_indexed_slices( + value_destination_pairs) + if (all_devices_match and not context.executing_eagerly() + and not contains_indexed_slices): return self._batch_all_reduce(method_string, [v[0] for v in value_destination_pairs]) else: - if not context.executing_eagerly(): + if not all_devices_match: logging.warning("Efficient batch_reduce is not supported if " "destinations are different.") + return [ self._reduce(method_string, t, destinations=v) for t, v in value_destination_pairs diff --git a/tensorflow/contrib/distribute/python/cross_tower_ops_test.py b/tensorflow/contrib/distribute/python/cross_tower_ops_test.py index 7c7b087088..2a26632608 100644 --- a/tensorflow/contrib/distribute/python/cross_tower_ops_test.py +++ b/tensorflow/contrib/distribute/python/cross_tower_ops_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.training import device_util def _make_per_device(values, devices): @@ -56,19 +57,46 @@ def _fake_mirrored(value, devices): {d: v for d, v in zip(devices, [value] * len(devices))}) +def _make_indexed_slices(values, indices, dense_shape, device): + with ops.device(device): + tensor = ops.IndexedSlices( + values=constant_op.constant(values), + indices=constant_op.constant(indices), + dense_shape=constant_op.constant(dense_shape)) + return tensor + + +def _make_mirrored_indexed_slices(devices, values, indices, dense_shape): + return value_lib.Mirrored({ + d: _make_indexed_slices(values, indices, dense_shape, d) for d in devices + }) + + _cpu_device = "/device:CPU:0" class CrossTowerOpsTest(test.TestCase, parameterized.TestCase): - def _assert_value_equal(self, left, right): + def _assert_indexed_slices_equal(self, left, right): + self.assertIsInstance(left, ops.IndexedSlices) + self.assertIsInstance(right, ops.IndexedSlices) + self.assertEqual(device_util.resolve(left.device), + device_util.resolve(right.device)) + self.assertAllEqual( + self.evaluate(ops.convert_to_tensor(left)), + self.evaluate(ops.convert_to_tensor(right))) + + def _assert_values_equal(self, left, right): if isinstance(left, list): for l, r in zip(left, right): - self._assert_value_equal(l, r) + self._assert_values_equal(l, r) else: self.assertEqual(type(left), type(right)) self.assertEqual(left.devices, right.devices) - if context.executing_eagerly(): + if isinstance(list(left._index.values())[0], ops.IndexedSlices): + for (d, v) in left._index.iteritems(): + self._assert_indexed_slices_equal(v, right._index[d]) + elif context.executing_eagerly(): self.assertEqual([v.numpy() for v in left._index.values()], list(right._index.values())) else: @@ -143,29 +171,29 @@ class CrossTowerOpsTest(test.TestCase, parameterized.TestCase): # test reduce() for destinations in all_destinations: - self._assert_value_equal( + self._assert_values_equal( cross_tower_ops.reduce("mean", per_device, destinations=destinations), _fake_mirrored(mean, destinations or per_device)) - self._assert_value_equal( + self._assert_values_equal( cross_tower_ops.reduce( "mean", per_device_2, destinations=destinations), _fake_mirrored(mean_2, destinations or per_device)) - self._assert_value_equal( + self._assert_values_equal( cross_tower_ops.reduce("sum", per_device, destinations=destinations), _fake_mirrored(mean * len(devices), destinations or per_device)) - self._assert_value_equal( + self._assert_values_equal( cross_tower_ops.reduce( "sum", per_device_2, destinations=destinations), _fake_mirrored(mean_2 * len(devices), destinations or per_device)) # test batch_reduce() for d1, d2 in itertools.product(all_destinations, all_destinations): - self._assert_value_equal( + self._assert_values_equal( cross_tower_ops.batch_reduce( "mean", [(per_device, d1), (per_device_2, d2)]), [_fake_mirrored(mean, d1 or per_device), _fake_mirrored(mean_2, d2 or per_device_2)]) - self._assert_value_equal( + self._assert_values_equal( cross_tower_ops.batch_reduce( "sum", [(per_device, d1), (per_device_2, d2)]), [_fake_mirrored(mean * len(devices), d1 or per_device), @@ -176,7 +204,7 @@ class CrossTowerOpsTest(test.TestCase, parameterized.TestCase): if destinations is None: continue else: - self._assert_value_equal( + self._assert_values_equal( cross_tower_ops.broadcast(constant_op.constant(1.), destinations), _fake_mirrored(1., destinations)) @@ -184,16 +212,14 @@ class CrossTowerOpsTest(test.TestCase, parameterized.TestCase): device_links = [[1, 2, 3, 4], [0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7], [0, 5, 6, 7], [1, 4, 6, 7], [2, 4, 5, 7], [3, 4, 5, 6]] result = cross_tower_ops_lib._choose_all_reduce_algorithm(device_links) - self.assertTrue( - isinstance(result, cross_tower_ops_lib.AllReduceCrossTowerOps)) + self.assertIsInstance(result, cross_tower_ops_lib.AllReduceCrossTowerOps) self.assertEqual(result.all_reduce_alg, "hierarchical_copy") self.assertEqual(result.num_packs, 8) # if there are only 4 devices device_links = [[1, 2, 3, 4], [0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7]] result = cross_tower_ops_lib._choose_all_reduce_algorithm(device_links) - self.assertTrue( - isinstance(result, cross_tower_ops_lib.AllReduceCrossTowerOps)) + self.assertIsInstance(result, cross_tower_ops_lib.AllReduceCrossTowerOps) self.assertEqual(result.all_reduce_alg, "nccl") self.assertEqual(result.num_packs, 1) @@ -202,8 +228,7 @@ class CrossTowerOpsTest(test.TestCase, parameterized.TestCase): [0, 1, 2, 3, 7], [0, 4, 5, 6, 7], [1, 4, 5, 6, 7], [2, 4, 5, 6, 7], [3, 4, 5, 6, 7]] result = cross_tower_ops_lib._choose_all_reduce_algorithm(device_links) - self.assertTrue( - isinstance(result, cross_tower_ops_lib.AllReduceCrossTowerOps)) + self.assertIsInstance(result, cross_tower_ops_lib.AllReduceCrossTowerOps) self.assertEqual(result.all_reduce_alg, "hierarchical_copy") self.assertEqual(result.num_packs, 8) @@ -211,11 +236,85 @@ class CrossTowerOpsTest(test.TestCase, parameterized.TestCase): device_links = [[0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7], [0, 5, 6, 7], [1, 4, 6, 7], [2, 4, 5, 7], [3, 4, 5, 6], [1, 2, 3, 4]] result = cross_tower_ops_lib._choose_all_reduce_algorithm(device_links) - self.assertTrue( - isinstance(result, cross_tower_ops_lib.AllReduceCrossTowerOps)) + self.assertIsInstance(result, cross_tower_ops_lib.AllReduceCrossTowerOps) self.assertEqual(result.all_reduce_alg, "nccl") self.assertEqual(result.num_packs, 1) + @combinations.generate(combinations.combine( + mode=["graph", "eager"], + required_gpus=1)) + def testSimpleReduceWithIndexedSlices(self): + devices = ["/cpu:0", "/gpu:0"] + t0 = _make_indexed_slices([[1., 2.]], [1], [5, 2], devices[0]) + t1 = _make_indexed_slices([[3., 4.], [5., 6.]], [1, 3], [5, 2], devices[1]) + per_device = value_lib.PerDevice({devices[0]: t0, devices[1]: t1}) + result = cross_tower_ops_lib._simple_reduce(per_device, devices[0], + math_ops.add_n, "sum") + + # Test that the result is semantically equal to both the concatenated + # IndexedSlices with and without duplicate indices. + total_with_dups = _make_indexed_slices( + [[1., 2.], [3., 4.], [5., 6.]], [1, 1, 3], [5, 2], devices[0]) + total_without_dups = _make_indexed_slices( + [[4., 6.], [5., 6.]], [1, 3], [5, 2], devices[0]) + self._assert_indexed_slices_equal(total_with_dups, result) + self._assert_indexed_slices_equal(total_without_dups, result) + + @combinations.generate(combinations.combine( + cross_tower_ops_instance=[ + combinations.NamedObject( + "ReductionToOneDeviceCrossTowerOps", + cross_tower_ops_lib.ReductionToOneDeviceCrossTowerOps()), + combinations.NamedObject( + "AllReduceCrossTowerOps", + cross_tower_ops_lib.AllReduceCrossTowerOps()) + ], + method_string=["sum", "mean"], + batch_reduce=[True, False], + mode=["graph", "eager"], + required_gpus=1)) + def testIndexedSlicesAllReduce(self, cross_tower_ops_instance, + method_string, batch_reduce): + devices = ["/cpu:0", "/gpu:0"] + dense_shape = [5, 2] + t0 = _make_indexed_slices([[1., 2.]], [1], dense_shape, devices[0]) + t1 = _make_indexed_slices( + [[3., 4.], [5., 6.]], [1, 3], dense_shape, devices[1]) + per_device = value_lib.PerDevice({devices[0]: t0, devices[1]: t1}) + + if batch_reduce: + result = cross_tower_ops_instance.batch_reduce(method_string, + [(per_device, devices)]) + else: + result = cross_tower_ops_instance.reduce(method_string, per_device, + devices) + + total_indices_with_dups = [1, 1, 3] + total_indices_without_dups = [1, 3] + + if method_string == "sum": + total_values_with_dups = [[1., 2.], [3., 4.], [5., 6.]] + total_values_without_dups = [[4., 6.], [5., 6.]] + else: + assert method_string == "mean" + total_values_with_dups = [[0.5, 1.], [1.5, 2.], [2.5, 3.]] + total_values_without_dups = [[2., 3.], [2.5, 3.]] + + total_mirrored_with_dups = _make_mirrored_indexed_slices( + devices, total_values_with_dups, total_indices_with_dups, dense_shape) + total_mirrored_without_dups = _make_mirrored_indexed_slices( + devices, total_values_without_dups, total_indices_without_dups, + dense_shape) + + # Test that the result is semantically equal to both the concatenated + # IndexedSlices, as well as when the duplicate indices are summed up. + if batch_reduce: + total_mirrored_with_dups = [total_mirrored_with_dups] + total_mirrored_without_dups = [total_mirrored_without_dups] + + self._assert_values_equal(total_mirrored_with_dups, result) + self._assert_values_equal(total_mirrored_without_dups, result) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distribute/python/cross_tower_utils.py b/tensorflow/contrib/distribute/python/cross_tower_utils.py index fc04e2195f..8dd7831c2b 100644 --- a/tensorflow/contrib/distribute/python/cross_tower_utils.py +++ b/tensorflow/contrib/distribute/python/cross_tower_utils.py @@ -21,9 +21,11 @@ from __future__ import print_function import collections as pycoll from tensorflow.contrib import nccl +from tensorflow.contrib.distribute.python import values as value_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops @@ -337,3 +339,46 @@ def unpack_small_tensors(tower_grads, packing): new_gv_list.insert(idx, gv[gi]) new_tower_grads.append(new_gv_list) return new_tower_grads + + +def aggregate_tensors_or_indexed_slices(values, accumulation_fn=math_ops.add_n): + """Aggregate tensors using `accumulation_fn` and IndexedSlices via concat.""" + if isinstance(values[0], ops.IndexedSlices): + return gradients_impl._AggregateIndexedSlicesGradients(values) # pylint: disable=protected-access + else: + return accumulation_fn(values) + + +def divide_by_n_tensors_or_indexed_slices(value, n): + if isinstance(value, ops.IndexedSlices): + value = gradients_impl._HandleNestedIndexedSlices(value) # pylint: disable=protected-access + return ops.IndexedSlices( + value.values / n, value.indices, value.dense_shape) + else: + return value / n + + +def copy_tensor_or_indexed_slices_to_device(value, device): + with ops.device(device): + if isinstance(value, ops.IndexedSlices): + copied_values = array_ops.identity(value.values) + copied_indices = array_ops.identity(value.indices) + copied_shape = array_ops.identity(value.dense_shape) + result = ops.IndexedSlices(copied_values, copied_indices, copied_shape) + else: + result = array_ops.identity(value) + return result + + +def contains_indexed_slices(value): + """Check whether the value is `IndexedSlices` or contains `IndexedSlices`.""" + if isinstance(value, ops.IndexedSlices): + return True + elif isinstance(value, (list, tuple, pycoll.Sequence)) and value: + return any(contains_indexed_slices(v) for v in value) + elif isinstance(value, value_lib.DistributedValues): + return contains_indexed_slices(list(value._index.values())) # pylint: disable=protected-access + elif isinstance(value, value_lib.MapOutput): + return contains_indexed_slices(value.get()) + else: + return False diff --git a/tensorflow/contrib/distribute/python/cross_tower_utils_test.py b/tensorflow/contrib/distribute/python/cross_tower_utils_test.py new file mode 100644 index 0000000000..4ef8db6815 --- /dev/null +++ b/tensorflow/contrib/distribute/python/cross_tower_utils_test.py @@ -0,0 +1,152 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for cross_tower_utils.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.contrib.distribute.python import cross_tower_utils +from tensorflow.contrib.distribute.python import values as value_lib +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import math_ops +from tensorflow.python.training import device_util + + +class IndexedSlicesUtilsTest(test.TestCase, parameterized.TestCase): + + def _assert_values_equal(self, left, right): + self.assertAllEqual( + self.evaluate(ops.convert_to_tensor(left)), + self.evaluate(ops.convert_to_tensor(right))) + + @test_util.run_in_graph_and_eager_modes() + def testAggregateTensors(self): + t0 = constant_op.constant([[1., 2.], [0, 0], [3., 4.]]) + t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]]) + total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) + result = cross_tower_utils.aggregate_tensors_or_indexed_slices([t0, t1]) + self._assert_values_equal(total, result) + + @test_util.run_in_graph_and_eager_modes() + def testAggregateIndexedSlices(self): + t0 = math_ops._as_indexed_slices( + constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) + t1 = math_ops._as_indexed_slices( + constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) + total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) + result = cross_tower_utils.aggregate_tensors_or_indexed_slices([t0, t1]) + self.assertIsInstance(result, ops.IndexedSlices) + self._assert_values_equal(total, result) + + @test_util.run_in_graph_and_eager_modes() + def testDivideTensor(self): + t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]]) + n = 2 + expected = constant_op.constant([[0.5, 1.], [0, 0], [1.5, 2.]]) + result = cross_tower_utils.divide_by_n_tensors_or_indexed_slices(t, n) + self._assert_values_equal(expected, result) + + @test_util.run_in_graph_and_eager_modes() + def testDivideIndexedSlices(self): + t = math_ops._as_indexed_slices( + constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) + n = 2 + expected = constant_op.constant([[0.5, 1.], [0, 0], [1.5, 2.]]) + result = cross_tower_utils.divide_by_n_tensors_or_indexed_slices(t, n) + self.assertIsInstance(result, ops.IndexedSlices) + self._assert_values_equal(expected, result) + + @test_util.run_in_graph_and_eager_modes() + def testIsIndexedSlices(self): + t = math_ops._as_indexed_slices( + constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) + self.assertTrue(cross_tower_utils.contains_indexed_slices(t)) + + @test_util.run_in_graph_and_eager_modes() + def testContainsIndexedSlices_List(self): + t0 = math_ops._as_indexed_slices( + constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) + t1 = math_ops._as_indexed_slices( + constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) + self.assertTrue(cross_tower_utils.contains_indexed_slices([t0, t1])) + + @test_util.run_in_graph_and_eager_modes() + def testContainsIndexedSlices_Tuple(self): + t0 = math_ops._as_indexed_slices( + constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) + t1 = math_ops._as_indexed_slices( + constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) + self.assertTrue(cross_tower_utils.contains_indexed_slices((t0, t1))) + + @test_util.run_in_graph_and_eager_modes() + def testContainsIndexedSlices_PerDevice(self): + t0 = math_ops._as_indexed_slices( + constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) + t1 = math_ops._as_indexed_slices( + constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) + per_device = value_lib.PerDevice({"/gpu:0": t0, "/cpu:0": t1}) + self.assertTrue(cross_tower_utils.contains_indexed_slices(per_device)) + + @test_util.run_in_graph_and_eager_modes() + def testContainsIndexedSlices_PerDeviceMapOutput(self): + t0 = math_ops._as_indexed_slices( + constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) + t1 = math_ops._as_indexed_slices( + constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) + per_device = value_lib.PerDevice({ + "/gpu:0": value_lib.MapOutput([t0]), + "/cpu:0": value_lib.MapOutput([t1])}) + self.assertTrue(cross_tower_utils.contains_indexed_slices(per_device)) + + @combinations.generate(combinations.combine( + mode=["graph", "eager"], + required_gpus=1)) + def testCopyTensor(self): + with ops.device("/cpu:0"): + t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]]) + destination = "/gpu:0" + result = cross_tower_utils.copy_tensor_or_indexed_slices_to_device( + t, destination) + + self._assert_values_equal(t, result) + self.assertEqual(device_util.resolve(destination), + device_util.resolve(result.device)) + + @combinations.generate(combinations.combine( + mode=["graph", "eager"], + required_gpus=1)) + def testCopyIndexedSlices(self): + with ops.device("/cpu:0"): + t = math_ops._as_indexed_slices( + constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) + destination = "/gpu:0" + result = cross_tower_utils.copy_tensor_or_indexed_slices_to_device( + t, destination) + + self.assertIsInstance(result, ops.IndexedSlices) + self._assert_values_equal(t, result) + self.assertEqual(device_util.resolve(destination), + device_util.resolve(result.device)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 716b54f07c..1e808fddb5 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -1006,21 +1006,33 @@ def _AggregatedGrads(grads, logging.vlog(2, " _AggregatedGrads %d x %s using %s", len(out_grad), tensor_shape, used) else: - out_grad = math_ops._as_indexed_slices_list( - [g for g in out_grad if g is not None]) - out_grad = [_HandleNestedIndexedSlices(x) for x in out_grad] - # Form IndexedSlices out of the concatenated values and - # indices. - out_grads[i] = ops.IndexedSlices( - array_ops.concat([x.values for x in out_grad], 0), - array_ops.concat([x.indices for x in out_grad], 0), - out_grad[0].dense_shape) + out_grads[i] = _AggregateIndexedSlicesGradients(out_grad) else: # not out_grad # out_grads[i] is [], thus its aggregation is simply None. out_grads[i] = None return out_grads +def _AggregateIndexedSlicesGradients(grads): + """Aggregates gradients of type `IndexedSlices` by concatenation.""" + if len(grads) < 1: + return None + elif len(grads) == 1: + return grads[0] + else: + assert isinstance(grads[0], ops.IndexedSlices) + grads = math_ops._as_indexed_slices_list( # pylint: disable=protected-access + [g for g in grads if g is not None]) + grads = [_HandleNestedIndexedSlices(x) for x in grads] # pylint: disable=protected-access + # Form IndexedSlices out of the concatenated values and indices. + concat_grad = ops.IndexedSlices( + array_ops.concat([x.values for x in grads], axis=0), + array_ops.concat([x.indices for x in grads], axis=0), + grads[0].dense_shape) + + return concat_grad + + # TODO(vrv): Make this available when we want to make it public. def _hessian_vector_product(ys, xs, v): """Multiply the Hessian of `ys` wrt `xs` by `v`. diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index 70d500a108..6891501ae1 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -946,5 +946,53 @@ class CustomGradientTest(test_util.TensorFlowTestCase): self.assertAllEqual(g.eval(feed_dict={conditional: False}), [3.0]) +class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase): + + def _assert_indexed_slices_equal(self, left, right): + self.assertAllEqual( + self.evaluate(ops.convert_to_tensor(left)), + self.evaluate(ops.convert_to_tensor(right))) + + def testNoGradients(self): + self.assertIsNone(gradients_impl._AggregateIndexedSlicesGradients([])) + + def testOneGradient(self): + t = math_ops._as_indexed_slices(constant_op.constant( + [[1., 2.], [0, 0], [3., 4.]])) + result = gradients_impl._AggregateIndexedSlicesGradients([t]) + self._assert_indexed_slices_equal(t, result) + + def testMultipleGradients(self): + t0 = math_ops._as_indexed_slices(constant_op.constant( + [[1., 2.], [0, 0], [3., 4.]])) + t1 = math_ops._as_indexed_slices(constant_op.constant( + [[0., 0.], [5, 6], [7., 8.]])) + total = constant_op.constant( + [[1., 2.], [5, 6], [10., 12.]]) + result = gradients_impl._AggregateIndexedSlicesGradients([t0, t1]) + self._assert_indexed_slices_equal(total, result) + + def testMultipleGradientsWithNones(self): + t0 = math_ops._as_indexed_slices(constant_op.constant( + [[1., 2.], [0, 0], [3., 4.]])) + t1 = math_ops._as_indexed_slices(constant_op.constant( + [[0., 0.], [5, 6], [7., 8.]])) + t3 = None + total = constant_op.constant( + [[1., 2.], [5, 6], [10., 12.]]) + result = gradients_impl._AggregateIndexedSlicesGradients([t0, t1, t3]) + self._assert_indexed_slices_equal(total, result) + + def testMixedTensorAndIndexedSlices(self): + t0 = math_ops._as_indexed_slices(constant_op.constant( + [[1., 2.], [0, 0], [3., 4.]])) + t1 = constant_op.constant( + [[0., 0.], [5, 6], [7., 8.]]) + total = constant_op.constant( + [[1., 2.], [5, 6], [10., 12.]]) + result = gradients_impl._AggregateIndexedSlicesGradients([t0, t1]) + self._assert_indexed_slices_equal(total, result) + + if __name__ == "__main__": googletest.main() -- GitLab From 3a17101171d3e51fcba2189d09416c5106bfe4ac Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 16:33:27 -0700 Subject: [PATCH 064/902] Make depthwiseconv handler handle filter ranges beyond 255 PiperOrigin-RevId: 197810361 --- tensorflow/contrib/lite/kernels/conv.cc | 1 + .../contrib/lite/kernels/depthwise_conv.cc | 5 +- .../lite/kernels/depthwise_conv_test.cc | 43 ++++++++++-- .../contrib/lite/kernels/fully_connected.cc | 1 + .../internal/optimized/depthwiseconv_uint8.h | 67 +++++++++++++------ .../depthwiseconv_uint8_3x3_filter.h | 22 +++--- .../internal/reference/depthwiseconv_uint8.h | 4 +- .../contrib/lite/kernels/kernel_util.cc | 1 - 8 files changed, 100 insertions(+), 44 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index 0b35a220e7..ee42e5cdc8 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -254,6 +254,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { double real_multiplier = 0.0; TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( context, input, filter, bias, output, &real_multiplier)); + TF_LITE_ENSURE(context, real_multiplier < 1.0); QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, &data->output_shift); CalculateActivationRangeUint8(params->activation, output, diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc index abb2549f85..a308de055f 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -151,8 +151,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { double real_multiplier = 0.0; TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( context, input, filter, bias, output, &real_multiplier)); - QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, - &data->output_shift); + int exponent; + QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent); + data->output_shift = -exponent; CalculateActivationRangeUint8(params->activation, output, &data->output_activation_min, &data->output_activation_max); diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc index 1439c8bce1..c00cafb9fb 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc @@ -47,12 +47,6 @@ class BaseDepthwiseConvolutionOpModel : public SingleOpModel { } output_ = AddOutput(output); - if (input.type != TensorType_FLOAT32) { - // The following is required by quantized inference. It is the unittest's - // responsibility to make sure the output scale falls into the correct - // range. - CHECK_LT(GetScale(input_) * GetScale(filter_), GetScale(output_)); - } int input_depth = GetShape(input_)[3]; int output_depth = GetShape(filter_)[3]; @@ -176,6 +170,43 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) { })); } +TEST(QuantizedDepthwiseConvolutionOpTest, + SimpleTestQuantizedFilterMultiplierGreaterThan1) { + QuantizedDepthwiseConvolutionOpModel quant_op( + {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64}, + {TensorType_UINT8, {1, 2, 2, 4}, -128.5, 128}, + {TensorType_UINT8, {}, -127, 128}); + DepthwiseConvolutionOpModel float_op({TensorType_FLOAT32, {1, 3, 2, 2}}, + {TensorType_FLOAT32, {1, 2, 2, 4}}, + {TensorType_FLOAT32, {}}); + + std::initializer_list input = { + 1, 2, 7, 8, // column 1 + 3, 4, 9, 10, // column 2 + 5, 6, 11, 12, // column 3 + }; + std::initializer_list filter = { + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::initializer_list bias = {1, 2, 3, 4}; + + quant_op.SetInput(input); + quant_op.SetFilter(filter); + quant_op.SetBias(bias); + quant_op.Invoke(); + + float_op.SetInput(input); + float_op.SetFilter(filter); + float_op.SetBias(bias); + float_op.Invoke(); + + EXPECT_THAT(quant_op.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(float_op.GetOutput(), 1))); +} + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc index 1b942a1910..989920622d 100644 --- a/tensorflow/contrib/lite/kernels/fully_connected.cc +++ b/tensorflow/contrib/lite/kernels/fully_connected.cc @@ -117,6 +117,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { double real_multiplier = 0.0; TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( context, input, filter, bias, output, &real_multiplier)); + TF_LITE_ENSURE(context, real_multiplier < 1.0); QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, &data->output_shift); CalculateActivationRangeUint8(params->activation, output, diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index dd6932ffe7..75cf987be6 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1691,14 +1691,16 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, const int filter_width = ArraySize(filter_dims, 1); const int output_height = ArraySize(output_dims, 2); const int output_width = ArraySize(output_dims, 1); + const bool shift_left = (output_shift <= 0); + const int32 multiplier_power_of_two = shift_left ? (1 << -output_shift) : 1; TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); #ifdef __aarch64__ // Call kernel optimized for depthwise convolutions using 3x3 filters if // parameters are supported. - if (Fast3x3FilterKernelSupported(input_dims, filter_dims, stride_width, - stride_height, pad_width, pad_height, - depth_multiplier, output_dims)) { + if (Fast3x3FilterKernelSupported( + input_dims, filter_dims, stride_width, stride_height, pad_width, + pad_height, depth_multiplier, output_dims, output_shift)) { DepthwiseConv3x3Filter(input_data, input_dims, input_offset, filter_data, filter_dims, filter_offset, bias_data, bias_dims, stride_width, stride_height, pad_width, pad_height, @@ -1833,12 +1835,20 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, acc[j] = vld1q_s32(acc_buffer + i + 4 * j); } - // Fixed-point multiplication. - for (int j = 0; j < 4; j++) { - acc[j] = vqrdmulhq_n_s32(acc[j], output_multiplier); - } - for (int j = 0; j < 4; j++) { - acc[j] = RoundingDivideByPOT(acc[j], output_shift); + if (!shift_left) { + // Fixed-point multiplication. + for (int j = 0; j < 4; j++) { + acc[j] = vqrdmulhq_n_s32(acc[j], output_multiplier); + } + for (int j = 0; j < 4; j++) { + acc[j] = RoundingDivideByPOT(acc[j], output_shift); + } + } else { + // Fixed-point multiplication. + for (int j = 0; j < 4; j++) { + acc[j] = vmulq_n_s32(acc[j], multiplier_power_of_two); + acc[j] = vqrdmulhq_n_s32(acc[j], output_multiplier); + } } // Add the output offset. for (int j = 0; j < 4; j++) { @@ -1870,12 +1880,21 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, for (; i <= num_output_values - 8; i += 8) { int32x4_t acc0 = vld1q_s32(acc_buffer + i); int32x4_t acc1 = vld1q_s32(acc_buffer + i + 4); - // Fixed-point multiplication. - acc0 = vqrdmulhq_n_s32(acc0, output_multiplier); - acc1 = vqrdmulhq_n_s32(acc1, output_multiplier); - // Rounding right shift. - acc0 = RoundingDivideByPOT(acc0, output_shift); - acc1 = RoundingDivideByPOT(acc1, output_shift); + if (!shift_left) { + // Fixed-point multiplication. + acc0 = vqrdmulhq_n_s32(acc0, output_multiplier); + acc1 = vqrdmulhq_n_s32(acc1, output_multiplier); + // Rounding right shift. + acc0 = RoundingDivideByPOT(acc0, output_shift); + acc1 = RoundingDivideByPOT(acc1, output_shift); + } else { + // Fixed-point multiplication. + acc0 = vmulq_n_s32(acc0, multiplier_power_of_two); + acc0 = vqrdmulhq_n_s32(acc0, output_multiplier); + + acc1 = vmulq_n_s32(acc1, multiplier_power_of_two); + acc1 = vqrdmulhq_n_s32(acc1, output_multiplier); + } // Add the output offset. acc0 = vaddq_s32(acc0, output_offset_vec); acc1 = vaddq_s32(acc1, output_offset_vec); @@ -1899,10 +1918,16 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, // that will have to go through the very slow scalar code. for (; i <= num_output_values - 4; i += 4) { int32x4_t acc = vld1q_s32(acc_buffer + i); - // Fixed-point multiplication. - acc = vqrdmulhq_n_s32(acc, output_multiplier); - // Rounding right shift. - acc = RoundingDivideByPOT(acc, output_shift); + if (!shift_left) { + // Fixed-point multiplication. + acc = vqrdmulhq_n_s32(acc, output_multiplier); + // Rounding right shift. + acc = RoundingDivideByPOT(acc, output_shift); + } else { + // Fixed-point multiplication. + acc = vmulq_n_s32(acc, multiplier_power_of_two); + acc = vqrdmulhq_n_s32(acc, output_multiplier); + } // Add the output offset. acc = vaddq_s32(acc, output_offset_vec); // Apply the activation function. @@ -1923,8 +1948,8 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, // Handle leftover values, one by one. This is very slow. for (; i < num_output_values; i++) { int32 acc = acc_buffer[i]; - acc = MultiplyByQuantizedMultiplierSmallerThanOne( - acc, output_multiplier, output_shift); + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, + -output_shift); acc += output_offset; acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index 55e0d5c3aa..9b1a45ebdf 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -4408,12 +4408,10 @@ struct ConvRow3x3FilterDepth8<8, 1, 1> { } }; -inline bool Fast3x3FilterKernelSupported(const Dims<4>& input_dims, - const Dims<4>& filter_dims, - int stride_width, int stride_height, - int pad_width, int pad_height, - int depth_multiplier, - const Dims<4>& output_dims) { +inline bool Fast3x3FilterKernelSupported( + const Dims<4>& input_dims, const Dims<4>& filter_dims, int stride_width, + int stride_height, int pad_width, int pad_height, int depth_multiplier, + const Dims<4>& output_dims, int output_shift) { const int input_height = ArraySize(input_dims, 2); const int input_width = ArraySize(input_dims, 1); const int input_depth = ArraySize(input_dims, 0); @@ -4422,12 +4420,12 @@ inline bool Fast3x3FilterKernelSupported(const Dims<4>& input_dims, const int output_height = ArraySize(output_dims, 2); const int output_width = ArraySize(output_dims, 1); - bool supported = filter_width == 3 && filter_height == 3 && - depth_multiplier == 1 && - (stride_width == 1 || stride_width == 2) && - (stride_height == 1 || stride_height == 2) && - (stride_width == stride_height) && pad_width == 0 && - pad_height == 0 && (input_depth % 8) == 0; + bool supported = + filter_width == 3 && filter_height == 3 && depth_multiplier == 1 && + (stride_width == 1 || stride_width == 2) && + (stride_height == 1 || stride_height == 2) && + (stride_width == stride_height) && pad_width == 0 && pad_height == 0 && + (input_depth % 8) == 0 && (output_shift > 0); if (!supported) { return false; diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h index e9b6baeaee..d57739279f 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h @@ -76,8 +76,8 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, if (bias_data) { acc += bias_data[Offset(bias_dims, oc, 0, 0, 0)]; } - acc = MultiplyByQuantizedMultiplierSmallerThanOne( - acc, output_multiplier, output_shift); + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, + -output_shift); acc += output_offset; acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); diff --git a/tensorflow/contrib/lite/kernels/kernel_util.cc b/tensorflow/contrib/lite/kernels/kernel_util.cc index 239b533a17..184028427f 100644 --- a/tensorflow/contrib/lite/kernels/kernel_util.cc +++ b/tensorflow/contrib/lite/kernels/kernel_util.cc @@ -37,7 +37,6 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <= 1e-6 * std::min(input_product_scale, bias_scale)); TF_LITE_ENSURE(context, input_product_scale >= 0); - TF_LITE_ENSURE(context, input_product_scale < output_scale); *multiplier = input_product_scale / output_scale; -- GitLab From 4ba9e8eed9dfe0727db000bdd8be5384f39e6bd9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 16:34:00 -0700 Subject: [PATCH 065/902] Open source rewrite_for_inference(). PiperOrigin-RevId: 197810460 --- tensorflow/contrib/tpu/python/tpu/tpu.py | 150 +++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index e2f57ce9c5..f531ae5fad 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -21,6 +21,7 @@ from __future__ import print_function from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.contrib.framework.python.framework import experimental from tensorflow.contrib.tpu.python.ops import tpu_ops from tensorflow.contrib.tpu.python.tpu import tpu_function @@ -867,3 +868,152 @@ def rewrite(computation, device_assignment=device_assignment, name=name)[0] # pylint: enable=indexing-exception + + # Operations that indicate some error in the user's inference graph. +_BLACKLISTED_INFERENCE_OPS = set([ + "ReadVariableOp", + "AssignVariableOp", + "AssignAddVariableOp", + "AssignSubVariableOp", + "VarHandleOp", + "Variable", + "VariableV2", +]) + + +class _TPUInferenceContext(control_flow_ops.XLAControlFlowContext): + """A `ControlFlowContext` for nodes inside a TPU inference computation. + + The primary role of `TPUReplicateContext` is to sanity check operators inside + a tpu.rewrite_for_inference() computation. + """ + + def __init__(self, name): + super(_TPUInferenceContext, self).__init__() + self._name = name + + def AddOp(self, op): + self._AddOpInternal(op) + + def _AddOpInternal(self, op): + # pylint: disable=protected-access + if op.type in _BLACKLISTED_INFERENCE_OPS: + raise NotImplementedError( + "Operation of type %s (%s) is not supported on the TPU for inference." + " Execution will fail if this op is used in the graph. Make sure your" + " variables are using variable_scope." % (op.type, op.name)) + if self._outer_context: + self._outer_context.AddInnerOp(op) + + def AddValue(self, val): + result = val + if self._outer_context: + result = self._outer_context.AddValue(val) + return result + + def AddInnerOp(self, op): + self._AddOpInternal(op) + + @property + def grad_state(self): + return None + + +@experimental +def validate_inference_rewrite_for_variables(graph): + """Validates whether rewrite_for_inference() 'worked' for variables. + + The rewrite_for_inference() method is supposed to append + GuaranteeConstOps after ReadVariableOps, but this mechanism works only + if you are using tf.get_variable() to create and access variables in your + tpu computation. This validation method can be called immediately after + calling tpu.rewrite_for_inference() to check whether GuaranteeConstOps + where added to the graph. + + Typical usages: + tpu.validate_inference_rewrite_for_variables(tf.get_default_graph()) + + tpu.validate_inference_rewrite_for_variables(sess.graph) + + Args: + graph: The graph which needs to be validated. + Raises: + RuntimeError: if validation failed. + """ + if not any([x.type == "GuaranteeConst" for x in graph.get_operations()]): + raise RuntimeError( + "No GuaranteeConst ops found in the graph after " + "running tpu.rewrite_for_inference(...). Please " + "check that you are using tf.get_variable() to " + "create and access variables in your tpu " + "computation.") + + +@experimental +def rewrite_for_inference(computation, + inputs=None, + infeed_queue=None, + device_assignment=None, + name=None): + """Rewrites `computation` for inference on a TPU system. + + Other than 'rewriting' the computation to run on a TPU, if using variables + in your computation, it moves the ReadVariableOps outside the TPU + computation, and adds GuaranteeConst ops just after the ReadVariableOps. + This mechanism works only if you are using tf.get_variable() to create and + access variables in your tpu computation. You can validate whether + this worked, by calling validate_inference_rewrite_for_variables() method + immediately after this method to check whether GuaranteeConstOps where + added to the graph. + + Args: + computation: A Python function that builds a computation to apply + to the input. If the function takes n inputs, 'inputs' should be + a list of n tensors. If the function returns m outputs, rewrite + will return a list of m tensors. + inputs: A list of input tensors or `None` (equivalent to an empty list). + infeed_queue: If not `None`, the `InfeedQueue` from which to append a tuple + of arguments as inputs to `computation`. + device_assignment: if not `None`, a `DeviceAssignment` describing the + mapping between logical cores in the computation with physical cores in + the TPU topology. May be omitted for a single-core computation, in which + case the core attached to task 0, TPU device 0 is used. + name: The name of the operator. + Returns: + A list of output tensors. + """ + + def guarantee_const_getter(getter, name, *args, **kwargs): + with ops.control_dependencies(None): + return array_ops.guarantee_const( + getter(name, *args, **kwargs), name=name + "/GuaranteeConst") + + def wrapped_computation(*args, **kwargs): + """Execute computation under `_TPUInferenceContext`.""" + context = _TPUInferenceContext( + name=ops.get_default_graph().unique_name("rewrite_for_inference")) + try: + context.Enter() + + vscope = variable_scope.get_variable_scope() + prev_custom_getter = vscope.custom_getter + prev_caching_device = vscope.caching_device + vscope.set_custom_getter(guarantee_const_getter) + vscope.set_caching_device(lambda op: op.device) + + result = computation(*args, **kwargs) + + vscope.set_custom_getter(prev_custom_getter) + vscope.set_caching_device(prev_caching_device) + finally: + context.Exit() + return result + + # pylint: disable=undefined-variable + return rewrite( + wrapped_computation, + inputs=inputs, + infeed_queue=infeed_queue, + device_assignment=device_assignment, + name=name) + # pylint: enable=undefined-variable -- GitLab From dac1f124020234fe24e8893a981b15395d0c6de8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 23 May 2018 16:45:26 -0700 Subject: [PATCH 066/902] Simplify the remapper code and added support for non scalar mean, variance, scale and offset. PiperOrigin-RevId: 197812268 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../core/grappler/optimizers/remapper.cc | 87 ++++++++++++++++--- .../core/grappler/optimizers/remapper_test.cc | 37 ++++++++ .../fold_old_batch_norms_test.cc | 2 +- 4 files changed, 112 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 104a0428ce..f6860695ec 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -695,6 +695,7 @@ tf_cuda_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/grappler:devices", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 2a62871293..efd870b118 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -28,10 +28,71 @@ namespace grappler { void AddBatchNormNodes(GraphDef* optimized_graph, const NodeDef& fused_node) { const string& x = fused_node.input(0); - const string& scale = fused_node.input(1); - const string& offset = fused_node.input(2); - const string& mean = fused_node.input(3); - const string& variance = fused_node.input(4); + string scale = fused_node.input(1); + string offset = fused_node.input(2); + string mean = fused_node.input(3); + string variance = fused_node.input(4); + + if (fused_node.attr().at("data_format").s() == "NCHW") { + // Need to reshape the last 4 inputs + NodeDef* new_shape = optimized_graph->add_node(); + new_shape->set_name(AddPrefixToNodeName("NCHWShape", fused_node.name())); + new_shape->set_op("Const"); + new_shape->set_device(fused_node.device()); + *new_shape->add_input() = AsControlDependency(scale); + (*new_shape->mutable_attr())["dtype"].set_type(DT_INT32); + Tensor t(DT_INT32, {4}); + t.flat()(0) = 1; + t.flat()(1) = -1; + t.flat()(2) = 1; + t.flat()(3) = 1; + t.AsProtoTensorContent( + (*new_shape->mutable_attr())["value"].mutable_tensor()); + + NodeDef* reshaped_scale = optimized_graph->add_node(); + reshaped_scale->set_name( + AddPrefixToNodeName("NCHWShapedScale", fused_node.name())); + reshaped_scale->set_op("Reshape"); + reshaped_scale->set_device(fused_node.device()); + *reshaped_scale->add_input() = scale; + *reshaped_scale->add_input() = new_shape->name(); + (*reshaped_scale->mutable_attr())["T"] = fused_node.attr().at("T"); + (*reshaped_scale->mutable_attr())["Tshape"].set_type(DT_INT32); + scale = reshaped_scale->name(); + + NodeDef* reshaped_offset = optimized_graph->add_node(); + reshaped_offset->set_name( + AddPrefixToNodeName("NCHWShapedOffset", fused_node.name())); + reshaped_offset->set_op("Reshape"); + reshaped_offset->set_device(fused_node.device()); + *reshaped_offset->add_input() = offset; + *reshaped_offset->add_input() = new_shape->name(); + (*reshaped_offset->mutable_attr())["T"] = fused_node.attr().at("T"); + (*reshaped_offset->mutable_attr())["Tshape"].set_type(DT_INT32); + offset = reshaped_offset->name(); + + NodeDef* reshaped_mean = optimized_graph->add_node(); + reshaped_mean->set_name( + AddPrefixToNodeName("NCHWShapedMean", fused_node.name())); + reshaped_mean->set_op("Reshape"); + reshaped_mean->set_device(fused_node.device()); + *reshaped_mean->add_input() = mean; + *reshaped_mean->add_input() = new_shape->name(); + (*reshaped_mean->mutable_attr())["T"] = fused_node.attr().at("T"); + (*reshaped_mean->mutable_attr())["Tshape"].set_type(DT_INT32); + mean = reshaped_mean->name(); + + NodeDef* reshaped_variance = optimized_graph->add_node(); + reshaped_variance->set_name( + AddPrefixToNodeName("NCHWShapedVariance", fused_node.name())); + reshaped_variance->set_op("Reshape"); + reshaped_variance->set_device(fused_node.device()); + *reshaped_variance->add_input() = variance; + *reshaped_variance->add_input() = new_shape->name(); + (*reshaped_variance->mutable_attr())["T"] = fused_node.attr().at("T"); + (*reshaped_variance->mutable_attr())["Tshape"].set_type(DT_INT32); + variance = reshaped_variance->name(); + } float epsilon = 0.0f; if (fused_node.attr().count("epsilon")) { @@ -118,20 +179,16 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item, optimizable &= (node.attr().count("is_training") == 0 || !node.attr().at("is_training").b()); if (optimizable) { - std::unordered_set const_inputs; - for (const string& input : node.input()) { - int pos; - const string input_node = ParseNodeName(input, &pos); - if (properties.HasInputProperties(input_node)) { - const auto& props = properties.GetInputProperties(input_node); - if (props.size() > pos && props[pos].has_value()) { - const_inputs.insert(pos); - } + int const_inputs = 0; + const auto& props = properties.GetInputProperties(node.name()); + for (const auto& prop : props) { + if (prop.has_value()) { + const_inputs += 1; } } // TODO(bsteiner): use the cost model to compare the cost of fused batch // norm against that of the optimized form. - optimizable = (const_inputs.size() >= 4); + optimizable = (const_inputs >= 4); } if (optimizable) { for (GraphView::Edge edge : graph.GetFanoutEdges(node, false)) { @@ -143,6 +200,8 @@ Status Remapper::Optimize(Cluster* /*cluster*/, const GrapplerItem& item, } } if (optimizable) { + std::cout << "Optimizing fused batch norm node " << node.DebugString() + << std::endl; AddBatchNormNodes(optimized_graph, node); continue; } diff --git a/tensorflow/core/grappler/optimizers/remapper_test.cc b/tensorflow/core/grappler/optimizers/remapper_test.cc index 291585c538..4cbf0d8d6f 100644 --- a/tensorflow/core/grappler/optimizers/remapper_test.cc +++ b/tensorflow/core/grappler/optimizers/remapper_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/remapper.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/grappler/devices.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/utils/grappler_test.h" #include "tensorflow/core/platform/test.h" @@ -54,5 +55,41 @@ TEST_F(RemapperTest, FusedBatchNorm) { test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } +TEST_F(RemapperTest, FusedBatchNormNCHW) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output dflt = + ops::Const(s.WithOpName("dflt"), {3.14f, 2.7f, 1.0f, 2.0f, 3.0f, 100.0f}, + {1, 3, 1, 2}); + Output x = ops::PlaceholderWithDefault(s.WithOpName("x"), dflt, {1, 3, 1, 2}); + Output scale = ops::Const(s.WithOpName("scale"), {0.3f, 7.0f, 123.0f}, {3}); + Output offset = + ops::Const(s.WithOpName("offset"), {0.123f, 2.1f, 0.55f}, {3}); + Output mean = ops::Const(s.WithOpName("mean"), {7.3f, 8.3f, 3.1f}, {3}); + Output variance = + ops::Const(s.WithOpName("variance"), {0.57f, 1.0f, 2.0f}, {3}); + ops::FusedBatchNorm::Attrs attr; + attr = attr.IsTraining(false); + attr = attr.DataFormat("NCHW"); + ops::FusedBatchNorm bn(s.WithOpName("batch_norm").WithDevice("/device:GPU:0"), + x, scale, offset, mean, variance, attr); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch = {"batch_norm"}; + + Remapper optimizer(RewriterConfig::ON); + GraphDef output; + TF_CHECK_OK(optimizer.Optimize(nullptr, item, &output)); + + if (GetNumAvailableGPUs() > 0) { + // NCHW batch norm is only supported on GPU. + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + EXPECT_EQ(1, tensors_expected.size()); + auto tensors = EvaluateNodes(output, item.fetch); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); + } +} + } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index 7651a03fe5..435f46c107 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -191,7 +191,7 @@ class FoldOldBatchNormsTest : public ::testing::Test { std::vector fused_outputs; TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); - test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 1e-5); + test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 2e-5); for (const NodeDef& node : fused_graph_def.node()) { EXPECT_NE("FusedBatchNorm", node.op()); -- GitLab From 2307db76a2a07c7af6581e0ef4c6a5a0b83921f4 Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Wed, 23 May 2018 17:14:39 -0700 Subject: [PATCH 067/902] Refactor StatSummarizer extract common functionality without proto dependencies. PiperOrigin-RevId: 197816405 --- .../contrib/android/jni/run_stats_jni.cc | 4 +- tensorflow/contrib/lite/profiling/BUILD | 27 ++ .../lite/profiling/profile_summarizer.cc | 140 ++++++++ .../lite/profiling/profile_summarizer.h | 58 ++++ .../lite/profiling/profile_summarizer_test.cc | 116 +++++++ tensorflow/core/BUILD | 12 + tensorflow/core/util/stat_summarizer.cc | 300 ++---------------- tensorflow/core/util/stat_summarizer.h | 188 +++-------- .../core/util/stat_summarizer_options.h | 43 +++ tensorflow/core/util/stats_calculator.cc | 289 +++++++++++++++++ tensorflow/core/util/stats_calculator.h | 189 +++++++++++ tensorflow/python/util/stat_summarizer.i | 2 +- tensorflow/tools/benchmark/benchmark_model.cc | 10 +- 13 files changed, 943 insertions(+), 435 deletions(-) create mode 100644 tensorflow/contrib/lite/profiling/profile_summarizer.cc create mode 100644 tensorflow/contrib/lite/profiling/profile_summarizer.h create mode 100644 tensorflow/contrib/lite/profiling/profile_summarizer_test.cc create mode 100644 tensorflow/core/util/stat_summarizer_options.h create mode 100644 tensorflow/core/util/stats_calculator.cc create mode 100644 tensorflow/core/util/stats_calculator.h diff --git a/tensorflow/contrib/android/jni/run_stats_jni.cc b/tensorflow/contrib/android/jni/run_stats_jni.cc index 707853b59b..30de7b59af 100644 --- a/tensorflow/contrib/android/jni/run_stats_jni.cc +++ b/tensorflow/contrib/android/jni/run_stats_jni.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/contrib/android/jni/run_stats_jni.h" #include + #include #include "tensorflow/core/protobuf/config.pb.h" @@ -73,7 +74,8 @@ JNIEXPORT jstring RUN_STATS_METHOD(summary)(JNIEnv* env, jclass clazz, StatSummarizer* s = requireHandle(env, handle); if (s == nullptr) return nullptr; std::stringstream ret; - ret << s->GetStatsByMetric("Top 10 CPU", StatSummarizer::BY_TIME, 10) + ret << s->GetStatsByMetric("Top 10 CPU", tensorflow::StatsCalculator::BY_TIME, + 10) << s->GetStatsByNodeType() << s->ShortSummary(); return env->NewStringUTF(ret.str().c_str()); } diff --git a/tensorflow/contrib/lite/profiling/BUILD b/tensorflow/contrib/lite/profiling/BUILD index 15999e5d41..c86be65ca7 100644 --- a/tensorflow/contrib/lite/profiling/BUILD +++ b/tensorflow/contrib/lite/profiling/BUILD @@ -31,6 +31,33 @@ cc_library( copts = common_copts, ) +cc_library( + name = "profile_summarizer", + srcs = ["profile_summarizer.cc"], + hdrs = ["profile_summarizer.h"], + deps = [ + ":profiler", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/schema:schema_fbs", + "//tensorflow/core:stats_calculator_portable", + ], +) + +cc_test( + name = "profile_summarizer_test", + srcs = ["profile_summarizer_test.cc"], + deps = [ + ":profile_summarizer", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:schema_fbs_version", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/kernels:kernel_util", + "//tensorflow/contrib/lite/kernels:test_util", + "//tensorflow/contrib/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + cc_test( name = "profile_buffer_test", srcs = ["profile_buffer_test.cc"], diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer.cc b/tensorflow/contrib/lite/profiling/profile_summarizer.cc new file mode 100644 index 0000000000..788f6922d2 --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profile_summarizer.cc @@ -0,0 +1,140 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/profiling/profile_summarizer.h" + +#include + +#include "tensorflow/contrib/lite/schema/schema_generated.h" + +namespace tflite { +namespace profiling { +namespace { + +using Detail = tensorflow::StatsCalculator::Detail; + +struct OperatorDetails { + string name; + std::vector inputs; + std::vector outputs; +}; + +string GetTensorName(const tflite::Interpreter& interpreter, int tensor_index) { + const auto tensor = interpreter.tensor(tensor_index); + if (tensor == nullptr || tensor->name == nullptr) { + return "Unknown"; + } + return tensor->name; +} +std::vector GetTensorNames(const tflite::Interpreter& interpreter, + const TfLiteIntArray* tensor_indices) { + std::vector tensors; + tensors.reserve(tensor_indices->size); + for (int i = 0; i < tensor_indices->size; i++) { + tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i])); + } + return tensors; +} + +string ToString(const std::vector& str_vector) { + std::stringstream stream; + stream << "["; + bool first = true; + for (const auto& s : str_vector) { + if (!first) { + stream << ", "; + } else { + first = false; + } + stream << s; + } + stream << "]"; + return stream.str(); +} + +OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter, + int node_index) { + auto node_reg = interpreter.node_and_registration(node_index); + auto inputs = node_reg->first.inputs; + auto outputs = node_reg->first.outputs; + int code = node_reg->second.builtin_code; + const char* op_name = nullptr; + if (code == tflite::BuiltinOperator_CUSTOM) { + const char* custom_name = node_reg->second.custom_name; + op_name = custom_name ? custom_name : "UnknownCustomOp"; + } else { + op_name = tflite::EnumNamesBuiltinOperator()[code]; + } + OperatorDetails details; + details.name = op_name; + details.inputs = GetTensorNames(interpreter, inputs); + details.outputs = GetTensorNames(interpreter, outputs); + return details; +} + +} // namespace + +ProfileSummarizer::ProfileSummarizer() + : stats_calculator_(new ::tensorflow::StatsCalculator( + tensorflow::StatSummarizerOptions())) {} + +void ProfileSummarizer::ProcessProfiles( + const std::vector& profile_stats, + const tflite::Interpreter& interpreter) { + std::vector events; + std::copy_if(profile_stats.begin(), profile_stats.end(), + std::back_inserter(events), [](const ProfileEvent* e) { + return e->event_type == + ProfileEvent::EventType::OPERATOR_INVOKE_EVENT && + e->end_timestamp_us >= e->begin_timestamp_us; + }); + // Sort with begin_time. + std::sort(events.begin(), events.end(), + [](const ProfileEvent* const& a, const ProfileEvent* const& b) { + return a->begin_timestamp_us < b->begin_timestamp_us; + }); + if (events.empty()) { + return; + } + + int64_t base_start_us = events[0]->begin_timestamp_us; + int node_num = 0; + int64_t curr_total_us = 0; + std::map details; + for (auto event : events) { + auto op_details = GetOperatorDetails(interpreter, event->event_metadata); + auto node_name = ToString(op_details.outputs); + auto result = details.emplace(node_name, Detail()); + Detail* detail = &(result.first->second); + detail->start_us.UpdateStat(event->begin_timestamp_us - base_start_us); + int64_t node_exec_time = + event->end_timestamp_us - event->begin_timestamp_us; + detail->rel_end_us.UpdateStat(node_exec_time); + curr_total_us += node_exec_time; + ++node_num; + + if (result.second) { + detail->name = node_name; + detail->type = op_details.name; + detail->run_order = node_num; + detail->times_called = 0; + } + ++detail->times_called; + } + stats_calculator_->UpdateDetails(details); + stats_calculator_->UpdateRunTotalUs(curr_total_us); +} +} // namespace profiling +} // namespace tflite diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer.h b/tensorflow/contrib/lite/profiling/profile_summarizer.h new file mode 100644 index 0000000000..6fe6ca04f5 --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profile_summarizer.h @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_ +#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_ + +#include + +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/profiling/profiler.h" +#include "tensorflow/core/util/stats_calculator.h" + +namespace tflite { +namespace profiling { + +// Creates a summary of operator invocations in the interpreter. +class ProfileSummarizer { + public: + ProfileSummarizer(); + virtual ~ProfileSummarizer() {} + + // Process profile events to update statistics for operator invocations. + void ProcessProfiles(const std::vector& profile_stats, + const tflite::Interpreter& interpreter); + + // Returns a string detailing the accumulated runtime stats in a tab-separated + // format which can be pasted into a spreadsheet for further analysis. + std::string GetOutputString() const { + return stats_calculator_->GetOutputString(); + } + + std::string GetShortSummary() const { + return stats_calculator_->GetShortSummary(); + } + + // Prints the string returned by GetOutputString(). + void PrintStepStats() const { stats_calculator_->PrintStepStats(); } + + private: + std::unique_ptr stats_calculator_; +}; + +} // namespace profiling +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_SUMMARIZER_H_ diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc b/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc new file mode 100644 index 0000000000..35cf780713 --- /dev/null +++ b/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc @@ -0,0 +1,116 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/profiling/profile_summarizer.h" +#include "tensorflow/contrib/lite/testing/util.h" +#include "tensorflow/contrib/lite/version.h" + +namespace tflite { +namespace profiling { + +namespace { + +TfLiteStatus SimpleOpEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input1 = tflite::GetInput(context, node, /*index=*/0); + const TfLiteTensor* input2 = tflite::GetInput(context, node, /*index=*/1); + + TfLiteTensor* output = GetOutput(context, node, /*index=*/0); + + int32_t* output_data = output->data.i32; + *output_data = *(input1->data.i32) + *(input2->data.i32); + return kTfLiteOk; +} + +TfLiteRegistration* RegisterSimpleOp() { + static TfLiteRegistration registration = {nullptr, + nullptr, + nullptr, + SimpleOpEval, + tflite::BuiltinOperator_CUSTOM, + "SimpleOpEval", + 1}; + return ®istration; +} + +class SimpleOpModel : public SingleOpModel { + public: + void Init(); + tflite::Interpreter* GetInterpreter() { return interpreter_.get(); } + void SetInputs(int32_t x, int32_t y) { + PopulateTensor(inputs_[0], {x}); + PopulateTensor(inputs_[1], {y}); + } + int32_t GetOutput() { return ExtractVector(output_)[0]; } + + private: + int inputs_[2]; + int output_; +}; + +void SimpleOpModel::Init() { + inputs_[0] = AddInput({TensorType_INT32, {1}}); + inputs_[1] = AddInput({TensorType_INT32, {1}}); + output_ = AddOutput({TensorType_INT32, {}}); + SetCustomOp("SimpleAdd", {}, RegisterSimpleOp); + BuildInterpreter({GetShape(inputs_[0]), GetShape(inputs_[1])}); +} + +TEST(ProfileSummarizerTest, Empty) { + ProfileSummarizer summarizer; + std::string output = summarizer.GetOutputString(); + EXPECT_GT(output.size(), 0); +} + +#ifdef TFLITE_PROFILING_ENABLED +TEST(ProfileSummarizerTest, Interpreter) { + Profiler profiler; + SimpleOpModel m; + m.Init(); + auto interpreter = m.GetInterpreter(); + interpreter->SetProfiler(&profiler); + profiler.StartProfiling(); + m.SetInputs(1, 2); + m.Invoke(); + // 3 = 1 + 2 + EXPECT_EQ(m.GetOutput(), 3); + profiler.StopProfiling(); + ProfileSummarizer summarizer; + auto events = profiler.GetProfileEvents(); + EXPECT_EQ(1, events.size()); + summarizer.ProcessProfiles(profiler.GetProfileEvents(), *interpreter); + auto output = summarizer.GetOutputString(); + // TODO(shashishekhar): Add a better test here. + ASSERT_TRUE(output.find("SimpleOp") != std::string::npos) << output; +} +#endif + +} // namespace +} // namespace profiling +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 19e88d6ff1..ce68ee174d 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -827,6 +827,8 @@ tf_cuda_library( "util/sparse/group_iterator.h", "util/sparse/sparse_tensor.h", "util/stat_summarizer.h", + "util/stat_summarizer_options.h", + "util/stats_calculator.h", "util/stream_executor_util.h", "util/strided_slice_op.h", "util/tensor_format.h", @@ -851,6 +853,16 @@ tf_cuda_library( deps = [":framework_internal"], ) +cc_library( + name = "stats_calculator_portable", + srcs = ["util/stats_calculator.cc"], + hdrs = [ + "util/stat_summarizer_options.h", + "util/stats_calculator.h", + ], + deps = [":platform_base"], +) + cc_library( name = "overflow", hdrs = ["util/overflow.h"], diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc index 8447028e38..42a4801dcb 100644 --- a/tensorflow/core/util/stat_summarizer.cc +++ b/tensorflow/core/util/stat_summarizer.cc @@ -31,26 +31,22 @@ limitations under the License. namespace tensorflow { +using Detail = StatsCalculator::Detail; + StatSummarizer::StatSummarizer(const StatSummarizerOptions& options) - : options_(options) {} + : stats_calculator_(new StatsCalculator(options)) {} StatSummarizer::StatSummarizer(const tensorflow::GraphDef& tensorflow_graph) - : StatSummarizer(StatSummarizerOptions()) {} + : stats_calculator_(new StatsCalculator(StatSummarizerOptions())) {} StatSummarizer::~StatSummarizer() {} -void StatSummarizer::Reset() { - run_total_us_.Reset(); - memory_.Reset(); - details_.clear(); -} - -void StatSummarizer::Validate(const Detail* detail, +void StatSummarizer::Validate(const std::vector* outputs, const NodeExecStats& ns) const { - if (detail->outputs.size() != ns.output_size()) { + if (outputs->size() != ns.output_size()) { LOG(WARNING) << "Number of outputs changed between runs for '" - << ns.node_name() << "' - was " << detail->outputs.size() - << ", now " << ns.output_size(); + << ns.node_name() << "' - was " << outputs->size() << ", now " + << ns.output_size(); } else { for (const auto& output : ns.output()) { const int32 slot = output.slot(); @@ -58,7 +54,7 @@ void StatSummarizer::Validate(const Detail* detail, // This is not a hard error for Switch ops, so just pass. continue; } - const auto& stored = detail->outputs[slot]; + const auto& stored = (*outputs)[slot]; const auto& current = output.tensor_description(); bool do_tensors_match = @@ -129,6 +125,7 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { int64 first_node_start_us = step_stats.dev_stats(0).node_stats(0).all_start_micros(); + std::map details; int node_num = 0; for (const auto& ds : step_stats.dev_stats()) { @@ -172,7 +169,10 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { ++node_num; const int64 curr_time = ns.all_end_rel_micros(); curr_total_us += curr_time; - auto result = details_.emplace(name, Detail()); + auto result = details.emplace(name, Detail()); + auto output_result = + outputs_.emplace(name, std::vector()); + std::vector* outputs = &(output_result.first->second); Detail* detail = &(result.first->second); detail->start_us.UpdateStat(ns.all_start_micros() - first_node_start_us); @@ -185,16 +185,15 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { detail->run_order = node_num; - detail->outputs.resize(ns.output_size()); + outputs->resize(ns.output_size()); for (const auto& output : ns.output()) { const int32 slot = output.slot(); if ((slot < 0) || (slot >= ns.output_size())) { // This is not a hard error for Switch ops, so just pass. continue; } - detail->outputs[slot] = output.tensor_description(); + (*outputs)[slot] = output.tensor_description(); } - detail->times_called = 0; } @@ -207,273 +206,22 @@ void StatSummarizer::ProcessStepStats(const StepStats& step_stats) { mem_total += curr_node_mem; ++detail->times_called; + stats_calculator_->UpdateDetails(details); - Validate(detail, ns); - } - } - - run_total_us_.UpdateStat(curr_total_us); - memory_.UpdateStat(mem_total); -} - -std::string StatSummarizer::ShortSummary() const { - std::stringstream stream; - stream << "Timings (microseconds): "; - run_total_us_.OutputToStream(&stream); - stream << std::endl; - - stream << "Memory (bytes): "; - memory_.OutputToStream(&stream); - stream << std::endl; - - stream << details_.size() << " nodes observed" << std::endl; - return stream.str(); -} - -std::ostream& InitField(std::ostream& stream, int width) { - stream << "\t" << std::right << std::setw(width) << std::fixed - << std::setprecision(3); - return stream; -} - -std::string StatSummarizer::HeaderString(const string& title) const { - std::stringstream stream; - - stream << "============================== " << title - << " ==============================" << std::endl; - - InitField(stream, 24) << "[node type]"; - InitField(stream, 9) << "[start]"; - InitField(stream, 9) << "[first]"; - InitField(stream, 9) << "[avg ms]"; - InitField(stream, 8) << "[%]"; - InitField(stream, 8) << "[cdf%]"; - InitField(stream, 10) << "[mem KB]"; - InitField(stream, 9) << "[times called]"; - stream << "\t" - << "[Name]"; - return stream.str(); -} - -std::string StatSummarizer::ColumnString(const Detail& detail, - const int64 cumulative_stat_on_node, - const Stat& stat) const { - const double start_ms = detail.start_us.avg() / 1000.0; - const double first_time_ms = detail.rel_end_us.first() / 1000.0; - const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; - const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum(); - const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); - const int64 times_called = detail.times_called / num_runs(); - - std::stringstream stream; - InitField(stream, 24) << detail.type; - InitField(stream, 9) << start_ms; - InitField(stream, 9) << first_time_ms; - InitField(stream, 9) << avg_time_ms; - InitField(stream, 7) << percentage << "%"; - InitField(stream, 7) << cdf_percentage << "%"; - InitField(stream, 10) << detail.mem_used.newest() / 1000.0; - InitField(stream, 9) << times_called; - stream << "\t" << detail.name; - - return stream.str(); -} - -void StatSummarizer::OrderNodesByMetric( - SortingMetric metric, std::vector* details) const { - std::priority_queue> sorted_list; - const int num_nodes = details_.size(); - - for (const auto& det : details_) { - const Detail* detail = &(det.second); - std::stringstream stream; - stream << std::setw(20) << std::right << std::setprecision(10) - << std::fixed; - - switch (metric) { - case BY_NAME: - stream << detail->name; - break; - case BY_RUN_ORDER: - stream << num_nodes - detail->run_order; - break; - case BY_TIME: - stream << detail->rel_end_us.avg(); - break; - case BY_MEMORY: - stream << detail->mem_used.avg(); - break; - case BY_TYPE: - stream << detail->type; - break; - default: - stream << ""; - break; + Validate(outputs, ns); } - - sorted_list.emplace(stream.str(), detail); - } - - while (!sorted_list.empty()) { - auto entry = sorted_list.top(); - sorted_list.pop(); - details->push_back(entry.second); } -} - -void StatSummarizer::ComputeStatsByType( - std::map* node_type_map_count, - std::map* node_type_map_time, - std::map* node_type_map_memory, - std::map* node_type_map_times_called, - int64* accumulated_us) const { - int64 run_count = run_total_us_.count(); - - for (const auto& det : details_) { - const string node_name = det.first; - const Detail& detail = det.second; - - int64 curr_time_val = - static_cast(detail.rel_end_us.sum() / run_count); - *accumulated_us += curr_time_val; - int64 curr_memory_val = detail.mem_used.newest(); - - const string& node_type = detail.type; - - (*node_type_map_count)[node_type] += 1; - (*node_type_map_time)[node_type] += curr_time_val; - (*node_type_map_memory)[node_type] += curr_memory_val; - (*node_type_map_times_called)[node_type] += detail.times_called / run_count; - } + stats_calculator_->UpdateRunTotalUs(curr_total_us); + stats_calculator_->UpdateMemoryUsed(mem_total); } -std::string StatSummarizer::GetStatsByNodeType() const { - std::stringstream stream; - - stream << "============================== Summary by node type " - "==============================" - << std::endl; - - LOG(INFO) << "Number of nodes executed: " << details_.size(); - - std::map node_type_map_count; - std::map node_type_map_time; - std::map node_type_map_memory; - std::map node_type_map_times_called; - int64 accumulated_us = 0; - - ComputeStatsByType(&node_type_map_count, &node_type_map_time, - &node_type_map_memory, &node_type_map_times_called, - &accumulated_us); - - // Sort them. - std::priority_queue>> timings; - for (const auto& node_type : node_type_map_time) { - const int64 mem_used = node_type_map_memory[node_type.first]; - timings.emplace(node_type.second, - std::pair(node_type.first, mem_used)); - } - - InitField(stream, 24) << "[Node type]"; - InitField(stream, 9) << "[count]"; - InitField(stream, 10) << "[avg ms]"; - InitField(stream, 11) << "[avg %]"; - InitField(stream, 11) << "[cdf %]"; - InitField(stream, 10) << "[mem KB]"; - InitField(stream, 10) << "[times called]"; - stream << std::endl; - - float cdf = 0.0f; - while (!timings.empty()) { - auto entry = timings.top(); - timings.pop(); - - const string node_type = entry.second.first; - const float memory = entry.second.second / 1000.0f; - - const int64 node_type_total_us = entry.first; - const float time_per_run_ms = node_type_total_us / 1000.0f; - - const float percentage = - ((entry.first / static_cast(accumulated_us)) * 100.0f); - cdf += percentage; - - InitField(stream, 24) << node_type; - InitField(stream, 9) << node_type_map_count[node_type]; - InitField(stream, 10) << time_per_run_ms; - InitField(stream, 10) << percentage << "%"; - InitField(stream, 10) << cdf << "%"; - InitField(stream, 10) << memory; - InitField(stream, 9) << node_type_map_times_called[node_type]; - stream << std::endl; - } - stream << std::endl; - return stream.str(); -} - -std::string StatSummarizer::GetStatsByMetric(const string& title, - SortingMetric sorting_metric, - int num_stats) const { - std::vector details; - OrderNodesByMetric(sorting_metric, &details); - - double cumulative_stat_on_node = 0; - - std::stringstream stream; - stream << HeaderString(title) << std::endl; - int stat_num = 0; - for (auto detail : details) { - ++stat_num; - if (num_stats > 0 && stat_num > num_stats) { - break; - } - - // TODO(andrewharp): Make this keep track of the particular metric for cdf. - cumulative_stat_on_node += detail->rel_end_us.sum(); - stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) - << std::endl; - } - stream << std::endl; - return stream.str(); -} - -std::string StatSummarizer::GetOutputString() const { - std::stringstream stream; - if (options_.show_run_order) { - stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, - options_.run_order_limit); - } - if (options_.show_time) { - stream << GetStatsByMetric("Top by Computation Time", BY_TIME, - options_.time_limit); - } - if (options_.show_memory) { - stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, - options_.memory_limit); - } - if (options_.show_type) { - stream << GetStatsByNodeType(); - } - if (options_.show_summary) { - stream << ShortSummary() << std::endl; - } - return stream.str(); -} - -void StatSummarizer::PrintStepStats() const { - string output = GetOutputString(); - std::istringstream iss(output); - for (std::string line; std::getline(iss, line);) { - LOG(INFO) << line; - } -} void StatSummarizer::PrintOutputs() const { std::priority_queue< std::pair*>> timings; - for (const auto& entry : details_) { + for (const auto& entry : stats_calculator_->GetDetails()) { timings.emplace(-entry.second.start_us.avg(), &entry); } @@ -481,10 +229,10 @@ void StatSummarizer::PrintOutputs() const { while (!timings.empty()) { auto entry = timings.top(); timings.pop(); - const Detail& detail = entry.second->second; std::stringstream stream; - stream << entry.second->first << "\t" << detail.outputs.size(); - for (const auto& tensor : detail.outputs) { + const auto detail_outputs = outputs_.at(entry.second->first); + stream << entry.second->first << "\t" << detail_outputs.size(); + for (const auto& tensor : detail_outputs) { stream << "\t" << DataTypeString(tensor.dtype()); stream << "\t" << tensor.shape().dim_size(); for (const auto& d : tensor.shape().dim()) { diff --git a/tensorflow/core/util/stat_summarizer.h b/tensorflow/core/util/stat_summarizer.h index 79fa63723e..39cd948525 100644 --- a/tensorflow/core/util/stat_summarizer.h +++ b/tensorflow/core/util/stat_summarizer.h @@ -13,20 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ -#define TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ +#ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_ +#define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_ #include #include #include #include +#include #include #include #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/stat_summarizer_options.h" +#include "tensorflow/core/util/stats_calculator.h" namespace tensorflow { @@ -34,103 +37,6 @@ class GraphDef; class StepStats; class NodeExecStats; -template -class Stat { - public: - void UpdateStat(ValueType v) { - if (count_ == 0) { - first_ = v; - } - - newest_ = v; - max_ = std::max(v, max_); - min_ = std::min(v, min_); - ++count_; - sum_ += v; - squared_sum_ += static_cast(v) * v; - } - - void Reset() { new (this) Stat(); } - - bool empty() const { return count_ == 0; } - - ValueType first() const { return first_; } - - ValueType newest() const { return newest_; } - - ValueType max() const { return max_; } - - ValueType min() const { return min_; } - - int64 count() const { return count_; } - - ValueType sum() const { return sum_; } - - HighPrecisionValueType squared_sum() const { return squared_sum_; } - - bool all_same() const { return (count_ == 0 || min_ == max_); } - - HighPrecisionValueType avg() const { - return empty() ? std::numeric_limits::quiet_NaN() - : static_cast(sum_) / count_; - } - - ValueType std_deviation() const { - return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg()); - } - - void OutputToStream(std::ostream* stream) const { - if (empty()) { - *stream << "count=0"; - } else if (all_same()) { - *stream << "count=" << count_ << " curr=" << newest_; - if (count_ > 1) *stream << "(all same)"; - } else { - *stream << "count=" << count_ << " first=" << first_ - << " curr=" << newest_ << " min=" << min_ << " max=" << max_ - << " avg=" << avg() << " std=" << std_deviation(); - } - } - - friend std::ostream& operator<<(std::ostream& stream, - const Stat& stat) { - stat.OutputToStream(&stream); - return stream; - } - - private: - ValueType first_ = 0; - ValueType newest_ = 0; - ValueType max_ = std::numeric_limits::min(); - ValueType min_ = std::numeric_limits::max(); - int64 count_ = 0; - ValueType sum_ = 0; - HighPrecisionValueType squared_sum_ = 0; -}; - -// Used to control the output of the statistics summarizer; -class StatSummarizerOptions { - public: - StatSummarizerOptions() - : show_run_order(true), - run_order_limit(0), - show_time(true), - time_limit(10), - show_memory(true), - memory_limit(10), - show_type(true), - show_summary(true) {} - - bool show_run_order; - int run_order_limit; - bool show_time; - int time_limit; - bool show_memory; - int memory_limit; - bool show_type; - bool show_summary; -}; - // A StatSummarizer assists in performance analysis of Graph executions. // // It summarizes time spent executing (on GPU/CPU), memory used etc. across @@ -140,14 +46,6 @@ class StatSummarizerOptions { // See tensorflow/tools/benchmark/benchmark_model.cc for an example usage. class StatSummarizer { public: - enum SortingMetric { - BY_NAME, - BY_RUN_ORDER, - BY_TIME, - BY_MEMORY, - BY_TYPE, - }; - explicit StatSummarizer(const StatSummarizerOptions& options); // Deprecated: Use StatSummarizer(const StatSummarizerOptions&) instead. The @@ -161,65 +59,51 @@ class StatSummarizer { // Returns a string detailing the accumulated runtime stats in a tab-separated // format which can be pasted into a spreadsheet for further analysis. - std::string GetOutputString() const; + std::string GetOutputString() const { + return stats_calculator_->GetOutputString(); + } - std::string ShortSummary() const; + std::string ShortSummary() const { + return stats_calculator_->GetShortSummary(); + } // Prints the string returned by GetOutputString(). - void PrintStepStats() const; + void PrintStepStats() const { stats_calculator_->PrintStepStats(); } // Prints the output tensor sizes and types for each node. void PrintOutputs() const; - void ComputeStatsByType(std::map* node_type_map_count, - std::map* node_type_map_time, - std::map* node_type_map_memory, - std::map* node_type_map_times_called, - int64* accumulated_us) const; + void ComputeStatsByType( + std::map* node_type_map_count, + std::map* node_type_map_time, + std::map* node_type_map_memory, + std::map* node_type_map_times_called, + int64_t* accumulated_us) const { + stats_calculator_->ComputeStatsByType( + node_type_map_count, node_type_map_time, node_type_map_memory, + node_type_map_times_called, accumulated_us); + } - std::string GetStatsByNodeType() const; + std::string GetStatsByNodeType() const { + return stats_calculator_->GetStatsByNodeType(); + } std::string GetStatsByMetric(const string& title, - SortingMetric sorting_metric, - int num_stats) const; - - void Reset(); + StatsCalculator::SortingMetric sorting_metric, + int num_stats) const { + return stats_calculator_->GetStatsByMetric(title, sorting_metric, + num_stats); + } - // Returns number of runs. - int num_runs() const { return static_cast(run_total_us_.count()); } + private: + void Validate(const std::vector* outputs, + const NodeExecStats& ns) const; - // Returns stats of total microseconds spent by all nodes in each run. - const Stat& run_total_us() const { return run_total_us_; } + std::map > outputs_; - private: - struct Detail { - string name; - string type; - int64 run_order; - Stat start_us; - Stat rel_end_us; - Stat mem_used; - std::vector outputs; - int64 times_called; - }; - - void Validate(const Detail* detail, const NodeExecStats& ns) const; - - void OrderNodesByMetric(SortingMetric sorting_metric, - std::vector* details) const; - - std::string HeaderString(const string& title) const; - std::string ColumnString(const Detail& detail, - const int64 cumulative_stat_on_node, - const Stat& stat) const; - - Stat run_total_us_; - Stat memory_; - - std::map details_; - StatSummarizerOptions options_; + std::unique_ptr stats_calculator_; }; } // namespace tensorflow -#endif // TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ +#endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_H_ diff --git a/tensorflow/core/util/stat_summarizer_options.h b/tensorflow/core/util/stat_summarizer_options.h new file mode 100644 index 0000000000..578020676b --- /dev/null +++ b/tensorflow/core/util/stat_summarizer_options.h @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_ +#define TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_ +namespace tensorflow { +// Used to control the output of the statistics summarizer; +class StatSummarizerOptions { + public: + StatSummarizerOptions() + : show_run_order(true), + run_order_limit(0), + show_time(true), + time_limit(10), + show_memory(true), + memory_limit(10), + show_type(true), + show_summary(true) {} + + bool show_run_order; + int run_order_limit; + bool show_time; + int time_limit; + bool show_memory; + int memory_limit; + bool show_type; + bool show_summary; +}; +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_STAT_SUMMARIZER_OPTIONS_H_ diff --git a/tensorflow/core/util/stats_calculator.cc b/tensorflow/core/util/stats_calculator.cc new file mode 100644 index 0000000000..20353ec76e --- /dev/null +++ b/tensorflow/core/util/stats_calculator.cc @@ -0,0 +1,289 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/stats_calculator.h" + +#include +#include +#include +#include +#include + +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +StatsCalculator::StatsCalculator(const StatSummarizerOptions& options) + : options_(options) {} + +std::string StatsCalculator::GetShortSummary() const { + std::stringstream stream; + stream << "Timings (microseconds): "; + run_total_us_.OutputToStream(&stream); + stream << std::endl; + + stream << "Memory (bytes): "; + memory_.OutputToStream(&stream); + stream << std::endl; + + stream << details_.size() << " nodes observed" << std::endl; + return stream.str(); +} + +std::ostream& InitField(std::ostream& stream, int width) { + stream << "\t" << std::right << std::setw(width) << std::fixed + << std::setprecision(3); + return stream; +} + +std::string StatsCalculator::HeaderString(const std::string& title) const { + std::stringstream stream; + + stream << "============================== " << title + << " ==============================" << std::endl; + + InitField(stream, 24) << "[node type]"; + InitField(stream, 9) << "[start]"; + InitField(stream, 9) << "[first]"; + InitField(stream, 9) << "[avg ms]"; + InitField(stream, 8) << "[%]"; + InitField(stream, 8) << "[cdf%]"; + InitField(stream, 10) << "[mem KB]"; + InitField(stream, 9) << "[times called]"; + stream << "\t" + << "[Name]"; + return stream.str(); +} + +std::string StatsCalculator::ColumnString(const Detail& detail, + const int64_t cumulative_stat_on_node, + const Stat& stat) const { + const double start_ms = detail.start_us.avg() / 1000.0; + const double first_time_ms = detail.rel_end_us.first() / 1000.0; + const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; + const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum(); + const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); + const int64_t times_called = detail.times_called / num_runs(); + + std::stringstream stream; + InitField(stream, 24) << detail.type; + InitField(stream, 9) << start_ms; + InitField(stream, 9) << first_time_ms; + InitField(stream, 9) << avg_time_ms; + InitField(stream, 7) << percentage << "%"; + InitField(stream, 7) << cdf_percentage << "%"; + InitField(stream, 10) << detail.mem_used.newest() / 1000.0; + InitField(stream, 9) << times_called; + stream << "\t" << detail.name; + + return stream.str(); +} + +void StatsCalculator::OrderNodesByMetric( + SortingMetric metric, std::vector* details) const { + std::priority_queue> sorted_list; + const int num_nodes = details_.size(); + + for (const auto& det : details_) { + const Detail* detail = &(det.second); + std::stringstream stream; + stream << std::setw(20) << std::right << std::setprecision(10) + << std::fixed; + + switch (metric) { + case BY_NAME: + stream << detail->name; + break; + case BY_RUN_ORDER: + stream << num_nodes - detail->run_order; + break; + case BY_TIME: + stream << detail->rel_end_us.avg(); + break; + case BY_MEMORY: + stream << detail->mem_used.avg(); + break; + case BY_TYPE: + stream << detail->type; + break; + default: + stream << ""; + break; + } + + sorted_list.emplace(stream.str(), detail); + } + + while (!sorted_list.empty()) { + auto entry = sorted_list.top(); + sorted_list.pop(); + details->push_back(entry.second); + } +} + +void StatsCalculator::ComputeStatsByType( + std::map* node_type_map_count, + std::map* node_type_map_time, + std::map* node_type_map_memory, + std::map* node_type_map_times_called, + int64_t* accumulated_us) const { + int64_t run_count = run_total_us_.count(); + + for (const auto& det : details_) { + const string node_name = det.first; + const Detail& detail = det.second; + + int64_t curr_time_val = + static_cast(detail.rel_end_us.sum() / run_count); + *accumulated_us += curr_time_val; + + int64_t curr_memory_val = detail.mem_used.newest(); + + const string& node_type = detail.type; + + (*node_type_map_count)[node_type] += 1; + (*node_type_map_time)[node_type] += curr_time_val; + (*node_type_map_memory)[node_type] += curr_memory_val; + (*node_type_map_times_called)[node_type] += detail.times_called / run_count; + } +} + +std::string StatsCalculator::GetStatsByNodeType() const { + std::stringstream stream; + + stream << "============================== Summary by node type " + "==============================" + << std::endl; + + LOG(INFO) << "Number of nodes executed: " << details_.size(); + + std::map node_type_map_count; + std::map node_type_map_time; + std::map node_type_map_memory; + std::map node_type_map_times_called; + int64_t accumulated_us = 0; + + ComputeStatsByType(&node_type_map_count, &node_type_map_time, + &node_type_map_memory, &node_type_map_times_called, + &accumulated_us); + + // Sort them. + std::priority_queue>> timings; + for (const auto& node_type : node_type_map_time) { + const int64_t mem_used = node_type_map_memory[node_type.first]; + timings.emplace(node_type.second, + std::pair(node_type.first, mem_used)); + } + + InitField(stream, 24) << "[Node type]"; + InitField(stream, 9) << "[count]"; + InitField(stream, 10) << "[avg ms]"; + InitField(stream, 11) << "[avg %]"; + InitField(stream, 11) << "[cdf %]"; + InitField(stream, 10) << "[mem KB]"; + InitField(stream, 10) << "[times called]"; + stream << std::endl; + + float cdf = 0.0f; + while (!timings.empty()) { + auto entry = timings.top(); + timings.pop(); + + const string node_type = entry.second.first; + const float memory = entry.second.second / 1000.0f; + + const int64_t node_type_total_us = entry.first; + const float time_per_run_ms = node_type_total_us / 1000.0f; + + const float percentage = + ((entry.first / static_cast(accumulated_us)) * 100.0f); + cdf += percentage; + + InitField(stream, 24) << node_type; + InitField(stream, 9) << node_type_map_count[node_type]; + InitField(stream, 10) << time_per_run_ms; + InitField(stream, 10) << percentage << "%"; + InitField(stream, 10) << cdf << "%"; + InitField(stream, 10) << memory; + InitField(stream, 9) << node_type_map_times_called[node_type]; + stream << std::endl; + } + stream << std::endl; + return stream.str(); +} + +std::string StatsCalculator::GetStatsByMetric(const std::string& title, + SortingMetric sorting_metric, + int num_stats) const { + std::vector details; + OrderNodesByMetric(sorting_metric, &details); + + double cumulative_stat_on_node = 0; + + std::stringstream stream; + stream << HeaderString(title) << std::endl; + int stat_num = 0; + for (auto detail : details) { + ++stat_num; + if (num_stats > 0 && stat_num > num_stats) { + break; + } + + // TODO(andrewharp): Make this keep track of the particular metric for cdf. + cumulative_stat_on_node += detail->rel_end_us.sum(); + stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) + << std::endl; + } + stream << std::endl; + return stream.str(); +} + +std::string StatsCalculator::GetOutputString() const { + std::stringstream stream; + if (options_.show_run_order) { + stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, + options_.run_order_limit); + } + if (options_.show_time) { + stream << GetStatsByMetric("Top by Computation Time", BY_TIME, + options_.time_limit); + } + if (options_.show_memory) { + stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, + options_.memory_limit); + } + if (options_.show_type) { + stream << GetStatsByNodeType(); + } + if (options_.show_summary) { + stream << GetShortSummary() << std::endl; + } + return stream.str(); +} + +void StatsCalculator::PrintStepStats() const { + string output = GetOutputString(); + std::istringstream iss(output); + for (std::string line; std::getline(iss, line);) { + LOG(INFO) << line; + } +} + +void StatsCalculator::UpdateDetails( + const std::map& details) { + details_.insert(details.begin(), details.end()); +} + +} // namespace tensorflow diff --git a/tensorflow/core/util/stats_calculator.h b/tensorflow/core/util/stats_calculator.h new file mode 100644 index 0000000000..a1033465fb --- /dev/null +++ b/tensorflow/core/util/stats_calculator.h @@ -0,0 +1,189 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ +#define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ + +#include + +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/util/stat_summarizer_options.h" + +namespace tensorflow { + +template +class Stat { + public: + void UpdateStat(ValueType v) { + if (count_ == 0) { + first_ = v; + } + + newest_ = v; + max_ = std::max(v, max_); + min_ = std::min(v, min_); + ++count_; + sum_ += v; + squared_sum_ += static_cast(v) * v; + } + + void Reset() { new (this) Stat(); } + + bool empty() const { return count_ == 0; } + + ValueType first() const { return first_; } + + ValueType newest() const { return newest_; } + + ValueType max() const { return max_; } + + ValueType min() const { return min_; } + + int64_t count() const { return count_; } + + ValueType sum() const { return sum_; } + + HighPrecisionValueType squared_sum() const { return squared_sum_; } + + bool all_same() const { return (count_ == 0 || min_ == max_); } + + HighPrecisionValueType avg() const { + return empty() ? std::numeric_limits::quiet_NaN() + : static_cast(sum_) / count_; + } + + ValueType std_deviation() const { + return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg()); + } + + void OutputToStream(std::ostream* stream) const { + if (empty()) { + *stream << "count=0"; + } else if (all_same()) { + *stream << "count=" << count_ << " curr=" << newest_; + if (count_ > 1) *stream << "(all same)"; + } else { + *stream << "count=" << count_ << " first=" << first_ + << " curr=" << newest_ << " min=" << min_ << " max=" << max_ + << " avg=" << avg() << " std=" << std_deviation(); + } + } + + friend std::ostream& operator<<(std::ostream& stream, + const Stat& stat) { + stat.OutputToStream(&stream); + return stream; + } + + private: + ValueType first_ = 0; + ValueType newest_ = 0; + ValueType max_ = std::numeric_limits::min(); + ValueType min_ = std::numeric_limits::max(); + int64_t count_ = 0; + ValueType sum_ = 0; + HighPrecisionValueType squared_sum_ = 0; +}; + +// A StatsCalculator assists in performance analysis of Graph executions. +// +// It summarizes time spent executing (on GPU/CPU), memory used etc for +// graph execution. +// +// For example usage see StatsSummarizer. +class StatsCalculator { + public: + enum SortingMetric { + BY_NAME, + BY_RUN_ORDER, + BY_TIME, + BY_MEMORY, + BY_TYPE, + }; + + explicit StatsCalculator(const StatSummarizerOptions& options); + + // Returns a string detailing the accumulated runtime stats in a tab-separated + // format which can be pasted into a spreadsheet for further analysis. + std::string GetOutputString() const; + + std::string GetShortSummary() const; + + // Prints the string returned by GetOutputString(). + void PrintStepStats() const; + + void ComputeStatsByType( + std::map* node_type_map_count, + std::map* node_type_map_time, + std::map* node_type_map_memory, + std::map* node_type_map_times_called, + int64_t* accumulated_us) const; + + std::string GetStatsByNodeType() const; + + std::string GetStatsByMetric(const std::string& title, + SortingMetric sorting_metric, + int num_stats) const; + + // Returns number of runs. + int num_runs() const { return static_cast(run_total_us_.count()); } + + // Returns stats of total microseconds spent by all nodes in each run. + const Stat& run_total_us() const { return run_total_us_; } + + void UpdateRunTotalUs(int64_t run_total_us) { + run_total_us_.UpdateStat(run_total_us); + } + + void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); } + + struct Detail { + std::string name; + std::string type; + int64_t run_order; + Stat start_us; + Stat rel_end_us; + Stat mem_used; + int64_t times_called; + }; + + const std::map& GetDetails() const { return details_; } + void UpdateDetails(const std::map& details); + + private: + void OrderNodesByMetric(SortingMetric sorting_metric, + std::vector* details) const; + + std::string HeaderString(const std::string& title) const; + std::string ColumnString(const Detail& detail, + const int64_t cumulative_stat_on_node, + const Stat& stat) const; + + Stat run_total_us_; + Stat memory_; + + std::map details_; + StatSummarizerOptions options_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_ diff --git a/tensorflow/python/util/stat_summarizer.i b/tensorflow/python/util/stat_summarizer.i index 6aeaa0e31b..f423553faa 100644 --- a/tensorflow/python/util/stat_summarizer.i +++ b/tensorflow/python/util/stat_summarizer.i @@ -73,7 +73,7 @@ void _DeleteStatSummarizer(tensorflow::StatSummarizer* ss); return ss; } } - +%include "tensorflow/core/util/stat_summarizer_options.h" %include "tensorflow/core/util/stat_summarizer.h" %unignoreall diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc index eeb1fab40c..de93b12b97 100644 --- a/tensorflow/tools/benchmark/benchmark_model.cc +++ b/tensorflow/tools/benchmark/benchmark_model.cc @@ -667,12 +667,12 @@ int Main(int argc, char** argv) { output_prefix, benchmark_name, "meta-init-plus-first-inference", 1, initialization_time_s + (warmup_time_us / 1000000.0) / warmup_runs); - std::map node_type_map_count; - std::map node_type_map_time; - std::map node_type_map_memory; - std::map node_type_map_times_called; + std::map node_type_map_count; + std::map node_type_map_time; + std::map node_type_map_memory; + std::map node_type_map_times_called; - int64 accumulated_us; + int64_t accumulated_us; stats->ComputeStatsByType(&node_type_map_count, &node_type_map_time, &node_type_map_memory, &node_type_map_times_called, &accumulated_us); -- GitLab From 21e6addc0eb2376a7596fa648aaa55761a247170 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 23 May 2018 17:15:54 -0700 Subject: [PATCH 068/902] Internal change. PiperOrigin-RevId: 197816560 --- tensorflow/python/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 3c944b16e1..72cc357c71 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2323,7 +2323,7 @@ cuda_py_test( ], shard_count = 2, tags = [ - "nogpu", # Flaky: b/80127739 + "no_gpu", # Flaky: b/80127739 ], ) -- GitLab From 437015dba0da9db86b8b97cb12e4fdd055479007 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 17:26:04 -0700 Subject: [PATCH 069/902] Internal cleanup to remove a difference from the code on github. PiperOrigin-RevId: 197817738 --- .../lite/python/interpreter_wrapper/BUILD | 4 ++-- tensorflow/contrib/lite/toco/python/BUILD | 4 ++-- .../lite/toco/python/toco_python_api.h | 2 +- tensorflow/contrib/tensorrt/BUILD | 2 +- .../core/platform/default/build_config.bzl | 2 +- tensorflow/python/BUILD | 20 +++++++++---------- tensorflow/python/eager/BUILD | 2 +- tensorflow/python/eager/pywrap_tfe.h | 3 ++- .../python/lib/core/py_exception_registry.cc | 4 ++-- tensorflow/python/lib/core/py_func.cc | 4 ++-- tensorflow/python/lib/core/py_util.cc | 3 ++- tensorflow/python/lib/core/safe_ptr.h | 1 + tensorflow/workspace.bzl | 2 +- .../python_runtime}/BUILD | 2 +- 14 files changed, 29 insertions(+), 26 deletions(-) rename {util/python => third_party/python_runtime}/BUILD (86%) diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/BUILD b/tensorflow/contrib/lite/python/interpreter_wrapper/BUILD index 453eda6e73..12ab38847d 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/BUILD +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/BUILD @@ -15,7 +15,7 @@ cc_library( "//tensorflow/contrib/lite/kernels:builtin_ops", "//tensorflow/core:lib", "//tensorflow/python:numpy_lib", - "//util/python:python_headers", + "//third_party/python_runtime:headers", "@com_google_absl//absl/memory", ], ) @@ -27,6 +27,6 @@ tf_py_wrap_cc( ], deps = [ ":interpreter_wrapper_lib", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index 6c4f8e12cd..8cac568bd7 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -15,7 +15,7 @@ cc_library( "//tensorflow/contrib/lite/toco:toco_port", "//tensorflow/contrib/lite/toco:toco_tooling", "//tensorflow/core:lib", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -26,7 +26,7 @@ tf_py_wrap_cc( ":toco_python_api", "//tensorflow/contrib/lite/toco:model_flags_proto_cc", "//tensorflow/contrib/lite/toco:toco_flags_proto_cc", - "//util/python:python_headers", + "//third_party/python_runtime:headers", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/contrib/lite/toco/python/toco_python_api.h b/tensorflow/contrib/lite/toco/python/toco_python_api.h index 9af38e937c..7e8ad9c1da 100644 --- a/tensorflow/contrib/lite/toco/python/toco_python_api.h +++ b/tensorflow/contrib/lite/toco/python/toco_python_api.h @@ -15,8 +15,8 @@ limitations under the License. #ifndef _THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_PYTHON_TOCO_PYTHON_API_H_ #define _THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_TOCO_PYTHON_TOCO_PYTHON_API_H_ -#include #include +#include namespace toco { diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 742be7baf0..6d6feb3c39 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -190,7 +190,7 @@ tf_py_wrap_cc( ":trt_conversion", ":trt_engine_op_kernel", "//tensorflow/core:framework_lite", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 284581b41c..23c594d90d 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -82,7 +82,7 @@ def pyx_library( native.cc_binary( name=shared_object_name, srcs=[stem + ".cpp"], - deps=deps + ["//util/python:python_headers"], + deps=deps + ["//third_party/python_runtime:headers"], linkshared = 1, ) shared_objects.append(shared_object_name) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 539e5d310e..679ef93229 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -256,7 +256,7 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//third_party/py/numpy:headers", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -269,7 +269,7 @@ cc_library( ":safe_ptr", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -293,7 +293,7 @@ cc_library( deps = [ "//tensorflow/c:c_api", "//tensorflow/core:lib", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -316,7 +316,7 @@ cc_library( ":safe_ptr", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -338,7 +338,7 @@ cc_library( "//tensorflow/core:script_ops_op_lib", "//tensorflow/python/eager:pywrap_tfe_lib", "//third_party/py/numpy:headers", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -349,7 +349,7 @@ cc_library( deps = [ "//tensorflow/c:c_api", "//tensorflow/c/eager:c_api", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -379,7 +379,7 @@ cc_library( ":safe_ptr", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -390,7 +390,7 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core:script_ops_op_lib", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -3437,7 +3437,7 @@ tf_cuda_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//third_party/py/numpy:headers", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) @@ -3508,6 +3508,7 @@ tf_py_wrap_cc( ":py_record_writer_lib", ":python_op_gen", ":tf_session_helper", + "//third_party/python_runtime:headers", "//tensorflow/c:c_api", "//tensorflow/c:checkpoint_reader", "//tensorflow/c:python_api", @@ -3530,7 +3531,6 @@ tf_py_wrap_cc( "//tensorflow/core/profiler/internal:print_model_analysis", "//tensorflow/tools/graph_transforms:transform_graph_lib", "//tensorflow/python/eager:pywrap_tfe_lib", - "//util/python:python_headers", ] + (tf_additional_lib_deps() + tf_additional_plugin_deps() + tf_additional_verbs_deps() + diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 5530193d4e..8dbb53211f 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -31,7 +31,7 @@ cc_library( "//tensorflow/python:numpy_lib", "//tensorflow/python:py_seq_tensor", "//tensorflow/python:safe_ptr", - "//util/python:python_headers", + "//third_party/python_runtime:headers", ], ) diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index 9bc8b9bc72..626c33be15 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -16,10 +16,11 @@ limitations under the License. #ifndef TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_ #define TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_ +#include + #include "tensorflow/c/eager/c_api.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" -#include typedef tensorflow::gtl::InlinedVector TFE_InputTensorHandles; diff --git a/tensorflow/python/lib/core/py_exception_registry.cc b/tensorflow/python/lib/core/py_exception_registry.cc index 6637de632b..d03cf8930b 100644 --- a/tensorflow/python/lib/core/py_exception_registry.cc +++ b/tensorflow/python/lib/core/py_exception_registry.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/python/lib/core/py_exception_registry.h" - #include +#include "tensorflow/python/lib/core/py_exception_registry.h" + namespace tensorflow { PyExceptionRegistry* PyExceptionRegistry::singleton_ = nullptr; diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index 8c6bb7955a..30c1a9c759 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -17,6 +17,8 @@ limitations under the License. #include +#include + #include "numpy/arrayobject.h" #include "tensorflow/c/eager/c_api.h" #include "tensorflow/c/eager/c_api_internal.h" @@ -33,8 +35,6 @@ limitations under the License. #include "tensorflow/python/lib/core/py_util.h" #include "tensorflow/python/lib/core/safe_ptr.h" -#include - namespace tensorflow { namespace { diff --git a/tensorflow/python/lib/core/py_util.cc b/tensorflow/python/lib/core/py_util.cc index 00cbf0c532..dcda1f4a44 100644 --- a/tensorflow/python/lib/core/py_util.cc +++ b/tensorflow/python/lib/core/py_util.cc @@ -15,9 +15,10 @@ limitations under the License. #include "tensorflow/python/lib/core/py_util.h" +#include + #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/strcat.h" -#include namespace tensorflow { namespace { diff --git a/tensorflow/python/lib/core/safe_ptr.h b/tensorflow/python/lib/core/safe_ptr.h index 32d2868886..35d71f7629 100644 --- a/tensorflow/python/lib/core/safe_ptr.h +++ b/tensorflow/python/lib/core/safe_ptr.h @@ -19,6 +19,7 @@ limitations under the License. #include #include + #include "tensorflow/c/c_api.h" #include "tensorflow/c/eager/c_api.h" diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 758e87b09d..5d6fa6655c 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -821,7 +821,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # Needed by Protobuf native.bind( name = "python_headers", - actual = clean_dep("//util/python:python_headers"), + actual = clean_dep("//third_party/python_runtime:headers"), ) # Needed by Protobuf diff --git a/util/python/BUILD b/third_party/python_runtime/BUILD similarity index 86% rename from util/python/BUILD rename to third_party/python_runtime/BUILD index f5fa0c6d29..2a1609191f 100644 --- a/util/python/BUILD +++ b/third_party/python_runtime/BUILD @@ -3,6 +3,6 @@ licenses(["notice"]) # New BSD, Python Software Foundation package(default_visibility = ["//visibility:public"]) alias( - name = "python_headers", + name = "headers", actual = "@local_config_python//:python_headers", ) -- GitLab From fd839c3980d4fb40bbe92fb8fa3105e2330334fc Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 23 May 2018 17:32:54 -0700 Subject: [PATCH 070/902] [tf.data] Split out the `tf.contrib.data.sample_from_datasets()` tests. These were previously broken and disabled in CI builds; this change also fixes them up. PiperOrigin-RevId: 197818554 --- .../contrib/cmake/tf_core_kernels.cmake | 1 + .../contrib/data/python/kernel_tests/BUILD | 17 +++ .../directed_interleave_dataset_test.py | 140 ++++++++++++++++++ .../interleave_dataset_op_test.py | 110 -------------- 4 files changed, 158 insertions(+), 110 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 90c58520a6..2d76bf530a 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -69,6 +69,7 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder_ops_util.cc" "${tensorflow_source_dir}/tensorflow/contrib/coder/ops/coder_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/csv_dataset_op.cc" + "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/directed_interleave_dataset_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/ignore_errors_dataset_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/prefetching_kernels.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/threadpool_dataset_op.cc" diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 53da4940a8..d269b5b69a 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -208,6 +208,23 @@ py_test( ], ) +py_test( + name = "directed_interleave_dataset_test", + size = "medium", + srcs = ["directed_interleave_dataset_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":dataset_serialization_test", + "//tensorflow/contrib/data/python/ops:interleave_ops", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + "//tensorflow/python:training", + "//tensorflow/python/data/ops:dataset_ops", + "//third_party/py/numpy", + ], +) + tf_py_test( name = "get_single_element_test", size = "small", diff --git a/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py b/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py new file mode 100644 index 0000000000..d071eb17e0 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py @@ -0,0 +1,140 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base +from tensorflow.contrib.data.python.ops import interleave_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import errors +from tensorflow.python.framework import random_seed +from tensorflow.python.platform import test + + +class DirectedInterleaveDatasetTest(test.TestCase): + + def testBasic(self): + selector_dataset = dataset_ops.Dataset.range(10).repeat(100) + input_datasets = [ + dataset_ops.Dataset.from_tensors(i).repeat(100) for i in range(10) + ] + dataset = interleave_ops.DirectedInterleaveDataset(selector_dataset, + input_datasets) + iterator = dataset.make_initializable_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer) + for _ in range(100): + for i in range(10): + self.assertEqual(i, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def _normalize(self, vec): + return vec / vec.sum() + + def _chi2(self, expected, actual): + actual = np.asarray(actual) + expected = np.asarray(expected) + diff = actual - expected + chi2 = np.sum(diff * diff / expected, axis=0) + return chi2 + + def _testSampleFromDatasetsHelper(self, weights, num_datasets, num_samples): + # Create a dataset that samples each integer in `[0, num_datasets)` + # with probability given by `weights[i]`. + dataset = interleave_ops.sample_from_datasets([ + dataset_ops.Dataset.from_tensors(i).repeat(None) + for i in range(num_datasets) + ], weights) + dataset = dataset.take(num_samples) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + freqs = np.zeros([num_datasets]) + for _ in range(num_samples): + freqs[sess.run(next_element)] += 1 + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + return freqs + + def testSampleFromDatasets(self): + random_seed.set_random_seed(1619) + num_samples = 5000 + rand_probs = self._normalize(np.random.random_sample((15,))) + + # Use chi-squared test to assert that the observed distribution matches the + # expected distribution. Based on the implementation in + # "tensorflow/python/kernel_tests/multinomial_op_test.py". + for probs in [[.85, .05, .1], rand_probs]: + probs = np.asarray(probs) + classes = len(probs) + freqs = self._testSampleFromDatasetsHelper(probs, classes, num_samples) + self.assertLess(self._chi2(probs, freqs / num_samples), 1e-2) + + # Also check that `weights` as a dataset samples correctly. + probs_ds = dataset_ops.Dataset.from_tensors(probs).repeat() + freqs = self._testSampleFromDatasetsHelper(probs_ds, classes, num_samples) + self.assertLess(self._chi2(probs, freqs / num_samples), 1e-2) + + def testErrors(self): + with self.assertRaisesRegexp(ValueError, + r"vector of length `len\(datasets\)`"): + interleave_ops.sample_from_datasets( + [dataset_ops.Dataset.range(10), + dataset_ops.Dataset.range(20)], + weights=[0.25, 0.25, 0.25, 0.25]) + + with self.assertRaisesRegexp(TypeError, "`tf.float32` or `tf.float64`"): + interleave_ops.sample_from_datasets( + [dataset_ops.Dataset.range(10), + dataset_ops.Dataset.range(20)], + weights=[1, 1]) + + with self.assertRaisesRegexp(TypeError, "must have the same type"): + interleave_ops.sample_from_datasets([ + dataset_ops.Dataset.from_tensors(0), + dataset_ops.Dataset.from_tensors(0.0) + ]) + + +class SampleFromDatasetsSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_dataset(self, probs, num_samples): + dataset = interleave_ops.sample_from_datasets( + [ + dataset_ops.Dataset.from_tensors(i).repeat(None) + for i in range(len(probs)) + ], + probs, + seed=1813) + return dataset.take(num_samples) + + def testSerializationCore(self): + self.run_core_tests( + lambda: self._build_dataset([0.5, 0.5], 100), + lambda: self._build_dataset([0.25, 0.25, 0.25, 0.25], 1000), 100) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py index 43aa4b1bd0..bee561e3e2 100644 --- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py @@ -30,7 +30,6 @@ from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -907,114 +906,5 @@ class ParallelInterleaveDatasetTest(test.TestCase): sess.run(self.next_element) -class DirectedInterleaveDatasetTest(test.TestCase): - - def testBasic(self): - selector_dataset = dataset_ops.Dataset.range(10).repeat(100) - input_datasets = [ - dataset_ops.Dataset.from_tensors(i).repeat(100) for i in range(10) - ] - dataset = interleave_ops.DirectedInterleaveDataset(selector_dataset, - input_datasets) - iterator = dataset.make_initializable_iterator() - next_element = iterator.get_next() - - with self.test_session() as sess: - sess.run(iterator.initializer) - for _ in range(100): - for i in range(10): - self.assertEqual(i, sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - def _normalize(self, vec): - return vec / vec.sum() - - def _chi2(self, expected, actual): - actual = np.asarray(actual) - expected = np.asarray(expected) - diff = actual - expected - chi2 = np.sum(diff * diff / expected, axis=0) - return chi2 - - def _testSampleFromDatasetsHelper(self, weights, num_datasets, num_samples): - # Create a dataset that samples each integer in `[0, num_datasets)` - # with probability given by `weights[i]`. - dataset = interleave_ops.sample_from_datasets([ - dataset_ops.Dataset.from_tensors(i).repeat(None) - for i in range(num_datasets) - ], weights) - dataset = dataset.take(num_samples) - iterator = dataset.make_one_shot_iterator() - next_element = iterator.get_next() - - with self.test_session() as sess: - freqs = np.zeros([num_datasets]) - for _ in range(num_samples): - freqs[sess.run(next_element)] += 1 - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - return freqs - - def testSampleFromDatasets(self): - random_seed.set_random_seed(1619) - num_samples = 10000 - rand_probs = self._normalize(np.random.random_sample((15,))) - - # Use chi-squared test to assert that the observed distribution matches the - # expected distribution. Based on the implementation in - # "tensorflow/python/kernel_tests/multinomial_op_test.py". - for probs in [[.85, .05, .1], rand_probs]: - probs = np.asarray(probs) - classes = len(probs) - freqs = self._testSampleFromDatasetsHelper(probs, classes, num_samples) - self.assertLess(self._chi2(probs, freqs / num_samples), 1e-3) - - # Also check that `weights` as a dataset samples correctly. - probs_ds = dataset_ops.Dataset.from_tensors(probs).repeat() - freqs = self._testSampleFromDatasetsHelper(probs_ds, classes, num_samples) - self.assertLess(self._chi2(probs, freqs / num_samples), 1e-3) - - def testErrors(self): - with self.assertRaisesRegexp(ValueError, - r"vector of length `len\(datasets\)`"): - interleave_ops.sample_from_datasets( - [dataset_ops.Dataset.range(10), - dataset_ops.Dataset.range(20)], - weights=[0.25, 0.25, 0.25, 0.25]) - - with self.assertRaisesRegexp(TypeError, "`tf.float32` or `tf.float64`"): - interleave_ops.sample_from_datasets( - [dataset_ops.Dataset.range(10), - dataset_ops.Dataset.range(20)], - weights=[1, 1]) - - with self.assertRaisesRegexp(TypeError, "must have the same type"): - interleave_ops.sample_from_datasets([ - dataset_ops.Dataset.from_tensors(0), - dataset_ops.Dataset.from_tensors(0.0) - ]) - - -class SampleFromDatasetsSerializationTest( - dataset_serialization_test_base.DatasetSerializationTestBase): - - def _build_dataset(self, probs, num_samples): - dataset = interleave_ops.sample_from_datasets( - [ - dataset_ops.Dataset.from_tensors(i).repeat(None) - for i in range(len(probs)) - ], - probs, - seed=1813) - return dataset.take(num_samples) - - def testSerializationCore(self): - self.run_core_tests( - lambda: self._build_dataset([0.5, 0.5], 100), - lambda: self._build_dataset([0.25, 0.25, 0.25, 0.25], 1000), 100) - - if __name__ == "__main__": test.main() -- GitLab From 0f5f23e876be89dc2a389be078289d3028ae6503 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Wed, 23 May 2018 17:44:32 -0700 Subject: [PATCH 071/902] Add import. PiperOrigin-RevId: 197820050 --- tensorflow/python/util/util.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index 386a6fbeb8..2b33d106bc 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/python/util/util.h" +#include #include #include -- GitLab From 335d52c17644f417bc53abe4ef87ead9de01ad6d Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 23 May 2018 17:49:42 -0700 Subject: [PATCH 072/902] Cache generated LLVM IR for GEBP After this change all generated GEBPs with the same shape will share a single llvm::Function. This is NFC for any actual workloads because the GEBP emitter isn't exercised by normal code-paths yet. PiperOrigin-RevId: 197820606 --- .../xla/service/cpu/dot_op_emitter.cc | 152 ++++++++++++------ 1 file changed, 102 insertions(+), 50 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 5158779910..3aa436b39a 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -610,16 +610,21 @@ class MatrixMatrixBlockPanelEmitter { int64 k() const { return k_; } int64 n() const { return n_; } + string ToString() const { + return tensorflow::strings::StrCat(m(), "x", k(), "x", n()); + } + private: const int64 m_; const int64 k_; const int64 n_; }; - // Creates an instance of MatrixMatrixBlockPanelEmitter that matrix-multiplies - // `lhs` with `rhs` and stores the result in `result`. + // Represents the configuration of the GEBP emitter. The LLVM IR emitted by + // the emitter, modulo the LLVM values holding the input and output buffers, + // must be a function of the instance of `Config` passed to it. // - // `m`, `k` and `n` are the matrix multiplication dimensions. + // `dims` holds the matrix multiplication dimensions. // // `max_vectorization_width` is the maximum vector width (i.e. the width of // the largest vector register we will use). This can be larger than the @@ -630,27 +635,54 @@ class MatrixMatrixBlockPanelEmitter { // // `k_tiling_factor` is the number of elements along the reduction dimensions // that we will attempt to process at once. - explicit MatrixMatrixBlockPanelEmitter( - llvm::Value* lhs, llvm::Value* rhs, llvm::Value* result, Dimensions dims, - int max_vectorization_width, int min_vectorization_width, - int k_tiling_factor, const TargetMachineFeatures& target_machine_features, - llvm::IRBuilder<>* ir_builder, PrimitiveType primitive_type) + class Config { + public: + explicit Config(PrimitiveType scalar_type, Dimensions dims, + int64 max_vectorization_width, + int64 min_vectorization_width, int64 k_tiling_factor) + : scalar_type_(scalar_type), + dims_(dims), + max_vectorization_width_(max_vectorization_width), + min_vectorization_width_(min_vectorization_width), + k_tiling_factor_(k_tiling_factor) {} + + string GetCacheKey() const { + return tensorflow::strings::StrCat( + "gebp_", PrimitiveType_Name(scalar_type()), "_", dims().ToString(), + "_", max_vectorization_width(), "_", min_vectorization_width(), "_", + k_tiling_factor()); + } + + PrimitiveType scalar_type() const { return scalar_type_; } + Dimensions dims() const { return dims_; } + int64 max_vectorization_width() const { return max_vectorization_width_; } + int64 min_vectorization_width() const { return min_vectorization_width_; } + int64 k_tiling_factor() const { return k_tiling_factor_; } + + private: + PrimitiveType scalar_type_; + Dimensions dims_; + int64 max_vectorization_width_; + int64 min_vectorization_width_; + int64 k_tiling_factor_; + }; + + // Creates an instance of MatrixMatrixBlockPanelEmitter that matrix-multiplies + // `lhs` with `rhs` and stores the result in `result`. + explicit MatrixMatrixBlockPanelEmitter(Config config, llvm::Value* lhs, + llvm::Value* rhs, llvm::Value* result, + llvm::IRBuilder<>* ir_builder) : lhs_(lhs), rhs_(rhs), result_(result), - dims_(dims), - max_vectorization_width_(max_vectorization_width), - min_vectorization_width_(min_vectorization_width), - k_tiling_factor_(k_tiling_factor), - target_machine_features_(target_machine_features), + config_(config), ir_builder_(ir_builder), - primitive_type_(primitive_type), ksl_(ir_builder_) { - CHECK(max_vectorization_width > 0 && - IsPowerOfTwo(static_cast(max_vectorization_width))); - CHECK(min_vectorization_width > 0 && - IsPowerOfTwo(static_cast(min_vectorization_width))); - CHECK_GT(k_tiling_factor, 0); + CHECK(max_vectorization_width() > 0 && + IsPowerOfTwo(static_cast(max_vectorization_width()))); + CHECK(min_vectorization_width() > 0 && + IsPowerOfTwo(static_cast(min_vectorization_width()))); + CHECK_GT(k_tiling_factor(), 0); } void Emit(); @@ -677,31 +709,37 @@ class MatrixMatrixBlockPanelEmitter { llvm::Value* getInt64(int64 value) { return ir_builder_->getInt64(value); } + Config config() const { return config_; } + Dimensions dims() const { return config().dims(); } + + int64 max_vectorization_width() const { + return config().max_vectorization_width(); + } + int64 min_vectorization_width() const { + return config().min_vectorization_width(); + } + int64 k_tiling_factor() const { return config().k_tiling_factor(); } + PrimitiveType scalar_type() const { return config().scalar_type(); } + llvm::Value* lhs_; llvm::Value* rhs_; llvm::Value* result_; - Dimensions dims_; - - int64 max_vectorization_width_; - int64 min_vectorization_width_; - int64 k_tiling_factor_; + Config config_; - const TargetMachineFeatures& target_machine_features_; llvm::IRBuilder<>* ir_builder_; - PrimitiveType primitive_type_; KernelSupportLibrary ksl_; }; void MatrixMatrixBlockPanelEmitter::Emit() { EmitChunkedLoopOverN(); } void MatrixMatrixBlockPanelEmitter::EmitChunkedLoopOverN() { - int64 current_vectorization_width = max_vectorization_width_; + int64 current_vectorization_width = max_vectorization_width(); int64 n_start = 0; - while (n_start != dims_.n() && - current_vectorization_width >= min_vectorization_width_) { - int64 n_end = dims_.n() - (dims_.n() % current_vectorization_width); + while (n_start != dims().n() && + current_vectorization_width >= min_vectorization_width()) { + int64 n_end = dims().n() - (dims().n() % current_vectorization_width); if (n_start != n_end) { - VectorSupportLibrary vsl(primitive_type_, current_vectorization_width, + VectorSupportLibrary vsl(scalar_type(), current_vectorization_width, ir_builder_, "gebp"); EmitLoopOverK(&vsl, getInt64(n_start), getInt64(n_end)); n_start = n_end; @@ -709,9 +747,9 @@ void MatrixMatrixBlockPanelEmitter::EmitChunkedLoopOverN() { current_vectorization_width /= 2; } - if (n_start != dims_.n()) { - VectorSupportLibrary vsl(primitive_type_, 1, ir_builder_, "gebp"); - ksl_.For("epi.n", n_start, dims_.n(), 1, [&](llvm::Value* n_i) { + if (n_start != dims().n()) { + VectorSupportLibrary vsl(scalar_type(), 1, ir_builder_, "gebp"); + ksl_.For("epi.n", n_start, dims().n(), 1, [&](llvm::Value* n_i) { llvm::Value* n_i_next = ir_builder_->CreateAdd(n_i, ir_builder_->getInt64(1)); EmitLoopOverK(&vsl, n_i, n_i_next); @@ -723,15 +761,15 @@ void MatrixMatrixBlockPanelEmitter::EmitLoopOverK(VectorSupportLibrary* vsl, llvm::Value* n_start, llvm::Value* n_end) { int64 k_start = 0; - int64 k_end = dims_.k() - (dims_.k() % k_tiling_factor_); + int64 k_end = dims().k() - (dims().k() % k_tiling_factor()); if (k_end != k_start) { - EmitInnerLoop(k_tiling_factor_, getInt64(k_start), getInt64(k_end), n_start, - n_end, vsl); + EmitInnerLoop(k_tiling_factor(), getInt64(k_start), getInt64(k_end), + n_start, n_end, vsl); k_start = k_end; } - if (k_start != dims_.k()) { - EmitInnerLoop(dims_.k() - k_start, getInt64(k_start), getInt64(dims_.k()), + if (k_start != dims().k()) { + EmitInnerLoop(dims().k() - k_start, getInt64(k_start), getInt64(dims().k()), n_start, n_end, vsl); } } @@ -789,12 +827,12 @@ void MatrixMatrixBlockPanelEmitter::EmitLoopOverK(VectorSupportLibrary* vsl, void MatrixMatrixBlockPanelEmitter::EmitInnerLoop( int64 k_tiling_factor, llvm::Value* k_start, llvm::Value* k_end, llvm::Value* n_start, llvm::Value* n_end, VectorSupportLibrary* vsl) { - ksl_.For("dot.m", 0, dims_.m(), 1, [&](llvm::Value* m_i) { + ksl_.For("dot.m", 0, dims().m(), 1, [&](llvm::Value* m_i) { // This outer loop iterates over all of the M dimension llvm::Value* result_row_begin = vsl->ComputeOffsetPointer( - result_, /*offset_elements=*/m_i, /*scale=*/dims_.n()); + result_, /*offset_elements=*/m_i, /*scale=*/dims().n()); llvm::Value* lhs_row_begin = vsl->ComputeOffsetPointer( - lhs_, /*offset_elements=*/m_i, /*scale=*/dims_.k()); + lhs_, /*offset_elements=*/m_i, /*scale=*/dims().k()); ksl_.For("dot.k", k_start, k_end, k_tiling_factor, [&](llvm::Value* k_i) { // broadcasted_a is the broadcasted set of vectors denoted as , @@ -808,7 +846,7 @@ void MatrixMatrixBlockPanelEmitter::EmitInnerLoop( // rhs_loader will be used to load the tile off of the RHS, denoted as // <, ...> in the diagram. - TileLoader rhs_loader(vsl, ir_builder_, rhs_, dims_.n(), k_i, + TileLoader rhs_loader(vsl, ir_builder_, rhs_, dims().n(), k_i, k_tiling_factor); ksl_.For( "dot.n", n_start, n_end, vsl->vector_size(), [&](llvm::Value* n_i) { @@ -913,14 +951,28 @@ bool DotOpEmitter::EmitExperimentalGebpDotIfEnabled( target, ir_builder_->getInt8(0), size_bytes, target_machine_features_.minimum_alignment_for_allocation(size_bytes)); - MatrixMatrixBlockPanelEmitter::Dimensions gebp_dims(/*m=*/m, /*k=*/k, - /*n=*/n); - MatrixMatrixBlockPanelEmitter gebp_emitter( - /*lhs=*/lhs, /*rhs=*/rhs, /*result=*/target, gebp_dims, + MatrixMatrixBlockPanelEmitter::Config config( + /*scalar_type=*/primitive_type, + MatrixMatrixBlockPanelEmitter::Dimensions{/*m=*/m, /*k=*/k, /*n=*/n}, /*max_vectorization_width=*/8, /*min_vectorization_width=*/4, - /*k_tiling_factor=*/8, target_machine_features_, ir_builder_, - primitive_type); - gebp_emitter.Emit(); + /*k_tiling_factor=*/8); + + const bool enable_fast_math = + hlo_module_config_.debug_options().xla_enable_fast_math(); + const bool optimize_for_size = + options::OptimizeForSizeRequested(hlo_module_config_); + + KernelSupportLibrary::EmitAndCallOutlinedKernel( + /*enable_fast_math=*/enable_fast_math, + /*optimize_for_size=*/optimize_for_size, ir_builder_, + config.GetCacheKey(), lhs, rhs, target, + [this, config](llvm::Value* lhs, llvm::Value* rhs, llvm::Value* target) { + MatrixMatrixBlockPanelEmitter gebp_emitter( + config, /*lhs=*/lhs, /*rhs=*/rhs, + /*result=*/target, ir_builder_); + gebp_emitter.Emit(); + }); + return true; } -- GitLab From c94c8674b214ea9ddd28486b4dacade2d5b0db77 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 23 May 2018 17:52:29 -0700 Subject: [PATCH 073/902] [XLA] Speed up SliceTest. - Use parameters rather than constants, because LLVM and ptxas are slow with large constants. - Use iota rather than filling with random values, because the latter is slow. PiperOrigin-RevId: 197820897 --- tensorflow/compiler/xla/tests/slice_test.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc index 52195db2aa..5292568abe 100644 --- a/tensorflow/compiler/xla/tests/slice_test.cc +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -365,15 +365,18 @@ XLA_TEST_P(SliceR2Test, DoIt) { const R2Spec& spec = GetParam(); Array2D input(spec.input_dim0, spec.input_dim1); input.FillUnique(); + auto literal = Literal::CreateR2FromArray2DWithLayout( + input, LayoutUtil::MakeLayout(spec.layout)); XlaBuilder builder(TestName()); - auto a = builder.ConstantR2FromArray2DWithLayout( - input, LayoutUtil::MakeLayout(spec.layout)); + auto a = builder.Parameter(0, literal->shape(), "p0"); builder.Slice(a, spec.slice_starts, spec.slice_limits, spec.slice_strides); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr arg, + client_->TransferToServer(*literal)); std::unique_ptr> expected = ReferenceUtil::Slice2D( input, spec.slice_starts, spec.slice_limits, spec.slice_strides); - ComputeAndCompareR2(&builder, *expected, {}); + ComputeAndCompareR2(&builder, *expected, {arg.get()}); } INSTANTIATE_TEST_CASE_P( @@ -453,7 +456,7 @@ class SliceR4Test : public ClientLibraryTestBase, void Run(const R4Spec& spec) { Array4D values(spec.input_dims[0], spec.input_dims[1], spec.input_dims[2], spec.input_dims[3]); - values.FillRandom(3.14f); + values.FillIota(3.14159); auto expected = ReferenceUtil::Slice4D( values, spec.slice_starts, spec.slice_limits, spec.slice_strides); XlaBuilder builder(TestName()); -- GitLab From 00b6afc6ddf30fde104c5d2908a6d97ed414a58f Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Wed, 23 May 2018 17:58:42 -0700 Subject: [PATCH 074/902] Aggregating IndexedSlices: Do not require first element to be IndexedSlices. PiperOrigin-RevId: 197821479 --- tensorflow/contrib/distribute/python/cross_tower_utils.py | 2 +- tensorflow/python/ops/gradients_impl.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/contrib/distribute/python/cross_tower_utils.py b/tensorflow/contrib/distribute/python/cross_tower_utils.py index 8dd7831c2b..4bff134cad 100644 --- a/tensorflow/contrib/distribute/python/cross_tower_utils.py +++ b/tensorflow/contrib/distribute/python/cross_tower_utils.py @@ -343,7 +343,7 @@ def unpack_small_tensors(tower_grads, packing): def aggregate_tensors_or_indexed_slices(values, accumulation_fn=math_ops.add_n): """Aggregate tensors using `accumulation_fn` and IndexedSlices via concat.""" - if isinstance(values[0], ops.IndexedSlices): + if any(isinstance(v, ops.IndexedSlices) for v in values): return gradients_impl._AggregateIndexedSlicesGradients(values) # pylint: disable=protected-access else: return accumulation_fn(values) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 1e808fddb5..7385cb7585 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -1020,7 +1020,6 @@ def _AggregateIndexedSlicesGradients(grads): elif len(grads) == 1: return grads[0] else: - assert isinstance(grads[0], ops.IndexedSlices) grads = math_ops._as_indexed_slices_list( # pylint: disable=protected-access [g for g in grads if g is not None]) grads = [_HandleNestedIndexedSlices(x) for x in grads] # pylint: disable=protected-access -- GitLab From da07aa28e0eef4aebe4851e9bdfc40e7b098cf04 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 18:13:23 -0700 Subject: [PATCH 075/902] Extracts the SimplifyReduction optimization into its own method. PiperOrigin-RevId: 197823183 --- .../grappler/optimizers/constant_folding.cc | 37 ++++++++++++------- .../grappler/optimizers/constant_folding.h | 6 ++- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 4ebe1cabfc..bf606fb8b1 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -2133,20 +2133,8 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } } - if (IsSimplifiableReduction(*node, *properties)) { - // Replace the reduction node with an identity node, that can be further - // optimized by the model pruner. - DataType output_type; - if (node->attr().count("T") > 0) { - output_type = node->attr().at("T").type(); - } else { - // This is an 'any' or 'all' reduction. The output is always boolean. - output_type = DT_BOOL; - } - node->set_op("Identity"); - node->clear_attr(); - (*node->mutable_attr())["T"].set_type(output_type); - *node->mutable_input(1) = AsControlDependency(node->input(1)); + + if (SimplifyReduction(*properties, node)) { graph_modified_ = true; return Status::OK(); } @@ -2200,6 +2188,27 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +bool ConstantFolding::SimplifyReduction(const GraphProperties& properties, + NodeDef* node) { + if (IsSimplifiableReduction(*node, properties)) { + // Replace the reduction node with an identity node, that can be further + // optimized by the model pruner. + DataType output_type; + if (node->attr().count("T") > 0) { + output_type = node->attr().at("T").type(); + } else { + // This is an 'any' or 'all' reduction. The output is always boolean. + output_type = DT_BOOL; + } + node->set_op("Identity"); + node->clear_attr(); + (*node->mutable_attr())["T"].set_type(output_type); + *node->mutable_input(1) = AsControlDependency(node->input(1)); + return true; + } + return false; +} + bool ConstantFolding::SimplifyReshape(const GraphProperties& properties, bool use_shape_info, NodeDef* node) { if (!use_shape_info) return false; diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 3cf379fbc0..07a2e01042 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -139,11 +139,13 @@ class ConstantFolding : public GraphOptimizer { GraphDef* optimized_graph, NodeDef* node, bool* success); - // Simplifies a Reshape operation to an Identity operation if the input node - // to the operation is a constant. + // Simplifies a Reshape operation to an Identity operation if applicable. bool SimplifyReshape(const GraphProperties& properties, bool use_shape_info, NodeDef* node); + // Simplifies a Reduction operation to an Identity operation if applicable. + bool SimplifyReduction(const GraphProperties& properties, NodeDef* node); + // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From 3dfe81c60fac512703eadf224d0485e17fe7d55a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 18:38:34 -0700 Subject: [PATCH 076/902] HloSharding parsing from string, used by new Sharding HloMatcher for ease of use. PiperOrigin-RevId: 197825588 --- tensorflow/compiler/xla/service/BUILD | 3 +- .../compiler/xla/service/hlo_matchers.h | 7 +++ .../compiler/xla/service/hlo_matchers_test.cc | 18 +++++++- .../compiler/xla/service/hlo_sharding_test.cc | 46 ++++++++++++++++++- .../compiler/xla/tools/parser/hlo_parser.cc | 23 ++++++++++ .../compiler/xla/tools/parser/hlo_parser.h | 4 ++ 6 files changed, 97 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index d1722644c7..749873e560 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -376,6 +376,7 @@ cc_library( ":hlo", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", ], ) @@ -387,7 +388,6 @@ tf_cc_test( ":hlo_matchers", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -431,6 +431,7 @@ tf_cc_test( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index c33bdadf1c..dfefad3634 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/lib/gtl/optional.h" namespace xla { @@ -324,6 +325,12 @@ inline ::testing::Matcher Sharding( return ::testing::MakeMatcher( new ::xla::testing::HloShardingMatcher(sharding)); } +// Matcher for Sharding from sharding string +inline ::testing::Matcher Sharding( + tensorflow::StringPiece sharding) { + return ::testing::MakeMatcher(new ::xla::testing::HloShardingMatcher( + xla::tools::ParseSharding(sharding).ValueOrDie())); +} // Verifies that no HloSharding is set for an HLO instruction. inline ::testing::Matcher NoSharding() { return ::testing::MakeMatcher( diff --git a/tensorflow/compiler/xla/service/hlo_matchers_test.cc b/tensorflow/compiler/xla/service/hlo_matchers_test.cc index 016cc01e33..1d10e3c4fe 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers_test.cc +++ b/tensorflow/compiler/xla/service/hlo_matchers_test.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace op = xla::testing::opcode_matchers; using ::testing::_; @@ -147,6 +146,18 @@ TEST(HloMatchersTest, ShardingMatcher) { "param.1"); p1->set_sharding(HloSharding::AssignDevice(1)); + auto tuple_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {7}), ShapeUtil::MakeShape(S32, {9}), + ShapeUtil::MakeShape(F32, {11})}); + auto p2 = HloInstruction::CreateParameter(1, tuple_shape, "param.2"); + Array assignment({2}); + assignment.SetValues({0, 1}); + auto sharding = HloSharding::Tuple( + tuple_shape, + {HloSharding::Tile(ShapeUtil::MakeShape(F32, {5}), assignment), + HloSharding::AssignDevice(1), HloSharding::Replicate()}); + p2->set_sharding(sharding); + EXPECT_THAT(p0.get(), op::NoSharding()); EXPECT_THAT(p0.get(), ::testing::Not(op::Sharding(HloSharding::AssignDevice(1)))); @@ -155,6 +166,11 @@ TEST(HloMatchersTest, ShardingMatcher) { ::testing::Not(op::Sharding(HloSharding::AssignDevice(0)))); EXPECT_THAT(p1.get(), op::Sharding(HloSharding::AssignDevice(1))); + EXPECT_THAT( + p2.get(), + op::Sharding( + "{{f32[5] devices=[2]0,1}, {maximal device=1}, {replicated}}")); + EXPECT_THAT(Explain(p0.get(), op::Sharding(HloSharding::AssignDevice(1))), "%param.0 = f32[5]{0} parameter(0) has no sharding (expected: " "{maximal device=1})"); diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc index 3bf0d25efb..94d1a3226b 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc @@ -13,8 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/hlo_sharding.h" - #include #include #include @@ -25,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/util.h" namespace xla { @@ -312,5 +311,48 @@ TEST_F(HloShardingTest, OstreamTest) { EXPECT_EQ(oss.str(), "{f32[3,5,7,11] devices=[1,1,2,2]0,1,2,3}"); } +TEST_F(HloShardingTest, Parse) { + auto check = [](const HloSharding& sharding) { + TF_ASSERT_OK_AND_ASSIGN(auto parsed_sharding, + tools::ParseSharding(sharding.ToString())); + EXPECT_EQ(sharding, parsed_sharding); + }; + check(HloSharding::Replicate()); + check(HloSharding::AssignDevice(2)); + check(HloSharding::Tile(ShapeUtil::MakeShape(F32, {3, 1, 3, 7}), + Array4D({{{{0}, {1}}}}))); + // Empty tuple. + check(HloSharding::Tuple(ShapeUtil::MakeTupleShape({}), {})); + { + // Non-nested tuple. + auto tuple_shape = + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {3, 1, 5, 7}), + ShapeUtil::MakeShape(F32, {3, 5, 7}), + ShapeUtil::MakeShape(F32, {3, 7})}); + check(HloSharding::Tuple( + tuple_shape, {HloSharding::Tile(ShapeUtil::MakeShape(F32, {3, 1, 3, 7}), + Array4D({{{{0}, {1}}}})), + HloSharding::Replicate(), HloSharding::AssignDevice(1)})); + } + { + // Nested tuple. + auto tuple_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {3, 1, 5, 7}), + ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {3, 5, 7}), + ShapeUtil::MakeShape(F32, {3, 7})})}); + std::vector leaf_shardings = { + HloSharding::Tile(ShapeUtil::MakeShape(F32, {3, 1, 3, 7}), + Array4D({{{{0}, {1}}}})), + HloSharding::Replicate(), HloSharding::AssignDevice(1)}; + ShapeTree sharding_tree(tuple_shape, HloSharding::Replicate()); + // Assign leaf_shardings to sharding_tree leaves. + auto it = leaf_shardings.begin(); + for (auto& index_to_sharding : sharding_tree.leaves()) { + index_to_sharding.second = *it++; + } + check(HloSharding::Tuple(sharding_tree)); + } +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index d0e7af8844..e990b6aba8 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -56,6 +56,11 @@ class HloParser { // Returns the error information. string GetError() const { return Join(error_, "\n"); } + // Stand alone parsing for sharding. The parser string is supposed to + // contain the body of the sharding, i.e. just the rhs of the "sharding={...}" + // attribute string. + StatusOr ParseShardingOnly(); + private: // ParseXXX returns false if an error occurred. bool ParseHloModule(); @@ -2673,6 +2678,18 @@ bool HloParser::AddComputation(const string& name, HloComputation* computation, return true; } +StatusOr HloParser::ParseShardingOnly() { + lexer_.Lex(); + OpSharding op_sharding; + if (!ParseSharding(&op_sharding)) { + return InvalidArgument("Syntax error:\n%s", GetError().c_str()); + } + if (lexer_.GetKind() != TokKind::kEof) { + return InvalidArgument("Syntax error:\nExtra content after sharding"); + } + return HloSharding::FromProto(op_sharding); +} + } // namespace StatusOr> Parse(StringPiece str, @@ -2689,5 +2706,11 @@ StatusOr> Parse(StringPiece str) { return Parse(str, config); } +StatusOr ParseSharding(tensorflow::StringPiece str) { + HloModuleConfig config; + HloParser parser(str, config); + return parser.ParseShardingOnly(); +} + } // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.h b/tensorflow/compiler/xla/tools/parser/hlo_parser.h index 2f97a2b9b1..f7854f403e 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.h +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.h @@ -36,6 +36,10 @@ StatusOr> Parse(tensorflow::StringPiece str, // format, parses the string and creates a HloModule with default config. StatusOr> Parse(tensorflow::StringPiece str); +// Parse sharding from str. str is supposed to contain the body of the +// sharding, i.e. just the rhs of the "sharding={...}" attribute string. +StatusOr ParseSharding(tensorflow::StringPiece str); + } // namespace tools } // namespace xla -- GitLab From 9fc9d5bfc460f736befa25f640a8010664945d61 Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Wed, 23 May 2018 18:45:30 -0700 Subject: [PATCH 077/902] Add back some public interface methods. PiperOrigin-RevId: 197826136 --- tensorflow/core/util/stat_summarizer.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/core/util/stat_summarizer.h b/tensorflow/core/util/stat_summarizer.h index 39cd948525..173ed5cebc 100644 --- a/tensorflow/core/util/stat_summarizer.h +++ b/tensorflow/core/util/stat_summarizer.h @@ -95,6 +95,13 @@ class StatSummarizer { num_stats); } + int num_runs() const { return stats_calculator_->num_runs(); } + + // Returns stats of total microseconds spent by all nodes in each run. + const Stat& run_total_us() const { + return stats_calculator_->run_total_us(); + } + private: void Validate(const std::vector* outputs, const NodeExecStats& ns) const; -- GitLab From cd468ceee10646c5e023661537a20915f52677f9 Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Wed, 23 May 2018 18:46:20 -0700 Subject: [PATCH 078/902] Moves estimator getting started docs into programmer's guide. Update path references and magic links. Remove getting started with estimators doc. Add redirects. PiperOrigin-RevId: 197826223 --- .../estimator/python/estimator/hooks.py | 2 +- .../get_started/datasets_quickstart.md | 4 +- .../get_started/get_started_for_beginners.md | 751 ------------------ tensorflow/docs_src/get_started/index.md | 22 +- tensorflow/docs_src/get_started/leftnav_files | 11 - tensorflow/docs_src/install/install_mac.md | 6 +- .../docs_src/install/install_windows.md | 6 +- .../checkpoints.md | 0 .../custom_estimators.md | 12 +- .../docs_src/programmers_guide/estimators.md | 2 +- .../feature_columns.md | 2 +- .../docs_src/programmers_guide/index.md | 24 +- .../docs_src/programmers_guide/leftnav_files | 16 +- .../programmers_guide/low_level_intro.md | 6 +- .../premade_estimators.md | 7 +- .../docs_src/programmers_guide/using_tpu.md | 4 +- .../docs_src/tutorials/kernel_methods.md | 2 +- tensorflow/docs_src/tutorials/layers.md | 10 +- tensorflow/docs_src/tutorials/linear.md | 2 +- .../docs_src/tutorials/recurrent_quickdraw.md | 2 +- tensorflow/python/estimator/estimator.py | 6 +- tensorflow/python/estimator/training.py | 4 +- 22 files changed, 66 insertions(+), 835 deletions(-) delete mode 100644 tensorflow/docs_src/get_started/get_started_for_beginners.md rename tensorflow/docs_src/{get_started => programmers_guide}/checkpoints.md (100%) rename tensorflow/docs_src/{get_started => programmers_guide}/custom_estimators.md (98%) rename tensorflow/docs_src/{get_started => programmers_guide}/feature_columns.md (99%) rename tensorflow/docs_src/{get_started => programmers_guide}/premade_estimators.md (98%) diff --git a/tensorflow/contrib/estimator/python/estimator/hooks.py b/tensorflow/contrib/estimator/python/estimator/hooks.py index 4808b9ee30..ddd6aa442f 100644 --- a/tensorflow/contrib/estimator/python/estimator/hooks.py +++ b/tensorflow/contrib/estimator/python/estimator/hooks.py @@ -72,7 +72,7 @@ class InMemoryEvaluatorHook(training.SessionRunHook): estimator: A `tf.estimator.Estimator` instance to call evaluate. input_fn: Equivalent to the `input_fn` arg to `estimator.evaluate`. A function that constructs the input data for evaluation. - See @{$get_started/premade_estimators#create_input_functions} for more + See @{$premade_estimators#create_input_functions} for more information. The function should construct and return one of the following: diff --git a/tensorflow/docs_src/get_started/datasets_quickstart.md b/tensorflow/docs_src/get_started/datasets_quickstart.md index c972e5e555..020e40dd3b 100644 --- a/tensorflow/docs_src/get_started/datasets_quickstart.md +++ b/tensorflow/docs_src/get_started/datasets_quickstart.md @@ -14,7 +14,7 @@ introduces the API by walking through two simple examples: Taking slices from an array is the simplest way to get started with `tf.data`. -The @{$get_started/premade_estimators$Premade Estimators} chapter describes +The @{$premade_estimators$Premade Estimators} chapter describes the following `train_input_fn`, from [`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py), to pipe the data into the Estimator: @@ -377,7 +377,7 @@ Now you have the basic idea of how to efficiently load data into an Estimator. Consider the following documents next: -* @{$get_started/custom_estimators}, which demonstrates how to build your own +* @{$custom_estimators}, which demonstrates how to build your own custom `Estimator` model. * The @{$low_level_intro#datasets$Low Level Introduction}, which demonstrates how to experiment directly with `tf.data.Datasets` using TensorFlow's low diff --git a/tensorflow/docs_src/get_started/get_started_for_beginners.md b/tensorflow/docs_src/get_started/get_started_for_beginners.md deleted file mode 100644 index d5a80e22c5..0000000000 --- a/tensorflow/docs_src/get_started/get_started_for_beginners.md +++ /dev/null @@ -1,751 +0,0 @@ -# Get Started with Graph Execution - -This document explains how to use machine learning to classify (categorize) -Iris flowers by species. This document dives deeply into the TensorFlow -code to do exactly that, explaining ML fundamentals along the way. - -If the following list describes you, then you are in the right place: - -* You know little to nothing about machine learning. -* You want to learn how to write TensorFlow programs. -* You can code (at least a little) in Python. - -If you are already familiar with basic machine learning concepts -but are new to TensorFlow, read -@{$premade_estimators$Getting Started with TensorFlow: for ML Experts}. - -If you'd like to learn a lot about the basics of Machine Learning, -consider taking -[Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/). - - -## The Iris classification problem - -Imagine you are a botanist seeking an automated way to classify each -Iris flower you find. Machine learning provides many ways to classify flowers. -For instance, a sophisticated machine learning program could classify flowers -based on photographs. Our ambitions are more modest--we're going to classify -Iris flowers based solely on the length and width of their -[sepals](https://en.wikipedia.org/wiki/Sepal) and -[petals](https://en.wikipedia.org/wiki/Petal). - -The Iris genus entails about 300 species, but our program will classify only -the following three: - -* Iris setosa -* Iris virginica -* Iris versicolor - -
-Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor -
- -**From left to right, -[*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by -[Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0), -[*Iris versicolor*](https://commons.wikimedia.org/w/index.php?curid=248095) (by -[Dlanglois](https://commons.wikimedia.org/wiki/User:Dlanglois), CC BY-SA 3.0), -and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862) -(by [Frank Mayfield](https://www.flickr.com/photos/33397993@N05), CC BY-SA -2.0).** -

 

- -Fortunately, someone has already created [a data set of 120 Iris -flowers](https://en.wikipedia.org/wiki/Iris_flower_data_set) -with the sepal and petal measurements. This data set has become -one of the canonical introductions to machine learning classification problems. -(The [MNIST database](https://en.wikipedia.org/wiki/MNIST_database), -which contains handwritten digits, is another popular classification -problem.) The first 5 entries of the Iris data set -look as follows: - -| Sepal length | sepal width | petal length | petal width | species -| --- | --- | --- | --- | --- -|6.4 | 2.8 | 5.6 | 2.2 | 2 -|5.0 | 2.3 | 3.3 | 1.0 | 1 -|4.9 | 2.5 | 4.5 | 1.7 | 2 -|4.9 | 3.1 | 1.5 | 0.1 | 0 -|5.7 | 3.8 | 1.7 | 0.3 | 0 - -Let's introduce some terms: - -* The last column (species) is called the - [**label**](https://developers.google.com/machine-learning/glossary/#label); - the first four columns are called - [**features**](https://developers.google.com/machine-learning/glossary/#feature). - Features are characteristics of an example, while the label is - the thing we're trying to predict. - -* An [**example**](https://developers.google.com/machine-learning/glossary/#example) - consists of the set of features and the label for one sample - flower. The preceding table shows 5 examples from a data set of - 120 examples. - -Each label is naturally a string (for example, "setosa"), but machine learning -typically relies on numeric values. Therefore, someone mapped each string to -a number. Here's the representation scheme: - -* 0 represents setosa -* 1 represents versicolor -* 2 represents virginica - -For a look at other examples of labels and examples, see the -[ML Terminology section of Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/framing/ml-terminology). - - -## Models and training - -A **model** is the relationship between features -and the label. For the Iris problem, the model defines the relationship -between the sepal and petal measurements and the predicted Iris species. Some -simple models can be described with a few lines of algebra, but complex machine -learning models have a large number of parameters that are difficult to -summarize. - -Could you determine the relationship between the four features and the -Iris species *without* using machine learning? That is, could you use -traditional programming techniques (for example, a lot of conditional -statements) to create a model? Maybe. You could play with the data set -long enough to determine the right relationships of petal and sepal -measurements to particular species. However, a good machine learning -approach *determines the model for you*. That is, if you feed enough -representative examples into the right machine learning model type, the program -will determine the relationship between sepals, petals, and species. - -**Training** is the stage of machine learning in which the model is -gradually optimized (learned). The Iris problem is an example -of [**supervised machine -learning**](https://developers.google.com/machine-learning/glossary/#supervised_machine_learning) -in which a model is trained from examples that contain labels. (In -[**unsupervised machine -learning**](https://developers.google.com/machine-learning/glossary/#unsupervised_machine_learning), -the examples don't contain labels. Instead, the model typically finds -patterns among the features.) - - - - -## Get the sample program - -Prior to playing with the sample code in this document, do the following: - -1. @{$install$Install TensorFlow}. -2. If you installed TensorFlow with virtualenv or Anaconda, activate your - TensorFlow environment. -3. Install or upgrade pandas by issuing the following command: - - `pip install pandas` - - -Take the following steps to get the sample program: - -1. Clone the TensorFlow Models repository from github by entering the following - command: - - `git clone https://github.com/tensorflow/models` - -2. Change directory within that branch to the location containing the examples - used in this document: - - `cd models/samples/core/get_started/` - -In that `get_started` directory, you'll find a program -named `premade_estimator.py`. - - -## Run the sample program - -You run TensorFlow programs as you would run any Python program. Therefore, -issue the following command from a command line to -run `premade_estimators.py`: - -``` bash -python premade_estimator.py -``` - -Running the program should output a whole bunch of information ending with -three prediction lines like the following: - -```None -... -Prediction is "Setosa" (99.6%), expected "Setosa" - -Prediction is "Versicolor" (99.8%), expected "Versicolor" - -Prediction is "Virginica" (97.9%), expected "Virginica" -``` - -If the program generates errors instead of predictions, ask yourself the -following questions: - -* Did you install TensorFlow properly? -* Are you using the correct version of TensorFlow? The `premade_estimators.py` - program requires at least TensorFlow v1.4. -* If you installed TensorFlow with virtualenv or Anaconda, did you activate - the environment? - - - -## The TensorFlow programming stack - -As the following illustration shows, TensorFlow -provides a programming stack consisting of multiple API layers: - -
- -
- -**The TensorFlow Programming Environment.** -

 

- -As you start writing TensorFlow programs, we strongly recommend focusing on -the following two high-level APIs: - -* Estimators -* Datasets - -Although we'll grab an occasional convenience function from other APIs, -this document focuses on the preceding two APIs. - - -## The program itself - -Thanks for your patience; let's dig into the code. -The general outline of `premade_estimator.py`--and many other TensorFlow -programs--is as follows: - -* Import and parse the data sets. -* Create feature columns to describe the data. -* Select the type of model -* Train the model. -* Evaluate the model's effectiveness. -* Let the trained model make predictions. - -The following subsections detail each part. - - -### Import and parse the data sets - -The Iris program requires the data from the following two .csv files: - -* `http://download.tensorflow.org/data/iris_training.csv`, which contains - the training set. -* `http://download.tensorflow.org/data/iris_test.csv`, which contains the - test set. - -The **training set** contains the examples that we'll use to train the model; -the **test set** contains the examples that we'll use to evaluate the trained -model's effectiveness. - -The training set and test set started out as a -single data set. Then, someone split the examples, with the majority going into -the training set and the remainder going into the test set. Adding -examples to the training set usually builds a better model; however, adding -more examples to the test set enables us to better gauge the model's -effectiveness. Regardless of the split, the examples in the test set -must be separate from the examples in the training set. Otherwise, you can't -accurately determine the model's effectiveness. - -The `premade_estimators.py` program relies on the `load_data` function -in the adjacent [`iris_data.py`]( -https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py) -file to read in and parse the training set and test set. -Here is a heavily commented version of the function: - -```python -TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv" -TEST_URL = "http://download.tensorflow.org/data/iris_test.csv" - -CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', - 'PetalLength', 'PetalWidth', 'Species'] - -... - -def load_data(label_name='Species'): - """Parses the csv file in TRAIN_URL and TEST_URL.""" - - # Create a local copy of the training set. - train_path = tf.keras.utils.get_file(fname=TRAIN_URL.split('/')[-1], - origin=TRAIN_URL) - # train_path now holds the pathname: ~/.keras/datasets/iris_training.csv - - # Parse the local CSV file. - train = pd.read_csv(filepath_or_buffer=train_path, - names=CSV_COLUMN_NAMES, # list of column names - header=0 # ignore the first row of the CSV file. - ) - # train now holds a pandas DataFrame, which is data structure - # analogous to a table. - - # 1. Assign the DataFrame's labels (the right-most column) to train_label. - # 2. Delete (pop) the labels from the DataFrame. - # 3. Assign the remainder of the DataFrame to train_features - train_features, train_label = train, train.pop(label_name) - - # Apply the preceding logic to the test set. - test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL) - test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0) - test_features, test_label = test, test.pop(label_name) - - # Return four DataFrames. - return (train_features, train_label), (test_features, test_label) -``` - -Keras is an open-sourced machine learning library; `tf.keras` is a TensorFlow -implementation of Keras. The `premade_estimator.py` program only accesses -one `tf.keras` function; namely, the `tf.keras.utils.get_file` convenience -function, which copies a remote CSV file to a local file system. - -The call to `load_data` returns two `(feature,label)` pairs, for the training -and test sets respectively: - -```python - # Call load_data() to parse the CSV file. - (train_feature, train_label), (test_feature, test_label) = load_data() -``` - -Pandas is an open-source Python library leveraged by several -TensorFlow functions. A pandas -[**DataFrame**](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html) -is a table with named columns headers and numbered rows. -The features returned by `load_data` are packed in `DataFrames`. -For example, the `test_feature` DataFrame looks as follows: - -```none - SepalLength SepalWidth PetalLength PetalWidth -0 5.9 3.0 4.2 1.5 -1 6.9 3.1 5.4 2.1 -2 5.1 3.3 1.7 0.5 -... -27 6.7 3.1 4.7 1.5 -28 6.7 3.3 5.7 2.5 -29 6.4 2.9 4.3 1.3 -``` - - -### Describe the data - -A **feature column** is a data structure that tells your model -how to interpret the data in each feature. In the Iris problem, -we want the model to interpret the data in each -feature as its literal floating-point value; that is, we want the -model to interpret an input value like 5.4 as, well, 5.4. However, -in other machine learning problems, it is often desirable to interpret -data less literally. Using feature columns to -interpret data is such a rich topic that we devote an entire -@{$feature_columns$document} to it. - -From a code perspective, you build a list of `feature_column` objects by calling -functions from the @{tf.feature_column} module. Each object describes an input -to the model. To tell the model to interpret data as a floating-point value, -call @{tf.feature_column.numeric_column}. In `premade_estimator.py`, all -four features should be interpreted as literal floating-point values, so -the code to create a feature column looks as follows: - -```python -# Create feature columns for all features. -my_feature_columns = [] -for key in train_x.keys(): - my_feature_columns.append(tf.feature_column.numeric_column(key=key)) -``` - -Here is a less elegant, but possibly clearer, alternative way to -encode the preceding block: - -```python -my_feature_columns = [ - tf.feature_column.numeric_column(key='SepalLength'), - tf.feature_column.numeric_column(key='SepalWidth'), - tf.feature_column.numeric_column(key='PetalLength'), - tf.feature_column.numeric_column(key='PetalWidth') -] -``` - - -### Select the type of model - -We need to select the kind of model that will be trained. -Lots of model types exist; picking the ideal type takes experience. -We've selected a neural network to solve the Iris problem. [**Neural -networks**](https://developers.google.com/machine-learning/glossary/#neural_network) -can find complex relationships between features and the label. -A neural network is a highly-structured graph, organized into one or more -[**hidden layers**](https://developers.google.com/machine-learning/glossary/#hidden_layer). -Each hidden layer consists of one or more -[**neurons**](https://developers.google.com/machine-learning/glossary/#neuron). -There are several categories of neural networks. -We'll be using a [**fully connected neural -network**](https://developers.google.com/machine-learning/glossary/#fully_connected_layer), -which means that the neurons in one layer take inputs from *every* neuron in -the previous layer. For example, the following figure illustrates a -fully connected neural network consisting of three hidden layers: - -* The first hidden layer contains four neurons. -* The second hidden layer contains three neurons. -* The third hidden layer contains two neurons. - -
- -
- -**A neural network with three hidden layers.** -

 

- -For a more detailed introduction to neural networks, see the -[Introduction to Neural Nets section of Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/introduction-to-neural-networks/anatomy). - -To specify a model type, instantiate an -[**Estimator**](https://developers.google.com/machine-learning/glossary/#Estimators) -class. TensorFlow provides two categories of Estimators: - -* [**pre-made - Estimators**](https://developers.google.com/machine-learning/glossary/#pre-made_Estimator), - which someone else has already written for you. -* [**custom - Estimators**](https://developers.google.com/machine-learning/glossary/#custom_estimator), - which you must code yourself, at least partially. - -To implement a neural network, the `premade_estimators.py` program uses -a pre-made Estimator named @{tf.estimator.DNNClassifier}. This Estimator -builds a neural network that classifies examples. The following call -instantiates `DNNClassifier`: - -```python - classifier = tf.estimator.DNNClassifier( - feature_columns=my_feature_columns, - hidden_units=[10, 10], - n_classes=3) -``` - -Use the `hidden_units` parameter to define the number of neurons -in each hidden layer of the neural network. Assign this parameter -a list. For example: - -```python - hidden_units=[10, 10], -``` - -The length of the list assigned to `hidden_units` identifies the number of -hidden layers (2, in this case). -Each value in the list represents the number of neurons in a particular -hidden layer (10 in the first hidden layer and 10 in the second hidden layer). -To change the number of hidden layers or neurons, simply assign a different -list to the `hidden_units` parameter. - -The ideal number of hidden layers and neurons depends on the problem -and the data set. Like many aspects of machine learning, -picking the ideal shape of the neural network requires some mixture -of knowledge and experimentation. -As a rule of thumb, increasing the number of hidden layers and neurons -*typically* creates a more powerful model, which requires more data to -train effectively. - -The `n_classes` parameter specifies the number of possible values that the -neural network can predict. Since the Iris problem classifies 3 Iris species, -we set `n_classes` to 3. - -The constructor for `tf.Estimator.DNNClassifier` takes an optional argument -named `optimizer`, which our sample code chose not to specify. The -[**optimizer**](https://developers.google.com/machine-learning/glossary/#optimizer) -controls how the model will train. As you develop more expertise in machine -learning, optimizers and -[**learning -rate**](https://developers.google.com/machine-learning/glossary/#learning_rate) -will become very important. - - - -### Train the model - -Instantiating a `tf.Estimator.DNNClassifier` creates a framework for learning -the model. Basically, we've wired a network but haven't yet let data flow -through it. To train the neural network, call the Estimator object's `train` -method. For example: - -```python - classifier.train( - input_fn=lambda:train_input_fn(train_feature, train_label, args.batch_size), - steps=args.train_steps) -``` - -The `steps` argument tells `train` to stop training after the specified -number of iterations. Increasing `steps` increases the amount of time -the model will train. Counter-intuitively, training a model longer -does not guarantee a better model. The default value of `args.train_steps` -is 1000. The number of steps to train is a -[**hyperparameter**](https://developers.google.com/machine-learning/glossary/#hyperparameter) -you can tune. Choosing the right number of steps usually -requires both experience and experimentation. - -The `input_fn` parameter identifies the function that supplies the -training data. The call to the `train` method indicates that the -`train_input_fn` function will supply the training data. Here's that -method's signature: - -```python -def train_input_fn(features, labels, batch_size): -``` - -We're passing the following arguments to `train_input_fn`: - -* `train_feature` is a Python dictionary in which: - * Each key is the name of a feature. - * Each value is an array containing the values for each example in the - training set. -* `train_label` is an array containing the values of the label for every - example in the training set. -* `args.batch_size` is an integer defining the [**batch - size**](https://developers.google.com/machine-learning/glossary/#batch_size). - -The `train_input_fn` function relies on the **Dataset API**. This is a -high-level TensorFlow API for reading data and transforming it into a form -that the `train` method requires. The following call converts the -input features and labels into a `tf.data.Dataset` object, which is the base -class of the Dataset API: - -```python - dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) -``` - -The `tf.dataset` class provides many useful functions for preparing examples -for training. The following line calls three of those functions: - -```python - dataset = dataset.shuffle(buffer_size=1000).repeat(count=None).batch(batch_size) -``` - -Training works best if the training examples are in -random order. To randomize the examples, call -`tf.data.Dataset.shuffle`. Setting the `buffer_size` to a value -larger than the number of examples (120) ensures that the data will -be well shuffled. - -During training, the `train` method typically processes the -examples multiple times. Calling the -`tf.data.Dataset.repeat` method without any arguments ensures -that the `train` method has an infinite supply of (now shuffled) -training set examples. - -The `train` method processes a -[**batch**](https://developers.google.com/machine-learning/glossary/#batch) -of examples at a time. -The `tf.data.Dataset.batch` method creates a batch by -concatenating multiple examples. -This program sets the default [**batch -size**](https://developers.google.com/machine-learning/glossary/#batch_size) -to 100, meaning that the `batch` method will concatenate groups of -100 examples. The ideal batch size depends on the problem. As a rule -of thumb, smaller batch sizes usually enable the `train` method to train -the model faster at the expense (sometimes) of accuracy. - -The following `return` statement passes a batch of examples back to -the caller (the `train` method). - -```python - return dataset.make_one_shot_iterator().get_next() -``` - - -### Evaluate the model - -**Evaluating** means determining how effectively the model makes -predictions. To determine the Iris classification model's effectiveness, -pass some sepal and petal measurements to the model and ask the model -to predict what Iris species they represent. Then compare the model's -prediction against the actual label. For example, a model that picked -the correct species on half the input examples would have an -[accuracy](https://developers.google.com/machine-learning/glossary/#accuracy) -of 0.5. The following suggests a more effective model: - - - - - - - - - - - - - - - - - - - - - -
- Test Set
FeaturesLabelPrediction
5.9 3.0 4.3 1.5 11
6.9 3.1 5.4 2.1 22
5.1 3.3 1.7 0.5 00
6.0 3.4 4.5 1.6 12
5.5 2.5 4.0 1.3 11
- -**A model that is 80% accurate.** -

 

- -To evaluate a model's effectiveness, each Estimator provides an `evaluate` -method. The `premade_estimator.py` program calls `evaluate` as follows: - -```python -# Evaluate the model. -eval_result = classifier.evaluate( - input_fn=lambda:eval_input_fn(test_x, test_y, args.batch_size)) - -print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result)) -``` - -The call to `classifier.evaluate` is similar to the call to `classifier.train`. -The biggest difference is that `classifier.evaluate` must get its examples -from the test set rather than the training set. In other words, to -fairly assess a model's effectiveness, the examples used to -*evaluate* a model must be different from the examples used to *train* -the model. The `eval_input_fn` function serves a batch of examples from -the test set. Here's the `eval_input_fn` method: - -```python -def eval_input_fn(features, labels=None, batch_size=None): - """An input function for evaluation or prediction""" - if labels is None: - # No labels, use only features. - inputs = features - else: - inputs = (features, labels) - - # Convert inputs to a tf.dataset object. - dataset = tf.data.Dataset.from_tensor_slices(inputs) - - # Batch the examples - assert batch_size is not None, "batch_size must not be None" - dataset = dataset.batch(batch_size) - - # Return the read end of the pipeline. - return dataset.make_one_shot_iterator().get_next() -``` - -In brief, `eval_input_fn` does the following when called by -`classifier.evaluate`: - -1. Converts the features and labels from the test set to a `tf.dataset` - object. -2. Creates a batch of test set examples. (There's no need to shuffle - or repeat the test set examples.) -3. Returns that batch of test set examples to `classifier.evaluate`. - -Running this code yields the following output (or something close to it): - -```none -Test set accuracy: 0.967 -``` - -An accuracy of 0.967 implies that our trained model correctly classified 29 -out of the 30 Iris species in the test set. - -To get a deeper understanding of different metrics for evaluating -models, see the -[Classification section of Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/classification). - - -### Predicting - -We've now trained a model and "proven" that it is good--but not -perfect--at classifying Iris species. Now let's use the trained -model to make some predictions on [**unlabeled -examples**](https://developers.google.com/machine-learning/glossary/#unlabeled_example); -that is, on examples that contain features but not a label. - -In real-life, the unlabeled examples could come from lots of different -sources including apps, CSV files, and data feeds. For now, we're simply -going to manually provide the following three unlabeled examples: - -```python - predict_x = { - 'SepalLength': [5.1, 5.9, 6.9], - 'SepalWidth': [3.3, 3.0, 3.1], - 'PetalLength': [1.7, 4.2, 5.4], - 'PetalWidth': [0.5, 1.5, 2.1], - } -``` - -Every Estimator provides a `predict` method, which `premade_estimator.py` -calls as follows: - -```python -predictions = classifier.predict( - input_fn=lambda:eval_input_fn(predict_x, - labels=None, - batch_size=args.batch_size)) -``` - -As with the `evaluate` method, our `predict` method also gathers examples -from the `eval_input_fn` method. - -When doing predictions, we're *not* passing labels to `eval_input_fn`. -Therefore, `eval_input_fn` does the following: - -1. Converts the features from the 3-element manual set we just created. -2. Creates a batch of 3 examples from that manual set. -3. Returns that batch of examples to `classifier.predict`. - -The `predict` method returns a python iterable, yielding a dictionary of -prediction results for each example. This dictionary contains several keys. -The `probabilities` key holds a list of three floating-point values, -each representing the probability that the input example is a particular -Iris species. For example, consider the following `probabilities` list: - -```none -'probabilities': array([ 1.19127117e-08, 3.97069454e-02, 9.60292995e-01]) -``` - -The preceding list indicates: - -* A negligible chance of the Iris being Setosa. -* A 3.97% chance of the Iris being Versicolor. -* A 96.0% chance of the Iris being Virginica. - -The `class_ids` key holds a one-element array that identifies the most -probable species. For example: - -```none -'class_ids': array([2]) -``` - -The number `2` corresponds to Virginica. The following code iterates -through the returned `predictions` to report on each prediction: - -``` python -for pred_dict, expec in zip(predictions, expected): - template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"') - - class_id = pred_dict['class_ids'][0] - probability = pred_dict['probabilities'][class_id] - print(template.format(iris_data.SPECIES[class_id], 100 * probability, expec)) -``` - -Running the program yields the following output: - - -``` None -... -Prediction is "Setosa" (99.6%), expected "Setosa" - -Prediction is "Versicolor" (99.8%), expected "Versicolor" - -Prediction is "Virginica" (97.9%), expected "Virginica" -``` - - -## Summary - -This document provides a short introduction to machine learning. - -Because `premade_estimators.py` relies on high-level APIs, much of the -mathematical complexity in machine learning is hidden. -If you intend to become more proficient in machine learning, we recommend -ultimately learning more about [**gradient -descent**](https://developers.google.com/machine-learning/glossary/#gradient_descent), -batching, and neural networks. - -We recommend reading the @{$feature_columns$Feature Columns} document next, -which explains how to represent different kinds of data in machine learning. diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md index 746126c720..55579d52fb 100644 --- a/tensorflow/docs_src/get_started/index.md +++ b/tensorflow/docs_src/get_started/index.md @@ -15,26 +15,8 @@ The easiest way to get started with TensorFlow is using Eager Execution. * @{$get_started/eager}, is for anyone new to machine learning or TensorFlow. TensorFlow provides many APIs. The remainder of this section focuses on the -Estimator API which provide scalable, high-performance models. -To get started with Estimators begin by reading one of the following documents: - - * @{$get_started/get_started_for_beginners}, which is aimed at readers - new to machine learning. - * @{$get_started/premade_estimators}, which is aimed at readers who have - experience in machine learning. - -Then, read the following documents, which demonstrate the key features -in the high-level APIs: - - * @{$get_started/checkpoints}, which explains how to save training progress - and resume where you left off. - * @{$get_started/feature_columns}, which shows how an - Estimator can handle a variety of input data types without changes to the - model. - * @{$get_started/datasets_quickstart}, which introduces TensorFlow's - input pipelines. - * @{$get_started/custom_estimators}, which demonstrates how - to build and train models you design yourself. +Estimator API which provide scalable, high-performance models. See the +@{$estimators} guide. For more advanced users: diff --git a/tensorflow/docs_src/get_started/leftnav_files b/tensorflow/docs_src/get_started/leftnav_files index 4c12f0d84b..e6cc8d5658 100644 --- a/tensorflow/docs_src/get_started/leftnav_files +++ b/tensorflow/docs_src/get_started/leftnav_files @@ -1,15 +1,4 @@ index.md -### Beginners eager.md -get_started_for_beginners.md -premade_estimators.md - -### Estimators -get_started_for_beginners.md: For Beginners -premade_estimators.md: Premade Estimators ->>> -checkpoints.md -feature_columns.md datasets_quickstart.md -custom_estimators.md diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 90d9ea0288..0906b55008 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -403,10 +403,8 @@ writing TensorFlow programs: If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). -If you are new to machine learning, we recommend the following: - -* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course) -* @{$get_started/get_started_for_beginners$Getting Started for ML Beginners} +If you are new to machine learning, we recommend the +[Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course). If you are experienced with machine learning but new to TensorFlow, see @{$get_started/eager}. diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index a139a49661..6c4f5b85ab 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -157,10 +157,8 @@ TensorFlow programs: If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). -If you are new to machine learning, we recommend the following: - -* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course) -* @{$get_started/get_started_for_beginners$Getting Started for ML Beginners} +If you are new to machine learning, we recommend the +[Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course). If you are experienced with machine learning but new to TensorFlow, see @{$get_started/eager}. diff --git a/tensorflow/docs_src/get_started/checkpoints.md b/tensorflow/docs_src/programmers_guide/checkpoints.md similarity index 100% rename from tensorflow/docs_src/get_started/checkpoints.md rename to tensorflow/docs_src/programmers_guide/checkpoints.md diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/programmers_guide/custom_estimators.md similarity index 98% rename from tensorflow/docs_src/get_started/custom_estimators.md rename to tensorflow/docs_src/programmers_guide/custom_estimators.md index 275cda12bc..fb20b35c12 100644 --- a/tensorflow/docs_src/get_started/custom_estimators.md +++ b/tensorflow/docs_src/programmers_guide/custom_estimators.md @@ -5,7 +5,7 @@ This document introduces custom Estimators. In particular, this document demonstrates how to create a custom @{tf.estimator.Estimator$Estimator} that mimics the behavior of the pre-made Estimator @{tf.estimator.DNNClassifier$`DNNClassifier`} in solving the Iris problem. See -the @{$get_started/premade_estimators$Pre-Made Estimators chapter} for details +the @{$premade_estimators$Pre-Made Estimators chapter} for details on the Iris problem. To download and access the example code invoke the following two commands: @@ -84,7 +84,7 @@ and a logits output layer. ## Write an Input function Our custom Estimator implementation uses the same input function as our -@{$get_started/premade_estimators$pre-made Estimator implementation}, from +@{$premade_estimators$pre-made Estimator implementation}, from [`iris_data.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py). Namely: @@ -106,8 +106,8 @@ This input function builds an input pipeline that yields batches of ## Create feature columns -As detailed in the @{$get_started/premade_estimators$Premade Estimators} and -@{$get_started/feature_columns$Feature Columns} chapters, you must define +As detailed in the @{$premade_estimators$Premade Estimators} and +@{$feature_columns$Feature Columns} chapters, you must define your model's feature columns to specify how the model should use each feature. Whether working with pre-made Estimators or custom Estimators, you define feature columns in the same fashion. @@ -145,7 +145,7 @@ to the constructor are in turn passed on to the `model_fn`. In [`custom_estimator.py`](https://github.com/tensorflow/models/blob/master/samples/core/get_started/custom_estimator.py) the following lines create the estimator and set the params to configure the model. This configuration step is similar to how we configured the @{tf.estimator.DNNClassifier} in -@{$get_started/premade_estimators}. +@{$premade_estimators}. ```python classifier = tf.estimator.Estimator( @@ -489,7 +489,7 @@ configure your Estimator without modifying the code in the `model_fn`. The rest of the code to train, evaluate, and generate predictions using our Estimator is the same as in the -@{$get_started/premade_estimators$Premade Estimators} chapter. For +@{$premade_estimators$Premade Estimators} chapter. For example, the following line will train the model: ```python diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md index ffadf29ad7..c4aae1d9d6 100644 --- a/tensorflow/docs_src/programmers_guide/estimators.md +++ b/tensorflow/docs_src/programmers_guide/estimators.md @@ -134,7 +134,7 @@ The heart of every Estimator--whether pre-made or custom--is its evaluation, and prediction. When you are using a pre-made Estimator, someone else has already implemented the model function. When relying on a custom Estimator, you must write the model function yourself. A -@{$get_started/custom_estimators$companion document} +@{$custom_estimators$companion document} explains how to write the model function. diff --git a/tensorflow/docs_src/get_started/feature_columns.md b/tensorflow/docs_src/programmers_guide/feature_columns.md similarity index 99% rename from tensorflow/docs_src/get_started/feature_columns.md rename to tensorflow/docs_src/programmers_guide/feature_columns.md index 79c2667979..845194fe0e 100644 --- a/tensorflow/docs_src/get_started/feature_columns.md +++ b/tensorflow/docs_src/programmers_guide/feature_columns.md @@ -5,7 +5,7 @@ intermediaries between raw data and Estimators. Feature columns are very rich, enabling you to transform a diverse range of raw data into formats that Estimators can use, allowing easy experimentation. -In @{$get_started/premade_estimators$Premade Estimators}, we used the premade +In @{$premade_estimators$Premade Estimators}, we used the premade Estimator, @{tf.estimator.DNNClassifier$`DNNClassifier`} to train a model to predict different types of Iris flowers from four input features. That example created only numerical feature columns (of type diff --git a/tensorflow/docs_src/programmers_guide/index.md b/tensorflow/docs_src/programmers_guide/index.md index 648d001bd3..9ebfd39c56 100644 --- a/tensorflow/docs_src/programmers_guide/index.md +++ b/tensorflow/docs_src/programmers_guide/index.md @@ -11,6 +11,23 @@ works. The units are as follows: * @{$programmers_guide/datasets}, which explains how to set up data pipelines to read data sets into your TensorFlow program. +## Estimators + +* @{$estimators} provides an introduction. +* @{$premade_estimators}, introduces Estimators for machine learning. +* @{$custom_estimators}, which demonstrates how to build and train models you + design yourself. +* @{$feature_columns}, which shows how an Estimator can handle a variety of input + data types without changes to the model. +* @{$checkpoints}, which explains how to save training progress and resume where + you left off. + +## Accelerators + + * @{$using_gpu} explains how TensorFlow assigns operations to + devices and how you can change the arrangement manually. + * @{$using_tpu} explains how to modify `Estimator` programs to run on a TPU. + ## Low Level APIs * @{$programmers_guide/low_level_intro}, which introduces the @@ -32,13 +49,6 @@ works. The units are as follows: * @{$programmers_guide/saved_model}, which explains how to save and restore variables and models. -## Accelerators - - * @{$using_gpu} explains how TensorFlow assigns operations to - devices and how you can change the arrangement manually. - * @{$using_tpu} explains how to modify `Estimator` programs to run on a TPU. - - ## ML Concepts * @{$programmers_guide/embedding}, which introduces the concept diff --git a/tensorflow/docs_src/programmers_guide/leftnav_files b/tensorflow/docs_src/programmers_guide/leftnav_files index 7ac63bf2e0..331317446a 100644 --- a/tensorflow/docs_src/programmers_guide/leftnav_files +++ b/tensorflow/docs_src/programmers_guide/leftnav_files @@ -3,7 +3,17 @@ index.md ### High Level APIs eager.md datasets.md -estimators.md + +### Estimators +estimators.md: Introduction to Estimators +premade_estimators.md +custom_estimators.md +feature_columns.md +checkpoints.md + +### Accelerators +using_gpu.md +using_tpu.md ### Low Level APIs low_level_intro.md @@ -12,10 +22,6 @@ variables.md graphs.md saved_model.md -### Accelerators -using_gpu.md -using_tpu.md - ### ML Concepts embedding.md diff --git a/tensorflow/docs_src/programmers_guide/low_level_intro.md b/tensorflow/docs_src/programmers_guide/low_level_intro.md index 05709ad10a..478e2bb70b 100644 --- a/tensorflow/docs_src/programmers_guide/low_level_intro.md +++ b/tensorflow/docs_src/programmers_guide/low_level_intro.md @@ -9,7 +9,7 @@ This guide gets you started programming in the low-level TensorFlow APIs * Use high level components ([datasets](#datasets), [layers](#layers), and [feature_columns](#feature_columns)) in this low level environment. * Build your own training loop, instead of using the one - @{$get_started/premade_estimators$provided by Estimators}. + @{$premade_estimators$provided by Estimators}. We recommend using the higher level APIs to build models when possible. Knowing TensorFlow Core is valuable for the following reasons: @@ -398,7 +398,7 @@ and layer reuse impossible. The easiest way to experiment with feature columns is using the @{tf.feature_column.input_layer} function. This function only accepts -@{$get_started/feature_columns$dense columns} as inputs, so to view the result +@{$feature_columns$dense columns} as inputs, so to view the result of a categorical column you must wrap it in an @{tf.feature_column.indicator_column}. For example: @@ -589,7 +589,7 @@ print(sess.run(y_pred)) To learn more about building models with TensorFlow consider the following: -* @{$get_started/custom_estimators$Custom Estimators}, to learn how to build +* @{$custom_estimators$Custom Estimators}, to learn how to build customized models with TensorFlow. Your knowledge of TensorFlow Core will help you understand and debug your own models. diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/programmers_guide/premade_estimators.md similarity index 98% rename from tensorflow/docs_src/get_started/premade_estimators.md rename to tensorflow/docs_src/programmers_guide/premade_estimators.md index 4be7e508f9..e5eca44297 100644 --- a/tensorflow/docs_src/get_started/premade_estimators.md +++ b/tensorflow/docs_src/programmers_guide/premade_estimators.md @@ -289,7 +289,7 @@ for key in train_x.keys(): ``` Feature columns can be far more sophisticated than those we're showing here. We -detail feature columns @{$get_started/feature_columns$later on} in our Getting +detail feature columns @{$feature_columns$later on} in our Getting Started guide. Now that we have the description of how we want the model to represent the raw @@ -425,11 +425,10 @@ Pre-made Estimators are an effective way to quickly create standard models. Now that you've gotten started writing TensorFlow programs, consider the following material: -* @{$get_started/checkpoints$Checkpoints} to learn how to save and restore - models. +* @{$checkpoints$Checkpoints} to learn how to save and restore models. * @{$get_started/datasets_quickstart$Datasets} to learn more about importing data into your model. -* @{$get_started/custom_estimators$Creating Custom Estimators} to learn how to +* @{$custom_estimators$Creating Custom Estimators} to learn how to write your own Estimator, customized for a particular problem. diff --git a/tensorflow/docs_src/programmers_guide/using_tpu.md b/tensorflow/docs_src/programmers_guide/using_tpu.md index 5e3e49d434..44aabf0557 100644 --- a/tensorflow/docs_src/programmers_guide/using_tpu.md +++ b/tensorflow/docs_src/programmers_guide/using_tpu.md @@ -22,8 +22,8 @@ Standard `Estimators` can drive models on CPU and GPUs. You must use @{tf.contrib.tpu.TPUEstimator} to drive a model on TPUs. Refer to TensorFlow's Getting Started section for an introduction to the basics -of using a @{$get_started/premade_estimators$pre-made `Estimator`}, and -@{$get_started/custom_estimators$custom `Estimator`s}. +of using a @{$premade_estimators$pre-made `Estimator`}, and +@{$custom_estimators$custom `Estimator`s}. The `TPUEstimator` class differs somewhat from the `Estimator` class. diff --git a/tensorflow/docs_src/tutorials/kernel_methods.md b/tensorflow/docs_src/tutorials/kernel_methods.md index 73e5c51057..205e2a2d2c 100644 --- a/tensorflow/docs_src/tutorials/kernel_methods.md +++ b/tensorflow/docs_src/tutorials/kernel_methods.md @@ -53,7 +53,7 @@ In order to feed data to a `tf.contrib.learn Estimator`, it is helpful to conver it to Tensors. For this, we will use an `input function` which adds Ops to the TensorFlow graph that, when executed, create mini-batches of Tensors to be used downstream. For more background on input functions, check -@{$get_started/premade_estimators#create_input_functions$this section on input functions}. +@{$premade_estimators#create_input_functions$this section on input functions}. In this example, we will use the `tf.train.shuffle_batch` Op which, besides converting numpy arrays to Tensors, allows us to specify the batch_size and whether to randomize the input every time the input_fn Ops are executed diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index 37cd2bb139..ead5a636b9 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -190,7 +190,7 @@ def cnn_model_fn(features, labels, mode): The following sections (with headings corresponding to each code block above) dive deeper into the `tf.layers` code used to create each layer, as well as how to calculate loss, configure the training op, and generate predictions. If -you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s}, +you're already experienced with CNNs and @{$custom_estimators$TensorFlow `Estimator`s}, and find the above code intuitive, you may want to skim these sections or just skip ahead to ["Training and Evaluating the CNN MNIST Classifier"](#train_eval_mnist). @@ -535,8 +535,8 @@ if mode == tf.estimator.ModeKeys.TRAIN: ``` > Note: For a more in-depth look at configuring training ops for Estimator model -> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining the training op for the model"} -> in the @{$get_started/custom_estimators$"Creating Estimations in tf.estimator"} tutorial. +> functions, see @{$custom_estimators#defining-the-training-op-for-the-model$"Defining the training op for the model"} +> in the @{$custom_estimators$"Creating Estimations in tf.estimator"} tutorial. ### Add evaluation metrics @@ -601,7 +601,7 @@ be saved (here, we specify the temp directory `/tmp/mnist_convnet_model`, but feel free to change to another directory of your choice). > Note: For an in-depth walkthrough of the TensorFlow `Estimator` API, see the -> tutorial @{$get_started/custom_estimators$"Creating Estimators in tf.estimator."} +> tutorial @{$custom_estimators$"Creating Estimators in tf.estimator."} ### Set Up a Logging Hook {#set_up_a_logging_hook} @@ -720,7 +720,7 @@ Here, we've achieved an accuracy of 97.3% on our test data set. To learn more about TensorFlow Estimators and CNNs in TensorFlow, see the following resources: -* @{$get_started/custom_estimators$Creating Estimators in tf.estimator} +* @{$custom_estimators$Creating Estimators in tf.estimator} provides an introduction to the TensorFlow Estimator API. It walks through configuring an Estimator, writing a model function, calculating loss, and defining a training op. diff --git a/tensorflow/docs_src/tutorials/linear.md b/tensorflow/docs_src/tutorials/linear.md index 265ded877d..3f247ade26 100644 --- a/tensorflow/docs_src/tutorials/linear.md +++ b/tensorflow/docs_src/tutorials/linear.md @@ -17,7 +17,7 @@ tutorial walks through the code in greater detail. To understand this overview it will help to have some familiarity with basic machine learning concepts, and also with -@{$get_started/premade_estimators$Estimators}. +@{$premade_estimators$Estimators}. [TOC] diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md index 5d83fbe2a3..1afd861738 100644 --- a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md +++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md @@ -220,7 +220,7 @@ length 2. ### Defining the model To define the model we create a new `Estimator`. If you want to read more about -estimators, we recommend @{$get_started/custom_estimators$this tutorial}. +estimators, we recommend @{$custom_estimators$this tutorial}. To build the model, we: diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index ecb5659716..9b4b866697 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -302,7 +302,7 @@ class Estimator(object): Args: input_fn: A function that provides input data for training as minibatches. - See @{$get_started/premade_estimators#create_input_functions} for more + See @{$premade_estimators#create_input_functions} for more information. The function should construct and return one of the following: @@ -398,7 +398,7 @@ class Estimator(object): Args: input_fn: A function that constructs the input data for evaluation. - See @{$get_started/premade_estimators#create_input_functions} for more + See @{$premade_estimators#create_input_functions} for more information. The function should construct and return one of the following: @@ -477,7 +477,7 @@ class Estimator(object): input_fn: A function that constructs the features. Prediction continues until `input_fn` raises an end-of-input exception (`OutOfRangeError` or `StopIteration`). - See @{$get_started/premade_estimators#create_input_functions} for more + See @{$premade_estimators#create_input_functions} for more information. The function should construct and return one of the following: diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 4f90bcf3a8..08fff3ba64 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -129,7 +129,7 @@ class TrainSpec( Args: input_fn: A function that provides input data for training as minibatches. - See @{$get_started/premade_estimators#create_input_functions} for more + See @{$premade_estimators#create_input_functions} for more information. The function should construct and return one of the following: * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a @@ -193,7 +193,7 @@ class EvalSpec( Args: input_fn: A function that constructs the input data for evaluation. - See @{$get_started/premade_estimators#create_input_functions} for more + See @{$premade_estimators#create_input_functions} for more information. The function should construct and return one of the following: * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a -- GitLab From 42e50daa384183d2f64e0ab5ae3f9bed07128e07 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 20:03:20 -0700 Subject: [PATCH 079/902] Set the correct shape in transformed distribution. Also add distribution_util.maybe_get_static_event_ndims to be reused in bijector and transformed distribution classes. PiperOrigin-RevId: 197831651 --- .../bijectors/conditional_bijector_test.py | 2 +- .../python/ops/bijectors/chain.py | 30 +++++++-------- .../conditional_transformed_distribution.py | 13 +++---- .../distributions/bijector_test.py | 32 +++++++++++++++- .../python/ops/distributions/bijector_impl.py | 37 +++++++++++++++---- .../distributions/transformed_distribution.py | 25 +++++++------ tensorflow/python/ops/distributions/util.py | 1 + 7 files changed, 94 insertions(+), 46 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/conditional_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/conditional_bijector_test.py index 8b279ebcd9..f8a52615b0 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/conditional_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/conditional_bijector_test.py @@ -59,7 +59,7 @@ class ConditionalBijectorTest(test.TestCase): for name in ["inverse_log_det_jacobian", "forward_log_det_jacobian"]: method = getattr(b, name) with self.assertRaisesRegexp(ValueError, name + ".*b1.*b2"): - method(1., event_ndims=0., arg1="b1", arg2="b2") + method(1., event_ndims=0, arg1="b1", arg2="b2") if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py index b158a51bb0..16f959560c 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/chain.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/chain.py @@ -234,7 +234,7 @@ class Chain(bijector.Bijector): if not self.bijectors: return ildj - event_ndims = self._maybe_get_event_ndims_statically( + event_ndims = self._maybe_get_static_event_ndims( self.inverse_min_event_ndims) if _use_static_shape(y, event_ndims): @@ -248,12 +248,15 @@ class Chain(bijector.Bijector): if _use_static_shape(y, event_ndims): event_shape = b.inverse_event_shape(event_shape) - event_ndims = self._maybe_get_event_ndims_statically( + event_ndims = self._maybe_get_static_event_ndims( event_shape.ndims) else: event_shape = b.inverse_event_shape_tensor(event_shape) - event_ndims = self._maybe_get_event_ndims_statically( - array_ops.size(event_shape)) + event_ndims = array_ops.size(event_shape) + event_ndims_ = self._maybe_get_static_event_ndims(event_ndims) + if event_ndims_ is not None: + event_ndims = event_ndims_ + y = b.inverse(y, **kwargs.get(b.name, {})) return ildj @@ -270,7 +273,7 @@ class Chain(bijector.Bijector): if not self.bijectors: return fldj - event_ndims = self._maybe_get_event_ndims_statically( + event_ndims = self._maybe_get_static_event_ndims( self.forward_min_event_ndims) if _use_static_shape(x, event_ndims): @@ -283,21 +286,14 @@ class Chain(bijector.Bijector): x, event_ndims=event_ndims, **kwargs.get(b.name, {})) if _use_static_shape(x, event_ndims): event_shape = b.forward_event_shape(event_shape) - event_ndims = self._maybe_get_event_ndims_statically(event_shape.ndims) + event_ndims = self._maybe_get_static_event_ndims(event_shape.ndims) else: event_shape = b.forward_event_shape_tensor(event_shape) - event_ndims = self._maybe_get_event_ndims_statically( - array_ops.size(event_shape)) + event_ndims = array_ops.size(event_shape) + event_ndims_ = self._maybe_get_static_event_ndims(event_ndims) + if event_ndims_ is not None: + event_ndims = event_ndims_ x = b.forward(x, **kwargs.get(b.name, {})) return fldj - - def _maybe_get_event_ndims_statically(self, event_ndims): - event_ndims_ = super(Chain, self)._maybe_get_event_ndims_statically( - event_ndims) - if event_ndims_ is None: - return event_ndims - return event_ndims_ - - diff --git a/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py index 10b4536135..3598c8d23e 100644 --- a/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/conditional_transformed_distribution.py @@ -20,7 +20,6 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops import conditional_distribution from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import transformed_distribution @@ -106,7 +105,7 @@ class ConditionalTransformedDistribution( bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} x = self.bijector.inverse(y, **bijector_kwargs) - event_ndims = self._maybe_get_event_ndims_statically() + event_ndims = self._maybe_get_static_event_ndims() ildj = self.bijector.inverse_log_det_jacobian( y, event_ndims=event_ndims, **bijector_kwargs) if self.bijector._is_injective: # pylint: disable=protected-access @@ -131,7 +130,7 @@ class ConditionalTransformedDistribution( bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} x = self.bijector.inverse(y, **bijector_kwargs) - event_ndims = self._maybe_get_event_ndims_statically() + event_ndims = self._maybe_get_static_event_ndims() ildj = self.bijector.inverse_log_det_jacobian( y, event_ndims=event_ndims, **bijector_kwargs) if self.bijector._is_injective: # pylint: disable=protected-access @@ -220,14 +219,14 @@ class ConditionalTransformedDistribution( inv_cdf = self.distribution.quantile(value, **distribution_kwargs) return self.bijector.forward(inv_cdf, **bijector_kwargs) - def _maybe_get_event_ndims_statically(self): + def _maybe_get_static_event_ndims(self): if self.event_shape.ndims is not None: return self.event_shape.ndims event_ndims = array_ops.size(self.event_shape_tensor()) - static_event_ndims = tensor_util.constant_value(event_ndims) + event_ndims_ = distribution_util.maybe_get_static_value(event_ndims) - if static_event_ndims is not None: - return static_event_ndims + if event_ndims_ is not None: + return event_ndims_ return event_ndims diff --git a/tensorflow/python/kernel_tests/distributions/bijector_test.py b/tensorflow/python/kernel_tests/distributions/bijector_test.py index a7fe336e6a..8b11556330 100644 --- a/tensorflow/python/kernel_tests/distributions/bijector_test.py +++ b/tensorflow/python/kernel_tests/distributions/bijector_test.py @@ -90,9 +90,10 @@ class IntentionallyMissingError(Exception): class BrokenBijector(bijector.Bijector): """Forward and inverse are not inverses of each other.""" - def __init__(self, forward_missing=False, inverse_missing=False): + def __init__( + self, forward_missing=False, inverse_missing=False, validate_args=False): super(BrokenBijector, self).__init__( - validate_args=False, forward_min_event_ndims=0, name="broken") + validate_args=validate_args, forward_min_event_ndims=0, name="broken") self._forward_missing = forward_missing self._inverse_missing = inverse_missing @@ -116,6 +117,33 @@ class BrokenBijector(bijector.Bijector): raise IntentionallyMissingError return math_ops.log(2.) +class BijectorTestEventNdims(test.TestCase): + + def testBijectorNonIntegerEventNdims(self): + bij = BrokenBijector() + with self.assertRaisesRegexp(ValueError, "Expected integer"): + bij.forward_log_det_jacobian(1., event_ndims=1.5) + with self.assertRaisesRegexp(ValueError, "Expected integer"): + bij.inverse_log_det_jacobian(1., event_ndims=1.5) + + def testBijectorArrayEventNdims(self): + bij = BrokenBijector() + with self.assertRaisesRegexp(ValueError, "Expected scalar"): + bij.forward_log_det_jacobian(1., event_ndims=(1, 2)) + with self.assertRaisesRegexp(ValueError, "Expected scalar"): + bij.inverse_log_det_jacobian(1., event_ndims=(1, 2)) + + def testBijectorDynamicEventNdims(self): + bij = BrokenBijector(validate_args=True) + event_ndims = array_ops.placeholder(dtype=np.int32, shape=None) + with self.test_session(): + with self.assertRaisesOpError("Expected scalar"): + bij.forward_log_det_jacobian(1., event_ndims=event_ndims).eval({ + event_ndims: (1, 2)}) + with self.assertRaisesOpError("Expected scalar"): + bij.inverse_log_det_jacobian(1., event_ndims=event_ndims).eval({ + event_ndims: (1, 2)}) + @six.add_metaclass(abc.ABCMeta) class BijectorCachingTestBase(object): diff --git a/tensorflow/python/ops/distributions/bijector_impl.py b/tensorflow/python/ops/distributions/bijector_impl.py index caceadf53a..969553bedd 100644 --- a/tensorflow/python/ops/distributions/bijector_impl.py +++ b/tensorflow/python/ops/distributions/bijector_impl.py @@ -1021,7 +1021,7 @@ class Bijector(object): axis=self._get_event_reduce_dims(min_event_ndims, event_ndims)) # The multiplication by ones can change the inferred static shape so we try # to recover as much as possible. - event_ndims_ = self._maybe_get_event_ndims_statically(event_ndims) + event_ndims_ = self._maybe_get_static_event_ndims(event_ndims) if (event_ndims_ is not None and y.shape.ndims is not None and ildj.shape.ndims is not None): @@ -1036,7 +1036,7 @@ class Bijector(object): def _get_event_reduce_dims(self, min_event_ndims, event_ndims): """Compute the reduction dimensions given event_ndims.""" - event_ndims_ = self._maybe_get_event_ndims_statically(event_ndims) + event_ndims_ = self._maybe_get_static_event_ndims(event_ndims) if event_ndims_ is not None: return [-index for index in range(1, event_ndims_ - min_event_ndims + 1)] @@ -1046,9 +1046,18 @@ class Bijector(object): def _check_valid_event_ndims(self, min_event_ndims, event_ndims): """Check whether event_ndims is atleast min_event_ndims.""" - event_ndims_ = self._maybe_get_event_ndims_statically(event_ndims) + event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") + event_ndims_ = tensor_util.constant_value(event_ndims) assertions = [] + + if not event_ndims.dtype.is_integer: + raise ValueError("Expected integer dtype, got dtype {}".format( + event_ndims.dtype)) + if event_ndims_ is not None: + if event_ndims.shape.ndims != 0: + raise ValueError("Expected scalar event_ndims, got shape {}".format( + event_ndims.shape)) if min_event_ndims > event_ndims_: raise ValueError("event_ndims ({}) must be larger than " "min_event_ndims ({})".format( @@ -1056,17 +1065,29 @@ class Bijector(object): elif self.validate_args: assertions += [ check_ops.assert_greater_equal(event_ndims, min_event_ndims)] + + if event_ndims.shape.is_fully_defined(): + if event_ndims.shape.ndims != 0: + raise ValueError("Expected scalar shape, got ndims {}".format( + event_ndims.shape.ndims)) + + elif self.validate_args: + assertions += [ + check_ops.assert_rank(event_ndims, 0, message="Expected scalar.")] return assertions - def _maybe_get_event_ndims_statically(self, event_ndims): + def _maybe_get_static_event_ndims(self, event_ndims): """Helper which returns tries to return an integer static value.""" event_ndims_ = distribution_util.maybe_get_static_value(event_ndims) - if isinstance(event_ndims_, np.ndarray): - if (event_ndims_.dtype not in (np.int32, np.int64) or - len(event_ndims_.shape)): + if isinstance(event_ndims_, (np.generic, np.ndarray)): + if event_ndims_.dtype not in (np.int32, np.int64): + raise ValueError("Expected integer dtype, got dtype {}".format( + event_ndims_.dtype)) + + if isinstance(event_ndims_, np.ndarray) and len(event_ndims_.shape): raise ValueError("Expected a scalar integer, got {}".format( event_ndims_)) - event_ndims_ = event_ndims_.tolist() + event_ndims_ = int(event_ndims_) return event_ndims_ diff --git a/tensorflow/python/ops/distributions/transformed_distribution.py b/tensorflow/python/ops/distributions/transformed_distribution.py index 9392464ec1..c2674bd6e5 100644 --- a/tensorflow/python/ops/distributions/transformed_distribution.py +++ b/tensorflow/python/ops/distributions/transformed_distribution.py @@ -416,7 +416,7 @@ class TransformedDistribution(distribution_lib.Distribution): # For caching to work, it is imperative that the bijector is the first to # modify the input. x = self.bijector.inverse(y) - event_ndims = self._maybe_get_event_ndims_statically() + event_ndims = self._maybe_get_static_event_ndims() ildj = self.bijector.inverse_log_det_jacobian(y, event_ndims=event_ndims) if self.bijector._is_injective: # pylint: disable=protected-access @@ -435,13 +435,15 @@ class TransformedDistribution(distribution_lib.Distribution): log_prob = math_ops.reduce_sum(log_prob, self._reduce_event_indices) log_prob += math_ops.cast(ildj, log_prob.dtype) if self._is_maybe_event_override and isinstance(event_ndims, int): - log_prob.set_shape(array_ops.broadcast_static_shape( - x.get_shape().with_rank_at_least(1)[:-event_ndims], self.batch_shape)) + log_prob.set_shape( + array_ops.broadcast_static_shape( + y.get_shape().with_rank_at_least(1)[:-event_ndims], + self.batch_shape)) return log_prob def _prob(self, y): x = self.bijector.inverse(y) - event_ndims = self._maybe_get_event_ndims_statically() + event_ndims = self._maybe_get_static_event_ndims() ildj = self.bijector.inverse_log_det_jacobian(y, event_ndims=event_ndims) if self.bijector._is_injective: # pylint: disable=protected-access return self._finish_prob_for_one_fiber(y, x, ildj, event_ndims) @@ -459,8 +461,10 @@ class TransformedDistribution(distribution_lib.Distribution): prob = math_ops.reduce_prod(prob, self._reduce_event_indices) prob *= math_ops.exp(math_ops.cast(ildj, prob.dtype)) if self._is_maybe_event_override and isinstance(event_ndims, int): - prob.set_shape(array_ops.broadcast_static_shape( - y.get_shape().with_rank_at_least(1)[:-event_ndims], self.batch_shape)) + prob.set_shape( + array_ops.broadcast_static_shape( + y.get_shape().with_rank_at_least(1)[:-event_ndims], + self.batch_shape)) return prob def _log_cdf(self, y): @@ -618,15 +622,14 @@ class TransformedDistribution(distribution_lib.Distribution): return array_ops.transpose( x, _concat_vectors(math_ops.range(n, ndims), math_ops.range(0, n))) - def _maybe_get_event_ndims_statically(self): + def _maybe_get_static_event_ndims(self): if self.event_shape.ndims is not None: return self.event_shape.ndims event_ndims = array_ops.size(self.event_shape_tensor()) + event_ndims_ = distribution_util.maybe_get_static_value(event_ndims) - static_event_ndims = tensor_util.constant_value(event_ndims) - - if static_event_ndims is not None: - return static_event_ndims + if event_ndims_ is not None: + return event_ndims_ return event_ndims diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py index 59c89d21f9..728fda28c2 100644 --- a/tensorflow/python/ops/distributions/util.py +++ b/tensorflow/python/ops/distributions/util.py @@ -179,6 +179,7 @@ def maybe_get_static_value(x, dtype=None): if x is None: return x try: + # This returns an np.ndarray. x_ = tensor_util.constant_value(x) except TypeError: x_ = x -- GitLab From 8f863f3d71542c47390f2d40348b72296ed5c4be Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 20:39:31 -0700 Subject: [PATCH 080/902] Add support for is_recompute optional kwarg to functions decorated with recompute_grad PiperOrigin-RevId: 197834316 --- .../layers/python/layers/rev_block_lib.py | 21 +++++++++++-- .../python/layers/rev_block_lib_test.py | 30 +++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py index 8ed9f446bc..0e35b1aa8b 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py @@ -46,6 +46,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest +from tensorflow.python.util import tf_inspect __all__ = ["rev_block", "RevBlock", "recompute_grad"] @@ -449,6 +450,15 @@ def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False): `variable_scope(name, use_resource=True), which are the default in Eager mode and when running on TPU. + Warning: Because the function will be called again on the backwards pass, the + user should be careful to not use ops in their function that mutate state or + have randomness (for example, batch normalization or dropout). If the function + does have such operations, it is recommended that the function take the + `is_recomputing` keyword argument which will be `False` on the forward pass + and `True` on the backwards pass so that it can disable state changes when + `is_recomputing=True` (for example, not updating the moving averages in batch + normalization). + Args: fn: a function that takes Tensors (all as positional arguments) and returns a tuple of Tensors. @@ -482,6 +492,7 @@ def _is_on_tpu(): def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False): """See recompute_grad.""" + has_is_recompute_kwarg = "is_recomputing" in tf_inspect.getargspec(fn).args for arg in args: if not isinstance(arg, framework_ops.Tensor): raise ValueError("All inputs to function must be Tensors") @@ -496,7 +507,10 @@ def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False): vs = variable_scope.get_variable_scope() arg_scope = contrib_framework_ops.current_arg_scope() with backprop.GradientTape() as tape: - outputs = fn(*args) + fn_kwargs = {} + if has_is_recompute_kwarg: + fn_kwargs["is_recomputing"] = False + outputs = fn(*args, **fn_kwargs) original_vars = set(tape.watched_variables()) # Backward pass @@ -516,7 +530,10 @@ def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False): with contrib_framework_ops.arg_scope(arg_scope): with variable_scope.variable_scope(vs, reuse=True): with backprop.GradientTape() as tape: - outputs = fn(*inputs) + fn_kwargs = {} + if has_is_recompute_kwarg: + fn_kwargs["is_recomputing"] = True + outputs = fn(*inputs, **fn_kwargs) recompute_vars = set(tape.watched_variables()) if original_vars != recompute_vars: raise ValueError(_WRONG_VARS_ERR) diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py index 997f53b9e1..bc09ba8d43 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py @@ -21,9 +21,11 @@ from __future__ import print_function from tensorflow.contrib.layers.python.layers import layers from tensorflow.contrib.layers.python.layers import rev_block_lib from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.layers import convolutional from tensorflow.python.layers import core as core_layers +from tensorflow.python.layers import normalization as normalization_layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import init_ops @@ -342,6 +344,34 @@ class RecomputeTest(test.TestCase): for grad in grads: self.assertTrue(grad is not None) + def testWithIsRecomputeKwarg(self): + + kwarg_values = [] + + @rev_block_lib.recompute_grad + def layer_with_recompute(inputs, is_recomputing=False): + kwarg_values.append(is_recomputing) + out = core_layers.dense(inputs, 2) + out = normalization_layers.batch_normalization(out, training=True) + if is_recomputing: + # Ensure that the updates are not duplicated by popping off the latest + # 2 additions. + update_ops = ops.get_collection_ref(ops.GraphKeys.UPDATE_OPS) + update_ops.pop() + update_ops.pop() + return out + + x = array_ops.ones((2, 4), dtypes.float32) + with variable_scope.variable_scope("layer1", use_resource=True): + y = layer_with_recompute(x) + loss = math_ops.reduce_sum(y) + tvars = variables.trainable_variables() + gradients_impl.gradients(loss, [x] + tvars) + + update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) + self.assertEqual(2, len(update_ops)) + self.assertEqual([False, True], kwarg_values) + if __name__ == "__main__": test.main() -- GitLab From 81ef70a0bc22163d34f1e0425122d6a93bf02eac Mon Sep 17 00:00:00 2001 From: Karmel Allison Date: Wed, 23 May 2018 20:53:15 -0700 Subject: [PATCH 081/902] Resolve name collisions with assets in SavedModels by deduplicating names that point to distinct files. PiperOrigin-RevId: 197835288 --- tensorflow/python/lib/io/file_io.py | 58 +++++++ tensorflow/python/lib/io/file_io_test.py | 91 +++++++++++ tensorflow/python/saved_model/builder_impl.py | 81 ++++++++-- .../python/saved_model/saved_model_test.py | 147 +++++++++++++++++- 4 files changed, 360 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py index 59f5075f17..f22fb253e4 100644 --- a/tensorflow/python/lib/io/file_io.py +++ b/tensorflow/python/lib/io/file_io.py @@ -21,6 +21,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import binascii import os import uuid @@ -33,6 +34,10 @@ from tensorflow.python.util import compat from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import tf_export +# A good default block size depends on the system in question. +# A somewhat conservative default chosen here. +_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024 + class FileIO(object): """FileIO class that exposes methods to read / write to / from files. @@ -551,3 +556,56 @@ def stat(filename): with errors.raise_exception_on_not_ok_status() as status: pywrap_tensorflow.Stat(compat.as_bytes(filename), file_statistics, status) return file_statistics + + +def filecmp(filename_a, filename_b): + """Compare two files, returning True if they are the same, False otherwise. + + We check size first and return False quickly if the files are different sizes. + If they are the same size, we continue to generating a crc for the whole file. + + You might wonder: why not use Python's filecmp.cmp() instead? The answer is + that the builtin library is not robust to the many different filesystems + TensorFlow runs on, and so we here perform a similar comparison with + the more robust FileIO. + + Args: + filename_a: string path to the first file. + filename_b: string path to the second file. + + Returns: + True if the files are the same, False otherwise. + """ + size_a = FileIO(filename_a, "rb").size() + size_b = FileIO(filename_b, "rb").size() + if size_a != size_b: + return False + + # Size is the same. Do a full check. + crc_a = file_crc32(filename_a) + crc_b = file_crc32(filename_b) + return crc_a == crc_b + + +def file_crc32(filename, block_size=_DEFAULT_BLOCK_SIZE): + """Get the crc32 of the passed file. + + The crc32 of a file can be used for error checking; two files with the same + crc32 are considered equivalent. Note that the entire file must be read + to produce the crc32. + + Args: + filename: string, path to a file + block_size: Integer, process the files by reading blocks of `block_size` + bytes. Use -1 to read the file as once. + + Returns: + hexadecimal as string, the crc32 of the passed file. + """ + crc = 0 + with FileIO(filename, mode="rb") as f: + chunk = f.read(n=block_size) + while chunk: + crc = binascii.crc32(chunk, crc) + chunk = f.read(n=block_size) + return hex(crc & 0xFFFFFFFF) diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index 223858edfa..c21eb93103 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -491,5 +491,96 @@ class FileIoTest(test.TestCase): v = file_io.file_exists(file_path) self.assertEqual(v, True) + def testFilecmp(self): + file1 = os.path.join(self._base_dir, "file1") + file_io.write_string_to_file(file1, "This is a sentence\n" * 100) + + file2 = os.path.join(self._base_dir, "file2") + file_io.write_string_to_file(file2, "This is another sentence\n" * 100) + + file3 = os.path.join(self._base_dir, "file3") + file_io.write_string_to_file(file3, u"This is another sentence\n" * 100) + + self.assertFalse(file_io.filecmp(file1, file2)) + self.assertTrue(file_io.filecmp(file2, file3)) + + def testFilecmpSameSize(self): + file1 = os.path.join(self._base_dir, "file1") + file_io.write_string_to_file(file1, "This is a sentence\n" * 100) + + file2 = os.path.join(self._base_dir, "file2") + file_io.write_string_to_file(file2, "This is b sentence\n" * 100) + + file3 = os.path.join(self._base_dir, "file3") + file_io.write_string_to_file(file3, u"This is b sentence\n" * 100) + + self.assertFalse(file_io.filecmp(file1, file2)) + self.assertTrue(file_io.filecmp(file2, file3)) + + def testFilecmpBinary(self): + file1 = os.path.join(self._base_dir, "file1") + file_io.FileIO(file1, "wb").write("testing\n\na") + + file2 = os.path.join(self._base_dir, "file2") + file_io.FileIO(file2, "wb").write("testing\n\nb") + + file3 = os.path.join(self._base_dir, "file3") + file_io.FileIO(file3, "wb").write("testing\n\nb") + + file4 = os.path.join(self._base_dir, "file4") + file_io.FileIO(file4, "wb").write("testing\n\ntesting") + + self.assertFalse(file_io.filecmp(file1, file2)) + self.assertFalse(file_io.filecmp(file1, file4)) + self.assertTrue(file_io.filecmp(file2, file3)) + + def testFileCrc32(self): + file1 = os.path.join(self._base_dir, "file1") + file_io.write_string_to_file(file1, "This is a sentence\n" * 100) + crc1 = file_io.file_crc32(file1) + + file2 = os.path.join(self._base_dir, "file2") + file_io.write_string_to_file(file2, "This is another sentence\n" * 100) + crc2 = file_io.file_crc32(file2) + + file3 = os.path.join(self._base_dir, "file3") + file_io.write_string_to_file(file3, "This is another sentence\n" * 100) + crc3 = file_io.file_crc32(file3) + + self.assertTrue(crc1 != crc2) + self.assertEqual(crc2, crc3) + + def testFileCrc32WithBytes(self): + file1 = os.path.join(self._base_dir, "file1") + file_io.write_string_to_file(file1, "This is a sentence\n" * 100) + crc1 = file_io.file_crc32(file1, block_size=24) + + file2 = os.path.join(self._base_dir, "file2") + file_io.write_string_to_file(file2, "This is another sentence\n" * 100) + crc2 = file_io.file_crc32(file2, block_size=24) + + file3 = os.path.join(self._base_dir, "file3") + file_io.write_string_to_file(file3, "This is another sentence\n" * 100) + crc3 = file_io.file_crc32(file3, block_size=-1) + + self.assertTrue(crc1 != crc2) + self.assertEqual(crc2, crc3) + + def testFileCrc32Binary(self): + file1 = os.path.join(self._base_dir, "file1") + file_io.FileIO(file1, "wb").write("testing\n\n") + crc1 = file_io.file_crc32(file1) + + file2 = os.path.join(self._base_dir, "file2") + file_io.FileIO(file2, "wb").write("testing\n\n\n") + crc2 = file_io.file_crc32(file2) + + file3 = os.path.join(self._base_dir, "file3") + file_io.FileIO(file3, "wb").write("testing\n\n\n") + crc3 = file_io.file_crc32(file3) + + self.assertTrue(crc1 != crc2) + self.assertEqual(crc2, crc3) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py index 071033b066..4b3982677f 100644 --- a/tensorflow/python/saved_model/builder_impl.py +++ b/tensorflow/python/saved_model/builder_impl.py @@ -104,10 +104,10 @@ class SavedModelBuilder(object): Args: assets_collection_to_add: The collection where the asset paths are setup. """ - asset_source_filepath_list = _maybe_save_assets(assets_collection_to_add) + asset_filename_map = _maybe_save_assets(assets_collection_to_add) # Return if there are no assets to write. - if len(asset_source_filepath_list) is 0: + if not asset_filename_map: tf_logging.info("No assets to write.") return @@ -119,12 +119,10 @@ class SavedModelBuilder(object): file_io.recursive_create_dir(assets_destination_dir) # Copy each asset from source path to destination path. - for asset_source_filepath in asset_source_filepath_list: - asset_source_filename = os.path.basename(asset_source_filepath) - + for asset_basename, asset_source_filepath in asset_filename_map.items(): asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), - compat.as_bytes(asset_source_filename)) + compat.as_bytes(asset_basename)) # Only copy the asset file to the destination if it does not already # exist. This is to ensure that an asset with the same name defined as @@ -475,16 +473,17 @@ def _maybe_save_assets(assets_collection_to_add=None): assets_collection_to_add: The collection where the asset paths are setup. Returns: - The list of filepaths to the assets in the assets collection. + A dict of asset basenames for saving to the original full path to the asset. Raises: ValueError: Indicating an invalid filepath tensor. """ - asset_source_filepath_list = [] + # Map of target file names to original filenames + asset_filename_map = {} if assets_collection_to_add is None: tf_logging.info("No assets to save.") - return asset_source_filepath_list + return asset_filename_map # Iterate over the supplied asset collection, build the `AssetFile` proto # and add them to the collection with key `constants.ASSETS_KEY`, in the @@ -494,15 +493,71 @@ def _maybe_save_assets(assets_collection_to_add=None): if not asset_source_filepath: raise ValueError("Invalid asset filepath tensor %s" % asset_tensor) - asset_source_filename = os.path.basename(asset_source_filepath) + asset_filename = _get_asset_filename_to_add( + asset_source_filepath, asset_filename_map) # Build `AssetFile` proto and add it to the asset collection in the graph. - _add_asset_to_collection(asset_source_filename, asset_tensor) + # Note that this should be done even when the file is a duplicate of an + # already-added file, as the tensor reference should still exist. + _add_asset_to_collection(asset_filename, asset_tensor) - asset_source_filepath_list.append(asset_source_filepath) + # In the cases where we are adding a duplicate, this will result in the + # last of the filepaths being the one used for copying the file to the + # SavedModel. Since the files in question are the same, it doesn't matter + # either way. + asset_filename_map[asset_filename] = asset_source_filepath tf_logging.info("Assets added to graph.") - return asset_source_filepath_list + return asset_filename_map + + +def _get_asset_filename_to_add(asset_filepath, asset_filename_map): + """Get a unique basename to add to the SavedModel if this file is unseen. + + Assets come from users as full paths, and we save them out to the + SavedModel as basenames. In some cases, the basenames collide. Here, + we dedupe asset basenames by first checking if the file is the same, + and, if different, generate and return an index-suffixed basename + that can be used to add the asset to the SavedModel. + + Args: + asset_filepath: the full path to the asset that is being saved + asset_filename_map: a dict of filenames used for saving the asset in + the SavedModel to full paths from which the filenames were derived. + + Returns: + Uniquified filename string if the file is not a duplicate, or the original + filename if the file has already been seen and saved. + """ + asset_filename = os.path.basename(asset_filepath) + + if asset_filename not in asset_filename_map: + # This is an unseen asset. Safe to add. + return asset_filename + + other_asset_filepath = asset_filename_map[asset_filename] + if other_asset_filepath == asset_filepath: + # This is the same file, stored twice in the collection list. No need + # to make unique. + return asset_filename + + # Else, asset_filename is in the map, and the filepath is different. Dedupe. + if not file_io.filecmp(asset_filepath, other_asset_filepath): + # Files are different; dedupe filenames. + return _get_unique_asset_filename(asset_filename, asset_filename_map) + + # Files are the same; don't make unique. + return asset_filename + + +def _get_unique_asset_filename(asset_filename, asset_filename_map): + i = 1 + unique_filename = asset_filename + while unique_filename in asset_filename_map: + unique_filename = compat.as_bytes("_").join( + [compat.as_bytes(asset_filename), compat.as_bytes(str(i))]) + i += 1 + return unique_filename def _asset_path_from_tensor(path_tensor): diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py index 1b83d60df9..7302c77ad5 100644 --- a/tensorflow/python/saved_model/saved_model_test.py +++ b/tensorflow/python/saved_model/saved_model_test.py @@ -64,9 +64,12 @@ class SavedModelTest(test.TestCase): self.assertEqual(variable_value, v.eval()) def _build_asset_collection(self, asset_file_name, asset_file_contents, - asset_file_tensor_name): + asset_file_tensor_name, asset_subdir=""): + parent_dir = os.path.join( + compat.as_bytes(test.get_temp_dir()), compat.as_bytes(asset_subdir)) + file_io.recursive_create_dir(parent_dir) asset_filepath = os.path.join( - compat.as_bytes(test.get_temp_dir()), compat.as_bytes(asset_file_name)) + compat.as_bytes(parent_dir), compat.as_bytes(asset_file_name)) file_io.write_string_to_file(asset_filepath, asset_file_contents) asset_file_tensor = constant_op.constant( asset_filepath, name=asset_file_tensor_name) @@ -77,10 +80,11 @@ class SavedModelTest(test.TestCase): def _validate_asset_collection(self, export_dir, graph_collection_def, expected_asset_file_name, expected_asset_file_contents, - expected_asset_tensor_name): + expected_asset_tensor_name, + asset_id=0): assets_any = graph_collection_def[constants.ASSETS_KEY].any_list.value asset = meta_graph_pb2.AssetFileDef() - assets_any[0].Unpack(asset) + assets_any[asset_id].Unpack(asset) assets_path = os.path.join( compat.as_bytes(export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY), @@ -634,6 +638,141 @@ class SavedModelTest(test.TestCase): compat.as_bytes("ignored.txt")) self.assertFalse(file_io.file_exists(ignored_asset_path)) + def testAssetsNameCollisionDiffFile(self): + export_dir = self._get_export_dir("test_assets_name_collision_diff_file") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + asset_collection = self._build_asset_collection( + "hello42.txt", "foo bar bak", "asset_file_tensor", + asset_subdir="1") + + asset_collection = self._build_asset_collection( + "hello42.txt", "foo bar baz", "asset_file_tensor_1", + asset_subdir="2") + + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_collection=asset_collection) + + # Save the SavedModel to disk. + builder.save() + + with self.test_session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "hello42.txt", "foo bar bak", + "asset_file_tensor:0") + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "hello42.txt_1", "foo bar baz", + "asset_file_tensor_1:0", + asset_id=1) + + def testAssetsNameCollisionSameFilepath(self): + export_dir = self._get_export_dir("test_assets_name_collision_same_path") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + asset_collection = self._build_asset_collection( + "hello42.txt", "foo bar baz", "asset_file_tensor") + + asset_collection = self._build_asset_collection( + "hello42.txt", "foo bar baz", "asset_file_tensor_1") + + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_collection=asset_collection) + + # Save the SavedModel to disk. + builder.save() + + with self.test_session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "hello42.txt", "foo bar baz", + "asset_file_tensor:0") + # The second tensor should be recorded, but the same. + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "hello42.txt", "foo bar baz", + "asset_file_tensor_1:0", + asset_id=1) + ignored_asset_path = os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes(constants.ASSETS_DIRECTORY), + compat.as_bytes("hello42.txt_1")) + self.assertFalse(file_io.file_exists(ignored_asset_path)) + + def testAssetsNameCollisionSameFile(self): + export_dir = self._get_export_dir("test_assets_name_collision_same_file") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + asset_collection = self._build_asset_collection( + "hello42.txt", "foo bar baz", "asset_file_tensor", + asset_subdir="1") + + asset_collection = self._build_asset_collection( + "hello42.txt", "foo bar baz", "asset_file_tensor_1", + asset_subdir="2") + + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_collection=asset_collection) + + # Save the SavedModel to disk. + builder.save() + + with self.test_session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "hello42.txt", "foo bar baz", + "asset_file_tensor:0") + # The second tensor should be recorded, but the same. + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "hello42.txt", "foo bar baz", + "asset_file_tensor_1:0", + asset_id=1) + ignored_asset_path = os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes(constants.ASSETS_DIRECTORY), + compat.as_bytes("hello42.txt_1")) + self.assertFalse(file_io.file_exists(ignored_asset_path)) + + def testAssetsNameCollisionManyFiles(self): + export_dir = self._get_export_dir("test_assets_name_collision_many_files") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + for i in range(5): + idx = str(i) + asset_collection = self._build_asset_collection( + "hello42.txt", "foo bar baz " + idx, "asset_file_tensor_" + idx, + asset_subdir=idx) + + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_collection=asset_collection) + + # Save the SavedModel to disk. + builder.save() + + with self.test_session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + for i in range(1, 5): + idx = str(i) + self._validate_asset_collection( + export_dir, foo_graph.collection_def, "hello42.txt_" + idx, + "foo bar baz " + idx, "asset_file_tensor_{}:0".format(idx), + asset_id=i) + + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "hello42.txt", "foo bar baz 0", + "asset_file_tensor_0:0") + def testCustomMainOp(self): export_dir = self._get_export_dir("test_main_op") builder = saved_model_builder.SavedModelBuilder(export_dir) -- GitLab From 9f332ea94b21aff6a73089db4d8e147748fb6ff6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 22:33:53 -0700 Subject: [PATCH 082/902] Add unit tests to tflite kernels PiperOrigin-RevId: 197842122 --- .../contrib/lite/kernels/internal/BUILD | 86 +++++ .../internal/depthwiseconv_float_test.cc | 162 +++++++++ .../internal/depthwiseconv_quantized_test.cc | 330 +++++++++++++++++ .../kernels/internal/log_quantized_test.cc | 333 ++++++++++++++++++ .../internal/logsoftmax_quantized_test.cc | 241 +++++++++++++ .../internal/resize_bilinear_float_test.cc | 102 ++++++ .../internal/softmax_quantized_test.cc | 227 ++++++++++++ .../lite/kernels/internal/test_util.cc | 121 +++++++ .../contrib/lite/kernels/internal/test_util.h | 104 ++++++ .../contrib/lite/kernels/internal/types.h | 1 + 10 files changed, 1707 insertions(+) create mode 100644 tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/test_util.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/test_util.h diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index aabbb0685c..0a5223b235 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -420,6 +420,15 @@ cc_library( }), ) +cc_library( + name = "test_util", + srcs = ["test_util.cc"], + hdrs = ["test_util.h"], + deps = [ + ":types", + ], +) + cc_test( name = "tensor_utils_test", srcs = ["tensor_utils_test.cc"], @@ -440,6 +449,83 @@ cc_test( ], ) +cc_test( + name = "depthwiseconv_float_test", + srcs = ["depthwiseconv_float_test.cc"], + deps = [ + ":optimized_base", + ":reference_base", + ":test_util", + ":types", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "depthwiseconv_quantized_test", + srcs = ["depthwiseconv_quantized_test.cc"], + deps = [ + ":optimized_base", + ":reference_base", + ":test_util", + ":types", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "resize_bilinear_float_test", + srcs = ["resize_bilinear_float_test.cc"], + deps = [ + ":optimized_base", + ":reference_base", + ":test_util", + ":types", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "softmax_quantized_test", + timeout = "long", + srcs = [ + "softmax_quantized_test.cc", + ], + deps = [ + ":optimized_base", + ":quantization_util", + ":reference_base", + ":test_util", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "logsoftmax_quantized_test", + timeout = "long", + srcs = [ + "logsoftmax_quantized_test.cc", + ], + tags = ["tflite_not_portable"], + deps = [ + ":optimized_base", + ":quantization_util", + ":reference_base", + ":test_util", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "log_quantized_test", + srcs = ["log_quantized_test.cc"], + deps = [ + ":optimized_base", + ":reference_base", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "cpu_check", hdrs = [ diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc new file mode 100644 index 0000000000..844ee6a53d --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc @@ -0,0 +1,162 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h" + +namespace tflite { +namespace { + +// Runs the DepthwiseConv and compares against the reference implementation. +template +void TestOneDepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride, int pad_width, int pad_height, + int depth_multiplier, const Dims<4>& output_dims) { + const int output_buffer_size = RequiredBufferSizeForDims(output_dims); + std::vector output_data(output_buffer_size); + std::vector reference_output_data(output_buffer_size); + reference_ops::DepthwiseConv(input_data, input_dims, filter_data, + filter_dims, bias_data, bias_dims, stride, + pad_width, pad_height, depth_multiplier, + reference_output_data.data(), output_dims); + optimized_ops::DepthwiseConv(input_data, input_dims, filter_data, + filter_dims, bias_data, bias_dims, stride, + pad_width, pad_height, depth_multiplier, + output_data.data(), output_dims); + double sum_abs_diff = 0; + float max_abs_val = 0; + for (int i = 0; i < output_buffer_size; i++) { + sum_abs_diff += std::abs(output_data[i] - reference_output_data[i]); + max_abs_val = std::max(max_abs_val, std::abs(reference_output_data[i])); + } + if (sum_abs_diff != 0.f) { + const float mean_diff = + static_cast(sum_abs_diff / output_buffer_size); + const float relative_error = std::abs(mean_diff) / max_abs_val; + ASSERT_LT(relative_error, 1e-5f); + } +} + +void TestOneDepthwiseConv(FusedActivationFunctionType Ac, + const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride, int pad_width, int pad_height, + int depth_multiplier, const Dims<4>& output_dims) { +#define TOCO_HANDLE_CASE(AC_TYPE) \ + if (AC_TYPE == Ac) { \ + TestOneDepthwiseConv(input_data, input_dims, filter_data, \ + filter_dims, bias_data, bias_dims, stride, \ + pad_width, pad_height, depth_multiplier, \ + output_dims); \ + return; \ + } + TOCO_HANDLE_CASE(FusedActivationFunctionType::kNone) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu1) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu6) +#undef TOCO_HANDLE_CASE +} + +// This function picks some random DepthwiseConv params, which may or may not +// be legal. If they're not legal, it returns false. If they're legal, +// it runs the DepthwiseConv test and returns true. This allows the caller +// to loop until a test has been run. +bool TryTestOneDepthwiseConv() { + // We have to pick a lot of positive values, where we are particularly + // interested in small values because they are most likely to be special + // cases in optimized implementations, and secondarily because they allow + // tests to run fast, which means we can run more tests and get more + // coverage. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = ExponentialRandomPositiveInt(0.9f, 6, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int filter_width = ExponentialRandomPositiveInt(0.9f, 4, 10); + const int filter_height = ExponentialRandomPositiveInt(0.9f, 4, 10); + const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const int output_depth = input_depth * depth_multiplier; + // The optimized DepthwiseConv implementation currently uses a fixed-size + // accumulator buffer on the stack, with that size. This currently means + // that it does not support larger output depths. It CHECK's for it, + // so it's safe in the sense that if a larger output depth was encountered, + // it would explicitly fail. We just need to adjust our testing to that + // constraint. + const int kMaxSupportedOutputDepth = 1024; + if (output_depth > kMaxSupportedOutputDepth) { + return false; + } + const auto ac = RandomElement(std::vector( + {FusedActivationFunctionType::kNone, FusedActivationFunctionType::kRelu, + FusedActivationFunctionType::kRelu6, + FusedActivationFunctionType::kRelu1})); + Dims<4> input_dims_inference = + MakeDimsForInference(input_depth, input_width, input_height, batch); + Dims<4> output_dims_inference; + int pad_width, pad_height; + const auto padding_type = + UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid; + if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width, + filter_height, stride, padding_type, + &output_dims_inference, &pad_width, &pad_height)) { + return false; + } + Dims<4> filter_dims_inference = + MakeDimsForInference(output_depth, filter_width, filter_height, 1); + Dims<4> bias_dims_inference = MakeDimsForInference(output_depth, 1, 1, 1); + const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference); + const int filter_buffer_size = + RequiredBufferSizeForDims(filter_dims_inference); + std::vector input_data(input_buffer_size); + std::vector filter_data(filter_buffer_size); + std::vector bias_data(output_depth); + const float input_amplitude = 1.f; + const float filter_amplitude = 1.f; + const float bias_amplitude = + filter_width * filter_height * input_amplitude * filter_amplitude; + FillRandom(&input_data, -input_amplitude, input_amplitude); + FillRandom(&filter_data, -filter_amplitude, filter_amplitude); + FillRandom(&bias_data, -bias_amplitude, bias_amplitude); + TestOneDepthwiseConv(ac, input_data.data(), input_dims_inference, + filter_data.data(), filter_dims_inference, + bias_data.data(), bias_dims_inference, stride, pad_width, + pad_height, depth_multiplier, output_dims_inference); + return true; +} + +void TestOneDepthwiseConv() { + while (!TryTestOneDepthwiseConv()) { + } +} + +TEST(TestDepthwiseConv, TestDepthwiseConv) { + const int kTestsToRun = 100 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + TestOneDepthwiseConv(); + } +} +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc new file mode 100644 index 0000000000..2c0fc8433e --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc @@ -0,0 +1,330 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h" + +namespace tflite { +namespace { + +// Runs the DepthwiseConv and compares against the reference implementation. +template +int TestOneDepthwiseConvWithGivenOutputShift( + const std::uint8_t* input_data, const Dims<4>& input_dims, + std::int32_t input_offset, const std::uint8_t* filter_data, + const Dims<4>& filter_dims, std::int32_t filter_offset, + const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + std::int32_t output_offset, std::int32_t output_multiplier, + int output_shift, std::int32_t output_activation_min, + std::int32_t output_activation_max, const Dims<4>& output_dims) { + const int output_buffer_size = RequiredBufferSizeForDims(output_dims); + std::vector output_data(output_buffer_size); + std::vector reference_output_data(output_buffer_size); + reference_ops::DepthwiseConv( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, + reference_output_data.data(), output_dims); + optimized_ops::DepthwiseConv( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data.data(), + output_dims); + int saturated_min = 0; + int saturated_max = 0; + std::vector diff(output_buffer_size); + std::int64_t sum_diff = 0; + std::int64_t sum_abs_diff = 0; + for (int i = 0; i < output_buffer_size; i++) { + diff[i] = static_cast(output_data[i]) - + static_cast(reference_output_data[i]); + sum_diff += diff[i]; + sum_abs_diff += std::abs(diff[i]); + saturated_min += output_data[i] == output_activation_min; + saturated_max += output_data[i] == output_activation_max; + } + // These stats help understand test failures. + std::sort(std::begin(diff), std::end(diff)); + const int min_diff = diff.front(); + const int max_diff = diff.back(); + const int median_diff = diff[diff.size() / 2]; + const float mean_diff = static_cast(sum_diff) / output_buffer_size; + const float mean_abs_diff = + static_cast(sum_abs_diff) / output_buffer_size; + // Normally we should require bit-for-bit exact results. Unfortunately a bug + // in the Intel arm_neon_sse.h translation header that we use for x86 tests + // causes 1-bit inaccuracy in + // the vqrdmulh_n_s32 intrinsic, which causes off-by-1 errors in quantized + // DepthwiseConv ops. So we have to live with a few off-by-one errors for now, + // yet still ensure that no more than a small minority of values are wrong. + EXPECT_TRUE(std::abs(mean_diff) < 1e-5f && mean_abs_diff < 1e-5f && + std::abs(median_diff) == 0 && std::abs(min_diff) <= 1 && + std::abs(max_diff) <= 1); + if (saturated_min > 2 * saturated_max) { + return -1; + } + if (saturated_max > 2 * saturated_min) { + return 1; + } + return 0; +} + +// The point of this function is that we can't practically know which +// output_shift value to pass to test DepthwiseConv. It's not easy to guess (we +// could do some +// statistics for large size, but they would be fragile at smaller sizes), and +// guessing wrong would mean that all the values get saturated so the test +// becomes +// vacuous. So we just bisect our way to reasonable output_shift values. +template +void TestOneDepthwiseConvBisectOutputShift( + const std::uint8_t* input_data, const Dims<4>& input_dims, + std::int32_t input_offset, const std::uint8_t* filter_data, + const Dims<4>& filter_dims, std::int32_t filter_offset, + const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + std::int32_t output_offset, std::int32_t output_multiplier, + int output_activation_bisect_start, int output_activation_bisect_end, + std::int32_t output_activation_min, std::int32_t output_activation_max, + const Dims<4>& output_dims) { + ASSERT_LT(output_activation_bisect_start, output_activation_bisect_end) + << "Bisection failed ?!?!"; + int output_shift_bisect_midpoint = + (output_activation_bisect_start + output_activation_bisect_end) / 2; + int bisect_result = TestOneDepthwiseConvWithGivenOutputShift( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, + output_shift_bisect_midpoint, output_activation_min, + output_activation_max, output_dims); + // At this point we know that the test succeeded (otherwise it would have + // aborted). + if (bisect_result == 0) { + // The result isn't particularly saturated on one or the other side. + // All good, we're done. + return; + } + if (output_activation_bisect_start == output_activation_bisect_end - 1) { + // There is still some saturation on one side, but the bisection is + // finished anyways. We're done; nothing more we can do about it. This + // happens + // in particular when using an activation with a narrow range. + return; + } + // Continue the bisection based on the present result. + int new_output_activation_bisect_start = bisect_result == 1 + ? output_shift_bisect_midpoint + : output_activation_bisect_start; + int new_output_activation_bisect_end = bisect_result == 1 + ? output_activation_bisect_end + : output_shift_bisect_midpoint; + TestOneDepthwiseConvBisectOutputShift( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, + new_output_activation_bisect_start, new_output_activation_bisect_end, + output_activation_min, output_activation_max, output_dims); +} + +template +void TestOneDepthwiseConv( + const std::uint8_t* input_data, const Dims<4>& input_dims, + std::int32_t input_offset, const std::uint8_t* filter_data, + const Dims<4>& filter_dims, std::int32_t filter_offset, + const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + std::int32_t output_offset, std::int32_t output_multiplier, + std::int32_t output_activation_min, std::int32_t output_activation_max, + const Dims<4>& output_dims) { + TestOneDepthwiseConvBisectOutputShift( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, 0, 32, + output_activation_min, output_activation_max, output_dims); +} + +void TestOneDepthwiseConv( + FusedActivationFunctionType Ac, const std::uint8_t* input_data, + const Dims<4>& input_dims, std::int32_t input_offset, + const std::uint8_t* filter_data, const Dims<4>& filter_dims, + std::int32_t filter_offset, const std::int32_t* bias_data, + const Dims<4>& bias_dims, int stride, int pad_width, int pad_height, + int depth_multiplier, std::int32_t output_offset, + std::int32_t output_multiplier, std::int32_t output_activation_min, + std::int32_t output_activation_max, const Dims<4>& output_dims) { +#define TOCO_HANDLE_CASE(AC_TYPE) \ + if (AC_TYPE == Ac) { \ + TestOneDepthwiseConv( \ + input_data, input_dims, input_offset, filter_data, filter_dims, \ + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, \ + depth_multiplier, output_offset, output_multiplier, \ + output_activation_min, output_activation_max, output_dims); \ + return; \ + } + TOCO_HANDLE_CASE(FusedActivationFunctionType::kNone) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu1) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu6) +#undef TOCO_HANDLE_CASE +} + +bool TryTestDepthwiseConv(int batch, int input_depth, int input_width, + int input_height, int filter_width, int filter_height, + int depth_multiplier, int stride, + PaddingType padding_type) { + const int output_depth = input_depth * depth_multiplier; + // The optimized DepthwiseConv implementation currently uses a fixed-size + // accumulator buffer on the stack, with that size. This currently means + // that it does not support larger output depths. It CHECK's for it, + // so it's safe in the sense that if a larger output depth was encountered, + // it would explicitly fail. We just need to adjust our testing to that + // constraint. + const int kMaxSupportedOutputDepth = 1024; + if (output_depth > kMaxSupportedOutputDepth) { + return false; + } + const auto ac = RandomElement(std::vector( + {FusedActivationFunctionType::kNone, FusedActivationFunctionType::kRelu, + FusedActivationFunctionType::kRelu6, + FusedActivationFunctionType::kRelu1})); + int output_activation_min = 0; + int output_activation_max = 255; + if (ac != FusedActivationFunctionType::kNone && UniformRandomInt(0, 1)) { + output_activation_min = UniformRandomInt(0, 50); + output_activation_max = UniformRandomInt(200, 255); + } + const std::int32_t output_multiplier = + UniformRandomInt(1 << 29, std::numeric_limits::max()); + const std::int32_t input_offset = UniformRandomInt(-256, 0); + const std::int32_t filter_offset = UniformRandomInt(-256, 0); + const std::int32_t output_offset = UniformRandomInt(-256, 0); + Dims<4> input_dims_inference = + MakeDimsForInference(input_depth, input_width, input_height, batch); + Dims<4> output_dims_inference; + int pad_width, pad_height; + if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width, + filter_height, stride, padding_type, + &output_dims_inference, &pad_width, &pad_height)) { + return false; + } + Dims<4> filter_dims_inference = + MakeDimsForInference(output_depth, filter_width, filter_height, 1); + Dims<4> bias_dims_inference = MakeDimsForInference(output_depth, 1, 1, 1); + const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference); + const int filter_buffer_size = + RequiredBufferSizeForDims(filter_dims_inference); + std::vector input_data(input_buffer_size); + std::vector filter_data(filter_buffer_size); + std::vector bias_data(output_depth); + FillRandom(&input_data); + FillRandom(&filter_data); + FillRandom(&bias_data, -10000, 10000); + TestOneDepthwiseConv(ac, input_data.data(), input_dims_inference, + input_offset, filter_data.data(), filter_dims_inference, + filter_offset, bias_data.data(), bias_dims_inference, + stride, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_activation_min, + output_activation_max, output_dims_inference); + return true; +} + +// This function picks some random DepthwiseConv params, which may or may not +// be legal. If they're not legal, it returns false. If they're legal, +// it runs the DepthwiseConv test and returns true. This allows the caller +// to loop until a test has been run. +bool TryTestOneDepthwiseConv() { + // We have to pick a lot of positive values, where we are particularly + // interested in small values because they are most likely to be special + // cases in optimized implementations, and secondarily because they allow + // tests to run fast, which means we can run more tests and get more + // coverage. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = ExponentialRandomPositiveInt(0.9f, 6, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int filter_width = ExponentialRandomPositiveInt(0.9f, 4, 10); + const int filter_height = ExponentialRandomPositiveInt(0.9f, 4, 10); + const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const auto padding_type = + UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid; + + return TryTestDepthwiseConv(batch, input_depth, input_width, input_height, + filter_width, filter_height, depth_multiplier, + stride, padding_type); +} + +// Tests parameters for the 3x3 filter kernel. +bool TryTestOneDepthwiseConv3x3Filter() { + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = 8 * ExponentialRandomPositiveInt(0.9f, 10, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int filter_width = 3; + const int filter_height = 3; + const int depth_multiplier = 1; + const int stride = UniformRandomInt(1, 2); + // Although the kernel supports only kValid padding, we test that kSame + // is using the correct code path. + const auto padding_type = + UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid; + + return TryTestDepthwiseConv(batch, input_depth, input_width, input_height, + filter_width, filter_height, depth_multiplier, + stride, padding_type); +} + +void TestOneDepthwiseConv() { + while (!TryTestOneDepthwiseConv()) { + } +} + +void TestOneDepthwiseConv3x3Filter() { + while (!TryTestOneDepthwiseConv3x3Filter()) { + } +} + +TEST(TestDepthwiseConv, TestDepthwiseConv) { + const int kTestsToRun = 10 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + TestOneDepthwiseConv(); + } +} + +TEST(TestDepthwiseConv3x3Filter, TestDepthwiseConv) { + const int kTestsToRun = 3 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + TestOneDepthwiseConv3x3Filter(); + } +} + +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc new file mode 100644 index 0000000000..7e9ff5242a --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc @@ -0,0 +1,333 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GEMMLOWP_ENABLE_FIXEDPOINT_CONSTANTS_CHECKS + +#include +#include +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" + +namespace { + +class NumberGenerator { + public: + std::vector RandomIntVector(int n, int min_val, int max_val) { + std::vector vec(n); + double scale = static_cast(max_val + 1 - min_val) / engine_.max(); + for (auto& it : vec) { + it = min_val + std::floor(engine_() * scale); + } + return vec; + } + + std::mt19937 engine_; +}; + +class LogQuantizedTest : public ::testing::Test { + public: + NumberGenerator generator_; +}; + +// input_integer_bits <= 30. output_integer_bits > 0. +inline int32 LogPositiveValuesViaFloat(int32 input_val, int input_integer_bits, + int output_integer_bits) { + const double float_log_sum_of_exps = std::log( + static_cast(input_val) * 0.5 / (1 << (30 - input_integer_bits))); + static constexpr double min_int = + static_cast(std::numeric_limits::min()); + static constexpr double max_int = + static_cast(std::numeric_limits::max()); + double double_result = tflite::TfLiteRound(float_log_sum_of_exps * + (1 << (31 - output_integer_bits))); + return static_cast( + std::min(max_int, std::max(min_int, double_result))); +} + +void CheckOutputData(const std::vector& test_output, + const std::vector& reference_output, + const std::vector& test_input, + const string& check_label, int input_integer_bits, + int output_integer_bits, int tolerance) { + // In the special case of small input, specifically raw value of 5, a rounding + // up leads to difference in the output. We do not aim to be accurate for + // very small input values, and there should be sufficient input fractional + // bits that this is a small input. + static constexpr double error_from_rounding_up = 0.0224585; + const int n = test_output.size(); + ASSERT_EQ(n, reference_output.size()); + for (int i = 0; i < n; ++i) { + // Adjust tolerance when input <= 5*2^-(31-input_integer_bits). + const int adjusted_tolerance = + test_input[i] > 5 + ? tolerance + : std::max(tolerance, static_cast(std::ceil( + error_from_rounding_up * + (1 << (31 - output_integer_bits))))); + ASSERT_LE(std::abs(test_output[i] - reference_output[i]), + adjusted_tolerance) + << "Failure in \"" << check_label << "\" at i=" << i + << ", test_input[i]=" << test_input[i] << "=" + << static_cast(test_input[i]) / (1 << (31 - input_integer_bits)) + << ", test_output[i]=" << test_output[i] << "=" + << static_cast(test_output[i]) / + (1 << (31 - output_integer_bits)) + << ", reference_output[i]=" << reference_output[i] << "=" + << static_cast(reference_output[i]) / + (1 << (31 - output_integer_bits)) + << ", difference[i]=" << std::abs(reference_output[i] - test_output[i]) + << "=" + << static_cast(std::abs(reference_output[i] - test_output[i])) / + (1 << (31 - output_integer_bits)) + << "; tolerance=" << tolerance + << ", adj tolerance=" << adjusted_tolerance; + } +} + +void RightShiftVector(const std::vector& shifts, + std::vector* vec) { + const int n = vec->size(); + ASSERT_EQ(n, shifts.size()); + for (int i = 0; i < n; ++i) { + vec->at(i) = std::max(1, vec->at(i) >> shifts[i]); + } +} + +template +void RunSingleTest(const std::vector& test_input, + const string& check_label, int tolerance) { + const int n = test_input.size(); + std::vector float_gen_output(n, 0); + std::vector reference_output(n, 0); + std::vector optimized_output(n, 0); + + // Workaround the stupid things that intelligent humans do. + // Consequence of __builtin_clz(0u) may equal 31 instead of 32. + std::vector fudged_input(n, 0); + for (int i = 0; i < n; ++i) { + fudged_input[i] = std::max(test_input[i], 2); + } + + for (int i = 0; i < n; ++i) { + reference_output[i] = + tflite::reference_ops::log_x_for_x_greater_than_or_equal_to_1_impl< + OutputIntegerBits, InputIntegerBits>( + gemmlowp::FixedPoint::FromRaw( + fudged_input[i])) + .raw(); + optimized_output[i] = + tflite::optimized_ops::log_x_for_x_greater_than_or_equal_to_1_impl< + OutputIntegerBits, InputIntegerBits>( + gemmlowp::FixedPoint::FromRaw( + fudged_input[i])) + .raw(); + float_gen_output[i] = LogPositiveValuesViaFloat( + fudged_input[i], InputIntegerBits, OutputIntegerBits); + } + // Note that first check is intolerant. + { + std::ostringstream label; + label << check_label << " / optimized vs reference / InputIntegerBits=" + << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits; + CheckOutputData( + optimized_output, reference_output, test_input, label.str(), + InputIntegerBits, OutputIntegerBits, 0); + } + { + std::ostringstream label; + label << check_label << " / reference vs float-gen / InputIntegerBits=" + << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits; + CheckOutputData( + reference_output, float_gen_output, test_input, label.str(), + InputIntegerBits, OutputIntegerBits, tolerance); + } + { + std::ostringstream label; + label << check_label << " optimized vs float-gen / InputIntegerBits=" + << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits; + CheckOutputData( + optimized_output, float_gen_output, test_input, label.str(), + InputIntegerBits, OutputIntegerBits, tolerance); + } +} + +template +void RunSingleTest(const std::vector& test_input, int input_integer_bits, + const string& check_label, int tolerance) { +#define INPUT_CASE(K) \ + case K: \ + return RunSingleTest(test_input, check_label, \ + tolerance) + switch (input_integer_bits) { + INPUT_CASE(0); + INPUT_CASE(1); + INPUT_CASE(2); + INPUT_CASE(3); + INPUT_CASE(4); + INPUT_CASE(5); + INPUT_CASE(6); + INPUT_CASE(7); + INPUT_CASE(8); + INPUT_CASE(9); + INPUT_CASE(10); + INPUT_CASE(11); + INPUT_CASE(12); + INPUT_CASE(13); + INPUT_CASE(14); + INPUT_CASE(15); + INPUT_CASE(16); + INPUT_CASE(17); + INPUT_CASE(18); + INPUT_CASE(19); + INPUT_CASE(20); + INPUT_CASE(21); + INPUT_CASE(22); + INPUT_CASE(23); + INPUT_CASE(24); + INPUT_CASE(25); + INPUT_CASE(26); + INPUT_CASE(27); + INPUT_CASE(28); + INPUT_CASE(29); + default: + ASSERT_LE(input_integer_bits, 30) + << "Input integer bits not handled: " << input_integer_bits; + } +#undef INPUT_CASE +} + +void RunSingleTest(const std::vector& test_input, int input_integer_bits, + int output_integer_bits, const string& check_label, + int tolerance) { +#define OUTPUT_CASE(K) \ + case K: \ + return RunSingleTest(test_input, input_integer_bits, check_label, \ + tolerance) + switch (output_integer_bits) { + OUTPUT_CASE(0); + OUTPUT_CASE(1); + OUTPUT_CASE(2); + OUTPUT_CASE(3); + OUTPUT_CASE(4); + OUTPUT_CASE(5); + OUTPUT_CASE(6); + OUTPUT_CASE(7); + OUTPUT_CASE(8); + OUTPUT_CASE(9); + OUTPUT_CASE(10); + OUTPUT_CASE(11); + OUTPUT_CASE(12); + OUTPUT_CASE(13); + OUTPUT_CASE(14); + OUTPUT_CASE(15); + OUTPUT_CASE(16); + OUTPUT_CASE(17); + OUTPUT_CASE(18); + OUTPUT_CASE(19); + OUTPUT_CASE(20); + OUTPUT_CASE(21); + OUTPUT_CASE(22); + OUTPUT_CASE(23); + OUTPUT_CASE(24); + OUTPUT_CASE(25); + OUTPUT_CASE(26); + OUTPUT_CASE(27); + OUTPUT_CASE(28); + OUTPUT_CASE(29); + default: + ASSERT_LE(input_integer_bits, 30) + << "Input integer bits not handled: " << input_integer_bits; + } +#undef OUTPUT_CASE +} + +void RunUniformTest(int test_size, int input_integer_bits, + int output_integer_bits, const string& check_label, + int tolerance, NumberGenerator* generator) { + std::vector test_data = generator->RandomIntVector( + test_size, 2, std::numeric_limits::max() - 1); + test_data[0] = 2; + test_data[1] = 3; + test_data[2] = 4; + test_data[3] = std::numeric_limits::max() - 2; + test_data[4] = std::numeric_limits::max() - 1; + test_data[5] = std::numeric_limits::max(); + + RunSingleTest(test_data, input_integer_bits, output_integer_bits, + check_label + " / uniform test", tolerance); +} + +void RunUniformShiftUniformTest(int test_size, int input_integer_bits, + int output_integer_bits, + const string& check_label, int tolerance, + NumberGenerator* generator) { + std::vector test_data = generator->RandomIntVector( + test_size, 2, std::numeric_limits::max() - 1); + std::vector shifts = generator->RandomIntVector(test_size, 0, 29); + RightShiftVector(shifts, &test_data); + + RunSingleTest(test_data, input_integer_bits, output_integer_bits, + check_label + " / shifted test", tolerance); +} + +TEST_F(LogQuantizedTest, VariedIntegerBits) { + static constexpr int kVariations = 250; + static constexpr int kRunSize = 250; + static constexpr int kIntegerTolerance = 8; + static constexpr double kOutputFloatTolerance = 7.0e-7; + + std::vector input_integer_bits = + generator_.RandomIntVector(kVariations, 0, 24); + std::vector output_integer_bits = + generator_.RandomIntVector(kVariations, 1, 10); + + for (int i = 0; i < kVariations; ++i) { + int var_output_integer_bits = output_integer_bits[i]; + int tolerance = + std::max(1.0 * kIntegerTolerance, + (1 << (31 - var_output_integer_bits)) * kOutputFloatTolerance); + + RunUniformTest(kRunSize, input_integer_bits[i], var_output_integer_bits, + "VariedIntegerBits", tolerance, &generator_); + RunUniformShiftUniformTest(kRunSize, input_integer_bits[i], + var_output_integer_bits, "VariedIntegerBits", + tolerance, &generator_); + } +} + +TEST_F(LogQuantizedTest, SelectedIntegerBits) { + static constexpr int kInputBits = 12; + static constexpr int kOutputBits = 5; + static constexpr int kRunSize = 100000; + static constexpr int kIntegerTolerance = 4; + + RunUniformTest(kRunSize, kInputBits, kOutputBits, "SelectedIntegerBits", + kIntegerTolerance, &generator_); + RunUniformShiftUniformTest(kRunSize, kInputBits, kOutputBits, + "SelectedIntegerBits", kIntegerTolerance, + &generator_); +} + +} // namespace diff --git a/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc new file mode 100644 index 0000000000..b7531ea2e2 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc @@ -0,0 +1,241 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" + +namespace tflite { +namespace { + +void RunLogSoftmaxFloatReference(const uint8* input_data, + const Dims<4>& dims_common, int32 input_offset, + const double input_scale, int stride, + float beta, uint8* reference_output_data) { + const int ref_buffer_size = RequiredBufferSizeForDims(dims_common); + std::vector reference_dequant_data(ref_buffer_size); + std::vector reference_output_float_data(ref_buffer_size); + + // Reference data generated via Dequant of input into float, and then applying + // float LogSoftmax. + reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale, + reference_dequant_data.data(), dims_common); + optimized_ops::LogSoftmax(reference_dequant_data.data(), dims_common, + reference_output_float_data.data(), dims_common); + // Work with quantized scaling for LogSoftmax, under which 255 represents 0, + // and -16 gets nudged up to 0. + for (int i = 0; i < ref_buffer_size; i++) { + reference_output_data[i] = std::max( + 0, static_cast( + 255 + std::round(16.0f * reference_output_float_data[i]))); + } +} + +void CheckOutputData(const uint8* test_output, const uint8* reference_output, + const Dims<4>& dims_common, const string& check_label, + bool be_exacting) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); + // While calculating some metrics in floating point, we work with quantized + // scaling. + std::vector diff(buffer_size); + int64_t sum_diff = 0; + int64_t sum_abs_diff = 0; + for (int i = 0; i < buffer_size; i++) { + diff[i] = static_cast(test_output[i]) - reference_output[i]; + sum_diff += diff[i]; + sum_abs_diff += std::abs(diff[i]); + } + // These stats help understand test failures. + std::sort(std::begin(diff), std::end(diff)); + const int min_diff = diff.front(); + const int max_diff = diff.back(); + const int median_diff = diff[diff.size() / 2]; + const float mean_diff = static_cast(sum_diff) / buffer_size; + const float mean_abs_diff = static_cast(sum_abs_diff) / buffer_size; + // We either check for bit exactness (against the reference quantized version) + // or for general accuracy, allowing off-by-one (against the float reference). + if (be_exacting) { + ASSERT_TRUE(std::abs(min_diff) == 0 && std::abs(max_diff) == 0) + << check_label << ": " + << "std::abs(min_diff)=" << std::abs(min_diff) + << ", std::abs(max_diff)=" << std::abs(max_diff); + } else { + // For small numbers of samples, the estimates of the means vary more. + // Rather than widen the tolerances, we skip the smaller tests. + ASSERT_TRUE(((std::abs(mean_diff) < 2e-2f && mean_abs_diff < 3e-2f) || + buffer_size < 10000) && + std::abs(median_diff) == 0 && std::abs(min_diff) <= 1 && + std::abs(max_diff) <= 1) + << check_label << ": " + << "buffer_size=" << buffer_size << ", mean_diff=" << mean_diff + << ", mean_abs_diff=" << mean_abs_diff + << ", median_diff=" << median_diff << ", min_diff=" << min_diff + << ", max_diff=" << max_diff; + } +} + +// Runs the LogSoftmax and compares against the float reference implementation +// and the quantized reference implementation. +void RunOneLogSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common, + int32 input_offset, const double input_scale, + int stride, float beta) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); + std::vector optimized_logsoftmax_output(buffer_size); + std::vector reference_float_logsoftmax_output(buffer_size); + std::vector reference_quant_logsoftmax_output(buffer_size); + + RunLogSoftmaxFloatReference(input_data, dims_common, input_offset, + input_scale, stride, beta, + reference_float_logsoftmax_output.data()); + + int32 input_beta_multiplier; + int input_beta_left_shift; + int32 reverse_scaling_divisor; + int reverse_scaling_right_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessLogSoftmaxScaling( + beta, input_scale, kScaledDiffIntegerBits, &input_beta_multiplier, + &input_beta_left_shift, &reverse_scaling_divisor, + &reverse_scaling_right_shift); + // diff_min has a negative value, and is used to limit the maximum magnitude + // of the diffs, which are <= 0. + const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits, + input_beta_left_shift); + + optimized_ops::LogSoftmax(input_data, dims_common, input_beta_multiplier, + input_beta_left_shift, reverse_scaling_divisor, + reverse_scaling_right_shift, diff_min, + optimized_logsoftmax_output.data(), dims_common); + reference_ops::LogSoftmax( + input_data, dims_common, input_beta_multiplier, input_beta_left_shift, + reverse_scaling_divisor, reverse_scaling_right_shift, diff_min, + reference_quant_logsoftmax_output.data(), dims_common); + + CheckOutputData(optimized_logsoftmax_output.data(), + reference_float_logsoftmax_output.data(), dims_common, + "Optimized vs float reference", false); + CheckOutputData(optimized_logsoftmax_output.data(), + reference_quant_logsoftmax_output.data(), dims_common, + "Optimized vs quant reference", true); + CheckOutputData(reference_quant_logsoftmax_output.data(), + reference_float_logsoftmax_output.data(), dims_common, + "Quant reference vs float reference", false); +} + +// This function picks some random LogSoftmax params, which are checked for +// desirability. If not acceptable, it returns false. If they're OK, +// it runs the LogSoftmax test and returns true. This allows the caller +// to loop until a test has been run. +// +// Currently we do not reject for any reason. +bool TryOneUniformLogSoftmax() { + // We pick mostly positive values, on the whole emphasizing smaller values and + // therefore faster tests. We test a wider range of depths. In the case of + // LogSoftmax, the width and height really just create test repetitions. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = ExponentialRandomPositiveInt(0.75f, 175, 500); + const int input_width = ExponentialRandomPositiveInt(0.8f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.8f, 20, 200); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0)); + const int32 input_offset = UniformRandomInt(-256, 0); + static constexpr float beta = 1.0f; + + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); + + std::vector input_data(buffer_size); + FillRandom(&input_data); + RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset, + input_scale, stride, beta); + return true; +} + +// See TryOneUniformLogSoftmax() for a general description. +// +// Tests with "skyscraper" input patterns are included for two reasons. (a) +// Bimodal distributions are potentially challenging and perhaps more +// realistic than simple uniform random inputs. (b) Some implementations of +// LogSoftmax may adapt as they traverse the depth, and so we test handling of +// cases where relatively small values are encountered at the beginning and end. +bool TryOneSkyscraperLogSoftmax(bool small_depth) { + // We pick mostly positive values, on the whole emphasizing smaller values and + // therefore faster tests. We test a wider range of depths. In the case of + // LogSoftmax, the width and height really just create test repetitions. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = small_depth + ? ExponentialRandomPositiveInt(0.75f, 40, 500) + : ExponentialRandomPositiveInt(0.75f, 175, 500); + const int input_width = ExponentialRandomPositiveInt(0.7f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.7f, 20, 200); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0)); + const int32 input_offset = UniformRandomInt(-256, 0); + static constexpr float beta = 1.0f; + // Extra parameters for skyscraper input patterns. + const double middle_proportion = + ExponentialRandomPositiveFloat(0.65f, 0.1, 1.0); + const int middle_min = UniformRandomInt(0, 255); + const int sides_max = UniformRandomInt(0, middle_min); + + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); + + std::vector input_data(buffer_size); + FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min, + sides_max); + RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset, + input_scale, stride, beta); + return true; +} + +TEST(TestQuantizedLogSoftmax, UniformLogSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneUniformLogSoftmax()) { + } + } +} + +TEST(TestQuantizedLogSoftmax, SkyscraperLogSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneSkyscraperLogSoftmax(false)) { + } + } +} + +TEST(TestQuantizedLogSoftmax, SmallSkyscraperLogSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneSkyscraperLogSoftmax(true)) { + } + } +} +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc b/tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc new file mode 100644 index 0000000000..c1c50dff4d --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc @@ -0,0 +1,102 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace { +void TestOneResizeBilinear(int batch, int depth, int input_width, + int input_height, int output_width, + int output_height) { + Dims<4> input_dims_inference = + MakeDimsForInference(depth, input_width, input_height, batch); + Dims<4> output_dims_inference = + MakeDimsForInference(depth, output_width, output_height, batch); + + const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference); + const int output_buffer_size = + RequiredBufferSizeForDims(output_dims_inference); + + std::vector input_data(input_buffer_size, 0); + std::vector reference_output_data(output_buffer_size, 0); + // Initialize the output data with something other than zero, so we can catch + // issue with kernels failing to initialize the output. + std::vector output_data(output_buffer_size, 3.1415); + + const float input_amplitude = 1.f; + FillRandom(&input_data, -input_amplitude, input_amplitude); + + Dims<4> output_size_dims = MakeDimsForInference(2, 1, 1, 1); + std::vector output_size_data = {output_height, output_width}; + + reference_ops::ResizeBilinear( + input_data.data(), input_dims_inference, output_size_data.data(), + output_size_dims, reference_output_data.data(), output_dims_inference); + optimized_ops::ResizeBilinear(input_data.data(), input_dims_inference, + output_size_data.data(), output_size_dims, + output_data.data(), output_dims_inference); + + double sum_diff = 0; + float max_abs_val = 0; + for (int i = 0; i < output_buffer_size; i++) { + sum_diff += std::abs(output_data[i] - reference_output_data[i]); + max_abs_val = std::max(max_abs_val, std::abs(reference_output_data[i])); + } + + if (sum_diff != 0.f) { + const float mean_diff = static_cast(sum_diff / output_buffer_size); + const float relative_error = std::abs(mean_diff) / max_abs_val; + ASSERT_LT(relative_error, 1e-5f); + } +} + +TEST(ResizeBilinear, TestResizeBilinear) { + const int kTestsToRun = 100 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int depth = ExponentialRandomPositiveInt(0.9f, 6, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int output_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int output_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + + TestOneResizeBilinear(batch, depth, input_width, input_height, output_width, + output_height); + } +} + +TEST(ResizeBilinear2x2, TestResizeBilinear) { + const int kTestsToRun = 100 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int depth = ExponentialRandomPositiveInt(0.9f, 6, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int output_width = input_width * 2; + const int output_height = input_height * 2; + + TestOneResizeBilinear(batch, depth, input_width, input_height, output_width, + output_height); + } +} +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc new file mode 100644 index 0000000000..d781a7b642 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc @@ -0,0 +1,227 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" + +namespace tflite { +namespace { + +void RunSoftmaxFloatReference(const uint8* input_data, + const Dims<4>& dims_common, int32 input_offset, + const double input_scale, int stride, float beta, + uint8* reference_output_data) { + const int ref_buffer_size = RequiredBufferSizeForDims(dims_common); + std::vector reference_dequant_data(ref_buffer_size); + std::vector reference_output_float_data(ref_buffer_size); + + // Reference data generated via Dequant of input into float, and then applying + // float Softmax. + reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale, + reference_dequant_data.data(), dims_common); + optimized_ops::Softmax(reference_dequant_data.data(), dims_common, beta, + reference_output_float_data.data(), dims_common); + // Work with quantized scaling for Softmax, under which 256 represents 1, but + // we limit this to 255. + for (int i = 0; i < ref_buffer_size; i++) { + reference_output_data[i] = std::min( + 255, + static_cast(std::round(256.0f * reference_output_float_data[i]))); + } +} + +void CheckOutputData(const uint8* test_output, const uint8* reference_output, + const Dims<4>& dims_common, const string& check_label, + bool be_exacting) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); + // While calculating some metrics in floating point, we work with quantized + // scaling. + std::vector diff(buffer_size); + int64_t sum_diff = 0; + int64_t sum_abs_diff = 0; + for (int i = 0; i < buffer_size; i++) { + diff[i] = static_cast(test_output[i]) - reference_output[i]; + sum_diff += diff[i]; + sum_abs_diff += std::abs(diff[i]); + } + // These stats help understand test failures. + std::sort(std::begin(diff), std::end(diff)); + const int min_diff = diff.front(); + const int max_diff = diff.back(); + const int median_diff = diff[diff.size() / 2]; + const float mean_diff = static_cast(sum_diff) / buffer_size; + const float mean_abs_diff = static_cast(sum_abs_diff) / buffer_size; + // We either check for bit exactness (against the reference quantized version) + // or for general accuracy, allowing off-by-one (against the float reference). + if (be_exacting) { + ASSERT_TRUE(std::abs(min_diff) == 0 && std::abs(max_diff) == 0); + } else { + // For small numbers of samples, the estimates of the means vary more. + // Rather than widen the tolerances, we skip the smaller tests. + ASSERT_TRUE(((std::abs(mean_diff) < 2e-2f && mean_abs_diff < 3e-2f) || + buffer_size < 10000) && + std::abs(median_diff) == 0 && std::abs(min_diff) <= 1 && + std::abs(max_diff) <= 1); + } +} + +// Runs the Softmax and compares against the float reference implementation and +// the quantized reference implementation. +void RunOneSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common, + int32 input_offset, const double input_scale, int stride, + float beta) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); + std::vector optimized_softmax_output(buffer_size); + std::vector reference_float_softmax_output(buffer_size); + std::vector reference_quant_softmax_output(buffer_size); + + RunSoftmaxFloatReference(input_data, dims_common, input_offset, input_scale, + stride, beta, reference_float_softmax_output.data()); + + int32 input_beta_multiplier; + int input_beta_left_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits, + &input_beta_multiplier, + &input_beta_left_shift); + // diff_min has a negative value, and is used to limit the maximum magnitude + // of the diffs, which are <= 0. + const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits, + input_beta_left_shift); + + optimized_ops::Softmax(input_data, dims_common, input_beta_multiplier, + input_beta_left_shift, diff_min, + optimized_softmax_output.data(), dims_common); + reference_ops::Softmax(input_data, dims_common, input_beta_multiplier, + input_beta_left_shift, diff_min, + reference_quant_softmax_output.data(), dims_common); + + CheckOutputData(optimized_softmax_output.data(), + reference_float_softmax_output.data(), dims_common, + "Optimized vs float reference", false); + CheckOutputData(optimized_softmax_output.data(), + reference_quant_softmax_output.data(), dims_common, + "Optimized vs quant reference", true); + CheckOutputData(reference_quant_softmax_output.data(), + reference_float_softmax_output.data(), dims_common, + "Quant reference vs float reference", false); +} + +// This function picks some random Softmax params, which are checked for +// desirability. If not acceptable, it returns false. If they're OK, +// it runs the Softmax test and returns true. This allows the caller +// to loop until a test has been run. +// +// Currently we do not reject for any reason. +bool TryOneUniformSoftmax() { + // We pick mostly positive values, on the whole emphasizing smaller values and + // therefore faster tests. We test a wider range of depths. In the case of + // Softmax, the width and height really just create test repetitions. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = ExponentialRandomPositiveInt(0.75f, 175, 500); + const int input_width = ExponentialRandomPositiveInt(0.8f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.8f, 20, 200); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0)); + const int32 input_offset = UniformRandomInt(-256, 0); + const float beta = 1.0f + ExponentialRandomPositiveFloat(0.9f, 2, 10); + + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); + + std::vector input_data(buffer_size); + FillRandom(&input_data); + RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale, + stride, beta); + return true; +} + +// See TryOneUniformSoftmax() for a general description. +// +// Tests with "skyscraper" input patterns are included for two reasons. (a) +// Bimodal distributions are potentially challenging and perhaps more +// realistic than simple uniform random inputs. (b) Some implementations of +// Softmax may adapt as they traverse the depth, and so we test handling of +// cases where relatively small values are encountered at the beginning and end. +bool TryOneSkyscraperSoftmax(bool small_depth) { + // We pick mostly positive values, on the whole emphasizing smaller values and + // therefore faster tests. We test a wider range of depths. In the case of + // Softmax, the width and height really just create test repetitions. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = small_depth + ? ExponentialRandomPositiveInt(0.75f, 40, 500) + : ExponentialRandomPositiveInt(0.75f, 175, 500); + const int input_width = ExponentialRandomPositiveInt(0.7f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.7f, 20, 200); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0)); + const int32 input_offset = UniformRandomInt(-256, 0); + const float beta = 1.0f + ExponentialRandomPositiveFloat(0.9f, 2, 10); + // Extra parameters for skyscraper input patterns. + const double middle_proportion = + ExponentialRandomPositiveFloat(0.65f, 0.1, 1.0); + const int middle_min = UniformRandomInt(0, 255); + const int sides_max = UniformRandomInt(0, middle_min); + + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); + + std::vector input_data(buffer_size); + FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min, + sides_max); + RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale, + stride, beta); + return true; +} + +TEST(TestQuantizedSoftmax, UniformSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneUniformSoftmax()) { + } + } +} + +TEST(TestQuantizedSoftmax, SkyscraperSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneSkyscraperSoftmax(false)) { + } + } +} + +TEST(TestQuantizedSoftmax, SmallSkyscraperSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneSkyscraperSoftmax(true)) { + } + } +} +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.cc b/tensorflow/contrib/lite/kernels/internal/test_util.cc new file mode 100644 index 0000000000..9b1fd9b344 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/test_util.cc @@ -0,0 +1,121 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" + +#include +#include + +namespace tflite { + +Dims<4> MakeDimsForInference(int depth, int width, int height, int batch) { + Dims<4> result; + int cum_prod = 1; + + result.sizes[0] = depth; + result.strides[0] = cum_prod; + cum_prod *= result.sizes[0]; + + result.sizes[1] = width; + result.strides[1] = cum_prod; + cum_prod *= result.sizes[1]; + + result.sizes[2] = height; + result.strides[2] = cum_prod; + cum_prod *= result.sizes[2]; + + result.sizes[3] = batch; + result.strides[3] = cum_prod; + + return result; +} + +// this is a copied from an internal function in propagate_fixed_sizes.cc +bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width, + int filter_height, int stride, PaddingType padding_type, + Dims<4>* output_dims, int* pad_width, int* pad_height) { + const int input_width = ArraySize(input_dims, 1); + const int input_height = ArraySize(input_dims, 2); + const int batch = ArraySize(input_dims, 3); + + int output_height = 0; + int output_width = 0; + if (padding_type == PaddingType::kValid) { + output_height = (input_height + stride - filter_height) / stride; + output_width = (input_width + stride - filter_width) / stride; + } else if (padding_type == PaddingType::kSame) { + output_height = (input_height + stride - 1) / stride; + output_width = (input_width + stride - 1) / stride; + } else { + return false; + } + + if (output_width <= 0 || output_height <= 0) { + return false; + } + + *pad_height = + ((output_height - 1) * stride + filter_height - input_height) / 2; + *pad_width = ((output_width - 1) * stride + filter_width - input_width) / 2; + *output_dims = + MakeDimsForInference(output_depth, output_width, output_height, batch); + return true; +} + +std::mt19937& RandomEngine() { + static std::mt19937 engine; + return engine; +} + +int UniformRandomInt(int min, int max) { + std::uniform_int_distribution dist(min, max); + return dist(RandomEngine()); +} + +float UniformRandomFloat(float min, float max) { + std::uniform_real_distribution dist(min, max); + return dist(RandomEngine()); +} + +int ExponentialRandomPositiveInt(float percentile, int percentile_val, + int max_val) { + const float lambda = + -std::log(1.f - percentile) / static_cast(percentile_val); + std::exponential_distribution dist(lambda); + float val; + do { + val = dist(RandomEngine()); + } while (!val || !std::isfinite(val) || val > max_val); + return static_cast(std::ceil(val)); +} + +float ExponentialRandomPositiveFloat(float percentile, float percentile_val, + float max_val) { + const float lambda = + -std::log(1.f - percentile) / static_cast(percentile_val); + std::exponential_distribution dist(lambda); + float val; + do { + val = dist(RandomEngine()); + } while (!std::isfinite(val) || val > max_val); + return val; +} + +void FillRandom(std::vector* vec, float min, float max) { + std::uniform_real_distribution dist(min, max); + auto gen = std::bind(dist, RandomEngine()); + std::generate(std::begin(*vec), std::end(*vec), gen); +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.h b/tensorflow/contrib/lite/kernels/internal/test_util.h new file mode 100644 index 0000000000..26078cef49 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/test_util.h @@ -0,0 +1,104 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TEST_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TEST_UTIL_H_ + +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { + +// Creates a Dims struct from a set of dimensions. +Dims<4> MakeDimsForInference(int depth, int width, int height, int batch); + +// Computes output and padding dimensions. +bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width, + int filter_height, int stride, PaddingType padding_type, + Dims<4>* output_dims, int* pad_width, int* pad_height); + +// Returns a mt19937 random engine. +std::mt19937& RandomEngine(); + +// Returns a random integer uniformly distributed between |min| and |max|. +int UniformRandomInt(int min, int max); + +// Returns a random float uniformly distributed between |min| and |max|. +float UniformRandomFloat(float min, float max); + +// Returns a random element in |v|. +template +const T& RandomElement(const std::vector& v) { + return v[UniformRandomInt(0, v.size() - 1)]; +} + +// Returns a random exponentially distributed integer. +int ExponentialRandomPositiveInt(float percentile, int percentile_val, + int max_val); + +// Returns a random exponentially distributed float. +float ExponentialRandomPositiveFloat(float percentile, float percentile_val, + float max_val); + +// Fills a vector with random floats between |min| and |max|. +void FillRandom(std::vector* vec, float min, float max); + +// Fills a vector with random numbers between |min| and |max|. +template +void FillRandom(std::vector* vec, T min, T max) { + std::uniform_int_distribution dist(min, max); + auto gen = std::bind(dist, RandomEngine()); + std::generate(std::begin(*vec), std::end(*vec), gen); +} + +// Fills a vector with random numbers. +template +void FillRandom(std::vector* vec) { + FillRandom(vec, std::numeric_limits::min(), std::numeric_limits::max()); +} + +template +void FillRandom(typename std::vector::iterator begin_it, + typename std::vector::iterator end_it, T min, T max) { + std::uniform_int_distribution dist(min, max); + auto gen = std::bind(dist, RandomEngine()); + std::generate(begin_it, end_it, gen); +} + +// Fill with a "skyscraper" pattern, in which there is a central section (across +// the depth) with higher values than the surround. +template +void FillRandomSkyscraper(std::vector* vec, int depth, + double middle_proportion, uint8 middle_min, + uint8 sides_max) { + for (auto base_it = std::begin(*vec); base_it != std::end(*vec); + base_it += depth) { + auto left_it = base_it + std::ceil(0.5 * depth * (1.0 - middle_proportion)); + auto right_it = + base_it + std::ceil(0.5 * depth * (1.0 + middle_proportion)); + FillRandom(base_it, left_it, std::numeric_limits::min(), sides_max); + FillRandom(left_it, right_it, middle_min, std::numeric_limits::max()); + FillRandom(right_it, base_it + depth, std::numeric_limits::min(), + sides_max); + } +} + +} // namespace tflite +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TEST_UTIL_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index 43c6883278..d5293edd56 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -20,6 +20,7 @@ limitations under the License. namespace tflite { enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu }; +enum class PaddingType { kNone, kSame, kValid }; // Quantization parameters, determining the mapping of quantized values // to real values (i.e. determining how quantized values are mathematically -- GitLab From 52c1423d88454958c4693453a76a05c18a191b63 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 23 May 2018 22:59:22 -0700 Subject: [PATCH 083/902] Implement support for reshape in IndexedArrayAnalysis PiperOrigin-RevId: 197843589 --- .../xla/service/indexed_array_analysis.cc | 301 +++++++++++++++++- .../xla/service/indexed_array_analysis.h | 19 +- .../service/indexed_array_analysis_test.cc | 111 +++++++ tensorflow/compiler/xla/util.h | 11 + 4 files changed, 438 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc index 15b2d8f499..b74f05e080 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc @@ -28,6 +28,8 @@ using Analysis = IndexedArrayAnalysis; using UnknownArray = Analysis::UnknownArray; using ConstantArray = Analysis::ConstantArray; using ScalarIndexedArray = Analysis::ScalarIndexedArray; +using tensorflow::gtl::ArraySlice; +using tensorflow::str_util::Join; } // namespace string IndexedArrayAnalysis::ToString(Array* root) { @@ -52,8 +54,7 @@ string IndexedArrayAnalysis::ToString(Array* root) { return tensorflow::strings::StrCat( "(", name, " ", ToString(indexed_array->source()), " ", ToString(indexed_array->indices()), " ", indexed_array->source_dim(), - "->[", tensorflow::str_util::Join(indexed_array->output_dims(), ","), - "])"); + "->[", Join(indexed_array->output_dims(), ","), "])"); } } } @@ -127,6 +128,10 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayFor( instr->gather_window_bounds(), FindOrDie(cache_, instr->operand(0)), FindOrDie(cache_, instr->operand(1))); break; + case HloOpcode::kReshape: + computed_array = ComputeArrayForReshape( + instr->shape(), FindOrDie(cache_, instr->operand(0))); + break; } if (!computed_array) { @@ -244,6 +249,298 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayForGather( shape); } +namespace { +// Returns an index into `values` such that the product of the range +// [values.begin()+index, values.end()) is equal to `product`. If there is no +// such index, return -1. All integers in `values` must be positive. +int64 FindSuffixWithProduct(ArraySlice values, int64 product) { + DCHECK(c_all_of(values, [](int64 value) { return value > 0; })); + + int64 current_product = 1; + int64 i; + for (i = values.size() - 1; i >= 0 && product > current_product; --i) { + current_product *= values[i]; + } + + if (product == current_product) { + return i + 1; + } + + return -1; +} + +struct ReshapePassthroughDimPair { + int64 result_dim; + int64 operand_dim; +}; + +// Returns a set of dimension pairs such for all (result_dim, operand_dim) in +// the set: +// +// output_index[result_dim] = SourceIndexOfReshape(output_index)[operand_dim] +// +// The returned vector of pairs is sorted in both the result_dim and the +// operand_dim components. +std::vector ComputeReshapePassthroughDimPairs( + ArraySlice operand_shape, ArraySlice result_shape) { + // A reshape can be seen as an index mapping from output index to input index: + // + // (i_0, ..., i_n) = f(o_0, ..., o_m) + // + // This function returns the pairs (j, k) for which the following invariant + // holds for all indices in the shape: + // + // o_j == i_k + // + // And this occurs when: + // + // O_{j+1} * ... * O_n == I_{k+1} * ... * I_m + // + // (where O_x are the sizes of the output shape and I_x are the sizes of the + // input shape) and the size of the dimension j of the result is the same as + // the size of dimension k in the operand. + // + // These conditions are sufficient because the Reshape HLO is spec'ed such + // that the rightmost dimensions are always minor in the flattening and refine + // operation. + + std::vector result; + int64 result_subarray_size = 1; + for (int64 result_dim = result_shape.size() - 1; result_dim >= 0; + --result_dim) { + int64 candidate_operand_dim = + FindSuffixWithProduct(operand_shape, result_subarray_size); + + // result_subarray_size does not include the elements in the current + // `result_dim` dimension (we multiply in result_shape[result_dim] at the + // end of loop body) so candidate_operand_dim can never be zero. + CHECK_NE(candidate_operand_dim, 0); + + if (candidate_operand_dim != -1 && + result_shape[result_dim] == operand_shape[candidate_operand_dim - 1]) { + result.push_back({/*result_dim=*/result_dim, + /*operand_dim=*/candidate_operand_dim - 1}); + } + result_subarray_size *= result_shape[result_dim]; + } + + c_reverse(result); + + if (VLOG_IS_ON(3)) { + std::vector result_strings; + c_transform(result, std::back_inserter(result_strings), + [](ReshapePassthroughDimPair value) { + return tensorflow::strings::StrCat(value.result_dim, "->", + value.operand_dim); + }); + VLOG(3) << "For a reshape from [" << Join(operand_shape, ",") << "] to [" + << Join(result_shape, ",") << "] passthrough indices are [" + << Join(result_strings, ",") << "]"; + } + + DCHECK(c_is_sorted( + result, [](ReshapePassthroughDimPair lhs, ReshapePassthroughDimPair rhs) { + return lhs.result_dim < rhs.result_dim; + })); + + DCHECK(c_is_sorted( + result, [](ReshapePassthroughDimPair lhs, ReshapePassthroughDimPair rhs) { + return lhs.operand_dim < rhs.operand_dim; + })); + + return result; +} + +// Return true if `dim` is stated as an passthrough operand dim in +// `passthrough_dims`. +bool IsReshapePassthroughOperandDim( + ArraySlice passthrough_dims, int64 dim) { + return c_any_of(passthrough_dims, + [&](ReshapePassthroughDimPair passthrough_dim_pair) { + return passthrough_dim_pair.operand_dim == dim; + }); +} + +// Maps `operand_dim` which must be an passthrough operand dimension to its +// corresponding passthrough result dimension based on `passthrough_dims`. +int64 MapPassthroughOperandDimToResultDim( + ArraySlice passthrough_dims, int64 operand_dim) { + auto it = c_find_if(passthrough_dims, + [&](ReshapePassthroughDimPair passthrough_dim_pair) { + return passthrough_dim_pair.operand_dim == operand_dim; + }); + CHECK(it != passthrough_dims.end()); + return it->result_dim; +} + +int64 FindSourcePositionForPassthroughResultDim(ArraySlice operand_shape, + ArraySlice result_shape, + int64 source_passthrough_dim) { + int64 indexed_source_subarray_size = + std::accumulate(operand_shape.begin() + source_passthrough_dim + 1, + operand_shape.end(), 1, std::multiplies()); + + return FindSuffixWithProduct(result_shape, indexed_source_subarray_size); +} + +}; // namespace + +Analysis::Array* IndexedArrayAnalysis::ComputeArrayForReshape( + const Shape& shape, Array* operand) { + auto* scalar_indexed = dynamic_cast(operand); + if (!scalar_indexed) { + return nullptr; + } + + // Try to fold Reshape(ScalarIndexed(Const, Indices)) + // => ScalarIndexed(Const', Indices) + // + // We can view the reshape and the scalar-indexed operations as functions that + // map an output index (i.e. an index into the result) to an input index + // (i.e. an index into the operand). The key idea used here is that the + // output-to-input mapping for some reshape operations may "pass through" some + // output dimensions into the input space unchanged -- i.e. there may exist + // output dimension "O" and input dimension "I" such that OutputIndex[O] is + // always == InputIndexForReshape(OutputIndex)[I]. If these pass-through + // dimensions in the input space of the reshape happen to be include all the + // output dimensions for the scalar-indexed node then, roughly, the following + // holds: + // + // SourceIndexOfScalarIndexed(SourceIndexOfReshape(Idx)) + // == SourceIndexOfScalarIndexed(SourceIndexOfReshape(Ps ++ Qs)) + // + // Where Ps are the set of the pass-through components of Idx that are + // also the output dims of the scalar-indexed node, and Qs are the rest. + // For brevity, we're playing fast and loose with the notation here -- we + // don't literally require Idx to be a concatenation of Ps and Qs, as + // suggested by the "++". + // + // == SourceIndexOfScalarIndexed(Ps ++ SourceIndexOfReshape(Qs)) + // + // Again, we're playing fast and loose with the notation around "++". + // Generally this ++ will be a different function that the ++ in the + // previous step. + // + // If the scalar-indexed node has a constant as the source then the + // SourceIndexOfReshape function can be "folded into" the constant itself by + // reshaping it, leaving us with: + // + // == SourceIndexOfScalarIndexed(Ps ++ Qs) + // == SourceIndexOfScalarIndexed(Idx) + // + // which is just a scalar-indexed node (with parameters different from the + // scalar-indexed node we started with) with a reshaped constant as the + // source. + // + // We can't fold SourceIndexOfReshape into the constant without introducing + // another precondition: since the new scalar-indexed node will have a + // reshaped (constant) array as its source it will, in general, have a + // different source dimension than the original scalar-indexed node. This + // source dimension will have to be a passthrough dimension of the + // SourceIndexOfReshape indexing function that is folded into the source. And + // such a dimension need not exist so this is a non-trivial precondition. + + std::vector reshape_passthrough_dims = + ComputeReshapePassthroughDimPairs( + /*operand_shape=*/AsInt64Slice(operand->shape().dimensions()), + /*result_shape=*/AsInt64Slice(shape.dimensions())); + + auto is_reshape_passthrough_operand_dim = [&](int64 operand_dim) { + return IsReshapePassthroughOperandDim(reshape_passthrough_dims, + operand_dim); + }; + + if (!c_all_of(scalar_indexed->output_dims(), + is_reshape_passthrough_operand_dim)) { + return nullptr; + } + + // To compute the shape of the source for the new scalar-indexed node we're + // going to create, we first "undo" the scalar-indexed operation. + std::vector new_scalar_indexed_source_shape(shape.dimensions().begin(), + shape.dimensions().end()); + for (int64 i = scalar_indexed->output_dims().size() - 1; i >= 0; i--) { + int64 output_dim = scalar_indexed->output_dims()[i]; + int64 output_dim_after_reshape = MapPassthroughOperandDimToResultDim( + reshape_passthrough_dims, output_dim); + new_scalar_indexed_source_shape.erase( + new_scalar_indexed_source_shape.begin() + output_dim_after_reshape); + } + + // After this, we need to add in the dimension that will be the source + // dimension for the new scalar-indexed node. A scalar-indexed node "removes" + // the source dimensions and "adds" the output dimensions, so to get back to + // the shape for the *source* of the scalar-indexed node we need to remove the + // output dims (which we did above) and then add back the source dim (which we + // are about to do below): + + const Shape& scalar_indexed_source_shape = scalar_indexed->source()->shape(); + + int64 source_dim_for_new_scalar_indexed_node = + FindSourcePositionForPassthroughResultDim( + /*operand_shape=*/AsInt64Slice( + scalar_indexed_source_shape.dimensions()), + /*result_shape=*/new_scalar_indexed_source_shape, + scalar_indexed->source_dim()); + + // We may not be able to find a source dim for the new scalar-indexed node. + // For instance consider: + // + // operand = s32[3,5,2] constant({...}) + // indices = s32[7] parameter(0) + // gather = s32[3,2,7] gather(operand, indices), + // output_window_dims={0,1}, + // elided_window_dims={1}, + // gather_dims_to_operand_dims={1}, + // index_vector_dim=1, + // window_bounds={3,1,2} + // reshape = s32[6,7] reshape(gather) + // + // In this case the gather maps to: + // (scalar-indexed-const (constant s32[3,5,2]) %indices 1->[2]) + // + // and the reshape passes through dimension 2 from its input into dimension 1 + // in its output. However, we can't rewrite the reshape as a scalar-indexed + // node because then we'd have to reshape the [3,5,2] `operand` array to + // [6,5], but then dimension 1 of the reshaped [6,5] array indexes differently + // (a.k.a. isn't pass-through) than the [3,5,2] array. + + if (source_dim_for_new_scalar_indexed_node == -1) { + return nullptr; + } + + InsertAt( + &new_scalar_indexed_source_shape, source_dim_for_new_scalar_indexed_node, + scalar_indexed_source_shape.dimensions(scalar_indexed->source_dim())); + + CHECK(IsReshapePassthroughOperandDim( + ComputeReshapePassthroughDimPairs( + /*operand_shape=*/AsInt64Slice( + scalar_indexed_source_shape.dimensions()), + /*result_shape=*/new_scalar_indexed_source_shape), + scalar_indexed->source_dim())); + + auto map_passthrough_operand_dim_to_result_dim = [&](int64 result_dim) { + return MapPassthroughOperandDimToResultDim(reshape_passthrough_dims, + result_dim); + }; + + std::vector output_dims_for_new_scalar_indexed_node; + c_transform(scalar_indexed->output_dims(), + std::back_inserter(output_dims_for_new_scalar_indexed_node), + map_passthrough_operand_dim_to_result_dim); + + Array* new_scalar_indexed_source = ComputeArrayForConstant( + *TakeOwnership(scalar_indexed->literal() + .Reshape(new_scalar_indexed_source_shape) + .ValueOrDie())); + + return ConstructScalarIndexedArray( + new_scalar_indexed_source, scalar_indexed->indices(), + source_dim_for_new_scalar_indexed_node, + output_dims_for_new_scalar_indexed_node, shape); +} + tensorflow::StringPiece IndexedArrayAnalysisPrinterPass::name() const { return "indexed-array-analysis-printer-pass"; } diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.h b/tensorflow/compiler/xla/service/indexed_array_analysis.h index b132a8f251..35d454ab77 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.h +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.h @@ -143,8 +143,8 @@ class IndexedArrayAnalysis { // // For example, if source is of shape [11,13,17,19], indices is of shape // [23,29], output_dims is [0,2] and source_dim is 2 then the output is of - // shape [23,11,29,19] and the output index [A,B,C,D,E] is mapped to the input - // index [B,D,indices[A,C],E]. + // shape [23,11,29,13,19] and the output index [A,B,C,D,E] is mapped to the + // input index [B,D,indices[A,C],E]. class ScalarIndexedArray : public Array { public: Kind kind() const override { return kScalarIndexed; } @@ -152,7 +152,15 @@ class IndexedArrayAnalysis { Array* source() const { return source_; } Array* indices() const { return indices_; } + + // `source_dim` is the dimension in the source array that is being indexed + // over using indices from the `indices` array. See the class documentation + // and the overview for more details. int64 source_dim() const { return source_dim_; } + + // `output_dims` are the dimensions in the output array that are being used + // to compute an index into the `indices` array. See the class + // documentation and the overview for more details. tensorflow::gtl::ArraySlice output_dims() const { return output_dims_; } @@ -258,6 +266,8 @@ class IndexedArrayAnalysis { ScalarIndexedArray* source, Array* indices, int64 source_dim, tensorflow::gtl::ArraySlice output_dims, Shape shape); + Array* ComputeArrayForReshape(const Shape& shape, Array* operand); + template T* Construct(Args&&... args) { T* new_tensor = new T(std::forward(args)...); @@ -279,6 +289,11 @@ class IndexedArrayAnalysis { } } + Literal* TakeOwnership(std::unique_ptr literal) { + owned_literals_.push_back(std::move(literal)); + return owned_literals_.back().get(); + } + std::vector> owned_tensors_; std::vector> owned_literals_; tensorflow::gtl::FlatMap cache_; diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc index b2731b7c51..e1090df942 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc @@ -187,5 +187,116 @@ ENTRY main { "(scalar-indexed %operand (scalar-indexed %indices_a %indices_b " "1->[0,2]) 1->[0,1,3])"); } + +TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather0) { + string hlo_text = R"( +HloModule ReshapeOfGather + +ENTRY main { + operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,2,3,4},{1,2,3,4}}) + indices = s32[5] parameter(0) + gather = s32[5,4] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1,4} + ROOT reshape = s32[5,2,2] reshape(gather) +} +)"; + + AssertArrayForRootExpressionIs( + hlo_text, "(scalar-indexed-const (constant s32[3,2,2]) %indices 0->[0])"); +} + +TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather1) { + string hlo_text = R"( +HloModule ReshapeOfGather + +ENTRY main { + operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,2,3,4},{1,2,3,4}}) + indices = s32[5,7] parameter(0) + gather = s32[5,4,7] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=2, + window_bounds={1,4} + ROOT reshape = s32[5,2,2,7] reshape(gather) +} +)"; + + AssertArrayForRootExpressionIs( + hlo_text, + "(scalar-indexed-const (constant s32[3,2,2]) %indices 0->[0,3])"); +} + +TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather2) { + string hlo_text = R"( +HloModule ReshapeOfGather + +ENTRY main { + operand = s32[3,2,6] constant(s32[3,2,6]{ + {{1,2,3,4,5,6},{1,2,3,4,5,6}}, + {{1,2,3,4,5,6},{1,2,3,4,5,6}}, + {{1,2,3,4,5,6},{1,2,3,4,5,6}}}) + indices = s32[5,7] parameter(0) + gather = s32[5,2,6,7] gather(operand, indices), + output_window_dims={1,2}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=2, + window_bounds={1,2,6} + ROOT reshape = s32[5,3,4,7] reshape(gather) +} +)"; + + AssertArrayForRootExpressionIs( + hlo_text, + "(scalar-indexed-const (constant s32[3,3,4]) %indices 0->[0,3])"); +} + +TEST_F(IndexedArrayAnalysisTest, ReshapeOfGatherNegative0) { + string hlo_text = R"( +HloModule ReshapeOfGather + +ENTRY main { + operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,2,3,4},{1,2,3,4}}) + indices = s32[5,6] parameter(0) + gather = s32[5,4,6] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=2, + window_bounds={1,4} + ROOT reshape = s32[5,2,2,2,3] reshape(gather) +} +)"; + + AssertArrayForRootExpressionIs(hlo_text, "%reshape"); +} + +TEST_F(IndexedArrayAnalysisTest, ReshapeOfGatherNegative1) { + string hlo_text = R"( +HloModule ReshapeOfGather + +ENTRY main { + operand = s32[3,5,2] constant(s32[3,5,2]{ + {{1,2},{3,4},{5,6},{7,8},{9,10}}, + {{1,2},{3,4},{5,6},{7,8},{9,10}}, + {{1,2},{3,4},{5,6},{7,8},{9,10}}}) + indices = s32[7] parameter(0) + gather = s32[3,2,7] gather(operand, indices), + output_window_dims={0,1}, + elided_window_dims={1}, + gather_dims_to_operand_dims={1}, + index_vector_dim=1, + window_bounds={3,1,2} + ROOT reshape = s32[6,7] reshape(gather) +} +)"; + + AssertArrayForRootExpressionIs(hlo_text, "%reshape"); +} } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index b7309885b4..6ca0c02c66 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -492,6 +492,12 @@ bool c_is_sorted(const C& c) { return std::is_sorted(std::begin(c), std::end(c)); } +template +bool c_is_sorted(const C& c, Compare&& comp) { + return std::is_sorted(std::begin(c), std::end(c), + std::forward(comp)); +} + template auto c_adjacent_find(const C& c) -> decltype(std::begin(c)) { return std::adjacent_find(std::begin(c), std::end(c)); @@ -526,6 +532,11 @@ int64 FindIndex(const C& c, Value&& value) { return std::distance(c.begin(), it); } +template +void InsertAt(C* c, int64 index, Value&& value) { + c->insert(c->begin() + index, std::forward(value)); +} + // Returns true if `x` fits in 32-bits. template bool IsInt32(T x) { -- GitLab From 767d6c8cfd02e65b592e01f7c8ff6713915135a3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 01:03:36 -0700 Subject: [PATCH 084/902] [XLA:GPU] Basic multi-output fusion for GPU. Take a conservative approach and attempt multi-output fusion in cases where "regular" fusion is not an option. PiperOrigin-RevId: 197852598 --- tensorflow/compiler/xla/service/gpu/BUILD | 3 + .../xla/service/gpu/instruction_fusion.cc | 20 ++ .../xla/service/gpu/instruction_fusion.h | 3 + .../service/gpu/instruction_fusion_test.cc | 248 +++++++++++++++++- .../xla/service/instruction_fusion.cc | 76 ++++-- .../compiler/xla/service/instruction_fusion.h | 21 +- 6 files changed, 342 insertions(+), 29 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 4012f87f2b..aafb61b583 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -401,6 +401,9 @@ tf_cc_test( srcs = ["instruction_fusion_test.cc"], deps = [ ":instruction_fusion", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_matchers", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index 5d5bef6b57..36a1b82a26 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -177,6 +177,26 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer, InstructionFusion::ShouldFuse(consumer, operand_index); } +bool GpuInstructionFusion::ShouldFuseIntoMultiOutput(HloInstruction* consumer, + int64 operand_index) { + const HloInstruction* producer = consumer->operand(operand_index); + // The IR emitter has limited support for non-loop fusions with multi output + // at present. + // TODO(tjoerg): Relax this constraint to allow for arbitraty kinds of fusion. + if (consumer->opcode() == HloOpcode::kFusion && + consumer->fusion_kind() != HloInstruction::FusionKind::kLoop) { + return false; + } + // Multi-output fusion requires instructions with compatible shapes. + if (!ShapeUtil::Compatible(producer->shape(), consumer->shape())) { + return false; + } + // TODO(tjoerg): Stop calling `ShouldFuse` to relax the criteria for + // multi-output fusion. In particular, do not check whether an instruction is + // expensive to duplicate, since this doesn't matter here. + return GpuInstructionFusion::ShouldFuse(consumer, operand_index); +} + HloInstruction::FusionKind GpuInstructionFusion::ChooseKind( const HloInstruction* producer, const HloInstruction* consumer) { if (IsReductionToVector(*consumer)) { diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.h b/tensorflow/compiler/xla/service/gpu/instruction_fusion.h index 9fb06b0a24..f629d9ff2c 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.h +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.h @@ -31,6 +31,9 @@ class GpuInstructionFusion : public InstructionFusion { bool ShouldFuse(HloInstruction* consumer, int64 operand_index) override; + bool ShouldFuseIntoMultiOutput(HloInstruction* consumer, + int64 operand_index) override; + HloInstruction::FusionKind ChooseKind( const HloInstruction* producer, const HloInstruction* consumer) override; }; diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index 760e0e90f5..ec60f3a167 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -15,9 +15,12 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" +#include "tensorflow/compiler/xla/util.h" namespace op = xla::testing::opcode_matchers; @@ -281,7 +284,8 @@ TEST_F(InstructionFusionTest, FloatingPointDivIsCheap) { .ValueOrDie()); HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, op::Tuple(op::Fusion(), op::Fusion())); + EXPECT_THAT(root, op::Tuple(op::Fusion(), op::Fusion())) + << module->ToString(); } // Compute sum(100/p0), where p0 has type s32, twice. Check that the division @@ -308,7 +312,8 @@ TEST_F(InstructionFusionTest, IntegerDivIsNotCheap) { EXPECT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) .Run(module.get()) - .ValueOrDie()); + .ValueOrDie()) + << module->ToString(); } TEST_F(InstructionFusionTest, DotOutputFusionImpossible) { @@ -337,5 +342,244 @@ TEST_F(InstructionFusionTest, DotOutputFusionImpossible) { op::Broadcast(op::Parameter()))); } +// Counts the HLO ops with a given op code in the specified module. +static int Count(const HloModule& module, HloOpcode op) { + int count = 0; + for (const auto* computation : module.computations()) { + for (const auto* instruction : computation->instructions()) { + if (instruction->opcode() == op) { + ++count; + } + } + } + return count; +} + +// Returns an HLO instruction from the given computation with the op code. +static StatusOr FindHloInstruction( + const HloComputation& computation, HloOpcode op) { + for (const auto* instruction : computation.instructions()) { + if (instruction->opcode() == op) { + return instruction; + } + } + return NotFound( + "Computation '%s' does not contain an instruction with op code '%s'.", + computation.name().c_str(), HloOpcodeString(op).c_str()); +} + +TEST_F(InstructionFusionTest, MultiOutputFusion) { + // sub --> add --> tuple + // \---------------/ + auto module = tools::Parse(R"( + HloModule test_module + ENTRY OutputFusion { + p0 = f32[4,3]{1,0} parameter(0) + p1 = f32[4,3]{1,0} parameter(1) + p2 = f32[4,3]{1,0} parameter(2) + sub = f32[4,3]{1,0} subtract(p0, p2) + add = f32[4,3]{1,0} add(sub, p1) + ROOT tuple = (f32[4,3]{1,0}, f32[4,3]{1,0}) tuple(sub, add) + })") + .ValueOrDie(); + + ASSERT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + SCOPED_TRACE(module->ToString()); + + // Expect that there is one multi-output fusion and subtract has not been + // duplicated. + EXPECT_EQ(Count(*module, HloOpcode::kFusion), 1); + EXPECT_EQ(Count(*module, HloOpcode::kSubtract), 1); + TF_ASSERT_OK_AND_ASSIGN( + const HloInstruction* fusion, + FindHloInstruction(*module->entry_computation(), HloOpcode::kFusion)); + EXPECT_THAT( + fusion->fused_expression_root(), + op::Tuple(op::Add(op::Subtract(), op::Parameter()), op::Subtract())); +} + +TEST_F(InstructionFusionTest, MultiOutputFusionExpensiveOp) { + // tanh --> add --> tuple + // \---------------/ + auto module = tools::Parse(R"( + HloModule test_module + ENTRY OutputFusion { + p0 = f32[4,3]{1,0} parameter(0) + p1 = f32[4,3]{1,0} parameter(1) + tanh = f32[4,3]{1,0} tanh(p0) + add = f32[4,3]{1,0} add(tanh, p1) + ROOT tuple = (f32[4,3]{1,0}, f32[4,3]{1,0}) tuple(tanh, add) + })") + .ValueOrDie(); + + // TODO(tjoerg): Allow multi-output fusion for expensive operations like tanh. + ASSERT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()) + << module->ToString(); +} + +TEST_F(InstructionFusionTest, MultiOutputFusion2) { + // sub --> add1 --\--------\ + // \----------> add2 --> tuple + auto module = tools::Parse(R"( + HloModule test_module + ENTRY OutputFusion { + p0 = f32[4,3]{1,0} parameter(0) + p1 = f32[4,3]{1,0} parameter(1) + p2 = f32[4,3]{1,0} parameter(2) + sub = f32[4,3]{1,0} subtract(p0, p2) + add1 = f32[4,3]{1,0} add(sub, p1) + add2 = f32[4,3]{1,0} add(sub, add1) + ROOT tuple = (f32[4,3]{1,0}) tuple(add1, add2) + })") + .ValueOrDie(); + + ASSERT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + SCOPED_TRACE(module->ToString()); + + // Expect that there is one multi-output fusion and subtract has not been + // duplicated. + EXPECT_EQ(Count(*module, HloOpcode::kFusion), 1); + EXPECT_EQ(Count(*module, HloOpcode::kSubtract), 1); + TF_ASSERT_OK_AND_ASSIGN( + const HloInstruction* fusion, + FindHloInstruction(*module->entry_computation(), HloOpcode::kFusion)); + EXPECT_THAT(fusion->fused_expression_root(), + op::Tuple(op::Add(op::Subtract(), op::Add()), + op::Add(op::Subtract(), op::Parameter()))); +} + +TEST_F(InstructionFusionTest, MultiOutputFusion3) { + // sub --> add1 ----\--------\ + // \ --> add2 --> add3 --> tuple + auto module = tools::Parse(R"( + HloModule test_module + ENTRY OutputFusion { + p0 = f32[4,3]{1,0} parameter(0) + p1 = f32[4,3]{1,0} parameter(1) + p2 = f32[4,3]{1,0} parameter(2) + p3 = f32[4,3]{1,0} parameter(3) + sub = f32[4,3]{1,0} subtract(p0, p2) + add1 = f32[4,3]{1,0} add(sub, p1) + add2 = f32[4,3]{1,0} add(p2, sub) + add3 = f32[4,3]{1,0} add(add1, add2) + ROOT tuple = (f32[4,3]{1,0}) tuple(add3, add2) + })") + .ValueOrDie(); + + ASSERT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + SCOPED_TRACE(module->ToString()); + + // Expect that there is one multi-output fusion and subtract has not been + // duplicated. + EXPECT_EQ(Count(*module, HloOpcode::kFusion), 1); + EXPECT_EQ(Count(*module, HloOpcode::kSubtract), 1); + TF_ASSERT_OK_AND_ASSIGN( + const HloInstruction* fusion, + FindHloInstruction(*module->entry_computation(), HloOpcode::kFusion)); + EXPECT_THAT(fusion->fused_expression_root(), + op::Tuple(op::Add(op::Add(), op::Add()), + op::Add(op::Parameter(), op::Subtract()))); +} + +TEST_F(InstructionFusionTest, NoCyclesDueToMultiOutputFusion) { + // sub --> mul ---\ + // \--> call --> add --> tuple + auto module = tools::Parse(R"( + HloModule test_module + ENTRY OutputFusion { + c = f32[] constant(42) + p0 = f32[4,3]{1,0} parameter(0) + p1 = f32[4,3]{1,0} parameter(1) + sub = f32[4,3]{1,0} subtract(p0, p1) + mul = f32[4,3]{1,0} multiply(sub, c) + call = f32[4,3]{1,0} custom-call(sub), custom_call_target="foo" + add = f32[4,3]{1,0} add(mul, call) + ROOT tuple = (f32[4,3]{1,0}) tuple(add) + })") + .ValueOrDie(); + + ASSERT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + // Visit instructions in post order to detect cycles. + // TODO(tjoerg): Add cycle detection to the HloVerifier. + class DummyVisitor : public DfsHloVisitorWithDefault { + public: + DummyVisitor() {} + Status DefaultAction(HloInstruction* /*hlo_instruction*/) override { + return Status::OK(); + } + } visitor; + for (const HloComputation* computation : module->MakeComputationPostOrder()) { + // Accept will return a FailedPrecondition when a cycle is detected. + EXPECT_TRUE(computation->root_instruction()->Accept(&visitor).ok()); + } +} + +TEST_F(InstructionFusionTest, NoMultiOutputFusionWithIncompatibleShapes) { + // sub[2,3] --> add[4,3] --> tuple([2,3], [4,3]) + // \-------------------------/ + auto module = tools::Parse(R"( + HloModule test_module + ENTRY OutputFusion { + p0 = f32[2,3]{1,0} parameter(0) + p1 = f32[4,3]{1,0} parameter(1) + p2 = f32[2,3]{1,0} parameter(2) + sub = f32[2,3]{1,0} subtract(p0, p2) + add = f32[4,3]{1,0} add(sub, p1) + ROOT tuple = (f32[2,3]{1,0}, f32[4,3]{1,0}) tuple(sub, add) + })") + .ValueOrDie(); + + // Multi-output fusion requires shapes to be compatible. Since `sub` and `add` + // have incompatible shapes, expect that no multi-output fusion happens. + ASSERT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()) + << module->ToString(); +} + +TEST_F(InstructionFusionTest, FuseIntoInputFusionInstruction) { + auto module = tools::Parse(R"( + HloModule test_module + + add_computation { + add_lhs = f32[] parameter(0) + add_rhs = f32[] parameter(1) + ROOT add_root = f32[] add(add_lhs, add_rhs) + } + + fused_computation { + p1 = f32[10] parameter(0) + zero = f32[] constant(0) + ROOT f2_root = f32[] reduce(p1, zero), dimensions={0}, + to_apply=add_computation + } + + ENTRY entry { + p0 = f32[10] parameter(0) + mul = f32[10] multiply(p0, p0) + fusion = f32[] fusion(mul), kind=kInput, calls=fused_computation + ROOT tuple = (f32[10], f32[]) tuple(fusion, mul) + })") + .ValueOrDie(); + + // Multi-output fusion is not supported for non-loop fusions at present. Since + // `fused_computation` is a input fusion, expect no multi-output fusion to + // happen. + ASSERT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()) + << module->ToString(); +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index cb6c98c481..1912b8f2c7 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -178,8 +178,7 @@ bool InstructionFusion::EffectivelyAtMostUnary(HloInstruction* hlo) { bool InstructionFusion::CanFuseOnAllPaths( HloInstruction* producer, HloInstruction* consumer, - const HloReachabilityMap& reachability_map, - const DoNotFuseSet& do_not_fuse) { + const HloInstructionSet& do_not_duplicate) { if (consumer == producer) { return true; } @@ -190,10 +189,11 @@ bool InstructionFusion::CanFuseOnAllPaths( auto* consumer_operand = consumer->mutable_operand(i); // If the operand is not on a path to the producer, it doesn't matter // whether it's fusable. - if (!reachability_map.IsReachable(producer, consumer_operand)) { + if (!reachability_->IsReachable(producer, consumer_operand)) { continue; } - if (do_not_fuse.count(consumer_operand) > 0 || !ShouldFuse(consumer, i)) { + if (do_not_duplicate.count(consumer_operand) > 0 || + !ShouldFuse(consumer, i)) { return false; } // The producer is reachable from consumer_operand which means we need @@ -201,18 +201,16 @@ bool InstructionFusion::CanFuseOnAllPaths( // producer to be fusable into consumer on all paths. // Perform the recursive step: make sure producer can be fused into // consumer_operand on all paths. - if (!CanFuseOnAllPaths(producer, consumer_operand, reachability_map, - do_not_fuse)) { + if (!CanFuseOnAllPaths(producer, consumer_operand, do_not_duplicate)) { return false; } } return true; } -InstructionFusion::DoNotFuseSet InstructionFusion::ComputeGloballyUnfusable( +InstructionFusion::HloInstructionSet +InstructionFusion::ComputeGloballyUnfusable( tensorflow::gtl::ArraySlice post_order) { - auto reachability = computation_->ComputeReachability(); - // Forbid fusion of producers that: // a) Need to be duplicated, unless they can be fused into all consumers // via all paths. @@ -222,10 +220,10 @@ InstructionFusion::DoNotFuseSet InstructionFusion::ComputeGloballyUnfusable( // Note that if we allow fusion by these global rules, we may still forbid // fusing operations that require duplication later depending on // is_expensive_(). - DoNotFuseSet do_not_fuse; + HloInstructionSet do_not_duplicate; for (HloInstruction* consumer : post_order) { for (HloInstruction* producer : consumer->operands()) { - if (do_not_fuse.count(producer) > 0) { + if (do_not_duplicate.count(producer) > 0) { continue; } @@ -254,14 +252,14 @@ InstructionFusion::DoNotFuseSet InstructionFusion::ComputeGloballyUnfusable( // A will be not allowed to be fused into B, as it cannot be fused via // all paths. if (producer->IsFusable() && - CanFuseOnAllPaths(producer, consumer, *reachability, do_not_fuse)) { + CanFuseOnAllPaths(producer, consumer, do_not_duplicate)) { continue; } - do_not_fuse.insert(producer); + do_not_duplicate.insert(producer); } } - return do_not_fuse; + return do_not_duplicate; } StatusOr InstructionFusion::Run(HloModule* module) { @@ -273,6 +271,7 @@ StatusOr InstructionFusion::Run(HloModule* module) { for (auto* computation : module->MakeNonfusionComputations()) { CHECK(!computation->IsFusionComputation()); computation_ = computation; + reachability_ = computation_->ComputeReachability(); // We want to be able to remove arbitrary instructions from the post order // and also compare positions of instructions in the post order. To make @@ -290,7 +289,7 @@ StatusOr InstructionFusion::Run(HloModule* module) { InsertOrDie(&post_order_index, post_order[i], i); } - DoNotFuseSet do_not_fuse = ComputeGloballyUnfusable(post_order); + HloInstructionSet do_not_duplicate = ComputeGloballyUnfusable(post_order); // Instruction fusion effectively fuses edges in the computation graph // (producer instruction -> consumer instruction) so we iterate over all @@ -358,9 +357,20 @@ StatusOr InstructionFusion::Run(HloModule* module) { // ensures that B will be considered before A. // // We store the original indices of the operands to pass to ShouldFuse. - std::vector sorted_operand_numbers(instruction->operands().size()); - std::iota(std::begin(sorted_operand_numbers), - std::end(sorted_operand_numbers), 0); + std::vector sorted_operand_numbers; + sorted_operand_numbers.reserve(instruction->operands().size()); + for (int i = 0; i < instruction->operands().size(); ++i) { + // This will happen if we have two possible instructions to fuse the + // same operand into; once the operand is fused into one instruction, + // the other instruction will get a new get-tuple-element as its + // operand, which is not in the post-order index. + // TODO(tjoerg): Look into fusing past these multi-output fuse points. + if (post_order_index.find(instruction->mutable_operand(i)) == + post_order_index.end()) { + continue; + } + sorted_operand_numbers.push_back(i); + } std::sort( sorted_operand_numbers.begin(), sorted_operand_numbers.end(), [&](int64 i, int64 j) { @@ -377,13 +387,20 @@ StatusOr InstructionFusion::Run(HloModule* module) { if (!operand->IsFusable()) { continue; } - if (!ShouldFuse(instruction, i)) { - continue; - } - if (do_not_fuse.count(operand) > 0) { + + HloInstruction* fusion_instruction; + // Try "regular" fusion if the operand may be duplicated. Otherwise, + // perform multi-output fusion, unless this creates a cycle. + // TODO(tjoerg): Consider making multi-output fusion the default. + if (ShouldFuse(instruction, i) && + do_not_duplicate.count(operand) == 0) { + fusion_instruction = Fuse(operand, instruction); + } else if (ShouldFuseIntoMultiOutput(instruction, i) && + !MultiOutputFusionCreatesCycle(operand, instruction)) { + fusion_instruction = FuseIntoMultiOutput(operand, instruction); + } else { continue; } - HloInstruction* fusion_instruction = Fuse(operand, instruction); // Fusing an instruction into a fusion instruction can change the // operand set of the fusion instruction. For simplicity just push the @@ -449,6 +466,19 @@ HloInstruction* InstructionFusion::FuseIntoMultiOutput( return fusion_instruction; } +bool InstructionFusion::MultiOutputFusionCreatesCycle( + HloInstruction* producer, HloInstruction* consumer) { + return c_any_of( + consumer->operands(), [&](const HloInstruction* consumer_operand) { + // The fusion algorithm traverses the HLO graph in reverse post order. + // Thus `cosumers` is visited before its operands (including + // `producer`). Therefore, consumer operands cannot have been fused yet. + // It is thus safe to use the pre-computed reachability map. + return consumer_operand != producer && + reachability_->IsReachable(producer, consumer_operand); + }); +} + bool InstructionFusion::ShouldFuse(HloInstruction* consumer, int64 operand_index) { HloInstruction* producer = consumer->mutable_operand(operand_index); diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h index c3c2ed0aaa..f73ca9adf7 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.h +++ b/tensorflow/compiler/xla/service/instruction_fusion.h @@ -61,6 +61,14 @@ class InstructionFusion : public HloPassInterface { // Subtypes can override this with target-specific heuristics. virtual bool ShouldFuse(HloInstruction* consumer, int64 operand_index); + // Returns whether multi-output fusion can be applied to fuse `producer` into + // `consumer`. In contrast to "regular" fusion, the `producer` is not + // duplicated by multi-output fusion. + virtual bool ShouldFuseIntoMultiOutput(HloInstruction* consumer, + int64 operand_index) { + return false; + } + // Chooses a fusion kind for `producer` and `consumer`. // Default method chooses `kLoop`. virtual HloInstruction::FusionKind ChooseKind(const HloInstruction* producer, @@ -97,10 +105,12 @@ class InstructionFusion : public HloPassInterface { // Current HloComputation instance the loop fuser is traversing. HloComputation* computation_; HloModule* module_; + // Reachability information for the current computation. + std::unique_ptr reachability_; private: // The set of producers whose consumers we cannot fuse into. - using DoNotFuseSet = std::unordered_set; + using HloInstructionSet = std::unordered_set; HloInstruction* AddFusionInstruction(HloInstruction* producer, HloInstruction* consumer); @@ -108,18 +118,21 @@ class InstructionFusion : public HloPassInterface { // Whether or not we can fuse producer into consumer on all paths // from the producer to the consumer where nodes are HLOs and edges are uses. bool CanFuseOnAllPaths(HloInstruction* producer, HloInstruction* consumer, - const HloReachabilityMap& reachability_map, - const DoNotFuseSet& do_not_fuse); + const HloInstructionSet& do_not_fuse); // Computes the set of nodes that we do not want to fuse into any of their // consumers based on a global analysis of the HLO graph. - DoNotFuseSet ComputeGloballyUnfusable( + HloInstructionSet ComputeGloballyUnfusable( tensorflow::gtl::ArraySlice post_order); // Used to determine if an HLO is expensive. Expensive operations will not be // duplicated. std::function is_expensive_; + // Whether multi-output fusion would introduce a cycle into the HLO graph. + bool MultiOutputFusionCreatesCycle(HloInstruction* producer, + HloInstruction* consumer); + // Returns whether we may duplicate an instruction if we want to fuse it. bool may_duplicate_; -- GitLab From 3a4da8ce522366b0bbc91a28e37a3a9f4252888f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 02:54:37 -0700 Subject: [PATCH 085/902] Allow to generate fake infeed buffers with shapes derived from the computation. When replaying a computation from a HloSnapshot, we want to be able to provide fake infeed data. This was already possible when the infeed shape is known by providing it with the --fake_infeed_shape flag. With this CL, we add the option to derive it from the provided HloSnapshot. Also, we transfer the infeed shape a fixed number of times instead of infinitely many times (configurable with a flag). Otherwise we will definitely run out of memory at some point. PiperOrigin-RevId: 197863412 --- .../compiler/xla/tools/replay_computation.cc | 61 +++++++++++++++---- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index df0501386c..d641ddffac 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -63,6 +63,8 @@ namespace { // fields. struct Options { string fake_infeed_shape; + bool generate_fake_infeed = false; + int num_infeeds = 10; bool use_fake_data = false; bool print_result = true; int num_runs = 1; @@ -72,8 +74,12 @@ struct Options { // Invokes the given computation passing arbitrary data for every (unbound) // parameter if use_fake_data, Otherwise use recorded data if available. // -// Similarly, infeeds fake data of shape fake_infeed_shape if it is provided; -// otherwise, no infeed is performed. +// Similarly, infeeds fake data of shape fake_infeed_shape if it is provided. +// If generate_fake_infeed is true, the required infeed shape is derived from +// the computation and then used to provide a fake infeed shape. +// +// If neither generate_fake_infeed is true nor a fake_infeed_shape is provided, +// no infeed is performed. StatusOr> ReplayComputation(const HloSnapshot& module, Client* client, const Options& opts) { @@ -92,22 +98,48 @@ StatusOr> ReplayComputation(const HloSnapshot& module, } } + bool provide_infeed = false; + Shape infeed_shape; + if (!opts.fake_infeed_shape.empty()) { + StatusOr shape_status = + ShapeUtil::ParseShapeString(opts.fake_infeed_shape); + TF_CHECK_OK(shape_status.status()); + infeed_shape = std::move(shape_status).ValueOrDie(); + provide_infeed = true; + } else if (opts.generate_fake_infeed) { + for (const auto& comp : computation.proto().computations()) { + for (const auto& instruction : comp.instructions()) { + if (instruction.opcode() == HloOpcodeString(HloOpcode::kInfeed)) { + CHECK(!provide_infeed) + << "--generate_fake_infeed only works if the model has 0 or 1 " + "infeed ops, but this one has >= 2."; + provide_infeed = true; + infeed_shape = instruction.shape(); + LOG(INFO) << "Generating fake infeed shape for inferred shape: " + << ShapeUtil::HumanString(infeed_shape); + } + } + } + } // We only instantiate the thread pool if the user has requested that a - // concurrent infeed occur via the fake_infeed_shape. + // concurrent infeed occur via the fake_infeed_shape, or when + // --generate_fake_infeed is passed and there exists an infeed operation in + // the HloSnapshot. tensorflow::gtl::optional pool; - - if (!opts.fake_infeed_shape.empty()) { + if (provide_infeed) { pool.emplace(tensorflow::Env::Default(), "infeed", /*num_threads=*/1); - pool->Schedule([opts, client]() { - StatusOr shape_status = - ShapeUtil::ParseShapeString(opts.fake_infeed_shape); - TF_CHECK_OK(shape_status.status()); - Shape shape = std::move(shape_status).ValueOrDie(); - StatusOr> data_status = MakeFakeLiteral(shape); + pool->Schedule([opts, infeed_shape, client]() { + StatusOr> data_status = + MakeFakeLiteral(infeed_shape); TF_CHECK_OK(data_status.status()); std::unique_ptr data = std::move(data_status).ValueOrDie(); - while (true) { + // There may be several infeed buffers needed, however we don't know how + // many. If we proactively transfer too many infeed buffers, we may run + // out of memory. If we transfer too few infeed buffers, the program will + // hang. + // TODO(akuegel): Figure out a better way to handle this. + for (int i = 0; i < opts.num_infeeds; ++i) { TF_CHECK_OK(client->TransferToInfeed(*data)); } }); @@ -202,8 +234,13 @@ int main(int argc, char** argv) { "Print the result of the computation to stdout"), tensorflow::Flag("num_runs", &opts.num_runs, "Number of times to run each computation"), + tensorflow::Flag("num_infeeds", &opts.num_infeeds, + "Number of times we transfer the fake infeed data"), tensorflow::Flag("fake_infeed_shape", &opts.fake_infeed_shape, "Shape of fake data to construct for (infinite) infeed"), + tensorflow::Flag("generate_fake_infeed", &opts.generate_fake_infeed, + "Whether a fake infeed shape should be generated " + "derived from the computation"), tensorflow::Flag( "xla_hlo_profile_last_run", &opts.xla_hlo_profile_last_run, "Pass --xla_hlo_profile the last time we run the computation."), -- GitLab From a2048b8ce0e8ab37c5cf75bc21b503093091673b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 03:48:24 -0700 Subject: [PATCH 086/902] Automated g4 rollback of changelist 197477959 PiperOrigin-RevId: 197868028 --- .../optimizers/arithmetic_optimizer.cc | 43 -------- .../optimizers/arithmetic_optimizer.h | 1 - .../optimizers/arithmetic_optimizer_test.cc | 103 ------------------ 3 files changed, 147 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 060e4200af..e7f70c6657 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1380,47 +1380,6 @@ class RemoveNegationStage : public ArithmeticOptimizerStage { } }; -class RemoveLogicalNotStage : public ArithmeticOptimizerStage { - public: - explicit RemoveLogicalNotStage(const GraphOptimizerContext& ctx, - const ArithmeticOptimizerContext& ctx_ext) - : ArithmeticOptimizerStage("RemoveLogicalNot", ctx, ctx_ext) {} - ~RemoveLogicalNotStage() override = default; - - bool IsSupported(const NodeDef* node) const override { - return IsLogicalNot(*node) && !IsInPreserveSet(*node); - } - - Status TrySimplify(NodeDef* node, string* simplified_node_name) override { - const string node_name = node->name(); - NodeDef* input; - TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &input)); - if (IsInPreserveSet(*input) || - NumNonControlOutputs(*input, *ctx().node_map) > 1) { - return Status::OK(); - } - string new_op; - if (IsEqual(*input)) { - new_op = "NotEqual"; - } else if (IsNotEqual(*input)) { - new_op = "Equal"; - } else if (IsLess(*input)) { - new_op = "GreaterEqual"; - } else if (IsLessEqual(*input)) { - new_op = "Greater"; - } else if (IsGreater(*input)) { - new_op = "LessEqual"; - } else if (IsGreaterEqual(*input)) { - new_op = "Less"; - } - if (!new_op.empty()) { - input->set_op(new_op); - *simplified_node_name = input->name(); - } - return Status::OK(); - } -}; - // This optimization hoists the common prefix of unary ops of the inputs to // concat out of the concat, for example: // Concat([Exp(Sin(x)), Exp(Sin(y)), Exp(Sin(z))]) @@ -2470,8 +2429,6 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) { pipeline.AddStage(ctx, ctx_ext); if (options_.remove_negation) pipeline.AddStage(ctx, ctx_ext); - if (options_.remove_logical_not) - pipeline.AddStage(ctx, ctx_ext); if (options_.hoist_cwise_unary_chains) pipeline.AddStage(ctx, ctx_ext); if (options_.convert_sqrt_div_to_rsqrt_mul) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 8e1b3eda3b..1f6f563687 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -68,7 +68,6 @@ class ArithmeticOptimizer : public GraphOptimizer { bool hoist_cwise_unary_chains = false; bool convert_sqrt_div_to_rsqrt_mul = false; bool remove_idempotent = true; - bool remove_logical_not = true; // Choose which arithmetic optimizer stages will be enabled for a given // optimization level by default. diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 64fdc8a83b..99f93e6eec 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -177,11 +177,6 @@ class ArithmeticOptimizerTest : public GrapplerTest { DisableAllStages(optimizer); optimizer->options_.remove_idempotent = true; } - - void EnableOnlyRemoveLogicalNot(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_logical_not = true; - } }; TEST_F(ArithmeticOptimizerTest, NoOp) { @@ -2742,103 +2737,5 @@ TEST_F(ArithmeticOptimizerTest, RemoveIdempotent) { } } -TEST_F(ArithmeticOptimizerTest, RemoveLogicalNot) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output a = ops::Const(s.WithOpName("a"), 3.14f, {32}); - Output b = ops::Const(s.WithOpName("b"), -3.14f, {32}); - Output eq = ops::Equal(s.WithOpName("eq"), a, b); - Output neq = ops::NotEqual(s.WithOpName("neq"), a, b); - Output lt = ops::Less(s.WithOpName("lt"), a, b); - Output le = ops::LessEqual(s.WithOpName("le"), a, b); - Output gt = ops::Greater(s.WithOpName("gt"), a, b); - Output ge = ops::GreaterEqual(s.WithOpName("ge"), a, b); - // not_eq is reserved - Output not_eq1 = ops::LogicalNot(s.WithOpName("not_eq1"), eq); - Output not_neq = ops::LogicalNot(s.WithOpName("not_neq"), neq); - Output not_lt = ops::LogicalNot(s.WithOpName("not_lt"), lt); - Output not_le = ops::LogicalNot(s.WithOpName("not_le"), le); - Output not_gt = ops::LogicalNot(s.WithOpName("not_gt"), gt); - Output not_ge = ops::LogicalNot(s.WithOpName("not_ge"), ge); - Output id_not_eq = ops::Identity(s.WithOpName("id_not_eq"), not_eq1); - Output id_not_neq = ops::Identity(s.WithOpName("id_not_neq"), not_neq); - Output id_not_lt = ops::Identity(s.WithOpName("id_not_lt"), not_lt); - Output id_not_le = ops::Identity(s.WithOpName("id_not_le"), not_le); - Output id_not_gt = ops::Identity(s.WithOpName("id_not_gt"), not_gt); - Output id_not_ge = ops::Identity(s.WithOpName("id_not_ge"), not_ge); - - GrapplerItem item; - item.fetch = {"id_not_eq", "id_not_neq", "id_not_lt", - "id_not_le", "id_not_gt", "id_not_ge"}; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - - auto tensors_expected = EvaluateNodes(item.graph, item.fetch); - - GraphDef output; - ArithmeticOptimizer optimizer; - EnableOnlyRemoveLogicalNot(&optimizer); - OptimizeTwice(&optimizer, &item, &output); - LOG(INFO) << output.DebugString(); - int found = 0; - for (const NodeDef& node : output.node()) { - if (node.name() == "id_not_eq") { - EXPECT_EQ("eq", node.input(0)); - ++found; - } - if (node.name() == "id_not_neq") { - EXPECT_EQ("neq", node.input(0)); - ++found; - } - if (node.name() == "id_not_lt") { - EXPECT_EQ("lt", node.input(0)); - ++found; - } - if (node.name() == "id_not_le") { - EXPECT_EQ("le", node.input(0)); - ++found; - } - if (node.name() == "id_not_gt") { - EXPECT_EQ("gt", node.input(0)); - ++found; - } - if (node.name() == "id_not_ge") { - EXPECT_EQ("ge", node.input(0)); - ++found; - } - - if (node.name() == "eq") { - EXPECT_EQ("NotEqual", node.op()); - ++found; - } - if (node.name() == "neq") { - EXPECT_EQ("Equal", node.op()); - ++found; - } - if (node.name() == "lt") { - EXPECT_EQ("GreaterEqual", node.op()); - ++found; - } - if (node.name() == "le") { - EXPECT_EQ("Greater", node.op()); - ++found; - } - if (node.name() == "gt") { - EXPECT_EQ("LessEqual", node.op()); - ++found; - } - if (node.name() == "ge") { - EXPECT_EQ("Less", node.op()); - ++found; - } - } - EXPECT_EQ(12, found); - - auto tensors = EvaluateNodes(output, item.fetch); - EXPECT_EQ(tensors.size(), tensors_expected.size()); - EXPECT_EQ(tensors.size(), item.fetch.size()); - for (int i = 0; i < item.fetch.size(); ++i) { - test::ExpectTensorEqual(tensors_expected[i], tensors[i]); - } -} - } // namespace grappler } // namespace tensorflow -- GitLab From afab4eaa372b5b4bd1d46a5b2ec13f61840cbf84 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 24 May 2018 06:20:04 -0700 Subject: [PATCH 087/902] Style guide edits: refer to the broader Google style guide, which is what was actually used in the code, to replace some of the rules that were spelled out explicitly. Use AutoGraph, rather than TensorFlow AutoGraph for name. PiperOrigin-RevId: 197881802 --- tensorflow/contrib/autograph/CONTRIBUTING.md | 5 +- tensorflow/contrib/autograph/STYLE_GUIDE.md | 76 ++++---------------- 2 files changed, 17 insertions(+), 64 deletions(-) diff --git a/tensorflow/contrib/autograph/CONTRIBUTING.md b/tensorflow/contrib/autograph/CONTRIBUTING.md index a7a3fe1452..a4aec8c74a 100644 --- a/tensorflow/contrib/autograph/CONTRIBUTING.md +++ b/tensorflow/contrib/autograph/CONTRIBUTING.md @@ -2,6 +2,9 @@ We'd love to have your patches and contributions! Here are some guidelines. In general, we follow the [TensorFlow contributing guidelines](../../CONTRIBUTING.md), but have some [AutoGraph-specific style guidelines](STYLE_GUIDE.md). More details below. +## TensorFlow Code of Conduct +Please review and follow the [TensorFlow Code of Conduct](../../CODE_OF_CONDUCT.md). + ## Contributor License Agreement Contributions to this project must be accompanied by a Contributor License @@ -28,7 +31,7 @@ repository (with credit to the original author) and closes the pull request. ## Style -See the [TensorFlow AutoGraph style guide](STYLE_GUIDE.md). +See the [AutoGraph style guide](STYLE_GUIDE.md). ## Unit tests diff --git a/tensorflow/contrib/autograph/STYLE_GUIDE.md b/tensorflow/contrib/autograph/STYLE_GUIDE.md index 5618ec3e34..866e5f583a 100644 --- a/tensorflow/contrib/autograph/STYLE_GUIDE.md +++ b/tensorflow/contrib/autograph/STYLE_GUIDE.md @@ -1,43 +1,26 @@ -# TensorFlow AutoGraph Style Guide +# AutoGraph Style Guide -This page contains style decisions that both developers and users of TensorFlow -AutoGraph should follow to increase the readability of their code, reduce the -number of errors, and promote consistency. We borrow many style principles from the TensorFlow Probability style guide. +This page contains style decisions that developers should follow when +contributing code to AutoGraph. ## TensorFlow Style Follow the [TensorFlow style -guide](https://www.tensorflow.org/community/style_guide) and [documentation -guide](https://www.tensorflow.org/community/documentation). Below are additional -TensorFlow conventions not noted in those guides. In the future, these noted -conventions may be moved upstream. +guide](https://www.tensorflow.org/community/style_guide), the [documentation +guide](https://www.tensorflow.org/community/documentation) and the +[Google Python style guide](https://google.github.io/styleguide/pyguide.html). + +Naming conventions: 1. The name is TensorFlow, not Tensorflow. 2. The name is AutoGraph, not Autograph. -## TensorFlow Code of Conduct -Please review and follow the [TensorFlow Code of Conduct](../../CODE_OF_CONDUCT.md). - -## TensorFlow AutoGraph Style +## AutoGraph Style -Below are TensorFlow AutoGraph-specific conventions. In the event of conflict, +Below are AutoGraph-specific conventions. In the event of conflict, it supercedes all previous conventions. -1. __Importing submodule aliases.__ Use the Pythonic style -`from tensorflow.contrib.autograph.converters import ifexp` and `from tensorflow.contrib import autograph as ag`. - -2. __Examples in Docstrings.__ Write a `#### Examples` subsection below `Args`, - `Returns`, `Raises`, etc. to illustrate examples. If the docstring's last - line is a fence bracket (\`\`\`) closing a code snippet, add an empty line - before closing the docstring with \"\"\". This properly displays the code - snippet. - - Justification: Users regularly need to remind themselves of args and - semantics. But rarely look at examples more than the first time. But since - examples are usually long (which is great!) it means they have to do a lot - of annoying scrolling ...unless Examples follow Args/Returns/Raises. - -3. __Citations in Docstrings.__ Write a `#### References` subsection at the +1. __Citations in Docstrings.__ Write a `#### References` subsection at the bottom of any docstring with citations. Use ICLR’s bibliography style to write references; for example, order entries by the first author's last name. Add a link to the paper if the publication is open source (ideally, @@ -77,21 +60,12 @@ it supercedes all previous conventions. https://arxiv.org/abs/1803.04386 ``` -4. When doing float math over literals eg use `1.` instead of `1` or `1.0`. - - * Using `1.` is another line of defense against an automatic casting - mistake. (Using `1.0` is also such a defense but is not minimal.) - -5. Prefer using named args for functions' 2nd args onward. - - * Definitely use named args for 2nd args onward in docstrings. - -9. Avoid LaTeX in docstrings. +2. Avoid LaTeX in docstrings. * It is not rendered in many (if not most) editors and can be hard to read for both LaTeX experts and non-experts. -10. Write docstring and comment math using ASCII friendly notation; python using +3. Write docstring and comment math using ASCII friendly notation; python using operators. E.g., `x**2` better than `x^2`, `x[i, j]` better than `x_{i,j}`, `sum{ f(x[i]) : i=1...n }` better than `\sum_{i=1}^n f(x_i)` `int{sin(x) dx: x in [0, 2 pi]}` better than `\int_0^{2\pi} sin(x) dx`. @@ -99,27 +73,3 @@ it supercedes all previous conventions. * The more we stick to python style, the more someone can copy/paste/execute. * Python style is usually easier to read as ASCII. - -11. All public functions require docstrings with: one line description, Args, - Returns, Raises (if raises exceptions). - - * Returns docstrings should be in the same format as Args, eg, of the form - "name: Description." Part of the rationale is that we are suggesting a - reasonable variable name for the returned object(s). - -12. Regard `*args` and/or `**kwargs` as features of last resort. - - * Keyword arguments make the intention of a function call more clear. - * [Possible exceptions for - `kwargs`](https://stackoverflow.com/questions/1415812/why-use-kwargs-in-python-what-are-some-real-world-advantages-over-using-named). - -18. The `__init__.py` file for modules should use TensorFlow's - `remove_undocumented` feature, which seals the module's methods. - -21. Use `"{}".format()` rather than `"" %` for string formatting. - - Justification: [PEP 3101](https://www.python.org/dev/peps/pep-3101/) and - [Python official - tutorials](https://docs.python.org/3.2/tutorial/inputoutput.html#old-string-formatting): - "...this old style of formatting will eventually be removed from the - language, str.format() should generally be used." -- GitLab From 22443c0f157658e04b96cbc06904b32486584055 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 07:00:24 -0700 Subject: [PATCH 088/902] When using fake infeed data, fill the infeed when it is empty. This makes sure we avoid OOM when there is too much infeed data to send it at once, and we also don't need the magic "num_infeeds" parameter anymore. PiperOrigin-RevId: 197886121 --- .../xla/service/gpu/infeed_manager.cc | 28 +++++++++++++++---- .../compiler/xla/service/gpu/infeed_manager.h | 9 ++++++ tensorflow/compiler/xla/tools/BUILD | 1 + .../compiler/xla/tools/replay_computation.cc | 28 ++++++++++--------- 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/infeed_manager.cc b/tensorflow/compiler/xla/service/gpu/infeed_manager.cc index 3ddc1c0789..ae310beefa 100644 --- a/tensorflow/compiler/xla/service/gpu/infeed_manager.cc +++ b/tensorflow/compiler/xla/service/gpu/infeed_manager.cc @@ -49,13 +49,25 @@ void InfeedManager::EnqueueBuffers(const std::vector& buffers) { } InfeedBuffer* InfeedManager::BlockingDequeueBuffer() { - tensorflow::mutex_lock l(mu_); - while (enqueued_buffer_.empty()) { - cv_.wait(l); + bool became_empty = false; + InfeedBuffer* current_buffer; + { + tensorflow::mutex_lock l(mu_); + while (enqueued_buffer_.empty()) { + cv_.wait(l); + } + current_buffer = enqueued_buffer_.front(); + enqueued_buffer_.pop_front(); + dequeued_buffer_.insert(current_buffer); + if (enqueued_buffer_.empty()) { + became_empty = true; + } + } + if (became_empty) { + for (const auto& callback : on_empty_callbacks_) { + callback(); + } } - InfeedBuffer* current_buffer = enqueued_buffer_.front(); - enqueued_buffer_.pop_front(); - dequeued_buffer_.insert(current_buffer); return current_buffer; } @@ -88,6 +100,10 @@ se::Stream* InfeedManager::GetStream(se::StreamExecutor* executor) { return host_to_device_stream_.get(); } +void InfeedManager::RegisterOnEmptyCallback(std::function callback) { + on_empty_callbacks_.push_back(std::move(callback)); +} + InfeedManager* GetOrCreateInfeedManager() { static InfeedManager* manager = new InfeedManager; return manager; diff --git a/tensorflow/compiler/xla/service/gpu/infeed_manager.h b/tensorflow/compiler/xla/service/gpu/infeed_manager.h index d5f2216d46..a3fc15cfe3 100644 --- a/tensorflow/compiler/xla/service/gpu/infeed_manager.h +++ b/tensorflow/compiler/xla/service/gpu/infeed_manager.h @@ -21,6 +21,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_INFEED_MANAGER_H_ #include +#include #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/gtl/flatset.h" @@ -100,6 +101,10 @@ class InfeedManager { // returns null. se::Stream* GetStream(se::StreamExecutor* executor); + // Registers a callback that will be called when 'enqueued_buffer_' becomes + // empty. + void RegisterOnEmptyCallback(std::function callback); + private: // TODO(b/30467474): Revisit if this mutex becomes a point of // contention. @@ -122,6 +127,10 @@ class InfeedManager { // Executor that the host_to_device_stream belongs to. Not owned. se::StreamExecutor* host_to_device_executor_; + + // List of callbacks which will be called when 'enqueued_buffer_' becomes + // empty. + std::vector> on_empty_callbacks_; }; // Singleton creator-or-accessor: Returns the GPU infeed manager. diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD index 415cf9c16a..15b9cd4265 100644 --- a/tensorflow/compiler/xla/tools/BUILD +++ b/tensorflow/compiler/xla/tools/BUILD @@ -86,6 +86,7 @@ cc_library( "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/lib:testing", "//tensorflow/compiler/xla/service:hlo_proto", + "//tensorflow/compiler/xla/service/gpu:infeed_manager", "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index d641ddffac..2349fa919e 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -41,6 +41,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/execution_options_util.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/gpu/infeed_manager.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -64,7 +65,6 @@ namespace { struct Options { string fake_infeed_shape; bool generate_fake_infeed = false; - int num_infeeds = 10; bool use_fake_data = false; bool print_result = true; int num_runs = 1; @@ -126,22 +126,26 @@ StatusOr> ReplayComputation(const HloSnapshot& module, // --generate_fake_infeed is passed and there exists an infeed operation in // the HloSnapshot. tensorflow::gtl::optional pool; + std::unique_ptr data; + if (provide_infeed) { + data = std::move(MakeFakeLiteral(infeed_shape)).ValueOrDie(); + } + auto transfer_infeed = [&data, client]() { + TF_CHECK_OK(client->TransferToInfeed(*data)); + }; if (provide_infeed) { pool.emplace(tensorflow::Env::Default(), "infeed", /*num_threads=*/1); - pool->Schedule([opts, infeed_shape, client]() { - StatusOr> data_status = - MakeFakeLiteral(infeed_shape); - TF_CHECK_OK(data_status.status()); - std::unique_ptr data = std::move(data_status).ValueOrDie(); + pool->Schedule([transfer_infeed]() { // There may be several infeed buffers needed, however we don't know how // many. If we proactively transfer too many infeed buffers, we may run // out of memory. If we transfer too few infeed buffers, the program will - // hang. - // TODO(akuegel): Figure out a better way to handle this. - for (int i = 0; i < opts.num_infeeds; ++i) { - TF_CHECK_OK(client->TransferToInfeed(*data)); - } + // hang. Therefore, we register a callback that is called when the infeed + // becomes empty, and in this callback we will transfer another fake + // infeed. + auto infeed_manager = xla::gpu::GetOrCreateInfeedManager(); + infeed_manager->RegisterOnEmptyCallback(transfer_infeed); + transfer_infeed(); }); } @@ -234,8 +238,6 @@ int main(int argc, char** argv) { "Print the result of the computation to stdout"), tensorflow::Flag("num_runs", &opts.num_runs, "Number of times to run each computation"), - tensorflow::Flag("num_infeeds", &opts.num_infeeds, - "Number of times we transfer the fake infeed data"), tensorflow::Flag("fake_infeed_shape", &opts.fake_infeed_shape, "Shape of fake data to construct for (infinite) infeed"), tensorflow::Flag("generate_fake_infeed", &opts.generate_fake_infeed, -- GitLab From 74eb0c4ca289e9dfb6dfeca47696fe3186956d50 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 09:05:41 -0700 Subject: [PATCH 089/902] Extracts the 'switch with same input' optimization into its own method. PiperOrigin-RevId: 197900929 --- .../grappler/optimizers/constant_folding.cc | 137 ++++++++---------- .../grappler/optimizers/constant_folding.h | 18 +++ 2 files changed, 82 insertions(+), 73 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index bf606fb8b1..a71f83b871 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -2035,22 +2035,66 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } - // Switch(x, x) will always feed false to its false branch and true to - // its true branch. By rewriting the graph a bit, we can propagate these - // constants down the two output branches, and just use control dependencies - // to trigger the selected one at runtime. For example, - // - // +------+ - // x-->|Switch|-->a (in practice there may be multiple consumers of each - // x-->| |-->b output branch.) - // +------+ - // - // Is rewritten as - // - // +------+ - // x-->|Switch|-->Identity--^>Const(false)-->a - // x-->| |-->Identity--^>Const(true)-->b - // +------+ + if (SimplifySwitch(optimized_graph, node)) { + graph_modified_ = true; + return Status::OK(); + } + + if (SimplifyReduction(*properties, node)) { + graph_modified_ = true; + return Status::OK(); + } + + if (SimplifyReshape(*properties, use_shape_info, node)) { + graph_modified_ = true; + return Status::OK(); + } + + bool arithmetic_simplification_succeed = false; + Status simplify_arithmetic_status = + SimplifyArithmeticOperations(*properties, use_shape_info, optimized_graph, + node, &arithmetic_simplification_succeed); + if (!simplify_arithmetic_status.ok()) { + return simplify_arithmetic_status; + } else if (arithmetic_simplification_succeed) { + graph_modified_ = true; + return Status::OK(); + } + + if (ReduceDivToReciprocalMul(optimized_graph, node)) { + graph_modified_ = true; + return Status::OK(); + } + + if (ConstantPushDown(node)) { + graph_modified_ = true; + return Status::OK(); + } + + if (MulConvPushDown(node, *properties)) { + graph_modified_ = true; + return Status::OK(); + } + + if (PartialConstPropThroughIdentityN(node)) { + graph_modified_ = true; + return Status::OK(); + } + + if (PartialAssocOpConstFolding(optimized_graph, properties, node)) { + graph_modified_ = true; + return Status::OK(); + } + + if (PartialConcatConstFolding(optimized_graph, properties, node)) { + graph_modified_ = true; + return Status::OK(); + } + + return Status::OK(); +} + +bool ConstantFolding::SimplifySwitch(GraphDef* optimized_graph, NodeDef* node) { if (node->op() == "Switch" && node->input(0) == node->input(1) && !OptimizedNodeExists(*node, "_const_false") && !OptimizedNodeExists(*node, "_const_true")) { @@ -2087,7 +2131,7 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, false_node->set_name(OptimizedNodeName(*node, "_const_false")); if (!CreateNodeDef(false_node->name(), TensorValue(&false_t), false_node) .ok()) { - return Status::OK(); + return false; } false_node->set_device(node->device()); @@ -2095,7 +2139,7 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, true_node->set_name(OptimizedNodeName(*node, "_const_true")); if (!CreateNodeDef(true_node->name(), TensorValue(&true_t), true_node) .ok()) { - return Status::OK(); + return false; } true_node->set_device(node->device()); @@ -2129,63 +2173,10 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } } - graph_modified_ = true; - return Status::OK(); + return true; } } - - if (SimplifyReduction(*properties, node)) { - graph_modified_ = true; - return Status::OK(); - } - - if (SimplifyReshape(*properties, use_shape_info, node)) { - graph_modified_ = true; - return Status::OK(); - } - - bool arithmetic_simplification_succeed = false; - Status simplify_arithmetic_status = - SimplifyArithmeticOperations(*properties, use_shape_info, optimized_graph, - node, &arithmetic_simplification_succeed); - if (!simplify_arithmetic_status.ok()) { - return simplify_arithmetic_status; - } else if (arithmetic_simplification_succeed) { - graph_modified_ = true; - return Status::OK(); - } - - if (ReduceDivToReciprocalMul(optimized_graph, node)) { - graph_modified_ = true; - return Status::OK(); - } - - if (ConstantPushDown(node)) { - graph_modified_ = true; - return Status::OK(); - } - - if (MulConvPushDown(node, *properties)) { - graph_modified_ = true; - return Status::OK(); - } - - if (PartialConstPropThroughIdentityN(node)) { - graph_modified_ = true; - return Status::OK(); - } - - if (PartialAssocOpConstFolding(optimized_graph, properties, node)) { - graph_modified_ = true; - return Status::OK(); - } - - if (PartialConcatConstFolding(optimized_graph, properties, node)) { - graph_modified_ = true; - return Status::OK(); - } - - return Status::OK(); + return false; } bool ConstantFolding::SimplifyReduction(const GraphProperties& properties, diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 07a2e01042..88f03b3931 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -146,6 +146,24 @@ class ConstantFolding : public GraphOptimizer { // Simplifies a Reduction operation to an Identity operation if applicable. bool SimplifyReduction(const GraphProperties& properties, NodeDef* node); + // Switch(x, x) will always feed false to its false branch and true to + // its true branch. By rewriting the graph a bit, we can propagate these + // constants down the two output branches, and just use control dependencies + // to trigger the selected one at runtime. For example, + // + // +------+ + // x-->|Switch|-->a (in practice there may be multiple consumers of each + // x-->| |-->b output branch.) + // +------+ + // + // Is rewritten as + // + // +------+ + // x-->|Switch|-->Identity--^>Const(false)-->a + // x-->| |-->Identity--^>Const(true)-->b + // +------+ + bool SimplifySwitch(GraphDef* optimized_graph, NodeDef* node); + // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From e47e08ee25b2d9cef724b85f0a24050bca3389c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 09:15:17 -0700 Subject: [PATCH 090/902] Internal change. PiperOrigin-RevId: 197902509 --- .../kernels/internal/optimized/optimized_ops.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 025e2825c6..f23b90d9dc 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -2018,11 +2018,23 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims, } const int gemm_input_rows = gemm_input_dims->sizes[0]; - const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_dims, 0); + // Using FlatSizeSkipDim causes segfault in some contexts (see b/79927784). + // The root cause has not yet been identified though. Same applies below for + // the other calls commented out. This is a partial rollback of cl/196819423. + // const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_dims, 0); + const int gemm_input_cols = gemm_input_dims->sizes[1] * + gemm_input_dims->sizes[2] * + gemm_input_dims->sizes[3]; const int filter_rows = filter_dims.sizes[3]; - const int filter_cols = FlatSizeSkipDim(filter_dims, 3); + // See b/79927784. + // const int filter_cols = FlatSizeSkipDim(filter_dims, 3); + const int filter_cols = + filter_dims.sizes[0] * filter_dims.sizes[1] * filter_dims.sizes[2]; const int output_rows = output_dims.sizes[0]; - const int output_cols = FlatSizeSkipDim(output_dims, 0); + // See b/79927784. + // const int output_cols = FlatSizeSkipDim(output_dims, 0); + const int output_cols = + output_dims.sizes[1] * output_dims.sizes[2] * output_dims.sizes[3]; TFLITE_DCHECK_EQ(output_rows, filter_rows); TFLITE_DCHECK_EQ(output_cols, gemm_input_cols); TFLITE_DCHECK_EQ(filter_cols, gemm_input_rows); -- GitLab From b947cb1c69177d79a9982a00896321bc8d3767d5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 09:28:43 -0700 Subject: [PATCH 091/902] Fix a bug in BestExporter - estimator.model_dir should be property instead of a function. PiperOrigin-RevId: 197904351 --- tensorflow/python/estimator/exporter.py | 4 ++-- tensorflow/python/estimator/exporter_test.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index ced7930671..f49ed05f57 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -287,11 +287,11 @@ class BestExporter(Exporter): is_the_final_export): export_result = None - if self._model_dir != estimator.model_dir() and self._event_file_pattern: + if self._model_dir != estimator.model_dir and self._event_file_pattern: # Loads best metric from event files. tf_logging.info('Loading best metric from event files.') - self._model_dir = estimator.model_dir() + self._model_dir = estimator.model_dir full_event_file_pattern = os.path.join(self._model_dir, self._event_file_pattern) self._best_eval_result = self._get_best_eval_result( diff --git a/tensorflow/python/estimator/exporter_test.py b/tensorflow/python/estimator/exporter_test.py index 053c549071..4cb4bffc8d 100644 --- a/tensorflow/python/estimator/exporter_test.py +++ b/tensorflow/python/estimator/exporter_test.py @@ -62,7 +62,7 @@ class BestExporterTest(test.TestCase): exports_to_keep=5) estimator = test.mock.Mock(spec=estimator_lib.Estimator) estimator.export_savedmodel.return_value = "export_result_path" - estimator.model_dir.return_value = export_dir_base + estimator.model_dir = export_dir_base export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {}, False) @@ -94,7 +94,7 @@ class BestExporterTest(test.TestCase): exports_to_keep=1) estimator = test.mock.Mock(spec=estimator_lib.Estimator) estimator.export_savedmodel.return_value = "export_result_path" - estimator.model_dir.return_value = export_dir_base + estimator.model_dir = export_dir_base export_result = exporter.export(estimator, export_dir_base, "checkpoint_path", {"loss": 0.5}, False) @@ -133,7 +133,7 @@ class BestExporterTest(test.TestCase): exports_to_keep=1) estimator = test.mock.Mock(spec=estimator_lib.Estimator) - estimator.model_dir.return_value = export_dir_base + estimator.model_dir = export_dir_base estimator.export_savedmodel.return_value = "export_result_path" export_result = exporter.export(estimator, export_dir_base, @@ -172,7 +172,7 @@ class BestExporterTest(test.TestCase): serving_input_receiver_fn=_serving_input_receiver_fn, exports_to_keep=2) estimator = test.mock.Mock(spec=estimator_lib.Estimator) - estimator.model_dir.return_value = export_dir_base + estimator.model_dir = export_dir_base # Garbage collect all but the most recent 2 exports, # where recency is determined based on the timestamp directory names. exporter.export(estimator, export_dir_base, None, None, False) -- GitLab From 7b77cd5e40a9440d9192f907cdb7f798dc6d58c8 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 24 May 2018 09:50:19 -0700 Subject: [PATCH 092/902] [XLA] Devectorize constant-sized arrays A sufficiently smart compiler could promote these from heap to stack, in practice no compiler does that. Remove the superfluous heap allocations manually. PiperOrigin-RevId: 197907388 --- tensorflow/compiler/xla/reference_util.h | 37 +++++++++---------- .../xla/service/hlo_dataflow_analysis.cc | 4 +- .../xla/service/hlo_evaluator_typed_visitor.h | 14 ++++--- .../compiler/xla/service/llvm_ir/tuple_ops.cc | 24 +++++------- .../compiler/xla/service/llvm_ir/tuple_ops.h | 8 ++-- 5 files changed, 42 insertions(+), 45 deletions(-) diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h index 2698ba7d79..8fa6961d19 100644 --- a/tensorflow/compiler/xla/reference_util.h +++ b/tensorflow/compiler/xla/reference_util.h @@ -265,9 +265,9 @@ class ReferenceUtil { const Array3D& rhs, int concatenate_dimension) { CHECK(0 <= concatenate_dimension && concatenate_dimension < 3); - std::vector lhs_dims = {lhs.n1(), lhs.n2(), lhs.n3()}; - std::vector rhs_dims = {rhs.n1(), rhs.n2(), rhs.n3()}; - std::vector out_dims = {rhs.n1(), rhs.n2(), rhs.n3()}; + const int64 lhs_dims[] = {lhs.n1(), lhs.n2(), lhs.n3()}; + const int64 rhs_dims[] = {rhs.n1(), rhs.n2(), rhs.n3()}; + int64 out_dims[] = {rhs.n1(), rhs.n2(), rhs.n3()}; for (int i = 0; i < 3; ++i) { if (i != concatenate_dimension) { out_dims[i] = lhs_dims[i]; @@ -299,9 +299,9 @@ class ReferenceUtil { const Array4D& rhs, int concatenate_dimension) { CHECK(0 <= concatenate_dimension && concatenate_dimension < 4); - std::vector lhs_dims = {lhs.n1(), lhs.n2(), lhs.n3(), lhs.n4()}; - std::vector rhs_dims = {rhs.n1(), rhs.n2(), rhs.n3(), rhs.n4()}; - std::vector out_dims = {rhs.n1(), rhs.n2(), rhs.n3(), rhs.n4()}; + const int64 lhs_dims[] = {lhs.n1(), lhs.n2(), lhs.n3(), lhs.n4()}; + const int64 rhs_dims[] = {rhs.n1(), rhs.n2(), rhs.n3(), rhs.n4()}; + int64 out_dims[] = {rhs.n1(), rhs.n2(), rhs.n3(), rhs.n4()}; for (int i = 0; i < 4; ++i) { if (i != concatenate_dimension) { out_dims[i] = lhs_dims[i]; @@ -553,12 +553,11 @@ class ReferenceUtil { const NativeT pad) { CHECK_EQ(padding.dimensions_size(), 3); - const std::vector input_bounds = {operand.n1(), operand.n2(), - operand.n3()}; - std::vector pad_low(3); - std::vector pad_high(3); - std::vector pad_interior(3); - std::vector output_bounds(3); + const int64 input_bounds[] = {operand.n1(), operand.n2(), operand.n3()}; + int64 pad_low[3]; + int64 pad_high[3]; + int64 pad_interior[3]; + int64 output_bounds[3]; for (int64 i = 0; i < 3; ++i) { pad_low[i] = padding.dimensions(i).edge_padding_low(); pad_high[i] = padding.dimensions(i).edge_padding_high(); @@ -574,7 +573,7 @@ class ReferenceUtil { Array3D result(output_bounds[0], output_bounds[1], output_bounds[2]); - std::vector indices = {0, 0, 0}; + int indices[] = {0, 0, 0}; for (indices[0] = 0; indices[0] < output_bounds[0]; ++indices[0]) { for (indices[1] = 0; indices[1] < output_bounds[1]; ++indices[1]) { for (indices[2] = 0; indices[2] < output_bounds[2]; ++indices[2]) { @@ -612,12 +611,12 @@ class ReferenceUtil { const NativeT pad) { CHECK_EQ(padding.dimensions_size(), 4); - const std::vector input_bounds = {operand.n1(), operand.n2(), - operand.n3(), operand.n4()}; - std::vector pad_low(4); - std::vector pad_high(4); - std::vector pad_interior(4); - std::vector output_bounds(4); + const int64 input_bounds[] = {operand.n1(), operand.n2(), operand.n3(), + operand.n4()}; + int64 pad_low[4]; + int64 pad_high[4]; + int64 pad_interior[4]; + int64 output_bounds[4]; for (int64 i = 0; i < 4; ++i) { pad_low[i] = padding.dimensions(i).edge_padding_low(); pad_high[i] = padding.dimensions(i).edge_padding_high(); diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index b06e6c9f3e..cc130a4900 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -363,7 +363,7 @@ bool HloDataflowAnalysis::UpdateCallValueSet(HloInstruction* call) { bool HloDataflowAnalysis::UpdateConditionalValueSet( HloInstruction* conditional) { CHECK_EQ(conditional->opcode(), HloOpcode::kConditional); - std::vector inputs = { + const InstructionValueSet* const inputs[] = { &GetInstructionValueSet( conditional->true_computation()->root_instruction()), &GetInstructionValueSet( @@ -538,7 +538,7 @@ bool HloDataflowAnalysis::UpdateTupleValueSet(HloInstruction* tuple) { bool HloDataflowAnalysis::UpdateWhileValueSet(HloInstruction* xla_while) { CHECK_EQ(xla_while->opcode(), HloOpcode::kWhile); - std::vector inputs = { + const InstructionValueSet* const inputs[] = { &GetInstructionValueSet(xla_while->while_body()->root_instruction()), &GetInstructionValueSet(xla_while->operand(0))}; if (ssa_form_) { diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index 024e8751f7..e37d651c95 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -1482,11 +1482,12 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { // Evaluate computation with specified literal operands. auto curr_val_literal = Literal::CreateR0(curr_val); auto result_val_literal = Literal::CreateR0(result_val); - std::vector args = {result_val_literal.get(), - curr_val_literal.get()}; std::unique_ptr computed_result = - embedded_evaluator.Evaluate(*function, args) + embedded_evaluator + .Evaluate( + *function, + {result_val_literal.get(), curr_val_literal.get()}) .ConsumeValueOrDie(); // Clear visit states so that we can use the evaluator again on // the same computation. @@ -1685,10 +1686,11 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { Literal::CreateR0(curr_val); const auto result_val_literal = Literal::CreateR0(result_val); - const std::vector args = { - result_val_literal.get(), curr_val_literal.get()}; std::unique_ptr computed_result = - embedded_evaluator.Evaluate(*function, args) + embedded_evaluator + .Evaluate( + *function, + {result_val_literal.get(), curr_val_literal.get()}) .ConsumeValueOrDie(); // Clear visit states so that the we can use the evaluate again diff --git a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc index 3a21eda357..5fc08aab91 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc @@ -24,15 +24,14 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" namespace xla { namespace llvm_ir { -void EmitTupleSelect(IrArray select, IrArray pred, llvm::Value* on_true, - llvm::Value* on_false, llvm::IRBuilder<>* ir_builder, - llvm::Module* module) { +void EmitTupleSelect(const IrArray& select, const IrArray& pred, + llvm::Value* on_true, llvm::Value* on_false, + llvm::IRBuilder<>* ir_builder, llvm::Module* module) { CHECK(ShapeUtil::IsScalar(pred.GetShape())); llvm::LoadInst* pred_value = @@ -47,30 +46,27 @@ void EmitTupleSelect(IrArray select, IrArray pred, llvm::Value* on_true, VLOG(2) << " pred_cond: " << DumpToString(*pred_cond); for (int i = 0; i < ShapeUtil::TupleElementCount(select.GetShape()); ++i) { - std::vector element_index = {ir_builder->getInt64(0), - ir_builder->getInt64(i)}; + llvm::Value* const element_index[] = {ir_builder->getInt64(0), + ir_builder->getInt64(i)}; llvm::Value* on_true_element_address = ir_builder->CreateInBoundsGEP(on_true, element_index); llvm::Value* on_true_element = ir_builder->CreateLoad( - on_true_element_address, - tensorflow::strings::Printf("on_true_element_%d", i).c_str()); + on_true_element_address, "on_true_element_" + llvm::Twine(i)); llvm::Value* on_false_element_address = ir_builder->CreateInBoundsGEP(on_false, element_index); llvm::Value* on_false_element = ir_builder->CreateLoad( - on_false_element_address, - tensorflow::strings::Printf("on_false_element_%d", i).c_str()); + on_false_element_address, "on_false_element_" + llvm::Twine(i)); llvm::Value* output_element_address = ir_builder->CreateInBoundsGEP(select.GetBasePointer(), element_index); ir_builder->CreateStore( - ir_builder->CreateSelect( - pred_cond, on_true_element, on_false_element, - tensorflow::strings::Printf("select_output_element_%d", i).c_str()), + ir_builder->CreateSelect(pred_cond, on_true_element, on_false_element, + "select_output_element_" + llvm::Twine(i)), output_element_address); } } -void EmitTuple(IrArray tuple, +void EmitTuple(const IrArray& tuple, tensorflow::gtl::ArraySlice operands, llvm::IRBuilder<>* ir_builder, llvm::Module* module) { for (size_t i = 0; i < operands.size(); ++i) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h index dbf9a14006..352d34ebf8 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h +++ b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h @@ -59,13 +59,13 @@ namespace llvm_ir { // of the address from the corresponding element in either // tuple_on_true or tuple_on_false: // output[i] = pred ? tuple_on_true[i] : tuple_on_false[i] -void EmitTupleSelect(IrArray select, IrArray pred, llvm::Value* on_true, - llvm::Value* on_false, llvm::IRBuilder<>* ir_builder, - llvm::Module* module); +void EmitTupleSelect(const IrArray& select, const IrArray& pred, + llvm::Value* on_true, llvm::Value* on_false, + llvm::IRBuilder<>* ir_builder, llvm::Module* module); // A tuple is an array of pointers, one for each operand. Each pointer points to // the output buffer of its corresponding operand. -void EmitTuple(IrArray tuple, +void EmitTuple(const IrArray& tuple, tensorflow::gtl::ArraySlice operands, llvm::IRBuilder<>* ir_builder, llvm::Module* module); -- GitLab From 1403625e860ffb8fb9af1bc75c1cea8f73e0478e Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 24 May 2018 10:05:10 -0700 Subject: [PATCH 093/902] [XLA] Speed up slice_test again. Previous patch missed one instance of creating a constant inside of slice_test. PiperOrigin-RevId: 197909685 --- tensorflow/compiler/xla/tests/slice_test.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc index 5292568abe..5653bf11a7 100644 --- a/tensorflow/compiler/xla/tests/slice_test.cc +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -197,9 +197,10 @@ class SliceR1Test : public ClientLibraryTestBase, // vector. tensorflow::gtl::InlinedVector input(spec.input_dim0); std::iota(input.begin(), input.end(), NativeT()); + auto literal = Literal::CreateR1(input); XlaBuilder builder(TestName()); - auto original = builder.ConstantR1(input); + auto original = builder.Parameter(0, literal->shape(), "p0"); builder.Slice(original, {spec.slice_start}, {spec.slice_limit}, {spec.slice_stride}); @@ -210,7 +211,9 @@ class SliceR1Test : public ClientLibraryTestBase, expected.push_back(i); } - ComputeAndCompareR1(&builder, expected, {}); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr arg, + client_->TransferToServer(*literal)); + ComputeAndCompareR1(&builder, expected, {arg.get()}); } }; -- GitLab From 53cd5c01407451cf918c1d1c1f5ca640b7d5dbc8 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 24 May 2018 10:30:41 -0700 Subject: [PATCH 094/902] Add a checkpointable map data structure PiperOrigin-RevId: 197913890 --- tensorflow/contrib/checkpoint/__init__.py | 3 + tensorflow/contrib/checkpoint/python/BUILD | 5 +- .../checkpointable/data_structures.py | 67 ++++++++++++---- .../checkpointable/data_structures_test.py | 77 +++++++++++++++++++ 4 files changed, 134 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/checkpoint/__init__.py b/tensorflow/contrib/checkpoint/__init__.py index bd0bc9e56b..8ae493ba99 100644 --- a/tensorflow/contrib/checkpoint/__init__.py +++ b/tensorflow/contrib/checkpoint/__init__.py @@ -26,6 +26,7 @@ Managing dependencies: Checkpointable data structures: @@List +@@Mapping @@UniqueNameTracker """ @@ -40,8 +41,10 @@ from tensorflow.core.protobuf.checkpointable_object_graph_pb2 import Checkpointa from tensorflow.python.training.checkpointable.base import Checkpointable from tensorflow.python.training.checkpointable.base import NoDependency from tensorflow.python.training.checkpointable.data_structures import List +from tensorflow.python.training.checkpointable.data_structures import Mapping from tensorflow.python.training.checkpointable.util import object_metadata from tensorflow.python.util.all_util import remove_undocumented remove_undocumented(module_name=__name__) + diff --git a/tensorflow/contrib/checkpoint/python/BUILD b/tensorflow/contrib/checkpoint/python/BUILD index 0b67619c11..7b200a29bf 100644 --- a/tensorflow/contrib/checkpoint/python/BUILD +++ b/tensorflow/contrib/checkpoint/python/BUILD @@ -20,7 +20,10 @@ py_library( srcs = ["containers.py"], srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], - deps = ["//tensorflow/python/training/checkpointable:base"], + deps = [ + "//tensorflow/python/training/checkpointable:base", + "//tensorflow/python/training/checkpointable:data_structures", + ], ) py_test( diff --git a/tensorflow/python/training/checkpointable/data_structures.py b/tensorflow/python/training/checkpointable/data_structures.py index b514f7bdb2..62cefa4f20 100644 --- a/tensorflow/python/training/checkpointable/data_structures.py +++ b/tensorflow/python/training/checkpointable/data_structures.py @@ -19,6 +19,8 @@ from __future__ import print_function import collections +import six + from tensorflow.python.keras.engine import base_layer from tensorflow.python.training.checkpointable import base as checkpointable_lib from tensorflow.python.training.checkpointable import data_structures_base @@ -198,21 +200,52 @@ class List(CheckpointableDataStructure, collections.Sequence): def __repr__(self): return "List(%s)" % (repr(self._storage),) - @property - def updates(self): - """Aggregate updates from any `Layer` instances.""" - # Updates and conditional losses are forwarded as-is rather than being - # filtered based on inputs, since this is just a container and won't ever - # have any inputs. - aggregated = [] - for layer in self.layers: - aggregated += layer.updates - return aggregated - @property - def losses(self): - """Aggregate losses from any `Layer` instances.""" - aggregated = [] - for layer in self.layers: - aggregated += layer.losses - return aggregated +class Mapping(CheckpointableDataStructure, collections.Mapping): + """An append-only checkpointable mapping data structure with string keys. + + Maintains checkpoint dependencies on its contents (which must also be + checkpointable), named based on its keys. + + Note that once a key has been added, it may not be deleted or replaced. If + names may not be unique, see `tf.contrib.checkpoint.UniqueNameTracker`. + """ + + def __init__(self, *args, **kwargs): + """Construct a new sequence. Arguments are passed to `dict()`.""" + super(Mapping, self).__init__() + self._storage = dict(*args, **kwargs) + for key, value in self._storage.items(): + self._track_value(value, name=self._name_element(key)) + + def _name_element(self, key): + if not isinstance(key, six.string_types): + raise TypeError( + "Mapping accepts only string keys, but got a key %s." + % repr(key)) + return str(key) + + def __setitem__(self, key, value): + current_value = self._storage.setdefault(key, value) + if current_value is not value: + raise ValueError( + ("Mappings are an append-only data structure. Tried to overwrite the " + "key '%s' with value %s, but it already contains %s") + % (key, value, current_value)) + self._track_value(value, name=self._name_element(key)) + + def update(self, *args, **kwargs): + for key, value in dict(*args, **kwargs).items(): + self[key] = value + + def __getitem__(self, key): + return self._storage[key] + + def __len__(self): + return len(self._storage) + + def __repr__(self): + return "Mapping(%s)" % (repr(self._storage),) + + def __iter__(self): + return iter(self._storage) diff --git a/tensorflow/python/training/checkpointable/data_structures_test.py b/tensorflow/python/training/checkpointable/data_structures_test.py index 6cabbea771..31a0e8b622 100644 --- a/tensorflow/python/training/checkpointable/data_structures_test.py +++ b/tensorflow/python/training/checkpointable/data_structures_test.py @@ -18,6 +18,8 @@ from __future__ import print_function import os +import numpy + from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.framework import test_util @@ -137,6 +139,81 @@ class ListTests(test.TestCase): outer.variables[0], resource_variable_ops.ResourceVariable) + def testHashing(self): + has_sequences = set([data_structures.List(), + data_structures.List()]) + self.assertEqual(2, len(has_sequences)) + self.assertNotIn(data_structures.List(), has_sequences) + + +class HasMapping(training.Model): + + def __init__(self): + super(HasMapping, self).__init__() + self.layer_dict = data_structures.Mapping(output=core.Dense(7)) + self.layer_dict["norm"] = data_structures.List() + self.layer_dict["dense"] = data_structures.List() + self.layer_dict["dense"].extend( + [core.Dense(5), + core.Dense(6, kernel_regularizer=math_ops.reduce_sum)]) + self.layer_dict["norm"].append( + normalization.BatchNormalization()) + self.layer_dict["norm"].append( + normalization.BatchNormalization()) + + def call(self, x): + aggregation = 0. + for norm, dense in zip(self.layer_dict["norm"], self.layer_dict["dense"]): + x = norm(dense(x)) + aggregation += math_ops.reduce_sum(x) + return self.layer_dict["output"](x) / aggregation + + +class MappingTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def testTracking(self): + model = HasMapping() + output = model(array_ops.ones([32, 2])) + self.assertAllEqual([32, 7], output.shape) + self.assertEqual(1, len(model.layers)) + self.assertIs(model.layer_dict, model.layers[0]) + self.assertEqual(3, len(model.layers[0].layers)) + self.assertEqual(1, len(model._checkpoint_dependencies)) + self.assertIs(model.layer_dict, model._checkpoint_dependencies[0].ref) + self.evaluate([v.initializer for v in model.variables]) + test_var = model.layer_dict["output"].kernel + self.evaluate(test_var.assign(array_ops.ones([6, 7]))) + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + self.evaluate(test_var.assign(array_ops.zeros([6, 7]))) + model.load_weights(save_path) + self.assertAllEqual(numpy.ones([6, 7]), + self.evaluate(test_var)) + + def testNoOverwrite(self): + mapping = data_structures.Mapping() + original = data_structures.List() + mapping["a"] = original + with self.assertRaises(ValueError): + mapping["a"] = data_structures.List() + self.assertIs(original, mapping["a"]) + with self.assertRaises(AttributeError): + del mapping["a"] + mapping.update(b=data_structures.Mapping()) + with self.assertRaises(ValueError): + mapping.update({"b": data_structures.Mapping()}) + + def testNonStringKeys(self): + mapping = data_structures.Mapping() + with self.assertRaises(TypeError): + mapping[1] = data_structures.List() + + def testHashing(self): + has_mappings = set([data_structures.Mapping(), + data_structures.Mapping()]) + self.assertEqual(2, len(has_mappings)) + self.assertNotIn(data_structures.Mapping(), has_mappings) if __name__ == "__main__": test.main() -- GitLab From a213d2abb422b6be825f1b8055190a3c65670311 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 24 May 2018 10:38:48 -0700 Subject: [PATCH 095/902] Warn about tf.Variable semantics PiperOrigin-RevId: 197915380 --- tensorflow/python/ops/variables.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 294ee0e328..959ae08ee4 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -123,6 +123,30 @@ class Variable(checkpointable.CheckpointableBase): various `Optimizer` classes use this collection as the default list of variables to optimize. + WARNING: tf.Variable objects have a non-intuitive memory model. A Variable is + represented internally as a mutable Tensor which can non-deterministically + alias other Tensors in a graph. The set of operations which consume a Variable + and can lead to aliasing is undetermined and can change across TensorFlow + versions. Avoid writing code which relies on the value of a Variable either + changing or not changing as other operations happen. For example, using + Variable objects or simple functions thereof as predicates in a `tf.cond` is + dangerous and error-prone: + + ``` + v = tf.Variable(True) + tf.cond(v, lambda: v.assign(False), my_false_fn) # Note: this is broken. + ``` + + Here replacing tf.Variable with tf.contrib.eager.Variable will fix any + nondeterminism issues. + + To use the replacement for variables which does + not have these issues: + + * Replace `tf.Variable` with `tf.contrib.eager.Variable`; + * Call `tf.get_variable_scope().set_use_resource(True)` inside a + `tf.variable_scope` before the `tf.get_variable()` call. + @compatibility(eager) `tf.Variable` is not compatible with eager execution. Use `tf.contrib.eager.Variable` instead which is compatible with both eager -- GitLab From f286fb4557ab48f38882bc643ccc9a2c85677c63 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 10:52:18 -0700 Subject: [PATCH 096/902] Fix build failure introduced by cl/197457316 PiperOrigin-RevId: 197917867 --- .../lite/kernels/internal/optimized/depthwiseconv_uint8.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index 75cf987be6..b85e6c49e0 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1691,8 +1691,10 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, const int filter_width = ArraySize(filter_dims, 1); const int output_height = ArraySize(output_dims, 2); const int output_width = ArraySize(output_dims, 1); +#ifdef USE_NEON const bool shift_left = (output_shift <= 0); const int32 multiplier_power_of_two = shift_left ? (1 << -output_shift) : 1; +#endif TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); #ifdef __aarch64__ -- GitLab From d9b764d72aa8e1f7959c396762d2054ee9d87cab Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Thu, 24 May 2018 10:53:28 -0700 Subject: [PATCH 097/902] Improve TOCO Python API. PiperOrigin-RevId: 197918102 --- tensorflow/contrib/lite/python/BUILD | 26 +- .../lite/python/convert_saved_model.py | 162 +-------- .../lite/python/convert_saved_model_test.py | 284 +++++++-------- .../convert_saved_model_to_frozen_graph.py | 106 ------ .../interpreter_wrapper.cc | 2 + tensorflow/contrib/lite/python/lite.py | 192 ++++++++++- tensorflow/contrib/lite/python/lite_test.py | 323 ++++++++++++++++++ .../contrib/lite/toco/g3doc/python_api.md | 191 +++++++++-- 8 files changed, 836 insertions(+), 450 deletions(-) delete mode 100644 tensorflow/contrib/lite/python/convert_saved_model_to_frozen_graph.py create mode 100644 tensorflow/contrib/lite/python/lite_test.py diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index 4920e83970..17c11ab0f9 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -45,7 +45,21 @@ py_library( ":convert", ":convert_saved_model", ":interpreter", + ":lite_constants", ":op_hint", + "//tensorflow/contrib/saved_model:saved_model_py", + "//tensorflow/python:graph_util", + "//tensorflow/python/tools:freeze_graph_lib", + ], +) + +py_test( + name = "lite_test", + srcs = ["lite_test.py"], + data = [":interpreter_test_data"], + srcs_version = "PY2AND3", + deps = [ + ":lite", ], ) @@ -110,10 +124,9 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ - ":convert", - ":lite_constants", "//tensorflow/contrib/saved_model:saved_model_py", "//tensorflow/python:graph_util", + "//tensorflow/python:platform", "//tensorflow/python/tools:freeze_graph_lib", ], ) @@ -151,15 +164,6 @@ py_test( ], ) -py_binary( - name = "convert_saved_model_to_frozen_graph", - srcs = ["convert_saved_model_to_frozen_graph.py"], - srcs_version = "PY2AND3", - deps = [ - ":convert_saved_model", - ], -) - # Transitive dependencies of this target will be included in the pip package. py_library( name = "tf_lite_py_pip", diff --git a/tensorflow/contrib/lite/python/convert_saved_model.py b/tensorflow/contrib/lite/python/convert_saved_model.py index a7eddf3408..54fec9d61f 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model.py +++ b/tensorflow/contrib/lite/python/convert_saved_model.py @@ -18,9 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.lite.python import convert -from tensorflow.contrib.lite.python import lite_constants -from tensorflow.contrib.lite.toco import model_flags_pb2 from tensorflow.contrib.saved_model.python.saved_model import reader from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils from tensorflow.core.framework import types_pb2 @@ -110,12 +107,12 @@ def _get_signature_def(meta_graph, signature_key): signature_def_map = meta_graph.signature_def signature_def_keys = set(signature_def_map.keys()) logging.info( - "The given saved_model MetaGraphDef contains SignatureDefs with the " + "The given SavedModel MetaGraphDef contains SignatureDefs with the " "following keys: %s", signature_def_keys) if signature_key not in signature_def_keys: - raise ValueError("No '{}' in the saved_model\'s SignatureDefs. Possible " - "values are '{}'. ".format(signature_key, - signature_def_keys)) + raise ValueError("No '{}' in the SavedModel\'s SignatureDefs. Possible " + "values are '{}'.".format(signature_key, + ",".join(signature_def_keys))) signature_def = signature_def_utils.get_signature_def_by_key( meta_graph, signature_key) return signature_def @@ -207,8 +204,8 @@ def _get_tensors(graph, signature_def_tensor_names=None, return tensors -def _freeze_saved_model(saved_model_dir, input_arrays, input_shapes, - output_arrays, tag_set, signature_key, batch_size): +def freeze_saved_model(saved_model_dir, input_arrays, input_shapes, + output_arrays, tag_set, signature_key): """Converts a SavedModel to a frozen graph. Args: @@ -224,8 +221,6 @@ def _freeze_saved_model(saved_model_dir, input_arrays, input_shapes, tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to analyze. All tags in the tag set must be present. (default "serve") signature_key: Key identifying SignatureDef containing inputs and outputs. - batch_size: Batch size for the model. Replaces the first dimension of an - input size array if undefined. (default 1) Returns: frozen_graph_def: Frozen GraphDef. @@ -237,7 +232,6 @@ def _freeze_saved_model(saved_model_dir, input_arrays, input_shapes, SavedModel doesn't contain a MetaGraphDef identified by tag_set. signature_key is not in the MetaGraphDef. input_shapes does not match the length of input_arrays. - input_shapes has a None value after the 1st dimension. input_arrays or output_arrays are not valid. Unable to load Session. """ @@ -246,8 +240,6 @@ def _freeze_saved_model(saved_model_dir, input_arrays, input_shapes, signature_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY if tag_set is None: tag_set = set([tag_constants.SERVING]) - if batch_size is None: - batch_size = 1 # Read SignatureDef. meta_graph = _get_meta_graph_def(saved_model_dir, tag_set) @@ -264,23 +256,13 @@ def _freeze_saved_model(saved_model_dir, input_arrays, input_shapes, in_tensors = _get_tensors(graph, inputs, input_arrays) out_tensors = _get_tensors(graph, outputs, output_arrays) - # Gets fully defined tensor shape. An input tensor with None in the first - # dimension, e.g. (None, 224, 224, 3), is replaced with the batch_size. - # Shapes with None after the first dimension result in a ValueError. - # TODO(zhixianyan): Add supports for input tensor with more None in shape. + # Gets fully defined tensor shape. for tensor in in_tensors: if (input_shapes and tensor.name in input_shapes and input_shapes[tensor.name] is not None): shape = input_shapes[tensor.name] else: shape = tensor.get_shape().as_list() - - if None in shape[1:]: - raise ValueError( - "None is only supported in the 1st dimension. Tensor '{0}' has " - "invalid shape '{1}'.".format(tensor.name, shape)) - elif shape[0] is None: - shape[0] = batch_size tensor.set_shape(shape) output_names = [node.split(":")[0] for node in outputs] @@ -289,133 +271,3 @@ def _freeze_saved_model(saved_model_dir, input_arrays, input_shapes, return frozen_graph_def, in_tensors, out_tensors raise ValueError("Unable to load Session.") - - -def saved_model_to_frozen_graphdef( - saved_model_dir, - output_file_model, - output_file_flags, - input_arrays=None, - input_shapes=None, - output_arrays=None, - tag_set=None, - signature_key=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, - batch_size=1): - """Converts a SavedModel to a frozen graph. Writes graph to tmp directory. - - Stores frozen graph and command line flags in the tmp directory. - - Args: - saved_model_dir: SavedModel directory to convert. - output_file_model: Full file path to save frozen graph. - output_file_flags: Full file path to save ModelFlags. - input_arrays: List of input tensors to freeze graph with. Uses input arrays - from SignatureDef when none are provided. (default None) - input_shapes: Map of strings representing input tensor names to list of - integers representing input shapes (e.g., {"foo": : [1, 16, 16, 3]}). - Automatically determined when input shapes is None (e.g., {"foo" : None}). - (default None) - output_arrays: List of output tensors to freeze graph with. Uses output - arrays from SignatureDef when none are provided. (default None) - tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to - analyze. All tags in the tag set must be present. (default "serve") - signature_key: Key identifying SignatureDef containing inputs and outputs. - batch_size: Batch size for the model. Replaces the first dimension of an - input size array if undefined. (default 1) - - Returns: None. - - Raises: - ValueError: Unable to convert to frozen graph. - """ - frozen_graph_def, in_tensors, out_tensors = _freeze_saved_model( - saved_model_dir, input_arrays, input_shapes, output_arrays, tag_set, - signature_key, batch_size) - - # Initialize model flags. - model = model_flags_pb2.ModelFlags() - - for input_tensor in in_tensors: - input_array = model.input_arrays.add() - input_array.name = convert.tensor_name(input_tensor) - input_array.shape.dims.extend(map(int, input_tensor.get_shape())) - - for output_tensor in out_tensors: - model.output_arrays.append(convert.tensor_name(output_tensor)) - - # Write model and ModelFlags to file. ModelFlags contain input array and - # output array information that is parsed from the SignatureDef and used for - # analysis by TOCO. - _write_and_flush_file(output_file_model, frozen_graph_def.SerializeToString()) - _write_and_flush_file(output_file_flags, model.SerializeToString()) - - -def tflite_from_saved_model( - saved_model_dir, - output_file=None, - input_arrays=None, - input_shapes=None, - output_arrays=None, - tag_set=None, - signature_key=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, - batch_size=1, - inference_type=lite_constants.FLOAT, - input_format=lite_constants.TENSORFLOW_GRAPHDEF, - output_format=lite_constants.TFLITE, - quantized_input_stats=None, - drop_control_dependency=True): - """Converts a SavedModel to TFLite FlatBuffer. - - Args: - saved_model_dir: SavedModel directory to convert. - output_file: File path to write result TFLite FlatBuffer. - input_arrays: List of input tensors to freeze graph with. Uses input arrays - from SignatureDef when none are provided. (default None) - input_shapes: Map of strings representing input tensor names to list of - integers representing input shapes (e.g., {"foo": : [1, 16, 16, 3]}). - Automatically determined when input shapes is None (e.g., {"foo" : None}). - (default None) - output_arrays: List of output tensors to freeze graph with. Uses output - arrays from SignatureDef when none are provided. (default None) - tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to - analyze. All tags in the tag set must be present. (default "serve") - signature_key: Key identifying SignatureDef containing inputs and outputs. - batch_size: Batch size for the model. Replaces the first dimension of an - input size array if undefined. (default 1) - inference_type: Currently must be `{FLOAT, QUANTIZED_UINT8}`. - input_format: Type of data to read (currently must be TENSORFLOW_GRAPHDEF). - output_format: Type of data to write (currently must be TFLITE or - GRAPHVIZ_DOT) - quantized_input_stats: For each member of input_tensors the mean and - std deviation of training data. Only needed if `inference_type` is - `QUANTIZED_UINT8`. - drop_control_dependency: Drops control dependencies silently. This is due - to tf lite not supporting control dependencies. - - Returns: - The converted data. For example if tflite was the destination, then - this will be a tflite flatbuffer in a bytes array. - - Raises: - ValueError: Unable to convert to frozen graph. - """ - frozen_graph_def, in_tensors, out_tensors = _freeze_saved_model( - saved_model_dir, input_arrays, input_shapes, output_arrays, tag_set, - signature_key, batch_size) - - result = convert.toco_convert( - input_data=frozen_graph_def, - input_tensors=in_tensors, - output_tensors=out_tensors, - inference_type=inference_type, - input_format=input_format, - output_format=output_format, - quantized_input_stats=quantized_input_stats, - drop_control_dependency=drop_control_dependency) - - if output_file is not None: - with gfile.Open(output_file, "wb") as f: - f.write(result) - logging.info("Successfully converted to: %s", output_file) - - return result diff --git a/tensorflow/contrib/lite/python/convert_saved_model_test.py b/tensorflow/contrib/lite/python/convert_saved_model_test.py index db95fc8ad7..f69381d0e6 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model_test.py +++ b/tensorflow/contrib/lite/python/convert_saved_model_test.py @@ -25,12 +25,12 @@ from __future__ import print_function import os from tensorflow.contrib.lite.python import convert_saved_model -from tensorflow.contrib.lite.toco import model_flags_pb2 as _model_flags_pb2 from tensorflow.python import keras from tensorflow.python.client import session from tensorflow.python.estimator import estimator_lib as estimator from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.layers import layers from tensorflow.python.ops import array_ops @@ -38,13 +38,13 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import random_ops from tensorflow.python.ops.losses import losses -from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.saved_model import saved_model +from tensorflow.python.saved_model import signature_constants from tensorflow.python.training import training as train -class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): +class FreezeSavedModelTest(test_util.TensorFlowTestCase): def _createSimpleSavedModel(self, shape): """Create a simple SavedModel on the fly.""" @@ -57,82 +57,163 @@ class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): saved_model.simple_save(sess, saved_model_dir, inputs, outputs) return saved_model_dir + def _createSavedModelTwoInputArrays(self, shape): + """Create a simple SavedModel.""" + saved_model_dir = os.path.join(self.get_temp_dir(), "simple_savedmodel") + with session.Session() as sess: + in_tensor_1 = array_ops.placeholder( + shape=shape, dtype=dtypes.float32, name="inputB") + in_tensor_2 = array_ops.placeholder( + shape=shape, dtype=dtypes.float32, name="inputA") + out_tensor = in_tensor_1 + in_tensor_2 + inputs = {"x": in_tensor_1, "y": in_tensor_2} + outputs = {"z": out_tensor} + saved_model.simple_save(sess, saved_model_dir, inputs, outputs) + return saved_model_dir + + def _getArrayNames(self, tensors): + return [tensor.name for tensor in tensors] + + def _getArrayShapes(self, tensors): + dims = [] + for tensor in tensors: + dim_tensor = [] + for dim in tensor.shape: + if isinstance(dim, tensor_shape.Dimension): + dim_tensor.append(dim.value) + else: + dim_tensor.append(dim) + dims.append(dim_tensor) + return dims + + def _convertSavedModel(self, + saved_model_dir, + input_arrays=None, + input_shapes=None, + output_arrays=None, + tag_set=None, + signature_key=None): + graph_def, in_tensors, out_tensors = convert_saved_model.freeze_saved_model( + saved_model_dir=saved_model_dir, + input_arrays=input_arrays, + input_shapes=input_shapes, + output_arrays=output_arrays, + tag_set=tag_set, + signature_key=signature_key) + return graph_def, in_tensors, out_tensors + def testSimpleSavedModel(self): - """Test a simple SavedModel created on the fly.""" - # Create a simple SavedModel + """Test a SavedModel.""" saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) - # Convert to tflite - result = convert_saved_model.tflite_from_saved_model( - saved_model_dir=saved_model_dir) - self.assertTrue(result) + _, in_tensors, out_tensors = self._convertSavedModel(saved_model_dir) + + self.assertEqual(self._getArrayNames(out_tensors), ["add:0"]) + self.assertEqual(self._getArrayNames(in_tensors), ["Placeholder:0"]) + self.assertEqual(self._getArrayShapes(in_tensors), [[1, 16, 16, 3]]) def testSimpleSavedModelWithNoneBatchSizeInShape(self): - """Test a simple SavedModel, with None in input tensor's shape.""" + """Test a SavedModel with None in input tensor's shape.""" saved_model_dir = self._createSimpleSavedModel(shape=[None, 16, 16, 3]) - result = convert_saved_model.tflite_from_saved_model( - saved_model_dir=saved_model_dir) - self.assertTrue(result) + _, in_tensors, out_tensors = self._convertSavedModel(saved_model_dir) - def testSimpleSavedModelWithMoreNoneInShape(self): - """Test a simple SavedModel, fail as more None in input shape.""" - saved_model_dir = self._createSimpleSavedModel(shape=[None, 16, None, 3]) - # Convert to tflite: this should raise ValueError, as 3rd dim is None. - with self.assertRaises(ValueError): - convert_saved_model.tflite_from_saved_model( - saved_model_dir=saved_model_dir) + self.assertEqual(self._getArrayNames(out_tensors), ["add:0"]) + self.assertEqual(self._getArrayNames(in_tensors), ["Placeholder:0"]) + self.assertEqual(self._getArrayShapes(in_tensors), [[None, 16, 16, 3]]) - def testSimpleSavedModelWithWrongSignatureKey(self): - """Test a simple SavedModel, fail as given signature is invalid.""" + def testSimpleSavedModelWithInvalidSignatureKey(self): + """Test a SavedModel that fails due to an invalid signature_key.""" saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) - # Convert to tflite: this should raise ValueError, as - # signature_key does not exit in the saved_model. - with self.assertRaises(ValueError): - convert_saved_model.tflite_from_saved_model( - saved_model_dir=saved_model_dir, signature_key="wrong-key") - - def testSimpleSavedModelWithWrongOutputArray(self): - """Test a simple SavedModel, fail as given output_arrays is invalid.""" - # Create a simple SavedModel + with self.assertRaises(ValueError) as error: + self._convertSavedModel(saved_model_dir, signature_key="invalid-key") + self.assertEqual( + "No 'invalid-key' in the SavedModel's SignatureDefs. " + "Possible values are 'serving_default'.", str(error.exception)) + + def testSimpleSavedModelWithInvalidOutputArray(self): + """Test a SavedModel that fails due to invalid output arrays.""" saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) - # Convert to tflite: this should raise ValueError, as - # output_arrays is not valid for the saved_model. - with self.assertRaises(ValueError): - convert_saved_model.tflite_from_saved_model( - saved_model_dir=saved_model_dir, output_arrays=["wrong-output"]) + with self.assertRaises(ValueError) as error: + self._convertSavedModel(saved_model_dir, output_arrays=["invalid-output"]) + self.assertEqual("Invalid tensors 'invalid-output' were found.", + str(error.exception)) def testSimpleSavedModelWithWrongInputArrays(self): - """Test a simple SavedModel, fail as given input_arrays is invalid.""" + """Test a SavedModel that fails due to invalid input arrays.""" saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) - # Checks invalid input_arrays. - with self.assertRaises(ValueError): - convert_saved_model.tflite_from_saved_model( - saved_model_dir=saved_model_dir, input_arrays=["wrong-input"]) - # Checks valid and invalid input_arrays. - with self.assertRaises(ValueError): - convert_saved_model.tflite_from_saved_model( - saved_model_dir=saved_model_dir, - input_arrays=["Placeholder", "wrong-input"]) + + # Check invalid input_arrays. + with self.assertRaises(ValueError) as error: + self._convertSavedModel(saved_model_dir, input_arrays=["invalid-input"]) + self.assertEqual("Invalid tensors 'invalid-input' were found.", + str(error.exception)) + + # Check valid and invalid input_arrays. + with self.assertRaises(ValueError) as error: + self._convertSavedModel( + saved_model_dir, input_arrays=["Placeholder", "invalid-input"]) + self.assertEqual("Invalid tensors 'invalid-input' were found.", + str(error.exception)) def testSimpleSavedModelWithCorrectArrays(self): - """Test a simple SavedModel, with correct input_arrays and output_arrays.""" + """Test a SavedModel with correct input_arrays and output_arrays.""" saved_model_dir = self._createSimpleSavedModel(shape=[None, 16, 16, 3]) - result = convert_saved_model.tflite_from_saved_model( + _, in_tensors, out_tensors = self._convertSavedModel( saved_model_dir=saved_model_dir, input_arrays=["Placeholder"], output_arrays=["add"]) - self.assertTrue(result) + + self.assertEqual(self._getArrayNames(out_tensors), ["add:0"]) + self.assertEqual(self._getArrayNames(in_tensors), ["Placeholder:0"]) + self.assertEqual(self._getArrayShapes(in_tensors), [[None, 16, 16, 3]]) def testSimpleSavedModelWithCorrectInputArrays(self): - """Test a simple SavedModel, with correct input_arrays and input_shapes.""" + """Test a SavedModel with correct input_arrays and input_shapes.""" saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) - result = convert_saved_model.tflite_from_saved_model( + _, in_tensors, out_tensors = self._convertSavedModel( saved_model_dir=saved_model_dir, input_arrays=["Placeholder"], input_shapes={"Placeholder": [1, 16, 16, 3]}) - self.assertTrue(result) + + self.assertEqual(self._getArrayNames(out_tensors), ["add:0"]) + self.assertEqual(self._getArrayNames(in_tensors), ["Placeholder:0"]) + self.assertEqual(self._getArrayShapes(in_tensors), [[1, 16, 16, 3]]) + + def testTwoInputArrays(self): + """Test a simple SavedModel.""" + saved_model_dir = self._createSavedModelTwoInputArrays(shape=[1, 16, 16, 3]) + + _, in_tensors, out_tensors = self._convertSavedModel( + saved_model_dir=saved_model_dir, input_arrays=["inputB", "inputA"]) + + self.assertEqual(self._getArrayNames(out_tensors), ["add:0"]) + self.assertEqual(self._getArrayNames(in_tensors), ["inputA:0", "inputB:0"]) + self.assertEqual( + self._getArrayShapes(in_tensors), [[1, 16, 16, 3], [1, 16, 16, 3]]) + + def testSubsetInputArrays(self): + """Test a SavedModel with a subset of the input array names of the model.""" + saved_model_dir = self._createSavedModelTwoInputArrays(shape=[1, 16, 16, 3]) + + # Check case where input shape is given. + _, in_tensors, out_tensors = self._convertSavedModel( + saved_model_dir=saved_model_dir, + input_arrays=["inputA"], + input_shapes={"inputA": [1, 16, 16, 3]}) + + self.assertEqual(self._getArrayNames(out_tensors), ["add:0"]) + self.assertEqual(self._getArrayNames(in_tensors), ["inputA:0"]) + self.assertEqual(self._getArrayShapes(in_tensors), [[1, 16, 16, 3]]) + + # Check case where input shape is None. + _, in_tensors, out_tensors = self._convertSavedModel( + saved_model_dir=saved_model_dir, input_arrays=["inputA"]) + + self.assertEqual(self._getArrayNames(out_tensors), ["add:0"]) + self.assertEqual(self._getArrayNames(in_tensors), ["inputA:0"]) + self.assertEqual(self._getArrayShapes(in_tensors), [[1, 16, 16, 3]]) def testMultipleMetaGraphDef(self): - """Test saved model with multiple MetaGraphDef.""" + """Test saved model with multiple MetaGraphDefs.""" saved_model_dir = os.path.join(self.get_temp_dir(), "savedmodel_two_mgd") builder = saved_model.builder.SavedModelBuilder(saved_model_dir) with session.Session(graph=ops.Graph()) as sess: @@ -161,91 +242,13 @@ class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): builder.save(True) # Convert to tflite - convert_saved_model.tflite_from_saved_model( + _, in_tensors, out_tensors = self._convertSavedModel( saved_model_dir=saved_model_dir, tag_set=set([saved_model.tag_constants.SERVING, "additional_test_tag"])) - -class ConvertSavedModelTestBasicGraphToText(test_util.TensorFlowTestCase): - - def _createSimpleSavedModel(self, shape): - """Create a simple SavedModel.""" - saved_model_dir = os.path.join(self.get_temp_dir(), "simple_savedmodel") - with session.Session() as sess: - in_tensor_1 = array_ops.placeholder( - shape=shape, dtype=dtypes.float32, name="inputB") - in_tensor_2 = array_ops.placeholder( - shape=shape, dtype=dtypes.float32, name="inputA") - out_tensor = in_tensor_1 + in_tensor_2 - inputs = {"x": in_tensor_1, "y": in_tensor_2} - outputs = {"z": out_tensor} - saved_model.simple_save(sess, saved_model_dir, inputs, outputs) - return saved_model_dir - - def _getInputArrayNames(self, model_proto): - return [data.name for data in model_proto.input_arrays] - - def _getInputArrayShapes(self, model_proto): - return [ - [dim for dim in data.shape.dims] for data in model_proto.input_arrays - ] - - def _get_model_flags_proto_from_file(self, filename): - proto = _model_flags_pb2.ModelFlags() - with gfile.Open(filename, "rb") as output_file: - proto.ParseFromString(output_file.read()) - output_file.close() - return proto - - def testSimpleSavedModel(self): - """Test a simple SavedModel.""" - saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) - output_file_model = os.path.join(self.get_temp_dir(), "model.pb") - output_file_flags = os.path.join(self.get_temp_dir(), "model.pbtxt") - - convert_saved_model.saved_model_to_frozen_graphdef( - saved_model_dir=saved_model_dir, - output_file_model=output_file_model, - output_file_flags=output_file_flags, - input_arrays=["inputB", "inputA"]) - - proto = self._get_model_flags_proto_from_file(output_file_flags) - self.assertEqual(proto.output_arrays, ["add"]) - self.assertEqual(self._getInputArrayNames(proto), ["inputA", "inputB"]) - self.assertEqual( - self._getInputArrayShapes(proto), [[1, 16, 16, 3], [1, 16, 16, 3]]) - - def testSimpleSavedModelWithDifferentInputNames(self): - """Test a simple SavedModel.""" - saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) - output_file_model = os.path.join(self.get_temp_dir(), "model.pb") - output_file_flags = os.path.join(self.get_temp_dir(), "model.pbtxt") - - # Check case where input shape is given. - convert_saved_model.saved_model_to_frozen_graphdef( - saved_model_dir=saved_model_dir, - output_file_model=output_file_model, - output_file_flags=output_file_flags, - input_arrays=["inputA"], - input_shapes={"inputA": [1, 16, 16, 3]}) - - proto = self._get_model_flags_proto_from_file(output_file_flags) - self.assertEqual(proto.output_arrays, ["add"]) - self.assertEqual(self._getInputArrayNames(proto), ["inputA"]) - self.assertEqual(self._getInputArrayShapes(proto), [[1, 16, 16, 3]]) - - # Check case where input shape is None. - convert_saved_model.saved_model_to_frozen_graphdef( - saved_model_dir=saved_model_dir, - output_file_model=output_file_model, - output_file_flags=output_file_flags, - input_arrays=["inputA"], - input_shapes={"inputA": None}) - - proto = self._get_model_flags_proto_from_file(output_file_flags) - self.assertEqual(proto.output_arrays, ["add"]) - self.assertEqual(self._getInputArrayNames(proto), ["inputA"]) - self.assertEqual(self._getInputArrayShapes(proto), [[1, 16, 16, 3]]) + self.assertEqual(self._getArrayNames(out_tensors), ["add:0"]) + self.assertEqual(self._getArrayNames(in_tensors), ["Placeholder:0"]) + self.assertEqual(self._getArrayShapes(in_tensors), [[1, 28, 28]]) class Model(keras.Model): @@ -354,7 +357,7 @@ def dummy_input_fn(): return image, labels -class ConvertSavedModelTestTrainGraph(test_util.TensorFlowTestCase): +class FreezeSavedModelTestTrainGraph(test_util.TensorFlowTestCase): def testTrainedMnistSavedModel(self): """Test mnist SavedModel, trained with dummy data and small steps.""" @@ -379,13 +382,16 @@ class ConvertSavedModelTestTrainGraph(test_util.TensorFlowTestCase): # Convert to tflite and test output saved_model_name = os.listdir(saved_model_dir)[0] saved_model_final_dir = os.path.join(saved_model_dir, saved_model_name) - output_file = os.path.join(saved_model_dir, saved_model_final_dir + ".lite") + # TODO(zhixianyan): no need to limit output_arrays to `Softmax' # once b/74205001 fixed and argmax implemented in tflite. - result = convert_saved_model.tflite_from_saved_model( + result = convert_saved_model.freeze_saved_model( saved_model_dir=saved_model_final_dir, + input_arrays=None, + input_shapes=None, output_arrays=["Softmax"], - output_file=output_file) + tag_set=None, + signature_key=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY) self.assertTrue(result) diff --git a/tensorflow/contrib/lite/python/convert_saved_model_to_frozen_graph.py b/tensorflow/contrib/lite/python/convert_saved_model_to_frozen_graph.py deleted file mode 100644 index 4d9782f4a6..0000000000 --- a/tensorflow/contrib/lite/python/convert_saved_model_to_frozen_graph.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Python console command for generating frozen models from SavedModels. - -This exists to add SavedModel compatibility to TOCO. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -from tensorflow.contrib.lite.python.convert_saved_model import saved_model_to_frozen_graphdef -from tensorflow.python.platform import app - -FLAGS = None - - -def execute(unused_args): - """Calls function to convert the SavedModel to a frozen graph.""" - # Error handling. - if FLAGS.input_shapes and not FLAGS.input_arrays: - raise ValueError("Input shapes requires input arrays to be specified.") - - # Calls saved_model_to_frozen_graphdef function to generate frozen graph. - input_arrays = (FLAGS.input_arrays.split(",") if FLAGS.input_arrays else None) - input_shapes = None - if FLAGS.input_shapes: - input_shapes = { - input_arrays[idx]: shape.split(",") - for idx, shape in enumerate(FLAGS.input_shapes.split(":")) - } - output_arrays = ( - FLAGS.output_arrays.split(",") if FLAGS.output_arrays else None) - tag_set = set(FLAGS.tag_set.split(",")) if FLAGS.tag_set else None - - saved_model_to_frozen_graphdef( - saved_model_dir=FLAGS.saved_model_directory, - output_file_model=FLAGS.output_file_model, - output_file_flags=FLAGS.output_file_flags, - input_arrays=input_arrays, - input_shapes=input_shapes, - output_arrays=output_arrays, - tag_set=tag_set, - signature_key=FLAGS.signature_key, - batch_size=FLAGS.batch_size) - - -def main(): - global FLAGS - # Parses flags. - parser = argparse.ArgumentParser( - description="Invoke SavedModel to frozen model converter.") - parser.add_argument( - "saved_model_directory", - type=str, - help="Full path to directory containing the SavedModel.") - parser.add_argument( - "output_file_model", - type=str, - help="Full file path to save frozen graph.") - parser.add_argument( - "output_file_flags", type=str, help="Full file path to save ModelFlags.") - parser.add_argument( - "--input_arrays", - type=str, - help="Name of the input arrays, comma-separated.") - parser.add_argument( - "--input_shapes", - type=str, - help="Shapes corresponding to --input_arrays, colon-separated.") - parser.add_argument( - "--output_arrays", - type=str, - help="Name of the output arrays, comma-separated.") - parser.add_argument( - "--tag_set", type=str, help="Name of output arrays, comma-separated.") - parser.add_argument( - "--signature_key", - type=str, - help="Key identifying SignatureDef containing inputs and outputs.") - parser.add_argument( - "--batch_size", - type=int, - help="Batch size for the model. Replaces the first dimension of an " - "input size array if undefined.") - - FLAGS, unparsed = parser.parse_known_args() - - app.run(main=execute, argv=[sys.argv[0]] + unparsed) - - -if __name__ == "__main__": - main() diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc index 16f4f30b94..6b12c91924 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -42,6 +42,8 @@ std::unique_ptr CreateInterpreter( return nullptr; } + tensorflow::ImportNumpy(); + std::unique_ptr interpreter; tflite::InterpreterBuilder(*model, resolver)(&interpreter); if (interpreter) { diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 86b25e68ac..f7f2d40a02 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -16,23 +16,199 @@ EXPERIMENTAL: APIs here are unstable and likely to change without notice. +@@TocoConverter @@toco_convert @@toco_convert_protos -@@tflite_from_saved_model @@Interpreter @@OpHint @@convert_op_hints_to_stubs +@@FLOAT +@@QUANTIZED_UINT8 +@@TFLITE +@@GRAPHVIZ_DOT + """ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import +from tensorflow.contrib.lite.python import lite_constants as constants +from tensorflow.contrib.lite.python.convert import tensor_name from tensorflow.contrib.lite.python.convert import toco_convert -from tensorflow.contrib.lite.python.convert import toco_convert_protos -from tensorflow.contrib.lite.python.convert_saved_model import tflite_from_saved_model -from tensorflow.contrib.lite.python.interpreter import Interpreter -from tensorflow.contrib.lite.python.op_hint import convert_op_hints_to_stubs -from tensorflow.contrib.lite.python.op_hint import OpHint -# pylint: enable=unused-import +from tensorflow.contrib.lite.python.convert import toco_convert_protos # pylint: disable=unused-import +from tensorflow.contrib.lite.python.convert_saved_model import freeze_saved_model +from tensorflow.contrib.lite.python.interpreter import Interpreter # pylint: disable=unused-import +from tensorflow.contrib.lite.python.op_hint import convert_op_hints_to_stubs # pylint: disable=unused-import +from tensorflow.contrib.lite.python.op_hint import OpHint # pylint: disable=unused-import +from tensorflow.python.framework import graph_util as tf_graph_util +from tensorflow.python.ops.variables import global_variables_initializer +from tensorflow.python.saved_model import signature_constants +from tensorflow.python.saved_model import tag_constants + + +class TocoConverter(object): + """Convert a TensorFlow model into `output_format` using TOCO. + + This is used to convert from a TensorFlow GraphDef or SavedModel into either a + TFLite FlatBuffer or graph visualization. + + Attributes: + + inference_type: Currently must be `{FLOAT, QUANTIZED_UINT8}`. + (default FLOAT) + output_format: Type of data to write (currently must be TFLITE or + GRAPHVIZ_DOT). (default TFLITE) + quantized_input_stats: The mean and std deviation of training data for each + input tensor. Only needed if `inference_type` is `QUANTIZED_UINT8`. + (default None) + drop_control_dependency: Boolean indicating whether to drop control + dependencies silently. This is due to TFLite not supporting control + dependencies. (default True) + allow_custom_ops: Boolean indicating whether to allow custom operations. + (default False) + + Example usage: + + # Converting a frozen graph. + converter = lite.TocoConverter.from_session(sess, in_tensors, out_tensors) + tflite_model = converter.convert() + open("converted_model.tflite", "wb").write(tflite_model) + + # Converting a SavedModel. + converter = lite.TocoConverter.from_saved_model(saved_model_dir) + tflite_model = converter.convert() + """ + + def __init__(self, graph_def, input_tensors, output_tensors): + """Constructor for TocoConverter. + + Args: + + graph_def: TensorFlow GraphDef. + input_tensors: List of input tensors. Type and shape are computed using + `foo.get_shape()` and `foo.dtype`. + output_tensors: List of output tensors (only .name is used from this). + """ + self._graph_def = graph_def + self._input_tensors = input_tensors + self._output_tensors = output_tensors + self.inference_type = constants.FLOAT + self.output_format = constants.TFLITE + self.quantized_input_stats = None + self.drop_control_dependency = True + self.allow_custom_ops = False + + @classmethod + def from_session(cls, + sess, + input_tensors, + output_tensors, + freeze_variables=False): + """Creates a TocoConverter class from a TensorFlow Session. + + Args: + sess: TensorFlow Session. + input_tensors: List of input tensors. Type and shape are computed using + `foo.get_shape()` and `foo.dtype`. + output_tensors: List of output tensors (only .name is used from this). + freeze_variables: Boolean indicating whether the variables need to be + converted into constants via the freeze_graph.py script. + (default False) + + Returns: + TocoConverter class. + """ + + # Get GraphDef. + if freeze_variables: + sess.run(global_variables_initializer()) + output_arrays = [tensor_name(tensor) for tensor in output_tensors] + graph_def = tf_graph_util.convert_variables_to_constants( + sess, sess.graph_def, output_arrays) + else: + graph_def = sess.graph_def + + # Create TocoConverter class. + return cls(graph_def, input_tensors, output_tensors) + + @classmethod + def from_saved_model( + cls, + saved_model_dir, + input_arrays=None, + input_shapes=None, + output_arrays=None, + tag_set=None, + signature_key=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY): + """Creates a TocoConverter class from a SavedModel. + + Args: + saved_model_dir: SavedModel directory to convert. + input_arrays: List of input tensors to freeze graph with. Uses input + arrays from SignatureDef when none are provided. (default None) + input_shapes: Map of strings representing input tensor names to list of + integers representing input shapes (e.g., {"foo": : [1, 16, 16, 3]}). + Automatically determined when input shapes is None (e.g., {"foo" : + None}). (default None) + output_arrays: List of output tensors to freeze graph with. Uses output + arrays from SignatureDef when none are provided. (default None) + tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to + analyze. All tags in the tag set must be present. (default "serve") + signature_key: Key identifying SignatureDef containing inputs and outputs. + + Returns: + TocoConverter class. + """ + if tag_set is None: + tag_set = set([tag_constants.SERVING]) + + result = freeze_saved_model(saved_model_dir, input_arrays, input_shapes, + output_arrays, tag_set, signature_key) + return cls( + graph_def=result[0], input_tensors=result[1], output_tensors=result[2]) + + def convert(self): + """Converts a TensorFlow GraphDef based on instance variables. + + Returns: + The converted data in serialized format. Either a TFLite Flatbuffer or a + Graphviz graph depending on value in `output_format`. + + Raises: + ValueError: + None value for dimension in input_tensor. + """ + # Checks dimensions in input tensor. + for tensor in self._input_tensors: + shape = tensor.get_shape().as_list() + if None in shape[1:]: + raise ValueError( + "None is only supported in the 1st dimension. Tensor '{0}' has " + "invalid shape '{1}'.".format(tensor.name, shape)) + elif shape[0] is None: + self._set_batch_size(batch_size=1) + + # Converts model. + result = toco_convert( + input_data=self._graph_def, + input_tensors=self._input_tensors, + output_tensors=self._output_tensors, + inference_type=self.inference_type, + input_format=constants.TENSORFLOW_GRAPHDEF, + output_format=self.output_format, + quantized_input_stats=self.quantized_input_stats, + drop_control_dependency=self.drop_control_dependency) + return result + + def _set_batch_size(self, batch_size): + """Sets the first dimension of the input tensor to `batch_size`. + + Args: + batch_size: Batch size for the model. Replaces the first dimension of an + input size array if undefined. (default 1) + """ + for tensor in self._input_tensors: + shape = tensor.get_shape().as_list() + shape[0] = batch_size + tensor.set_shape(shape) diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py new file mode 100644 index 0000000000..2f3105f3e6 --- /dev/null +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -0,0 +1,323 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for lite.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import numpy as np + +from tensorflow.contrib.lite.python import lite +from tensorflow.contrib.lite.python import lite_constants +from tensorflow.contrib.lite.python.interpreter import Interpreter +from tensorflow.python.client import session +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.platform import test +from tensorflow.python.saved_model import saved_model + + +class FromSessionTest(test_util.TensorFlowTestCase): + + def testFloat(self): + in_tensor = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32) + out_tensor = in_tensor + in_tensor + sess = session.Session() + + # Convert model and ensure model is not None. + converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(1, len(input_details)) + self.assertEqual('Placeholder', input_details[0]['name']) + self.assertEqual(np.float32, input_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertEqual((0., 0.), input_details[0]['quantization']) + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('add', output_details[0]['name']) + self.assertEqual(np.float32, output_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertEqual((0., 0.), output_details[0]['quantization']) + + def testQuantization(self): + in_tensor = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32, name='input') + out_tensor = array_ops.fake_quant_with_min_max_args( + in_tensor + in_tensor, min=0., max=1., name='output') + sess = session.Session() + + # Convert model and ensure model is not None. + converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) + converter.inference_type = lite_constants.QUANTIZED_UINT8 + converter.quantized_input_stats = [(0., 1.)] # mean, std_dev + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(1, len(input_details)) + self.assertEqual('input', input_details[0]['name']) + self.assertEqual(np.uint8, input_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertEqual((1., 0.), + input_details[0]['quantization']) # scale, zero_point + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('output', output_details[0]['name']) + self.assertEqual(np.uint8, output_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertTrue(output_details[0]['quantization'][0] > 0) # scale + + def testBatchSizeInvalid(self): + in_tensor = array_ops.placeholder( + shape=[None, 16, 16, 3], dtype=dtypes.float32) + out_tensor = in_tensor + in_tensor + sess = session.Session() + + # Test invalid shape. None after 1st dimension. + in_tensor = array_ops.placeholder( + shape=[1, None, 16, 3], dtype=dtypes.float32) + converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) + with self.assertRaises(ValueError) as error: + converter.convert() + self.assertEqual( + 'None is only supported in the 1st dimension. Tensor ' + '\'Placeholder_1:0\' has invalid shape \'[1, None, 16, 3]\'.', + str(error.exception)) + + def testBatchSizeValid(self): + in_tensor = array_ops.placeholder( + shape=[None, 16, 16, 3], dtype=dtypes.float32) + out_tensor = in_tensor + in_tensor + sess = session.Session() + + # Convert model and ensure model is not None. + converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(1, len(input_details)) + self.assertEqual('Placeholder', input_details[0]['name']) + self.assertEqual(np.float32, input_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertEqual((0., 0.), input_details[0]['quantization']) + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('add', output_details[0]['name']) + self.assertEqual(np.float32, output_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertEqual((0., 0.), output_details[0]['quantization']) + + def testFreezeGraph(self): + in_tensor = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32) + var = variable_scope.get_variable( + 'weights', shape=[1, 16, 16, 3], dtype=dtypes.float32) + out_tensor = in_tensor + var + sess = session.Session() + + # Convert model and ensure model is not None. + converter = lite.TocoConverter.from_session( + sess, [in_tensor], [out_tensor], freeze_variables=True) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(1, len(input_details)) + self.assertEqual('Placeholder', input_details[0]['name']) + self.assertEqual(np.float32, input_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertEqual((0., 0.), input_details[0]['quantization']) + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('add', output_details[0]['name']) + self.assertEqual(np.float32, output_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertEqual((0., 0.), output_details[0]['quantization']) + + def testGraphviz(self): + in_tensor = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32) + out_tensor = in_tensor + in_tensor + sess = session.Session() + + # Convert model and ensure model is not None. + converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) + converter.output_format = lite_constants.GRAPHVIZ_DOT + graphviz_output = converter.convert() + self.assertTrue(graphviz_output) + + +class FromSavedModelTest(test_util.TensorFlowTestCase): + + def _createSavedModel(self, shape): + """Create a simple SavedModel.""" + saved_model_dir = os.path.join(self.get_temp_dir(), 'simple_savedmodel') + with session.Session() as sess: + in_tensor_1 = array_ops.placeholder( + shape=shape, dtype=dtypes.float32, name='inputB') + in_tensor_2 = array_ops.placeholder( + shape=shape, dtype=dtypes.float32, name='inputA') + out_tensor = in_tensor_1 + in_tensor_2 + inputs = {'x': in_tensor_1, 'y': in_tensor_2} + outputs = {'z': out_tensor} + saved_model.simple_save(sess, saved_model_dir, inputs, outputs) + return saved_model_dir + + def testSimpleModel(self): + """Test a SavedModel.""" + saved_model_dir = self._createSavedModel(shape=[1, 16, 16, 3]) + + # Convert model and ensure model is not None. + converter = lite.TocoConverter.from_saved_model(saved_model_dir) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(2, len(input_details)) + self.assertEqual('inputA', input_details[0]['name']) + self.assertEqual(np.float32, input_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertEqual((0., 0.), input_details[0]['quantization']) + + self.assertEqual('inputB', input_details[1]['name']) + self.assertEqual(np.float32, input_details[1]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all()) + self.assertEqual((0., 0.), input_details[1]['quantization']) + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('add', output_details[0]['name']) + self.assertEqual(np.float32, output_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertEqual((0., 0.), output_details[0]['quantization']) + + def testNoneBatchSize(self): + """Test a SavedModel, with None in input tensor's shape.""" + saved_model_dir = self._createSavedModel(shape=[None, 16, 16, 3]) + + converter = lite.TocoConverter.from_saved_model(saved_model_dir) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(2, len(input_details)) + self.assertEqual('inputA', input_details[0]['name']) + self.assertEqual(np.float32, input_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertEqual((0., 0.), input_details[0]['quantization']) + + self.assertEqual('inputB', input_details[1]['name']) + self.assertEqual(np.float32, input_details[1]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all()) + self.assertEqual((0., 0.), input_details[1]['quantization']) + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('add', output_details[0]['name']) + self.assertEqual(np.float32, output_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertEqual((0., 0.), output_details[0]['quantization']) + + def testOrderInputArrays(self): + """Test a SavedModel ordering of input arrays.""" + saved_model_dir = self._createSavedModel(shape=[1, 16, 16, 3]) + + converter = lite.TocoConverter.from_saved_model( + saved_model_dir, input_arrays=['inputB', 'inputA']) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(2, len(input_details)) + self.assertEqual('inputA', input_details[0]['name']) + self.assertEqual(np.float32, input_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertEqual((0., 0.), input_details[0]['quantization']) + + self.assertEqual('inputB', input_details[1]['name']) + self.assertEqual(np.float32, input_details[1]['dtype']) + self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all()) + self.assertEqual((0., 0.), input_details[1]['quantization']) + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('add', output_details[0]['name']) + self.assertEqual(np.float32, output_details[0]['dtype']) + self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertEqual((0., 0.), output_details[0]['quantization']) + + def testSubsetInputArrays(self): + """Test a SavedModel with a subset of the input array names of the model.""" + saved_model_dir = self._createSavedModel(shape=[1, 16, 16, 3]) + + # Check case where input shape is given. + converter = lite.TocoConverter.from_saved_model( + saved_model_dir, + input_arrays=['inputA'], + input_shapes={'inputA': [1, 16, 16, 3]}) + + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Check case where input shape is None. + converter = lite.TocoConverter.from_saved_model( + saved_model_dir, input_arrays=['inputA'], input_shapes={'inputA': None}) + + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md index f0fd638a61..29a83bd26f 100644 --- a/tensorflow/contrib/lite/toco/g3doc/python_api.md +++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md @@ -1,69 +1,198 @@ -# TensorFlow Lite Optimizing Converter (TOCO) Python API reference +# TensorFlow Lite Optimizing Converter & Interpreter Python API reference -This page provides examples on how to use TOCO via the Python API. It is -complemented by the following documents: +This page provides examples on how to use TOCO and the TensorFlow Lite +interpreter via the Python API. It is complemented by the following documents: * [README](../README.md) * [Command-line examples](cmdline_examples.md) * [Command-line glossary](cmdline_reference.md) +Table of contents: + +* [High-level overview](#high-level-overview) +* [API](#api) +* [Basic examples](#basic) + * [Exporting a GraphDef with constants](#basic-graphdef-const) + * [Exporting a GraphDef with variables](#basic-graphdef-var) + * [Exporting a SavedModel](#basic-savedmodel) +* [Complex examples](#complex) + * [Exporting a quantized GraphDef](#complex-quant) +* [TensorFlow Lite Python interpreter](#interpreter) + * [Using the interpreter from a model file](#interpreter-file) + * [Using the interpreter from model data](#interpreter-data) + ## High-level overview While the TensorFlow Lite Optimizing Converter can be used from the command -line, it is often convenient to use it as part of Python model build and +line, it is often convenient to use it as part of a Python model build and training script. This is so that conversion can be part of your model development pipeline. This allows you to know early and often that you are designing a model that can be targeted to devices with mobile. ## API -In Python you can run `help(tf.contrib.lite)` to get documentation on functions. -In particular, `tf.contrib.lite.toco_convert` presents a simple API and -`tf.contrib.lite.toco_from_protos` allows more detailed control of TOCO using -the protobuf interface to TOCO. +The API for converting TensorFlow models to TensorFlow Lite is +`tf.contrib.lite.TocoConverter`. The API for calling the Python intepreter is +`tf.contrib.lite.Interpreter`. + +`TocoConverter` provides class methods based on the original format of the +model. `TocoConverter.from_session()` is available for GraphDefs. +`TocoConverter.from_saved_model()` is available for SavedModels. Example usages +for simple float-point models are shown in [Basic Examples](#basic). Examples +usages for more complex models is shown in [Complex Examples](#complex). + +**NOTE**: Currently, `TocoConverter` will cause a fatal error to the Python +interpreter when the conversion fails. This will be remedied as soon as +possible. + +## Basic examples -## Example +The following section shows examples of how to convert a basic float-point model +from each of the supported data formats into a TensorFlow Lite FlatBuffers. -In particular, here we show creating a simple model and converting it to a -TensorFlow Lite Model. +### Exporting a GraphDef with constants + +The following example shows how to convert a TensorFlow GraphDef with constants +into a TensorFlow Lite FlatBuffer. ```python import tensorflow as tf img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) -val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) +const = tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) +val = img + const out = tf.identity(val, name="out") + with tf.Session() as sess: - tflite_model = tf.contrib.lite.toco_convert(sess.graph_def, [img], [out]) - open("test.tflite", "wb").write(tflite_model) + converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out]) + tflite_model = converter.convert() + open("converted_model.tflite", "wb").write(tflite_model) ``` -**NOTE** Currently, the TOCO command will cause a fatal error to the Python -interpreter when TOCO conversion fails. This will be remedied as soon as -possible. - -## Example 2: Export with variables +### Exporting a GraphDef with variables -If a model has variables, they need to be turned into constants. This process is -known as freezing, and it can actually be accomplished with +If a model has variables, they need to be turned into constants through a +process known as freezing. It can be accomplished by setting `freeze_variables` +to `True` as shown in the example below. ```python import tensorflow as tf img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) -var = tf.get_variable("weights", dtype=tf.float32, shape=(1,64,64,3)) +var = tf.get_variable("weights", dtype=tf.float32, shape=(1, 64, 64, 3)) val = img + var +out = tf.identity(val, name="out") -def canonical_name(x): - return x.name.split(":")[0] +with tf.Session() as sess: + converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out], + freeze_variables=True) + tflite_model = converter.convert() + open("converted_model.tflite", "wb").write(tflite_model) +``` + +### Exporting a SavedModel + +The following example shows how to convert a SavedModel into a TensorFlow Lite +FlatBuffer. + +```python +import tensorflow as tf + +converter = tf.contrib.lite.TocoConverter.from_saved_model(saved_model_dir) +tflite_model = converter.convert() +open("converted_model.tflite", "wb").write(tflite_model) +``` + +For more complex SavedModels, the optional parameters that can be passed into +`TocoConverter.from_saved_model()` are `input_arrays`, `input_shapes`, +`output_arrays`, `tag_set` and `signature_key`. Details of each parameter are +available by running `help(tf.contrib.lite.TocoConverter)`. + +## Complex examples + +For models where the default value of the attributes is not sufficient, the +variables values should be set before calling `convert()`. In order to call any +constants use `tf.contrib.lite.constants.` as seen below with +`QUANTIZED_UINT8`. Run `help(tf.contrib.lite.TocoConverter)` in the Python +terminal for detailed documentation on the attributes. + +Although the examples are demonstrated on GraphDefs containing only constants. +The same logic can be applied irrespective of the input data format. + +### Exporting a quantized GraphDef + +The following example shows how to convert a quantized model into a TensorFlow +Lite FlatBuffer. + +```python +import tensorflow as tf + +img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) +const = tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) +val = img + const +out = tf.fake_quant_with_min_max_args(val, min=0., max=1., name="output") -out = tf.identity(val, name="out") with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - out_tensors = [out] - frozen_graphdef = tf.graph_util.convert_variables_to_constants( - sess, sess.graph_def, map(canonical_name, out_tensors)) - tflite_model = tf.contrib.lite.toco_convert( - frozen_graphdef, [img], out_tensors) + converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out]) + converter.inference_type = tf.contrib.lite.constants.QUANTIZED_UINT8 + converter.quantized_input_stats = [(0., 1.)] # mean, std_dev + tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` + +## TensorFlow Lite Python interpreter + +### Using the interpreter from a model file + +The following example shows how to use the TensorFlow Lite Python interpreter +when provided a TensorFlow Lite FlatBuffer file. The example also demonstrates +how to run inference on random input data. Run +`help(tf.contrib.lite.Interpreter)` in the Python terminal to get detailed +documentation on the interpreter. + +```python +import numpy as np +import tensorflow as tf + +# Load TFLite model and allocate tensors. +interpreter = tf.contrib.lite.Interpreter(model_path="converted_model.tflite") +interpreter.allocate_tensors() + +# Get input and output tensors. +input_details = interpreter.get_input_details() +output_details = interpreter.get_output_details() + +# Test model on random input data. +input_shape = input_details[0]['shape'] +input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32) +interpreter.set_tensor(input_details[0]['index'], input_data) + +interpreter.invoke() +output_data = interpreter.get_tensor(output_details[0]['index']) +print(output_data) +``` + +### Using the interpreter from model data + +The following example shows how to use the TensorFlow Lite Python interpreter +when starting with the TensorFlow Lite Flatbuffer model previously loaded. This +example shows an end-to-end use case, starting from building the TensorFlow +model. + +```python +import numpy as np +import tensorflow as tf + +img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) +const = tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) +val = img + const +out = tf.identity(val, name="out") + +with tf.Session() as sess: + converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out]) + tflite_model = converter.convert() + +# Load TFLite model and allocate tensors. +interpreter = tf.contrib.lite.Interpreter(model_content=tflite_model) +interpreter.allocate_tensors() +``` -- GitLab From 015c1d84f714c651f401a19cdb709ad9c91561e1 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 24 May 2018 10:58:47 -0700 Subject: [PATCH 098/902] Fix convert_to_tensor logic in GradientDescentOptimizer's _prepare method Previously, eagerly executing an optimizer that had been used in a `defun` led to a cryptic error because the learning rate tensor supplied to the update op was in fact a vestigial graph Tensor. PiperOrigin-RevId: 197919104 --- .../python/training/gradient_descent.py | 3 ++- .../python/training/gradient_descent_test.py | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/training/gradient_descent.py b/tensorflow/python/training/gradient_descent.py index 6caf29d83a..a07ad19a6e 100644 --- a/tensorflow/python/training/gradient_descent.py +++ b/tensorflow/python/training/gradient_descent.py @@ -71,6 +71,7 @@ class GradientDescentOptimizer(optimizer.Optimizer): return var.scatter_sub(delta, use_locking=self._use_locking) def _prepare(self): - if not context.executing_eagerly() or self._learning_rate_tensor is None: + if not context.executing_eagerly() or not isinstance( + self._learning_rate_tensor, ops.EagerTensor): self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate, name="learning_rate") diff --git a/tensorflow/python/training/gradient_descent_test.py b/tensorflow/python/training/gradient_descent_test.py index 5370cafbcf..f89a9c5838 100644 --- a/tensorflow/python/training/gradient_descent_test.py +++ b/tensorflow/python/training/gradient_descent_test.py @@ -18,6 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import backprop +from tensorflow.python.eager import context +from tensorflow.python.eager import function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -218,6 +221,26 @@ class GradientDescentOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], var1.eval()) + def testCapturingInDefunWhileExecutingEagerly(self): + with context.eager_mode(): + optimizer = gradient_descent.GradientDescentOptimizer(1.0) + + def step(): + v = resource_variable_ops.ResourceVariable(1.0) + with backprop.GradientTape() as tape: + loss = v ** 2 + grad = tape.gradient(loss, v) + optimizer.apply_gradients([(grad, v)]) + return v.read_value() + + compiled_step = function.defun(step) + + self.assertEqual(float(step()), -1.0) + self.assertEqual(float(compiled_step()), -1.0) + # This shouldn't fail; in particular, the learning rate tensor should + # be an EagerTensor once again, not a graph Tensor. + self.assertEqual(float(step()), -1.0) + if __name__ == "__main__": test.main() -- GitLab From 677b4cf7539af0cf5741d12dfe7e142c586d4567 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 24 May 2018 11:11:42 -0700 Subject: [PATCH 099/902] Add shape validation for symbolic tensors passed to fit (only graph mode). PiperOrigin-RevId: 197921675 --- tensorflow/python/keras/engine/training.py | 3 +- .../python/keras/engine/training_test.py | 31 +++++++++++++++++++ .../python/keras/engine/training_utils.py | 14 ++++++--- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 0db805cc84..6d625f16c2 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -846,7 +846,8 @@ class Model(Network): # in the case where all inputs are value arrays. if context.executing_eagerly(): - # In eager mode, do not do shape validation. + # In eager mode, do not do shape validation + # since the network has no input nodes (placeholders) to be fed. feed_input_names = self.input_names feed_input_shapes = None elif not self._is_graph_network: diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 222e3496c1..5c02d36382 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -1917,6 +1917,37 @@ class TestTrainingWithDataset(test.TestCase): 'you should specify the `steps` argument'): model.predict(dataset, verbose=0) + def test_dataset_input_shape_validation(self): + with self.test_session(): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + model.compile(optimizer, loss) + + # User forgets to batch the dataset + inputs = np.zeros((10, 3), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + + with self.assertRaisesRegexp(ValueError, + 'expected input to have 2 dimensions'): + model.train_on_batch(dataset) + + # Wrong input shape + inputs = np.zeros((10, 5), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + with self.assertRaisesRegexp(ValueError, + 'expected input to have shape'): + model.train_on_batch(dataset) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index c53948b902..b93f999444 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -166,10 +166,16 @@ def standardize_input_data(data, # Check shapes compatibility. if shapes: for i in range(len(names)): - if shapes[i] is not None and not tensor_util.is_tensor(data[i]): - data_shape = data[i].shape + if shapes[i] is not None: + if tensor_util.is_tensor(data[i]): + tensorshape = data[i].get_shape() + if not tensorshape: + continue + data_shape = tuple(tensorshape.as_list()) + else: + data_shape = data[i].shape shape = shapes[i] - if data[i].ndim != len(shape): + if len(data_shape) != len(shape): raise ValueError('Error when checking ' + exception_prefix + ': expected ' + names[i] + ' to have ' + str(len(shape)) + ' dimensions, but got array ' @@ -178,7 +184,7 @@ def standardize_input_data(data, data_shape = data_shape[1:] shape = shape[1:] for dim, ref_dim in zip(data_shape, shape): - if ref_dim != dim and ref_dim: + if ref_dim != dim and ref_dim is not None and dim is not None: raise ValueError( 'Error when checking ' + exception_prefix + ': expected ' + names[i] + ' to have shape ' + str(shape) + -- GitLab From f8438342466a323add1363bbac85a410b340ac57 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 24 May 2018 11:14:17 -0700 Subject: [PATCH 100/902] Deprecate `DeviceBase::GetStepAllocator()` and replace with calls to `GetAllocator()`. The `GetStepAllocator()` API relied on the existence of a "step resource manager", which is no longer a concept in the runtime (it was replaced by "step containers"). Since the additional flexibility does not appear to be used in the codebase, the `GetScopedAllocator()` seems to provide a similar extension point (based on step IDs), and the `OpKernelContext::get_allocator()` method is called frequently, this change simplifies the implementation somewhat. The `GetStepAllocator()` method is retained as a non-virtual stub that forwards to `GetAllocator()`, because at least one third-party library (libxsmm) calls this interface directly. PiperOrigin-RevId: 197922154 --- tensorflow/contrib/nccl/kernels/nccl_manager_test.cc | 3 +-- tensorflow/core/common_runtime/renamed_device.h | 5 ----- tensorflow/core/framework/device_base.h | 11 ++++------- tensorflow/core/framework/op_kernel.cc | 6 +++--- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc index 4d8d922cb4..5144f7c38c 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_manager_test.cc @@ -171,8 +171,7 @@ class NcclManagerTest : public ::testing::Test { private: static Allocator* GpuAllocator(BaseGPUDevice* device) { - return device->GetStepAllocator(AllocatorAttributes(), - nullptr /* step_resource_manager */); + return device->GetAllocator(AllocatorAttributes()); } static se::DeviceMemory AsDeviceMemory(const Scalar* cuda_memory) { diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h index fe4df1c106..103eee03b3 100644 --- a/tensorflow/core/common_runtime/renamed_device.h +++ b/tensorflow/core/common_runtime/renamed_device.h @@ -58,11 +58,6 @@ class RenamedDevice : public Device { return underlying_->GetAllocator(attr); } - Allocator* GetStepAllocator(AllocatorAttributes attr, - ResourceMgr* step_resource_manager) override { - return underlying_->GetStepAllocator(attr, step_resource_manager); - } - const Eigen::ThreadPoolDevice* eigen_cpu_device() override { return underlying_->eigen_cpu_device(); } diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h index 223b74857d..ec26d92a61 100644 --- a/tensorflow/core/framework/device_base.h +++ b/tensorflow/core/framework/device_base.h @@ -169,13 +169,10 @@ class DeviceBase { return nullptr; } - // Return the Allocator implementation to use based on the allocator - // attributes requested and the supplied resource manager. By - // default this ignores the resource manager and calls the base - // implementation but devices can override if they want to consult - // the resource manager when choosing the allocator. - virtual Allocator* GetStepAllocator(AllocatorAttributes attr, - ResourceMgr* /*step_resource_manager*/) { + // DEPRECATED: Use `this->GetAllocator()` or `this->GetScopedAllocator()`. + // This method is provided for backwards compatibility, and will be removed + // in a future release. + Allocator* GetStepAllocator(AllocatorAttributes attr, ResourceMgr*) { return GetAllocator(attr); } diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index c71bcb26ab..d240c853eb 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -283,13 +283,13 @@ OpKernelContext::~OpKernelContext() { Allocator* OpKernelContext::get_allocator(AllocatorAttributes attr) { Allocator* allocator = nullptr; - if (attr.scope_id > 0) { + if (TF_PREDICT_FALSE(attr.scope_id > 0)) { allocator = params_->device->GetScopedAllocator(attr, step_id()); CHECK(allocator); } else { - allocator = params_->device->GetStepAllocator(attr, resource_manager()); + allocator = params_->device->GetAllocator(attr); } - if (track_allocations()) { + if (TF_PREDICT_FALSE(track_allocations())) { mutex_lock lock(mu_); for (const auto& wrapped : wrapped_allocators_) { if (wrapped.first == allocator) { -- GitLab From 61dd76952e1e9a312105b7497f34d32d1a00a04b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 11:18:45 -0700 Subject: [PATCH 101/902] boosted_trees: used double precision instead of single precision while accumulating batches within MakeStatsSummary, as float type faces numerical precision problems when batch gets larger and stats gets smaller. PiperOrigin-RevId: 197923022 --- .../core/kernels/boosted_trees/stats_ops.cc | 41 +++++++++++++------ .../python/kernel_tests/boosted_trees/BUILD | 2 +- .../boosted_trees/stats_ops_test.py | 38 +++++++++++++++++ 3 files changed, 67 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc index 6dfcd63ab3..53bdd482cb 100644 --- a/tensorflow/core/kernels/boosted_trees/stats_ops.cc +++ b/tensorflow/core/kernels/boosted_trees/stats_ops.cc @@ -255,7 +255,7 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel { // node_ids const Tensor* node_ids_t; OP_REQUIRES_OK(context, context->input("node_ids", &node_ids_t)); - const auto node_ids = node_ids_t->vec(); + const auto node_ids = node_ids_t->flat(); // gradients const Tensor* gradients_t; OP_REQUIRES_OK(context, context->input("gradients", &gradients_t)); @@ -268,12 +268,6 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel { OpInputList bucketized_features_list; OP_REQUIRES_OK(context, context->input_list("bucketized_features_list", &bucketized_features_list)); - std::vector::ConstVec> bucketized_features; - bucketized_features.reserve(num_features_); - for (const Tensor& tensor : bucketized_features_list) { - bucketized_features.emplace_back(tensor.vec()); - } - // Infer batch size. const int64 batch_size = node_ids_t->dim_size(0); // Allocate output stats tensor (Rank 4). @@ -282,18 +276,39 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel { "stats_summary", {num_features_, max_splits_, num_buckets_, 2}, &output_stats_summary_t)); - auto output_stats_summary = output_stats_summary_t->tensor(); - output_stats_summary.setZero(); + auto output_stats_summary = output_stats_summary_t->flat(); + EIGEN_STATIC_ASSERT( + (static_cast(decltype(output_stats_summary)::Layout) == + static_cast(Eigen::RowMajor)), + THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES); + + const int shift_per_node = num_buckets_ * 2; + const int shift_per_feature = shift_per_node * max_splits_; + const int32 max_index = num_features_ * shift_per_feature; + // We use double to sum the gradients and hessians, due to possible + // precision loss when summing small float values. + std::vector res(max_index, 0); // Partition by node, and then bucketize. - for (int feature_idx = 0; feature_idx < num_features_; ++feature_idx) { - const auto& features = bucketized_features[feature_idx]; + int feature_idx = 0; + int feature_shift = 0; + for (const Tensor& tensor : bucketized_features_list) { + const auto& features = tensor.flat(); for (int i = 0; i < batch_size; ++i) { const int32 node = node_ids(i); const int32 bucket = features(i); - output_stats_summary(feature_idx, node, bucket, 0) += gradients(i, 0); - output_stats_summary(feature_idx, node, bucket, 1) += hessians(i, 0); + // Calculate the index in the flattened vector for + // [feature_idx][node][bucket][0]. + const int index = feature_shift + node * shift_per_node + bucket * 2; + res[index] += gradients(i, 0); + res[index + 1] += hessians(i, 0); } + ++feature_idx; + feature_shift += shift_per_feature; + } + // Copy over the results. + for (int i = 0; i < max_index; ++i) { + output_stats_summary(i) = res[i]; } } diff --git a/tensorflow/python/kernel_tests/boosted_trees/BUILD b/tensorflow/python/kernel_tests/boosted_trees/BUILD index 30e6289420..4f92ab0795 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/BUILD +++ b/tensorflow/python/kernel_tests/boosted_trees/BUILD @@ -52,7 +52,7 @@ tf_py_test( tf_py_test( name = "stats_ops_test", - size = "small", + size = "medium", srcs = ["stats_ops_test.py"], additional_deps = [ "//tensorflow/python:boosted_trees_ops", diff --git a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py index 5cceb98cff..568e695fd5 100644 --- a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py +++ b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py @@ -17,7 +17,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import boosted_trees_ops from tensorflow.python.platform import googletest @@ -388,6 +391,41 @@ class StatsOpsTest(test_util.TensorFlowTestCase): ], result.eval()) + def _verify_precision(self, length): + with self.test_session(): + max_splits = 1 + num_buckets = 1 + node_ids = array_ops.fill([length], 0) + + gradients = constant_op.constant( + 2.0 / length, dtype=dtypes.float32, shape=[length, 1]) + hessians = constant_op.constant( + 0.2 / length, dtype=dtypes.float32, shape=[length, 1]) + + bucketized_features = array_ops.zeros([length], dtype=dtypes.int32) + + result = boosted_trees_ops.make_stats_summary( + node_ids, gradients, hessians, [bucketized_features], max_splits, + num_buckets) # shape=[max_splits, num_buckets, num_features, 2] + + self.assertAllClose([[[[2., 0.2]]]], result.eval()) + + def testMakeStatsSummaryNumericalPrecisionSmallBatch(self): + """Tests numeric precision.""" + self._verify_precision(length=2000) + + def testMakeStatsSummaryNumericalPrecisionMediumBatch(self): + """Tests numeric precision.""" + self._verify_precision(length=100000) + + def testMakeStatsSummaryNumericalPrecisionLargeBatch(self): + """Tests numeric precision.""" + self._verify_precision(length=1000000) + + def testMakeStatsSummaryNumericalPrecisionMegaBatch(self): + """Tests numeric precision.""" + self._verify_precision(length=50000000) + if __name__ == '__main__': googletest.main() -- GitLab From cdc1b4756a41dbfa7e7f39c466ff65dd88407cc0 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 24 May 2018 11:23:18 -0700 Subject: [PATCH 102/902] Make the existing checkpointable data structure a CheckpointableDataStructure Gives it better/more consistent handling of Layers. PiperOrigin-RevId: 197923880 --- tensorflow/contrib/checkpoint/python/containers.py | 7 +++++-- tensorflow/contrib/checkpoint/python/containers_test.py | 9 +++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/checkpoint/python/containers.py b/tensorflow/contrib/checkpoint/python/containers.py index 9807abae1f..4d3d531299 100644 --- a/tensorflow/contrib/checkpoint/python/containers.py +++ b/tensorflow/contrib/checkpoint/python/containers.py @@ -18,9 +18,10 @@ from __future__ import division from __future__ import print_function from tensorflow.python.training.checkpointable import base as checkpointable_lib +from tensorflow.python.training.checkpointable import data_structures -class UniqueNameTracker(checkpointable_lib.CheckpointableBase): +class UniqueNameTracker(data_structures.CheckpointableDataStructure): """Adds dependencies on checkpointable objects with name hints. Useful for creating dependencies with locally unique names. @@ -41,6 +42,7 @@ class UniqueNameTracker(checkpointable_lib.CheckpointableBase): """ def __init__(self): + super(UniqueNameTracker, self).__init__() self._maybe_initialize_checkpointable() self._name_counts = {} @@ -74,4 +76,5 @@ class UniqueNameTracker(checkpointable_lib.CheckpointableBase): count += 1 candidate = _format_name(base_name, count) self._name_counts[base_name] = count + 1 - return self._track_checkpointable(checkpointable, name=candidate) + self._track_value(checkpointable, name=candidate) + return checkpointable diff --git a/tensorflow/contrib/checkpoint/python/containers_test.py b/tensorflow/contrib/checkpoint/python/containers_test.py index 851a800588..3717d7f583 100644 --- a/tensorflow/contrib/checkpoint/python/containers_test.py +++ b/tensorflow/contrib/checkpoint/python/containers_test.py @@ -22,6 +22,8 @@ import six from tensorflow.contrib.checkpoint.python import containers from tensorflow.python.framework import test_util +from tensorflow.python.keras import layers +from tensorflow.python.ops import array_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test from tensorflow.python.training.checkpointable import base as checkpointable @@ -95,5 +97,12 @@ class UniqueNameTrackerTests(test.TestCase): dependency_names, ["x", "x_1", "y", "slot_manager", "slotdeps", "save_counter"]) + @test_util.run_in_graph_and_eager_modes() + def testLayers(self): + tracker = containers.UniqueNameTracker() + tracker.track(layers.Dense(3), "dense") + tracker.layers[0](array_ops.zeros([1, 1])) + self.assertEqual(2, len(tracker.trainable_weights)) + if __name__ == "__main__": test.main() -- GitLab From d9c3057d8e2f9f0e8bc5c48522a5176a73af11b8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 11:29:34 -0700 Subject: [PATCH 103/902] Extracts the 'Move Constants Past Enter Node' optimization into its own method. PiperOrigin-RevId: 197924962 --- .../grappler/optimizers/constant_folding.cc | 97 ++++++++++--------- .../grappler/optimizers/constant_folding.h | 3 + 2 files changed, 55 insertions(+), 45 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index a71f83b871..62e1ab0892 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1988,51 +1988,9 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } - // Move constants past Enter. - if (IsEnter(*node) && node->input_size() > 0) { - if (node->attr().count("is_constant") == 0 || - !node->attr().at("is_constant").b()) { - return Status::OK(); - } - const string& node_name = node->name(); - const NodeDef* input = node_map_->GetNode(node->input(0)); - if (input != nullptr && IsReallyConstant(*input) && - !OptimizedNodeExists(*input, "_enter")) { - auto fanouts = node_map_->GetOutputs(node_name); - // Find non-constant nodes that consume the output of *node. - std::vector consumers; - for (NodeDef* fanout : fanouts) { - if (!IsConstant(*fanout)) { - for (int i = 0; i < fanout->input_size(); ++i) { - if (fanout->input(i) == node_name) { - consumers.push_back(fanout); - break; - } - } - } - } - if (!consumers.empty()) { - NodeDef* new_node = optimized_graph->add_node(); - *new_node = *input; - new_node->set_name(OptimizedNodeName(*input, "_enter")); - new_node->set_device(node->device()); - new_node->clear_input(); - new_node->add_input(AsControlDependency(node_name)); - node_map_->AddNode(new_node->name(), new_node); - node_map_->AddOutput(node_name, new_node->name()); - for (NodeDef* consumer : consumers) { - for (int i = 0; i < consumer->input_size(); ++i) { - if (NodeName(consumer->input(i)) == node_name) { - node_map_->UpdateInput(consumer->name(), node_name, - new_node->name()); - consumer->set_input(i, new_node->name()); - } - } - } - graph_modified_ = true; - return Status::OK(); - } - } + if (MoveConstantsPastEnter(optimized_graph, node)) { + graph_modified_ = true; + return Status::OK(); } if (SimplifySwitch(optimized_graph, node)) { @@ -2094,6 +2052,55 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +bool ConstantFolding::MoveConstantsPastEnter(GraphDef* optimized_graph, + NodeDef* node) { + if (IsEnter(*node) && node->input_size() > 0) { + if (node->attr().count("is_constant") == 0 || + !node->attr().at("is_constant").b()) { + return false; + } + const string& node_name = node->name(); + const NodeDef* input = node_map_->GetNode(node->input(0)); + if (input != nullptr && IsReallyConstant(*input) && + !OptimizedNodeExists(*input, "_enter")) { + auto fanouts = node_map_->GetOutputs(node_name); + // Find non-constant nodes that consume the output of *node. + std::vector consumers; + for (NodeDef* fanout : fanouts) { + if (!IsConstant(*fanout)) { + for (int i = 0; i < fanout->input_size(); ++i) { + if (fanout->input(i) == node_name) { + consumers.push_back(fanout); + break; + } + } + } + } + if (!consumers.empty()) { + NodeDef* new_node = optimized_graph->add_node(); + *new_node = *input; + new_node->set_name(OptimizedNodeName(*input, "_enter")); + new_node->set_device(node->device()); + new_node->clear_input(); + new_node->add_input(AsControlDependency(node_name)); + node_map_->AddNode(new_node->name(), new_node); + node_map_->AddOutput(node_name, new_node->name()); + for (NodeDef* consumer : consumers) { + for (int i = 0; i < consumer->input_size(); ++i) { + if (NodeName(consumer->input(i)) == node_name) { + node_map_->UpdateInput(consumer->name(), node_name, + new_node->name()); + consumer->set_input(i, new_node->name()); + } + } + } + return true; + } + } + } + return false; +} + bool ConstantFolding::SimplifySwitch(GraphDef* optimized_graph, NodeDef* node) { if (node->op() == "Switch" && node->input(0) == node->input(1) && !OptimizedNodeExists(*node, "_const_false") && diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 88f03b3931..9fd4c9c789 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -164,6 +164,9 @@ class ConstantFolding : public GraphOptimizer { // +------+ bool SimplifySwitch(GraphDef* optimized_graph, NodeDef* node); + // Move constants past Enter node if applicable. + bool MoveConstantsPastEnter(GraphDef* optimized_graph, NodeDef* node); + // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From 6e96bf989014ef3079d668c93f3ebebff30e3e37 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 24 May 2018 11:37:12 -0700 Subject: [PATCH 104/902] [tf.data] Add `tf.contrib.data.choose_from_datasets()`. This is a deterministic counterpart to `tf.contrib.data.sample_from_datasets()`. PiperOrigin-RevId: 197926386 --- tensorflow/contrib/data/__init__.py | 1 + .../directed_interleave_dataset_test.py | 27 +++++++++++ .../contrib/data/python/ops/interleave_ops.py | 45 +++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index a25aa85251..1af1ed08b5 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -30,6 +30,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@assert_element_shape @@batch_and_drop_remainder @@bucket_by_sequence_length +@@choose_from_datasets @@dense_to_sparse_batch @@enumerate_dataset @@group_by_window diff --git a/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py b/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py index d071eb17e0..34b6a080c0 100644 --- a/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py @@ -96,6 +96,21 @@ class DirectedInterleaveDatasetTest(test.TestCase): freqs = self._testSampleFromDatasetsHelper(probs_ds, classes, num_samples) self.assertLess(self._chi2(probs, freqs / num_samples), 1e-2) + def testSelectFromDatasets(self): + words = [b"foo", b"bar", b"baz"] + datasets = [dataset_ops.Dataset.from_tensors(w).repeat() for w in words] + choice_array = np.random.randint(3, size=(15,), dtype=np.int64) + choice_dataset = dataset_ops.Dataset.from_tensor_slices(choice_array) + dataset = interleave_ops.choose_from_datasets(datasets, choice_dataset) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + for i in choice_array: + self.assertEqual(words[i], sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + def testErrors(self): with self.assertRaisesRegexp(ValueError, r"vector of length `len\(datasets\)`"): @@ -116,6 +131,18 @@ class DirectedInterleaveDatasetTest(test.TestCase): dataset_ops.Dataset.from_tensors(0.0) ]) + with self.assertRaisesRegexp(TypeError, "tf.int64"): + interleave_ops.choose_from_datasets([ + dataset_ops.Dataset.from_tensors(0), + dataset_ops.Dataset.from_tensors(1) + ], choice_dataset=dataset_ops.Dataset.from_tensors(1.0)) + + with self.assertRaisesRegexp(TypeError, "scalar"): + interleave_ops.choose_from_datasets([ + dataset_ops.Dataset.from_tensors(0), + dataset_ops.Dataset.from_tensors(1) + ], choice_dataset=dataset_ops.Dataset.from_tensors([1.0])) + class SampleFromDatasetsSerializationTest( dataset_serialization_test_base.DatasetSerializationTestBase): diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 812a50ecbf..be66fbac50 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -27,6 +27,7 @@ from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.util import deprecation @@ -240,3 +241,47 @@ def sample_from_datasets(datasets, weights=None, seed=None): (logits_ds, random_ops.RandomDataset(seed).batch(2))).map(select_dataset) return DirectedInterleaveDataset(selector_input, datasets) + + +def choose_from_datasets(datasets, choice_dataset): + """Creates a dataset that deterministically chooses elements from `datasets`. + + For example, given the following datasets: + + ```python + datasets = [tf.data.Dataset.from_tensors("foo").repeat(), + tf.data.Dataset.from_tensors("bar").repeat(), + tf.data.Dataset.from_tensors("baz").repeat()] + + # Define a dataset containing `[0, 1, 2, 0, 1, 2, 0, 1, 2]`. + choice_dataset = tf.data.Dataset.range(3).repeat(3) + + result = tf.contrib.data.choose_from_datasets(datasets, choice_dataset) + ``` + + The elements of `result` will be: + + ``` + "foo", "bar", "baz", "foo", "bar", "baz", "foo", "bar", "baz" + ``` + + Args: + datasets: A list of @{tf.data.Dataset} objects with compatible structure. + choice_dataset: A @{tf.data.Dataset} of scalar `tf.int64` tensors between + `0` and `len(datasets) - 1`. + + Returns: + A dataset that interleaves elements from `datasets` according to the values + of `choice_dataset`. + + Raises: + TypeError: If the `datasets` or `choice_dataset` arguments have the wrong + type. + """ + if not (choice_dataset.output_types == dtypes.int64 + and choice_dataset.output_shapes.is_compatible_with( + tensor_shape.scalar()) + and choice_dataset.output_classes == ops.Tensor): + raise TypeError("`choice_dataset` must be a dataset of scalar " + "`tf.int64` tensors.") + return DirectedInterleaveDataset(choice_dataset, datasets) -- GitLab From 0f12ef65f4ffcd62fcdba93211f83386af19b2a8 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 24 May 2018 11:44:15 -0700 Subject: [PATCH 105/902] Fix `tf_inspect.getargspec` callable objects other than functions. PiperOrigin-RevId: 197927601 --- tensorflow/python/util/tf_inspect.py | 128 ++++++++++++++-------- tensorflow/python/util/tf_inspect_test.py | 46 ++++++++ 2 files changed, 127 insertions(+), 47 deletions(-) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index 33b389c8c4..5faf644c91 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -42,32 +42,67 @@ def currentframe(): return _inspect.stack()[1][0] -def getargspec(object): # pylint: disable=redefined-builtin +def getargspec(obj): """TFDecorator-aware replacement for inspect.getargspec. Args: - object: A callable (function or partial function), possibly decorated. + obj: A function, partial function, or callable object, possibly + decorated. Returns: The `ArgSpec` that describes the signature of the outermost decorator that changes the callable's signature. If the callable is not decorated, - `inspect.getargspec()` will be called directly on the callable. + `inspect.getargspec()` will be called directly on the object. Raises: - ValueError: When callable's function signature can not be expressed with - ArgSpec. + ValueError: When callable's signature can not be expressed with + ArgSpec. + TypeError: For objects of unsupported types. """ + if isinstance(obj, functools.partial): + return _get_argspec_for_partial(obj) - def get_argspec_with_decorator(obj): - decorators, target = tf_decorator.unwrap(obj) - return next((d.decorator_argspec - for d in decorators - if d.decorator_argspec is not None), - _inspect.getargspec(target)) + decorators, target = tf_decorator.unwrap(obj) + + spec = next((d.decorator_argspec + for d in decorators + if d.decorator_argspec is not None), None) + if spec: + return spec + + try: + # Python3 will handle most callables here (not partial). + return _inspect.getargspec(target) + except TypeError: + pass + + if isinstance(target, type): + try: + return _inspect.getargspec(target.__init__) + except TypeError: + pass + + try: + return _inspect.getargspec(target.__new__) + except TypeError: + pass - if not isinstance(object, functools.partial): - return get_argspec_with_decorator(object) + # The `type(target)` ensures that if a class is received we don't return + # the signature of it's __call__ method. + return _inspect.getargspec(type(target).__call__) + +def _get_argspec_for_partial(obj): + """Implements `getargspec` for `functools.partial` objects. + + Args: + obj: The `functools.partial` obeject + Returns: + An `inspect.ArgSpec` + Raises: + ValueError: When callable's signature can not be expressed with + ArgSpec. + """ # When callable is a functools.partial object, we construct its ArgSpec with # following strategy: # - If callable partial contains default value for positional arguments (ie. @@ -97,10 +132,10 @@ def getargspec(object): # pylint: disable=redefined-builtin # value and ensures all following arguments also have default values. When # this is not true, a ValueError is raised. - n_prune_args = len(object.args) - partial_keywords = object.keywords or {} + n_prune_args = len(obj.args) + partial_keywords = obj.keywords or {} - args, varargs, keywords, defaults = get_argspec_with_decorator(object.func) + args, varargs, keywords, defaults = getargspec(obj.func) # Pruning first n_prune_args arguments. args = args[n_prune_args:] @@ -137,11 +172,34 @@ def getargspec(object): # pylint: disable=redefined-builtin return ArgSpec(args, varargs, keywords, tuple(all_defaults[first_default:])) -def getfullargspec(obj): # pylint: disable=redefined-builtin - """TFDecorator-aware replacement for `inspect.getfullargspec`/`getargspec`. +if hasattr(_inspect, 'getfullargspec'): + _getfullargspec = _inspect.getfullargspec +else: + + def _getfullargspec(target): + """A python2 version of getfullargspec. + + Args: + target: the target object to inspect. + Returns: + A FullArgSpec with empty kwonlyargs, kwonlydefaults and annotations. + """ + argspecs = _inspect.getargspec(target) + fullargspecs = FullArgSpec( + args=argspecs.args, + varargs=argspecs.varargs, + varkw=argspecs.keywords, + defaults=argspecs.defaults, + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) + return fullargspecs + - This wrapper uses `inspect.getfullargspec` if available and falls back to - `inspect.getargspec` in Python 2. +def getfullargspec(obj): + """TFDecorator-aware replacement for `inspect.getfullargspec`. + + This wrapper emulates `inspect.getfullargspec` in[^)]* Python2. Args: obj: A callable, possibly decorated. @@ -152,34 +210,10 @@ def getfullargspec(obj): # pylint: disable=redefined-builtin callable is not decorated, `inspect.getfullargspec()` will be called directly on the callable. """ - if hasattr(_inspect, 'getfullargspec'): - spec_fn = _inspect.getfullargspec - else: - def spec_fn(target): - """Spec function that adding default value from FullArgSpec. - - It is used when getfullargspec is not available (eg in PY2). - - Args: - target: the target object to inspect. - Returns: - The full argument specs with empty kwonlyargs, kwonlydefaults and - annotations. - """ - argspecs = _inspect.getargspec(target) - fullargspecs = FullArgSpec( - args=argspecs.args, - varargs=argspecs.varargs, - varkw=argspecs.keywords, - defaults=argspecs.defaults, - kwonlyargs=[], - kwonlydefaults=None, - annotations={}) - return fullargspecs - decorators, target = tf_decorator.unwrap(obj) - return next((d.decorator_argspec for d in decorators - if d.decorator_argspec is not None), spec_fn(target)) + return next((d.decorator_argspec + for d in decorators + if d.decorator_argspec is not None), _getfullargspec(target)) def getcallargs(func, *positional, **named): diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py index 325131c4f4..beaf350de1 100644 --- a/tensorflow/python/util/tf_inspect_test.py +++ b/tensorflow/python/util/tf_inspect_test.py @@ -245,6 +245,52 @@ class TfInspectTest(test.TestCase): self.assertEqual(partial_argspec, tf_inspect.getargspec(partial_with_decorator)) + def testGetArgSpecOnCallableObject(self): + + class Callable(object): + + def __call__(self, a, b=1, c='hello'): + pass + + argspec = tf_inspect.ArgSpec( + args=['self', 'a', 'b', 'c'], + varargs=None, + keywords=None, + defaults=(1, 'hello')) + + test_obj = Callable() + self.assertEqual(argspec, tf_inspect.getargspec(test_obj)) + + def testGetArgSpecOnInitClass(self): + + class InitClass(object): + + def __init__(self, a, b=1, c='hello'): + pass + + argspec = tf_inspect.ArgSpec( + args=['self', 'a', 'b', 'c'], + varargs=None, + keywords=None, + defaults=(1, 'hello')) + + self.assertEqual(argspec, tf_inspect.getargspec(InitClass)) + + def testGetArgSpecOnNewClass(self): + + class NewClass(object): + + def __new__(cls, a, b=1, c='hello'): + pass + + argspec = tf_inspect.ArgSpec( + args=['cls', 'a', 'b', 'c'], + varargs=None, + keywords=None, + defaults=(1, 'hello')) + + self.assertEqual(argspec, tf_inspect.getargspec(NewClass)) + def testGetDoc(self): self.assertEqual('Test Decorated Function With Defaults Docstring.', tf_inspect.getdoc(test_decorated_function_with_defaults)) -- GitLab From 4baecc0f1961f340fccec9fac8984add54066a5e Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 24 May 2018 11:54:56 -0700 Subject: [PATCH 106/902] [TF:XLA] Bump open source llvm revision to r333167 PiperOrigin-RevId: 197929434 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5d6fa6655c..90de15e18e 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -453,11 +453,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/638915a37f90f26599941977846408864f70ab35.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/638915a37f90f26599941977846408864f70ab35.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/387dd2c562ef25850ef9229de82b9bcd0959b02c.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/387dd2c562ef25850ef9229de82b9bcd0959b02c.tar.gz", ], - sha256 = "aae3cacefa318cef030b4ca1e81ee9906752bbd89013cf9d47e156b5ad04b3a5", - strip_prefix = "llvm-638915a37f90f26599941977846408864f70ab35", + sha256 = "c94107d335f54a2d9c22dd38c754688f63618c42598bfc2df5bcce9de8363367", + strip_prefix = "llvm-387dd2c562ef25850ef9229de82b9bcd0959b02c", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) -- GitLab From b6ef284eb02e3bfee1cf46e725a84e6558ca0c63 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 12:11:41 -0700 Subject: [PATCH 107/902] Fix doc: "--input_arrays" instead of "--input_array". PiperOrigin-RevId: 197932202 --- tensorflow/contrib/lite/toco/model_cmdline_flags.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc index f875c85d1a..0f104d5e2d 100644 --- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc +++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc @@ -83,7 +83,7 @@ bool ParseModelFlagsFromCommandLineFlags( "Deprecated: use --input_data_types instead. Input array type, if " "not already provided in the graph. " "Typically needs to be specified when passing arbitrary arrays " - "to --input_array."), + "to --input_arrays."), Flag("input_data_types", parsed_flags.input_data_types.bind(), parsed_flags.input_data_types.default_value(), "Input arrays types, comma-separated, if not already provided in " -- GitLab From 2a8b52fc0c5f1fc257ad9c042126b00edfeca705 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 24 May 2018 12:22:04 -0700 Subject: [PATCH 108/902] Don't use hex floats. Hex float literals are in C11 and C++17, but not in C++11, so use plain float notation. PiperOrigin-RevId: 197933744 --- tensorflow/compiler/xla/service/hlo_evaluator_test.cc | 8 ++++---- tensorflow/compiler/xla/tests/convert_test.cc | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index ae5b5e0412..84b4ead2dd 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -262,13 +262,13 @@ TEST_P(HloEvaluatorTest, DoesCosR2) { auto operand = Literal::CreateR2({{0, M_PI}, {-M_PI, 2 * M_PI}}); auto expected = Literal::CreateR2({{1, -1}, {-1, 1}}); TestUnaryOp(HloOpcode::kCos, std::move(expected), std::move(operand), - use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20); + use_bfloat16_ ? 0.031250 : 9.5367431640625E-7); } TEST_P(HloEvaluatorTest, DoesSinR2) { auto operand = Literal::CreateR2({{0, M_PI}, {-M_PI, 2 * M_PI}}); auto expected = Literal::CreateR2({{0, 0}, {0, 0}}); TestUnaryOp(HloOpcode::kSin, std::move(expected), std::move(operand), - use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20); + use_bfloat16_ ? 0.031250 : 9.5367431640625E-7); } TEST_P(HloEvaluatorTest, DoesNotR2) { auto operand = @@ -333,7 +333,7 @@ TEST_P(HloEvaluatorTest, DoesReshape) { result->EachCell( [&](tensorflow::gtl::ArraySlice indices, NativeT value) { std::vector rindexes = Permute(permutation, indices); - EXPECT_NEAR(value, literal_clone->Get(rindexes), 0x1.0P-5); + EXPECT_NEAR(value, literal_clone->Get(rindexes), 0.031250); }); } @@ -567,7 +567,7 @@ TEST_P(HloEvaluatorTest, NegativePadding2D) { (*expected_array)(0, 4) = 2.718f; auto expected = Literal::CreateR2FromArray2D(*expected_array); - EXPECT_TRUE(LiteralTestUtil::Near(*expected, *result, ErrorSpec(0x1.0P-5))); + EXPECT_TRUE(LiteralTestUtil::Near(*expected, *result, ErrorSpec(0.031250))); } TEST_P(HloEvaluatorTest, NegativeAndInteriorPadding2D) { diff --git a/tensorflow/compiler/xla/tests/convert_test.cc b/tensorflow/compiler/xla/tests/convert_test.cc index 4ef0a77884..722d882471 100644 --- a/tensorflow/compiler/xla/tests/convert_test.cc +++ b/tensorflow/compiler/xla/tests/convert_test.cc @@ -249,10 +249,10 @@ XLA_TEST_F(ConvertTest, ConvertR1F32ToR1S64) { -1.99f, -2.0f, -2.01f, - 0x1.FFFFFEp+62F, - 0x1.FFFFFCp+62F, - -0x1.FFFFFEp+62F, - -0x1.FFFFFCp+62F}; + 9223371487098961920.f, + 9223370937343148032.f, + -9223371487098961920.f, + -9223370937343148032.f}; std::unique_ptr arg_literal = Literal::CreateR1({arg}); auto arg_param = builder.Parameter(0, arg_literal->shape(), "arg_param"); std::unique_ptr arg_data = -- GitLab From 43408f89b46bbbbf76df90eb30f47ecc71af0876 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 24 May 2018 12:23:32 -0700 Subject: [PATCH 109/902] Mark queue related ops as having side effect PiperOrigin-RevId: 197933941 --- tensorflow/contrib/batching/BUILD | 1 + tensorflow/core/grappler/op_types.cc | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD index d65c990c87..b6dae3cc1f 100644 --- a/tensorflow/contrib/batching/BUILD +++ b/tensorflow/contrib/batching/BUILD @@ -96,6 +96,7 @@ py_test( name = "batch_ops_test", size = "small", srcs = ["python/ops/batch_ops_test.py"], + shard_count = 5, srcs_version = "PY2AND3", tags = [ "manual", diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 92581942cb..6d2adcf5b8 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -445,6 +445,10 @@ bool IsFreeOfSideEffect(const NodeDef& node) { return false; } } + // Queue ops modify the queue which is a side effect. + if (node.op().find("Queue") != std::string::npos) { + return false; + } return !ModifiesInputsInPlace(node); } -- GitLab From 4054ddab84775659c4e04b4b239e3ef93e36a2de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 12:34:02 -0700 Subject: [PATCH 110/902] Modify tf.image.central_crop to support batched-input. Currently central_crop works on singular images with dynamic dimensions. For large image classification models, it would be nice if central_crop can be modified to support batched input. This CL makes that change. PiperOrigin-RevId: 197935606 --- tensorflow/python/ops/image_ops_impl.py | 87 +++++++++++++--- tensorflow/python/ops/image_ops_test.py | 132 +++++++++++++++++------- 2 files changed, 164 insertions(+), 55 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 54e27b87df..52141ba24a 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -523,7 +523,7 @@ def transpose_image(image): @tf_export('image.central_crop') def central_crop(image, central_fraction): - """Crop the central region of the image. + """Crop the central region of the image(s). Remove the outer parts of an image but retain the central region of the image along each dimension. If we specify central_fraction = 0.5, this function @@ -536,15 +536,19 @@ def central_crop(image, central_fraction): | | where "X" is the central 50% of the image. -------- + This function works on either a single image (`image` is a 3-D Tensor), or a + batch of images (`image` is a 4-D Tensor). + Args: - image: 3-D float Tensor of shape [height, width, depth] + image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D + Tensor of shape [batch_size, height, width, depth]. central_fraction: float (0, 1], fraction of size to crop Raises: ValueError: if central_crop_fraction is not within (0, 1]. Returns: - 3-D float Tensor + 3-D / 4-D float Tensor, as per the input. """ with ops.name_scope(None, 'central_crop', [image]): image = ops.convert_to_tensor(image, name='image') @@ -553,24 +557,75 @@ def central_crop(image, central_fraction): if central_fraction == 1.0: return image - image = _Assert3DImage(image) + _AssertAtLeast3DImage(image) + rank = image.get_shape().ndims + if rank != 3 and rank != 4: + raise ValueError('`image` should either be a Tensor with rank = 3 or ' + 'rank = 4. Had rank = {}.'.format(rank)) + + # Helper method to return the `idx`-th dimension of `tensor`, along with + # a boolean signifying if the dimension is dynamic. + def _get_dim(tensor, idx): + static_shape = tensor.get_shape()[idx].value + if static_shape is not None: + return static_shape, False + return array_ops.shape(tensor)[idx], True + + # Get the height, width, depth (and batch size, if the image is a 4-D + # tensor). + if rank == 3: + img_h, dynamic_h = _get_dim(image, 0) + img_w, dynamic_w = _get_dim(image, 1) + img_d = image.get_shape()[2] + else: + img_bs = image.get_shape()[0] + img_h, dynamic_h = _get_dim(image, 1) + img_w, dynamic_w = _get_dim(image, 2) + img_d = image.get_shape()[3] + + # Compute the bounding boxes for the crop. The type and value of the + # bounding boxes depend on the `image` tensor's rank and whether / not the + # dimensions are statically defined. + if dynamic_h: + img_hd = math_ops.to_double(img_h) + bbox_h_start = math_ops.to_int32((img_hd - img_hd * central_fraction) / 2) + else: + img_hd = float(img_h) + bbox_h_start = int((img_hd - img_hd * central_fraction) / 2) - img_shape = array_ops.shape(image) - depth = image.get_shape()[2] - img_h = math_ops.to_double(img_shape[0]) - img_w = math_ops.to_double(img_shape[1]) - bbox_h_start = math_ops.to_int32((img_h - img_h * central_fraction) / 2) - bbox_w_start = math_ops.to_int32((img_w - img_w * central_fraction) / 2) + if dynamic_w: + img_wd = math_ops.to_double(img_w) + bbox_w_start = math_ops.to_int32((img_wd - img_wd * central_fraction) / 2) + else: + img_wd = float(img_w) + bbox_w_start = int((img_wd - img_wd * central_fraction) / 2) + + bbox_h_size = img_h - bbox_h_start * 2 + bbox_w_size = img_w - bbox_w_start * 2 - bbox_h_size = img_shape[0] - bbox_h_start * 2 - bbox_w_size = img_shape[1] - bbox_w_start * 2 + if rank == 3: + bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0]) + bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1]) + else: + bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0]) + bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1]) - bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0]) - bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1]) image = array_ops.slice(image, bbox_begin, bbox_size) - # The first two dimensions are dynamic and unknown. - image.set_shape([None, None, depth]) + # Reshape the `image` tensor to the desired size. + if rank == 3: + image.set_shape([ + None if dynamic_h else bbox_h_size, + None if dynamic_w else bbox_w_size, + img_d + ]) + else: + image.set_shape([ + img_bs, + None if dynamic_h else bbox_h_size, + None if dynamic_w else bbox_w_size, + img_d + ]) return image diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index c437c12c27..72c889a2e6 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -1585,14 +1585,16 @@ class CentralCropTest(test_util.TensorFlowTestCase): self.assertEqual(y.get_shape().as_list(), post_shape) def testNoOp(self): - x_shape = [13, 9, 3] - x_np = np.ones(x_shape, dtype=np.float32) - with self.test_session(use_gpu=True): - x = constant_op.constant(x_np, shape=x_shape) - y = image_ops.central_crop(x, 1.0) - y_tf = y.eval() - self.assertAllEqual(y_tf, x_np) - self.assertEqual(y.op.name, x.op.name) + x_shapes = [[13, 9, 3], [5, 13, 9, 3]] + for x_shape in x_shapes: + x_np = np.ones(x_shape, dtype=np.float32) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + x = constant_op.constant(x_np, shape=x_shape) + y = image_ops.central_crop(x, 1.0) + y_tf = y.eval() + self.assertAllEqual(y_tf, x_np) + self.assertEqual(y.op.name, x.op.name) def testCropping(self): x_shape = [4, 8, 1] @@ -1601,6 +1603,23 @@ class CentralCropTest(test_util.TensorFlowTestCase): [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8]], dtype=np.int32).reshape(x_shape) y_np = np.array([[3, 4, 5, 6], [3, 4, 5, 6]]).reshape([2, 4, 1]) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + x = constant_op.constant(x_np, shape=x_shape) + y = image_ops.central_crop(x, 0.5) + y_tf = y.eval() + self.assertAllEqual(y_tf, y_np) + self.assertAllEqual(y_tf.shape, y_np.shape) + + x_shape = [2, 4, 8, 1] + x_np = np.array( + [[1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8], + [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8], + [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1], + [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1]], + dtype=np.int32).reshape(x_shape) + y_np = np.array([[[3, 4, 5, 6], [3, 4, 5, 6]], + [[6, 5, 4, 3], [6, 5, 4, 3]]]).reshape([2, 2, 4, 1]) with self.test_session(use_gpu=True): x = constant_op.constant(x_np, shape=x_shape) y = image_ops.central_crop(x, 0.5) @@ -1610,52 +1629,87 @@ class CentralCropTest(test_util.TensorFlowTestCase): def testCropping2(self): # Test case for 10315 - x_shape = [240, 320, 3] - x_np = np.zeros(x_shape, dtype=np.int32) - y_np = np.zeros([80, 106, 3], dtype=np.int32) - with self.test_session(use_gpu=True): - x = array_ops.placeholder(shape=x_shape, dtype=dtypes.int32) - y = image_ops.central_crop(x, 0.33) - y_tf = y.eval(feed_dict={x: x_np}) - self.assertAllEqual(y_tf, y_np) - self.assertAllEqual(y_tf.shape, y_np.shape) + x_shapes = [[240, 320, 3], [5, 240, 320, 3]] + expected_y_shapes = [[80, 106, 3], [5, 80, 106, 3]] + + for x_shape, y_shape in zip(x_shapes, expected_y_shapes): + x_np = np.zeros(x_shape, dtype=np.int32) + y_np = np.zeros(y_shape, dtype=np.int32) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + x = array_ops.placeholder(shape=x_shape, dtype=dtypes.int32) + y = image_ops.central_crop(x, 0.33) + y_tf = y.eval(feed_dict={x: x_np}) + self.assertAllEqual(y_tf, y_np) + self.assertAllEqual(y_tf.shape, y_np.shape) def testShapeInference(self): - # Test no-op fraction=1.0 + # Test no-op fraction=1.0, with 3-D tensors. self._assertShapeInference([50, 60, 3], 1.0, [50, 60, 3]) self._assertShapeInference([None, 60, 3], 1.0, [None, 60, 3]) self._assertShapeInference([50, None, 3], 1.0, [50, None, 3]) self._assertShapeInference([None, None, 3], 1.0, [None, None, 3]) self._assertShapeInference([50, 60, None], 1.0, [50, 60, None]) self._assertShapeInference([None, None, None], 1.0, [None, None, None]) - self._assertShapeInference(None, 1.0, None) - # TODO(toddw): Currently central_crop() doesn't infer the result shape even - # when it's possible. If we change it to do so, we can test as follows: - # - # self._assertShapeInference([50, 60, 3], 0.5, [25, 30, 3]) - # self._assertShapeInference([None, 60, 3], 0.5, [None, 30, 3]) - # self._assertShapeInference([50, None, 3], 0.5, [25, None, 3]) - # self._assertShapeInference([None, None, 3], 0.5, [None, None, 3]) - # self._assertShapeInference([50, 60, None], 0.5, [25, 30, None]) - # self._assertShapeInference([None, None, None], 0.5, [None, None, None]) - # self._assertShapeInference(None, 0.5, None) - def testError(self): + # Test no-op fraction=0.5, with 3-D tensors. + self._assertShapeInference([50, 60, 3], 0.5, [26, 30, 3]) + self._assertShapeInference([None, 60, 3], 0.5, [None, 30, 3]) + self._assertShapeInference([50, None, 3], 0.5, [26, None, 3]) + self._assertShapeInference([None, None, 3], 0.5, [None, None, 3]) + self._assertShapeInference([50, 60, None], 0.5, [26, 30, None]) + self._assertShapeInference([None, None, None], 0.5, [None, None, None]) + + # Test no-op fraction=1.0, with 4-D tensors. + self._assertShapeInference([5, 50, 60, 3], 1.0, [5, 50, 60, 3]) + self._assertShapeInference([5, None, 60, 3], 1.0, [5, None, 60, 3]) + self._assertShapeInference([5, 50, None, 3], 1.0, [5, 50, None, 3]) + self._assertShapeInference([5, None, None, 3], 1.0, [5, None, None, 3]) + self._assertShapeInference([5, 50, 60, None], 1.0, [5, 50, 60, None]) + self._assertShapeInference([5, None, None, None], 1.0, + [5, None, None, None]) + self._assertShapeInference([None, None, None, None], 1.0, + [None, None, None, None]) + + # Test no-op fraction=0.5, with 4-D tensors. + self._assertShapeInference([5, 50, 60, 3], 0.5, [5, 26, 30, 3]) + self._assertShapeInference([5, None, 60, 3], 0.5, [5, None, 30, 3]) + self._assertShapeInference([5, 50, None, 3], 0.5, [5, 26, None, 3]) + self._assertShapeInference([5, None, None, 3], 0.5, [5, None, None, 3]) + self._assertShapeInference([5, 50, 60, None], 0.5, [5, 26, 30, None]) + self._assertShapeInference([5, None, None, None], 0.5, + [5, None, None, None]) + self._assertShapeInference([None, None, None, None], 0.5, + [None, None, None, None]) + + def testErrorOnInvalidCentralCropFractionValues(self): x_shape = [13, 9, 3] x_np = np.ones(x_shape, dtype=np.float32) - with self.test_session(use_gpu=True): - x = constant_op.constant(x_np, shape=x_shape) - with self.assertRaises(ValueError): - _ = image_ops.central_crop(x, 0.0) - with self.assertRaises(ValueError): - _ = image_ops.central_crop(x, 1.01) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + x = constant_op.constant(x_np, shape=x_shape) + with self.assertRaises(ValueError): + _ = image_ops.central_crop(x, 0.0) + with self.assertRaises(ValueError): + _ = image_ops.central_crop(x, 1.01) + + def testErrorOnInvalidShapes(self): + x_shapes = [None, [], [3], [3, 9], [3, 9, 3, 9, 3]] + for x_shape in x_shapes: + x_np = np.ones(x_shape, dtype=np.float32) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + x = constant_op.constant(x_np, shape=x_shape) + with self.assertRaises(ValueError): + _ = image_ops.central_crop(x, 0.5) def testNameScope(self): x_shape = [13, 9, 3] x_np = np.ones(x_shape, dtype=np.float32) - with self.test_session(use_gpu=True): - y = image_ops.central_crop(x_np, 1.0) - self.assertTrue(y.op.name.startswith("central_crop")) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + y = image_ops.central_crop(x_np, 1.0) + self.assertTrue(y.op.name.startswith("central_crop")) class PadToBoundingBoxTest(test_util.TensorFlowTestCase): -- GitLab From 520cf4d0f62944eb9817446e7d2740fb0e8cad96 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 13:00:07 -0700 Subject: [PATCH 111/902] Only wait for one of the input tensors to be ready. The waiting was implemented to avoid reading stale models as much as possible. However with this dependency, each input column creates a Send/Recv to PS0 which slows down training significantly. Colocate Quantile and Stats accumulators for the same handler. PiperOrigin-RevId: 197939327 --- .../lib/learner/batch/ordinal_split_handler.py | 13 ++++++++----- .../python/training/functions/gbdt_batch.py | 5 ++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py index ecb2f60187..8225318b70 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py @@ -134,11 +134,14 @@ class InequalitySplitHandler(base_split_handler.BaseSplitHandler): gradient_shape, hessian_shape, name="StatsAccumulator/{}".format(self._name)) - self._quantile_accumulator = quantile_ops.QuantileAccumulator( - init_stamp_token, - epsilon=epsilon, - num_quantiles=num_quantiles, - name="QuantileAccumulator/{}".format(self._name)) + # Allocate both stats accumulator and quantile accumulator on the same + # device so that we can build splits with fewer RPCs. + with ops.colocate_with(self._stats_accumulator.resource()): + self._quantile_accumulator = quantile_ops.QuantileAccumulator( + init_stamp_token, + epsilon=epsilon, + num_quantiles=num_quantiles, + name="QuantileAccumulator/{}".format(self._name)) class DenseSplitHandler(InequalitySplitHandler): diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index c725f32b7c..c492ef19f1 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -460,8 +460,8 @@ class GradientBoostedDecisionTreeModel(object): # Determine whether the local ensemble is stale and update it if needed. def _refresh_local_ensemble_fn(): - # Serialize the model from parameter server after reading all inputs. - with ops.control_dependencies(input_deps): + # Serialize the model from parameter server after reading the inputs. + with ops.control_dependencies([input_deps[0]]): (ensemble_stamp, serialized_model) = ( model_ops.tree_ensemble_serialize(self._ensemble_handle)) @@ -915,7 +915,6 @@ class GradientBoostedDecisionTreeModel(object): "DecisionTreeEnsembleResourceHandleOp", "StatsAccumulatorScalarResourceHandleOp", "StatsAccumulatorTensorResourceHandleOp", - "QuantileStreamResourceHandleOp", ] ps_strategy = _OpRoundRobinStrategy(ps_ops, ps_tasks) return device_setter.replica_device_setter( -- GitLab From 748ea4b831fd91660fc83288fe798ef3abca7d2b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 13:03:10 -0700 Subject: [PATCH 112/902] Updated documentation for tf.reduce_join. PiperOrigin-RevId: 197939808 --- .../core/api_def/base_api/api_def_ReduceJoin.pbtxt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt b/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt index ca7e0d3bee..d13866ddaa 100644 --- a/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt @@ -38,7 +38,9 @@ END Computes the string join across dimensions in the given string Tensor of shape `[d_0, d_1, ..., d_n-1]`. Returns a new Tensor created by joining the input strings with the given separator (default: empty string). Negative indices are -counted backwards from the end, with `-1` being equivalent to `n - 1`. +counted backwards from the end, with `-1` being equivalent to `n - 1`. If +indices are not specified, joins across all dimensions beginning from `n - 1` +through `0`. For example: @@ -51,9 +53,10 @@ tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"] tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]] tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]] tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"] -tf.reduce_join(a, [0, 1]) ==> ["acbd"] -tf.reduce_join(a, [1, 0]) ==> ["abcd"] -tf.reduce_join(a, []) ==> ["abcd"] +tf.reduce_join(a, [0, 1]) ==> "acbd" +tf.reduce_join(a, [1, 0]) ==> "abcd" +tf.reduce_join(a, []) ==> [["a", "b"], ["c", "d"]] +tf.reduce_join(a) = tf.reduce_join(a, [1, 0]) ==> "abcd" ``` END } -- GitLab From 44311bc337f88547722f2c8bc8158cc1b8ae2923 Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Thu, 24 May 2018 13:07:50 -0700 Subject: [PATCH 113/902] Ensure ResourceMgr::LookupOrCreate calls create fn just once This addresses a race condition where LookupOrCreate is called at the same time from two threads, and both Lookup()s fail, so the creator() function is run twice, even though only a single Create() will then succeed. The motivation is that some creator() functions have side-effects, e.g. tf.contrib.summary.create_file_writer()'s init op opens an events file. This change ensures that if two init ops for file writers with the same resource name are run in the same session.run() call, only one events file will be created. (Current behavior will often open two files; typically the second one overwrites the first but this won't happen if the filename_suffix values are different or the timestamps happen to straddle a second boundary.) PiperOrigin-RevId: 197940607 --- tensorflow/core/framework/resource_mgr.cc | 18 +++---- tensorflow/core/framework/resource_mgr.h | 49 +++++++++++++------ .../core/framework/resource_mgr_test.cc | 27 +++++++++- 3 files changed, 67 insertions(+), 27 deletions(-) diff --git a/tensorflow/core/framework/resource_mgr.cc b/tensorflow/core/framework/resource_mgr.cc index 78574bc0b1..21fc6c1bd5 100644 --- a/tensorflow/core/framework/resource_mgr.cc +++ b/tensorflow/core/framework/resource_mgr.cc @@ -138,16 +138,13 @@ string ResourceMgr::DebugString() const { Status ResourceMgr::DoCreate(const string& container, TypeIndex type, const string& name, ResourceBase* resource) { - { - mutex_lock l(mu_); - Container** b = &containers_[container]; - if (*b == nullptr) { - *b = new Container; - } - if ((*b)->insert({{type.hash_code(), name}, resource}).second) { - TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name())); - return Status::OK(); - } + Container** b = &containers_[container]; + if (*b == nullptr) { + *b = new Container; + } + if ((*b)->insert({{type.hash_code(), name}, resource}).second) { + TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name())); + return Status::OK(); } resource->Unref(); return errors::AlreadyExists("Resource ", container, "/", name, "/", @@ -157,7 +154,6 @@ Status ResourceMgr::DoCreate(const string& container, TypeIndex type, Status ResourceMgr::DoLookup(const string& container, TypeIndex type, const string& name, ResourceBase** resource) const { - tf_shared_lock l(mu_); const Container* b = gtl::FindPtrOrNull(containers_, container); if (b == nullptr) { return errors::NotFound("Container ", container, diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h index 621da5b838..11160127e4 100644 --- a/tensorflow/core/framework/resource_mgr.h +++ b/tensorflow/core/framework/resource_mgr.h @@ -174,10 +174,19 @@ class ResourceMgr { mutable mutex mu_; std::unordered_map containers_ GUARDED_BY(mu_); + template + Status LookupInternal(const string& container, const string& name, + T** resource) const + SHARED_LOCKS_REQUIRED(mu_) TF_MUST_USE_RESULT; + Status DoCreate(const string& container, TypeIndex type, const string& name, - ResourceBase* resource) TF_MUST_USE_RESULT; + ResourceBase* resource) + EXCLUSIVE_LOCKS_REQUIRED(mu_) TF_MUST_USE_RESULT; + Status DoLookup(const string& container, TypeIndex type, const string& name, - ResourceBase** resource) const TF_MUST_USE_RESULT; + ResourceBase** resource) const + SHARED_LOCKS_REQUIRED(mu_) TF_MUST_USE_RESULT; + Status DoDelete(const string& container, uint64 type_hash_code, const string& resource_name, const string& type_name) TF_MUST_USE_RESULT; @@ -362,6 +371,7 @@ Status ResourceMgr::Create(const string& container, const string& name, T* resource) { CheckDeriveFromResourceBase(); CHECK(resource != nullptr); + mutex_lock l(mu_); return DoCreate(container, MakeTypeIndex(), name, resource); } @@ -369,6 +379,13 @@ template Status ResourceMgr::Lookup(const string& container, const string& name, T** resource) const { CheckDeriveFromResourceBase(); + tf_shared_lock l(mu_); + return LookupInternal(container, name, resource); +} + +template +Status ResourceMgr::LookupInternal(const string& container, const string& name, + T** resource) const { ResourceBase* found = nullptr; Status s = DoLookup(container, MakeTypeIndex(), name, &found); if (s.ok()) { @@ -383,21 +400,23 @@ template Status ResourceMgr::LookupOrCreate(const string& container, const string& name, T** resource, std::function creator) { - Status s; + CheckDeriveFromResourceBase(); *resource = nullptr; - while (*resource == nullptr) { - s = Lookup(container, name, resource); - if (s.ok()) break; - s = creator(resource); - if (!s.ok()) break; - s = Create(container, name, *resource); - if (s.ok()) { - (*resource)->Ref(); - break; - } - // Rare event. Concurrent racy creation. Redo the lookup. - *resource = nullptr; + Status s; + { + tf_shared_lock l(mu_); + s = LookupInternal(container, name, resource); + if (s.ok()) return s; + } + mutex_lock l(mu_); + s = LookupInternal(container, name, resource); + if (s.ok()) return s; + TF_RETURN_IF_ERROR(creator(resource)); + s = DoCreate(container, MakeTypeIndex(), name, *resource); + if (!s.ok()) { + return errors::Internal("LookupOrCreate failed unexpectedly"); } + (*resource)->Ref(); return s; } diff --git a/tensorflow/core/framework/resource_mgr_test.cc b/tensorflow/core/framework/resource_mgr_test.cc index 798220d4c3..7c7f0af0ce 100644 --- a/tensorflow/core/framework/resource_mgr_test.cc +++ b/tensorflow/core/framework/resource_mgr_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/test.h" @@ -124,7 +125,7 @@ TEST(ResourceMgrTest, Basic) { TF_CHECK_OK(rm.Cleanup("bar")); } -TEST(ResourceMgr, CreateOrLookup) { +TEST(ResourceMgrTest, CreateOrLookup) { ResourceMgr rm; EXPECT_EQ("R/cat", LookupOrCreate(&rm, "foo", "bar", "cat")); EXPECT_EQ("R/cat", LookupOrCreate(&rm, "foo", "bar", "dog")); @@ -136,6 +137,30 @@ TEST(ResourceMgr, CreateOrLookup) { HasError(FindErr(rm, "foo", "bar"), "Not found: Resource foo/bar"); } +TEST(ResourceMgrTest, CreateOrLookupRaceCondition) { + ResourceMgr rm; + std::atomic atomic_int(0); + { + thread::ThreadPool threads(Env::Default(), "racing_creates", 2); + for (int i = 0; i < 2; i++) { + threads.Schedule([&rm, &atomic_int] { + Resource* r; + TF_CHECK_OK(rm.LookupOrCreate( + "container", "resource-name", &r, [&atomic_int](Resource** ret) { + // Maximize chance of encountering race condition if one exists. + Env::Default()->SleepForMicroseconds(1 * 1000 * 1000); + atomic_int += 1; + *ret = new Resource("label"); + return Status::OK(); + })); + r->Unref(); + }); + } + } + // Resource creator function should always run exactly once. + EXPECT_EQ(1, atomic_int); +} + Status ComputePolicy(const string& attr_container, const string& attr_shared_name, bool use_node_name_as_default, string* result) { -- GitLab From 2cb86382ebc8432b25469f813c9156507984043f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 13:13:42 -0700 Subject: [PATCH 114/902] Extracts the Simplify Pack optimization into its own method. PiperOrigin-RevId: 197941474 --- .../grappler/optimizers/constant_folding.cc | 65 ++++++++++--------- .../grappler/optimizers/constant_folding.h | 5 +- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 62e1ab0892..b8b8088b40 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1955,35 +1955,7 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } - if (IsPack(*node) && NumNonControlInputs(*node) == 1 && - !OptimizedNodeExists(*node, "_const_axis")) { - // Create constant axis node. - Tensor axis_t(DT_INT32, TensorShape({})); - NodeDef* axis_node = optimized_graph->add_node(); - axis_node->set_name(OptimizedNodeName(*node, "_const_axis")); - const int axis = node->attr().at("axis").i(); - if (!SetTensorValue(DT_INT32, axis, &axis_t).ok() || - !CreateNodeDef(axis_node->name(), TensorValue(&axis_t), axis_node) - .ok()) { - return Status::OK(); - } - // Add a control dependency to make sure axis_node is in the right frame. - const string ctrl_dep = ConstantFolding::AddControlDependency( - node->input(0), graph_, node_map_.get()); - axis_node->add_input(ctrl_dep); - axis_node->set_device(node->device()); - node->set_op("ExpandDims"); - if (node->attr().count("axis") != 0) { - node->mutable_attr()->erase("axis"); - } - if (node->attr().count("N") != 0) { - node->mutable_attr()->erase("N"); - } - (*node->mutable_attr())["Tdim"].set_type(DT_INT32); - node->add_input(axis_node->name()); - if (node->input_size() > 2) { - node->mutable_input()->SwapElements(1, node->input_size() - 1); - } + if (SimplifyPack(optimized_graph, node)) { graph_modified_ = true; return Status::OK(); } @@ -2052,6 +2024,41 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +bool ConstantFolding::SimplifyPack(GraphDef* optimized_graph, NodeDef* node) { + if (IsPack(*node) && NumNonControlInputs(*node) == 1 && + !OptimizedNodeExists(*node, "_const_axis")) { + // Create constant axis node. + Tensor axis_t(DT_INT32, TensorShape({})); + NodeDef* axis_node = optimized_graph->add_node(); + axis_node->set_name(OptimizedNodeName(*node, "_const_axis")); + const int axis = node->attr().at("axis").i(); + if (!SetTensorValue(DT_INT32, axis, &axis_t).ok() || + !CreateNodeDef(axis_node->name(), TensorValue(&axis_t), axis_node) + .ok()) { + return false; + } + // Add a control dependency to make sure axis_node is in the right frame. + const string ctrl_dep = ConstantFolding::AddControlDependency( + node->input(0), graph_, node_map_.get()); + axis_node->add_input(ctrl_dep); + axis_node->set_device(node->device()); + node->set_op("ExpandDims"); + if (node->attr().count("axis") != 0) { + node->mutable_attr()->erase("axis"); + } + if (node->attr().count("N") != 0) { + node->mutable_attr()->erase("N"); + } + (*node->mutable_attr())["Tdim"].set_type(DT_INT32); + node->add_input(axis_node->name()); + if (node->input_size() > 2) { + node->mutable_input()->SwapElements(1, node->input_size() - 1); + return true; + } + } + return false; +} + bool ConstantFolding::MoveConstantsPastEnter(GraphDef* optimized_graph, NodeDef* node) { if (IsEnter(*node) && node->input_size() > 0) { diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 9fd4c9c789..be78004f6d 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -164,9 +164,12 @@ class ConstantFolding : public GraphOptimizer { // +------+ bool SimplifySwitch(GraphDef* optimized_graph, NodeDef* node); - // Move constants past Enter node if applicable. + // Moves constants past Enter node if applicable. bool MoveConstantsPastEnter(GraphDef* optimized_graph, NodeDef* node); + // Simplifies Pack operation if applicable. + bool SimplifyPack(GraphDef* optimized_graph, NodeDef* node); + // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From 2eb7289b97d4daac044b6f0a005cc8f9fc48ac0e Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 24 May 2018 13:15:37 -0700 Subject: [PATCH 115/902] Removing outdated links. PiperOrigin-RevId: 197941740 --- tensorflow/tools/dist_test/build_server.sh | 2 +- tensorflow/tools/dist_test/local_test.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/dist_test/build_server.sh b/tensorflow/tools/dist_test/build_server.sh index 225c034741..345217d733 100755 --- a/tensorflow/tools/dist_test/build_server.sh +++ b/tensorflow/tools/dist_test/build_server.sh @@ -23,7 +23,7 @@ # E.g.: tensorflow/tf_grpc_test_server:0.11.0rc1 # # whl_file_location: URL from which the TensorFlow whl file will be downloaded. -# E.g.: https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl +# E.g.: https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl # E.g.: /path/to/folder/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl # # The optional flag --test lets the script to use the Dockerfile for the diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index caae7fd530..06c2b997cb 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -35,7 +35,7 @@ # # Arguments: # whl_file_location: URL from which the TensorFlow whl file will be acquired. -# E.g.: https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl +# E.g.: https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl # E.g.: /path/to/folder/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl # # --leave_container_running: Do not stop the docker-in-docker container after -- GitLab From bb1924cf6b1f3eca2e859157483b42698f41f975 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 13:18:32 -0700 Subject: [PATCH 116/902] add maxpoolgrad transposer for layout optimizer. PiperOrigin-RevId: 197942180 --- tensorflow/core/grappler/op_types.cc | 2 ++ tensorflow/core/grappler/op_types.h | 1 + 2 files changed, 3 insertions(+) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 6d2adcf5b8..fe0fad9148 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -203,6 +203,8 @@ bool IsMax(const NodeDef& node) { return node.op() == "Max"; } bool IsMaximum(const NodeDef& node) { return node.op() == "Maximum"; } +bool IsMaxPoolGrad(const NodeDef& node) { return node.op() == "MaxPoolGrad"; } + bool IsMean(const NodeDef& node) { return node.op() == "Mean"; } bool IsMerge(const NodeDef& node) { diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 9d91ba1ba5..915da21fad 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -78,6 +78,7 @@ bool IsLogicalNot(const NodeDef& node); bool IsLogicalOr(const NodeDef& node); bool IsMax(const NodeDef& node); bool IsMaximum(const NodeDef& node); +bool IsMaxPoolGrad(const NodeDef& node); bool IsMean(const NodeDef& node); bool IsMerge(const NodeDef& node); bool IsMin(const NodeDef& node); -- GitLab From 5a0c3d796f0d9e6c459c5b04779cc1280dc30710 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 13:19:47 -0700 Subject: [PATCH 117/902] Automated g4 rollback of changelist 197868028 PiperOrigin-RevId: 197942379 --- .../optimizers/arithmetic_optimizer.cc | 43 ++++++++ .../optimizers/arithmetic_optimizer.h | 1 + .../optimizers/arithmetic_optimizer_test.cc | 103 ++++++++++++++++++ .../core/kernels/cwise_op_not_equal_to_1.cc | 23 ++++ 4 files changed, 170 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index e7f70c6657..060e4200af 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1380,6 +1380,47 @@ class RemoveNegationStage : public ArithmeticOptimizerStage { } }; +class RemoveLogicalNotStage : public ArithmeticOptimizerStage { + public: + explicit RemoveLogicalNotStage(const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("RemoveLogicalNot", ctx, ctx_ext) {} + ~RemoveLogicalNotStage() override = default; + + bool IsSupported(const NodeDef* node) const override { + return IsLogicalNot(*node) && !IsInPreserveSet(*node); + } + + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { + const string node_name = node->name(); + NodeDef* input; + TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &input)); + if (IsInPreserveSet(*input) || + NumNonControlOutputs(*input, *ctx().node_map) > 1) { + return Status::OK(); + } + string new_op; + if (IsEqual(*input)) { + new_op = "NotEqual"; + } else if (IsNotEqual(*input)) { + new_op = "Equal"; + } else if (IsLess(*input)) { + new_op = "GreaterEqual"; + } else if (IsLessEqual(*input)) { + new_op = "Greater"; + } else if (IsGreater(*input)) { + new_op = "LessEqual"; + } else if (IsGreaterEqual(*input)) { + new_op = "Less"; + } + if (!new_op.empty()) { + input->set_op(new_op); + *simplified_node_name = input->name(); + } + return Status::OK(); + } +}; + // This optimization hoists the common prefix of unary ops of the inputs to // concat out of the concat, for example: // Concat([Exp(Sin(x)), Exp(Sin(y)), Exp(Sin(z))]) @@ -2429,6 +2470,8 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) { pipeline.AddStage(ctx, ctx_ext); if (options_.remove_negation) pipeline.AddStage(ctx, ctx_ext); + if (options_.remove_logical_not) + pipeline.AddStage(ctx, ctx_ext); if (options_.hoist_cwise_unary_chains) pipeline.AddStage(ctx, ctx_ext); if (options_.convert_sqrt_div_to_rsqrt_mul) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 1f6f563687..8e1b3eda3b 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -68,6 +68,7 @@ class ArithmeticOptimizer : public GraphOptimizer { bool hoist_cwise_unary_chains = false; bool convert_sqrt_div_to_rsqrt_mul = false; bool remove_idempotent = true; + bool remove_logical_not = true; // Choose which arithmetic optimizer stages will be enabled for a given // optimization level by default. diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 99f93e6eec..64fdc8a83b 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -177,6 +177,11 @@ class ArithmeticOptimizerTest : public GrapplerTest { DisableAllStages(optimizer); optimizer->options_.remove_idempotent = true; } + + void EnableOnlyRemoveLogicalNot(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_logical_not = true; + } }; TEST_F(ArithmeticOptimizerTest, NoOp) { @@ -2737,5 +2742,103 @@ TEST_F(ArithmeticOptimizerTest, RemoveIdempotent) { } } +TEST_F(ArithmeticOptimizerTest, RemoveLogicalNot) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output a = ops::Const(s.WithOpName("a"), 3.14f, {32}); + Output b = ops::Const(s.WithOpName("b"), -3.14f, {32}); + Output eq = ops::Equal(s.WithOpName("eq"), a, b); + Output neq = ops::NotEqual(s.WithOpName("neq"), a, b); + Output lt = ops::Less(s.WithOpName("lt"), a, b); + Output le = ops::LessEqual(s.WithOpName("le"), a, b); + Output gt = ops::Greater(s.WithOpName("gt"), a, b); + Output ge = ops::GreaterEqual(s.WithOpName("ge"), a, b); + // not_eq is reserved + Output not_eq1 = ops::LogicalNot(s.WithOpName("not_eq1"), eq); + Output not_neq = ops::LogicalNot(s.WithOpName("not_neq"), neq); + Output not_lt = ops::LogicalNot(s.WithOpName("not_lt"), lt); + Output not_le = ops::LogicalNot(s.WithOpName("not_le"), le); + Output not_gt = ops::LogicalNot(s.WithOpName("not_gt"), gt); + Output not_ge = ops::LogicalNot(s.WithOpName("not_ge"), ge); + Output id_not_eq = ops::Identity(s.WithOpName("id_not_eq"), not_eq1); + Output id_not_neq = ops::Identity(s.WithOpName("id_not_neq"), not_neq); + Output id_not_lt = ops::Identity(s.WithOpName("id_not_lt"), not_lt); + Output id_not_le = ops::Identity(s.WithOpName("id_not_le"), not_le); + Output id_not_gt = ops::Identity(s.WithOpName("id_not_gt"), not_gt); + Output id_not_ge = ops::Identity(s.WithOpName("id_not_ge"), not_ge); + + GrapplerItem item; + item.fetch = {"id_not_eq", "id_not_neq", "id_not_lt", + "id_not_le", "id_not_gt", "id_not_ge"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + + GraphDef output; + ArithmeticOptimizer optimizer; + EnableOnlyRemoveLogicalNot(&optimizer); + OptimizeTwice(&optimizer, &item, &output); + LOG(INFO) << output.DebugString(); + int found = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "id_not_eq") { + EXPECT_EQ("eq", node.input(0)); + ++found; + } + if (node.name() == "id_not_neq") { + EXPECT_EQ("neq", node.input(0)); + ++found; + } + if (node.name() == "id_not_lt") { + EXPECT_EQ("lt", node.input(0)); + ++found; + } + if (node.name() == "id_not_le") { + EXPECT_EQ("le", node.input(0)); + ++found; + } + if (node.name() == "id_not_gt") { + EXPECT_EQ("gt", node.input(0)); + ++found; + } + if (node.name() == "id_not_ge") { + EXPECT_EQ("ge", node.input(0)); + ++found; + } + + if (node.name() == "eq") { + EXPECT_EQ("NotEqual", node.op()); + ++found; + } + if (node.name() == "neq") { + EXPECT_EQ("Equal", node.op()); + ++found; + } + if (node.name() == "lt") { + EXPECT_EQ("GreaterEqual", node.op()); + ++found; + } + if (node.name() == "le") { + EXPECT_EQ("Greater", node.op()); + ++found; + } + if (node.name() == "gt") { + EXPECT_EQ("LessEqual", node.op()); + ++found; + } + if (node.name() == "ge") { + EXPECT_EQ("Less", node.op()); + ++found; + } + } + EXPECT_EQ(12, found); + + auto tensors = EvaluateNodes(output, item.fetch); + EXPECT_EQ(tensors.size(), tensors_expected.size()); + EXPECT_EQ(tensors.size(), item.fetch.size()); + for (int i = 0; i < item.fetch.size(); ++i) { + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); + } +} + } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc index 02cd298745..935619711c 100644 --- a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc +++ b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc @@ -21,5 +21,28 @@ REGISTER6(BinaryOp, CPU, "NotEqual", functor::not_equal_to, float, Eigen::half, #if GOOGLE_CUDA REGISTER4(BinaryOp, GPU, "NotEqual", functor::not_equal_to, float, Eigen::half, double, uint8); +// A special GPU kernel for int32. +// TODO(b/25387198): Also enable int32 in device memory. This kernel +// registration requires all int32 inputs and outputs to be in host memory. +REGISTER_KERNEL_BUILDER(Name("NotEqual") + .Device(DEVICE_GPU) + .HostMemory("x") + .HostMemory("y") + .HostMemory("z") + .TypeConstraint("T"), + BinaryOp>); #endif + +#ifdef TENSORFLOW_USE_SYCL +REGISTER2(BinaryOp, SYCL, "NotEqual", functor::not_equal_to, float, double); + +REGISTER_KERNEL_BUILDER(Name("NotEqual") + .Device(DEVICE_SYCL) + .HostMemory("x") + .HostMemory("y") + .HostMemory("z") + .TypeConstraint("T"), + BinaryOp>); +#endif // TENSORFLOW_USE_SYCL + } // namespace tensorflow -- GitLab From 51645f15b3854447c887abf0e92d0465d79ea92c Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 24 May 2018 13:30:15 -0700 Subject: [PATCH 118/902] Fix bugs with the code blocks in defun's docstring. PiperOrigin-RevId: 197943921 --- tensorflow/python/eager/function.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 120b298171..b46e0612c3 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -777,7 +777,7 @@ def defun(func=None, compiled=False): def h(): return f(x, y) - assert h().numpy() == f(x, y) + assert (h().numpy() == f(x, y).numpy()).all() # `defun` automatically lifts variables out of the graphs it creates, # allowing you to compile the `call` methods of `tf.keras.layers.Layer` and @@ -785,6 +785,7 @@ def defun(func=None, compiled=False): class MyModel(tf.keras.Model): def __init__(self, keep_probability=0.2): + super(MyModel, self).__init__() self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu) self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax) self.keep_probability = keep_probability @@ -804,7 +805,7 @@ def defun(func=None, compiled=False): # `defun`-compiled functions are differentiable. optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) with tf.GradientTape() as tape: - outputs = model(inputs) + outputs = model(x) gradient = tape.gradient(outputs, model.trainable_variables) optimizer.apply_gradients((grad, var) for grad, var in zip(gradient, model.trainable_variables)) @@ -840,6 +841,8 @@ def defun(func=None, compiled=False): import tensorflow as tf import numpy as np + tf.enable_eager_execution() + matrix = tf.eye(5) # `matrix` is assumed to be a Tensor def add_noise(): @@ -862,6 +865,8 @@ def defun(func=None, compiled=False): ```python import tensorflow as tf + tf.enable_eager_execution() + @tf.contrib.eager.defun def lossy_matmul(W, x, training=True): outputs = tf.matmul(W, x) @@ -869,6 +874,9 @@ def defun(func=None, compiled=False): outputs = tf.nn.dropout(outputs, keep_probability=0.2) return outputs + W = tf.random_normal((3, 5)) + x = tf.random_normal((5, 1)) + # Executes a graph that applies dropout. lossy_outputs = lossy_matmul(W, x, training=True) @@ -919,14 +927,14 @@ def defun(func=None, compiled=False): # `fn` is a Python function, so x is created, initialized, and destroyed upon # every invocation - assert(fn().numpy() == fn().numpy() == 1.0) + assert fn().numpy() == fn().numpy() == 1.0 compiled = tf.contrib.eager.defun(fn) # Compiling `fn` with `defun` hoists all variables outside of the generated # graph, so initialization happens exactly once. - assert(compiled().numpy() == 1.0) - assert(compiled().numpy() == 2.0) + assert compiled().numpy() == 1.0 + assert compiled().numpy() == 2.0 ``` Finally, because each input signature is bound to a unique graph, if your -- GitLab From d2090672fe8305289156460c43f7fcc1a5dd5422 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Thu, 24 May 2018 14:02:30 -0700 Subject: [PATCH 119/902] tfdbg: fix issue where total source file size exceeds gRPC message size limit * Source file content is now sent one by one, making it less likely that individual messages will have sizes above the 4-MB gRPC message size limit. * In case the message for a single source file exceeds the limit, the client handles it gracefully by skipping the sending and print a warning message. Fixes: https://github.com/tensorflow/tensorboard/issues/1118 PiperOrigin-RevId: 197949416 --- tensorflow/python/debug/BUILD | 1 + .../debug/lib/grpc_debug_test_server.py | 13 +++--- tensorflow/python/debug/lib/source_remote.py | 23 ++++++++-- .../python/debug/lib/source_remote_test.py | 46 +++++++++++++++++++ 4 files changed, 74 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index 16ae74a19f..09062abd74 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -572,6 +572,7 @@ py_test( ":source_utils", "//tensorflow/core:protos_all_py", "//tensorflow/python:client", + "//tensorflow/python:client_testlib", "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", "//tensorflow/python:math_ops", diff --git a/tensorflow/python/debug/lib/grpc_debug_test_server.py b/tensorflow/python/debug/lib/grpc_debug_test_server.py index 9170046948..a7be20948d 100644 --- a/tensorflow/python/debug/lib/grpc_debug_test_server.py +++ b/tensorflow/python/debug/lib/grpc_debug_test_server.py @@ -245,7 +245,7 @@ class EventListenerTestServicer(grpc_debug_server.EventListenerBaseServicer): self._origin_id_to_strings = [] self._graph_tracebacks = [] self._graph_versions = [] - self._source_files = None + self._source_files = [] def _initialize_toggle_watch_state(self, toggle_watches): self._toggle_watches = toggle_watches @@ -274,7 +274,7 @@ class EventListenerTestServicer(grpc_debug_server.EventListenerBaseServicer): self._origin_id_to_strings = [] self._graph_tracebacks = [] self._graph_versions = [] - self._source_files = None + self._source_files = [] def SendTracebacks(self, request, context): self._call_types.append(request.call_type) @@ -286,7 +286,7 @@ class EventListenerTestServicer(grpc_debug_server.EventListenerBaseServicer): return debug_service_pb2.EventReply() def SendSourceFiles(self, request, context): - self._source_files = request + self._source_files.append(request) return debug_service_pb2.EventReply() def query_op_traceback(self, op_name): @@ -351,9 +351,10 @@ class EventListenerTestServicer(grpc_debug_server.EventListenerBaseServicer): if not self._source_files: raise ValueError( "This debug server has not received any source file contents yet.") - for source_file_proto in self._source_files.source_files: - if source_file_proto.file_path == file_path: - return source_file_proto.lines[lineno - 1] + for source_files in self._source_files: + for source_file_proto in source_files.source_files: + if source_file_proto.file_path == file_path: + return source_file_proto.lines[lineno - 1] raise ValueError( "Source file at path %s has not been received by the debug server", file_path) diff --git a/tensorflow/python/debug/lib/source_remote.py b/tensorflow/python/debug/lib/source_remote.py index 4b6b2b995e..4afae41bc9 100644 --- a/tensorflow/python/debug/lib/source_remote.py +++ b/tensorflow/python/debug/lib/source_remote.py @@ -28,6 +28,7 @@ from tensorflow.python.debug.lib import common from tensorflow.python.debug.lib import debug_service_pb2_grpc from tensorflow.python.debug.lib import source_utils from tensorflow.python.platform import gfile +from tensorflow.python.platform import tf_logging from tensorflow.python.profiler import tfprof_logger @@ -95,6 +96,11 @@ def _source_file_paths_outside_tensorflow_py_library(code_defs, id_to_string): return non_tf_files +def grpc_message_length_bytes(): + """Maximum gRPC message length in bytes.""" + return 4 * 1024 * 1024 + + def _send_call_tracebacks(destinations, origin_stack, is_eager_execution=False, @@ -155,17 +161,28 @@ def _send_call_tracebacks(destinations, source_file_paths.update(_source_file_paths_outside_tensorflow_py_library( [call_traceback.origin_stack], call_traceback.origin_id_to_string)) - debugged_source_files = debug_pb2.DebuggedSourceFiles() + debugged_source_files = [] for file_path in source_file_paths: + source_files = debug_pb2.DebuggedSourceFiles() _load_debugged_source_file( - file_path, debugged_source_files.source_files.add()) + file_path, source_files.source_files.add()) + debugged_source_files.append(source_files) for destination in destinations: channel = grpc.insecure_channel(destination) stub = debug_service_pb2_grpc.EventListenerStub(channel) stub.SendTracebacks(call_traceback) if send_source: - stub.SendSourceFiles(debugged_source_files) + for path, source_files in zip( + source_file_paths, debugged_source_files): + if source_files.ByteSize() < grpc_message_length_bytes(): + stub.SendSourceFiles(source_files) + else: + tf_logging.warn( + "The content of the source file at %s is not sent to " + "gRPC debug server %s, because the message size exceeds " + "gRPC message length limit (%d bytes)." % ( + path, destination, grpc_message_length_bytes())) def send_graph_tracebacks(destinations, diff --git a/tensorflow/python/debug/lib/source_remote_test.py b/tensorflow/python/debug/lib/source_remote_test.py index 27bafa45e1..29add425e9 100644 --- a/tensorflow/python/debug/lib/source_remote_test.py +++ b/tensorflow/python/debug/lib/source_remote_test.py @@ -33,6 +33,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops # pylint: disable=unused-import from tensorflow.python.ops import variables from tensorflow.python.platform import googletest +from tensorflow.python.platform import test from tensorflow.python.util import tf_inspect @@ -155,6 +156,51 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): self.assertEqual(["dummy_run_key"], server.query_call_keys()) self.assertEqual([sess.graph.version], server.query_graph_versions()) + def testSourceFileSizeExceedsGrpcMessageLengthLimit(self): + """In case source file size exceeds the grpc message length limit. + + it ought not to have been sent to the server. + """ + this_func_name = "testSourceFileSizeExceedsGrpcMessageLengthLimit" + + # Patch the method to simulate a very small message length limit. + with test.mock.patch.object( + source_remote, "grpc_message_length_bytes", return_value=2): + with session.Session() as sess: + a = variables.Variable(21.0, name="two/a") + a_lineno = line_number_above() + b = variables.Variable(2.0, name="two/b") + b_lineno = line_number_above() + x = math_ops.add(a, b, name="two/x") + x_lineno = line_number_above() + + send_traceback = traceback.extract_stack() + send_lineno = line_number_above() + source_remote.send_graph_tracebacks( + [self._server_address, self._server_address_2], + "dummy_run_key", send_traceback, sess.graph) + + servers = [self._server, self._server_2] + for server in servers: + # Even though the source file content is not sent, the traceback + # should have been sent. + tb = server.query_op_traceback("two/a") + self.assertIn((self._curr_file_path, a_lineno, this_func_name), tb) + tb = server.query_op_traceback("two/b") + self.assertIn((self._curr_file_path, b_lineno, this_func_name), tb) + tb = server.query_op_traceback("two/x") + self.assertIn((self._curr_file_path, x_lineno, this_func_name), tb) + + self.assertIn( + (self._curr_file_path, send_lineno, this_func_name), + server.query_origin_stack()[-1]) + + tf_trace_file_path = ( + self._findFirstTraceInsideTensorFlowPyLibrary(x.op)) + # Verify that the source content is not sent to the server. + with self.assertRaises(ValueError): + self._server.query_source_file_line(tf_trace_file_path, 0) + def testSendEagerTracebacksToSingleDebugServer(self): this_func_name = "testSendEagerTracebacksToSingleDebugServer" send_traceback = traceback.extract_stack() -- GitLab From d4115dc837e1fb0098616415e50b1b1691fe89eb Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Thu, 24 May 2018 14:03:41 -0700 Subject: [PATCH 120/902] [XLA] Convert infeed call to take a LiteralSlice. PiperOrigin-RevId: 197949637 --- tensorflow/compiler/xla/client/client.cc | 4 ++-- tensorflow/compiler/xla/client/client.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index c9d275a77b..3d596a6e65 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -64,7 +64,7 @@ StatusOr> Client::Transfer( } StatusOr> Client::TransferToServer( - const Literal& literal, const DeviceHandle* device_handle) { + const LiteralSlice& literal, const DeviceHandle* device_handle) { TransferToServerRequest request; *request.mutable_literal() = literal.ToProto(); if (device_handle) { @@ -91,7 +91,7 @@ StatusOr> Client::TransferToServer( return MakeUnique(stub_, response.data()); } -Status Client::TransferToInfeed(const Literal& literal, int64 replica_id, +Status Client::TransferToInfeed(const LiteralSlice& literal, int64 replica_id, const DeviceHandle* device_handle) { TransferToInfeedRequest request; *request.mutable_literal() = literal.ToProto(); diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h index d57e2536d0..cda8a71f71 100644 --- a/tensorflow/compiler/xla/client/client.h +++ b/tensorflow/compiler/xla/client/client.h @@ -107,14 +107,14 @@ class Client { // device (and its replicas if replication is enabled). Otherwise, data is // transferred to the default device (and its replicas). StatusOr> TransferToServer( - const Literal& literal, const DeviceHandle* device_handle = nullptr); + const LiteralSlice& literal, const DeviceHandle* device_handle = nullptr); // Transfer the given literal to the Infeed interface of the device. // // device_handle and replica_id together specify a particular device; a device // assigned for the given replica_id among the replicas that the given device // handle belongs to. - Status TransferToInfeed(const Literal& literal, int64 replica_id = 0, + Status TransferToInfeed(const LiteralSlice& literal, int64 replica_id = 0, const DeviceHandle* device_handle = nullptr); // Transfers from the Outfeed of the device. -- GitLab From 30206c24067da6daa2bae031b23abe0449bd7c65 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 24 May 2018 14:20:39 -0700 Subject: [PATCH 121/902] [TF:XLA] Avoid buffer copy when copying a Tensor onto an XLA device. PiperOrigin-RevId: 197952565 --- tensorflow/compiler/jit/xla_device_context.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index c7648347a8..c718125a38 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -58,8 +58,11 @@ XlaTransferManager::XlaTransferManager( Status XlaTransferManager::TransferLiteralToDevice( const Tensor& host_tensor, Tensor* device_tensor) const { - xla::Literal literal; - TF_RETURN_IF_ERROR(HostTensorToLiteral(host_tensor, &literal)); + xla::Shape xla_shape; + TF_RETURN_IF_ERROR(TensorShapeToXLAShape(host_tensor.dtype(), + host_tensor.shape(), &xla_shape)); + xla::BorrowingLiteral literal( + static_cast(DMAHelper::base(&host_tensor)), xla_shape); const xla::ShapedBuffer& shaped_buffer = XlaTensor::FromTensor(device_tensor)->shaped_buffer(); -- GitLab From b42bc6ee8448496f11e03e5c9a881d3655caf945 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 24 May 2018 14:58:15 -0700 Subject: [PATCH 122/902] Raise ValueError when calling model.summary() before it is built PiperOrigin-RevId: 197959372 --- tensorflow/python/keras/engine/network.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index 6e818ec62a..6db41472b6 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -1428,7 +1428,15 @@ class Network(base_layer.Layer): It will be called on each line of the summary. You can set it to a custom function in order to capture the string summary. + + Raises: + ValueError: if `summary()` is called before the model is built. """ + if not self.built: + raise ValueError('This model has never been called, thus its weights ' + 'have not yet been created, so no summary can be ' + 'displayed. Build the model first ' + '(e.g. by calling it on some data).') print_layer_summary(self, line_length=line_length, positions=positions, -- GitLab From e918dabba67c3eb53e6f852d299dffc12a2da6e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 14:59:05 -0700 Subject: [PATCH 123/902] Small fix so that GDN can run on TPU PiperOrigin-RevId: 197959536 --- tensorflow/contrib/layers/python/layers/layers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 25c3b1e7ea..f708da6693 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -1890,6 +1890,7 @@ class GDN(base.Layer): def beta_initializer(shape, dtype=None, partition_info=None): del partition_info # unused + pedestal = array_ops.constant(self._reparam_offset**2, dtype=self.dtype) return math_ops.sqrt(array_ops.ones(shape, dtype=dtype) + pedestal) def gamma_initializer(shape, dtype=None, partition_info=None): @@ -1897,6 +1898,7 @@ class GDN(base.Layer): assert len(shape) == 2 assert shape[0] == shape[1] eye = linalg_ops.eye(shape[0], dtype=dtype) + pedestal = array_ops.constant(self._reparam_offset**2, dtype=self.dtype) return math_ops.sqrt(self._gamma_init * eye + pedestal) beta = self.add_variable( -- GitLab From 04e94b2fb0ffebcd4cb48727170ce6448680fcad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 14:59:29 -0700 Subject: [PATCH 124/902] Windows build script change for release job PiperOrigin-RevId: 197959602 --- .../ci_build/windows/cpu/pip/build_tf_windows.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index a2300811bb..73520bb2ac 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -54,18 +54,24 @@ function cleanup { trap cleanup EXIT skip_test=0 +release_build=0 for ARG in "$@"; do if [[ "$ARG" == --skip_test ]]; then skip_test=1 elif [[ "$ARG" == --enable_gcs_remote_cache ]]; then set_gcs_remote_cache_options + elif [[ "$ARG" == --release_build ]]; then + release_build=1 fi done -# --define=override_eigen_strong_inline=true speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc -# by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521 -echo "build --define=override_eigen_strong_inline=true" >> "${TMP_BAZELRC}" +if [[ "$release_build" != 1 ]]; then + # --define=override_eigen_strong_inline=true speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc + # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521 + # Because this hurts the performance of TF, we don't enable it in release build. + echo "build --define=override_eigen_strong_inline=true" >> "${TMP_BAZELRC}" +fi echo "import %workspace%/${TMP_BAZELRC}" >> .bazelrc -- GitLab From d7fa8384dc302fb76f7c7bd556098735117220e4 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 24 May 2018 15:19:40 -0700 Subject: [PATCH 125/902] Rename getInt64 to GetInt64 to follow Google style PiperOrigin-RevId: 197963232 --- tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 3aa436b39a..97fa379ee1 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -707,7 +707,7 @@ class MatrixMatrixBlockPanelEmitter { llvm::Value* k_end, llvm::Value* n_start, llvm::Value* n_end, VectorSupportLibrary* vsl); - llvm::Value* getInt64(int64 value) { return ir_builder_->getInt64(value); } + llvm::Value* GetInt64(int64 value) { return ir_builder_->getInt64(value); } Config config() const { return config_; } Dimensions dims() const { return config().dims(); } @@ -741,7 +741,7 @@ void MatrixMatrixBlockPanelEmitter::EmitChunkedLoopOverN() { if (n_start != n_end) { VectorSupportLibrary vsl(scalar_type(), current_vectorization_width, ir_builder_, "gebp"); - EmitLoopOverK(&vsl, getInt64(n_start), getInt64(n_end)); + EmitLoopOverK(&vsl, GetInt64(n_start), GetInt64(n_end)); n_start = n_end; } current_vectorization_width /= 2; @@ -763,13 +763,13 @@ void MatrixMatrixBlockPanelEmitter::EmitLoopOverK(VectorSupportLibrary* vsl, int64 k_start = 0; int64 k_end = dims().k() - (dims().k() % k_tiling_factor()); if (k_end != k_start) { - EmitInnerLoop(k_tiling_factor(), getInt64(k_start), getInt64(k_end), + EmitInnerLoop(k_tiling_factor(), GetInt64(k_start), GetInt64(k_end), n_start, n_end, vsl); k_start = k_end; } if (k_start != dims().k()) { - EmitInnerLoop(dims().k() - k_start, getInt64(k_start), getInt64(dims().k()), + EmitInnerLoop(dims().k() - k_start, GetInt64(k_start), GetInt64(dims().k()), n_start, n_end, vsl); } } @@ -841,7 +841,7 @@ void MatrixMatrixBlockPanelEmitter::EmitInnerLoop( broadcasted_a.reserve(k_tiling_factor); for (int i = 0; i < k_tiling_factor; i++) { broadcasted_a.push_back(vsl->LoadBroadcast( - lhs_row_begin, ir_builder_->CreateAdd(getInt64(i), k_i))); + lhs_row_begin, ir_builder_->CreateAdd(GetInt64(i), k_i))); } // rhs_loader will be used to load the tile off of the RHS, denoted as -- GitLab From e6eb02f9babe07ab47852f9defe7f6d512164473 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 15:27:00 -0700 Subject: [PATCH 126/902] Add local_init_run_options to SessionManager and Supervisor so that collective_graph_key can be passed in when collective ops are used in variable initialization. PiperOrigin-RevId: 197964316 --- tensorflow/python/training/session_manager.py | 8 ++++++-- tensorflow/python/training/supervisor.py | 9 +++++++-- .../api/golden/tensorflow.train.-session-manager.pbtxt | 2 +- .../tools/api/golden/tensorflow.train.-supervisor.pbtxt | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/training/session_manager.py b/tensorflow/python/training/session_manager.py index 3cb3877cc2..974f75777f 100644 --- a/tensorflow/python/training/session_manager.py +++ b/tensorflow/python/training/session_manager.py @@ -95,7 +95,8 @@ class SessionManager(object): ready_op=None, ready_for_local_init_op=None, graph=None, - recovery_wait_secs=30): + recovery_wait_secs=30, + local_init_run_options=None): """Creates a SessionManager. The `local_init_op` is an `Operation` that is run always after a new session @@ -127,6 +128,8 @@ class SessionManager(object): to run local_init_op. graph: The `Graph` that the model will use. recovery_wait_secs: Seconds between checks for the model to be ready. + local_init_run_options: RunOptions to be passed to session.run when + executing the local_init_op. Raises: ValueError: If ready_for_local_init_op is not None but local_init_op is @@ -141,6 +144,7 @@ class SessionManager(object): self._graph = graph self._recovery_wait_secs = recovery_wait_secs self._target = None + self._local_init_run_options = local_init_run_options if ready_for_local_init_op is not None and local_init_op is None: raise ValueError("If you pass a ready_for_local_init_op " "you must also pass a local_init_op " @@ -485,7 +489,7 @@ class SessionManager(object): is_ready_for_local_init, msg = self._model_ready_for_local_init(sess) if is_ready_for_local_init: logging.info("Running local_init_op.") - sess.run(self._local_init_op) + sess.run(self._local_init_op, options=self._local_init_run_options) logging.info("Done running local_init_op.") return True, None else: diff --git a/tensorflow/python/training/supervisor.py b/tensorflow/python/training/supervisor.py index 7389e344c7..372ea415df 100644 --- a/tensorflow/python/training/supervisor.py +++ b/tensorflow/python/training/supervisor.py @@ -225,7 +225,8 @@ class Supervisor(object): checkpoint_basename="model.ckpt", session_manager=None, summary_writer=USE_DEFAULT, - init_fn=None): + init_fn=None, + local_init_run_options=None): """Create a `Supervisor`. Args: @@ -294,6 +295,8 @@ class Supervisor(object): init_fn: Optional callable used to initialize the model. Called after the optional `init_op` is called. The callable must accept one argument, the session being initialized. + local_init_run_options: RunOptions to be passed as the SessionManager + local_init_run_options parameter. Returns: A `Supervisor`. @@ -327,6 +330,7 @@ class Supervisor(object): self._recovery_wait_secs = recovery_wait_secs self._stop_grace_secs = stop_grace_secs self._init_fn = init_fn + self._local_init_run_options = local_init_run_options # Set all attributes related to checkpointing and writing events to None. # Afterwards, set them appropriately for chief supervisors, as these are @@ -362,7 +366,8 @@ class Supervisor(object): ready_op=self._ready_op, ready_for_local_init_op=self._ready_for_local_init_op, graph=self._graph, - recovery_wait_secs=self._recovery_wait_secs) + recovery_wait_secs=self._recovery_wait_secs, + local_init_run_options=self._local_init_run_options) else: self._session_manager = session_manager diff --git a/tensorflow/tools/api/golden/tensorflow.train.-session-manager.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-session-manager.pbtxt index cc31bb4e4b..448764fe08 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-session-manager.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-session-manager.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'local_init_op\', \'ready_op\', \'ready_for_local_init_op\', \'graph\', \'recovery_wait_secs\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'30\'], " + argspec: "args=[\'self\', \'local_init_op\', \'ready_op\', \'ready_for_local_init_op\', \'graph\', \'recovery_wait_secs\', \'local_init_run_options\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'30\', \'None\'], " } member_method { name: "prepare_session" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-supervisor.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-supervisor.pbtxt index 1f0e59a1ac..9677e5a98e 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-supervisor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-supervisor.pbtxt @@ -104,7 +104,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'graph\', \'ready_op\', \'ready_for_local_init_op\', \'is_chief\', \'init_op\', \'init_feed_dict\', \'local_init_op\', \'logdir\', \'summary_op\', \'saver\', \'global_step\', \'save_summaries_secs\', \'save_model_secs\', \'recovery_wait_secs\', \'stop_grace_secs\', \'checkpoint_basename\', \'session_manager\', \'summary_writer\', \'init_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'0\', \'True\', \'0\', \'None\', \'0\', \'None\', \'0\', \'0\', \'0\', \'120\', \'600\', \'30\', \'120\', \'model.ckpt\', \'None\', \'0\', \'None\'], " + argspec: "args=[\'self\', \'graph\', \'ready_op\', \'ready_for_local_init_op\', \'is_chief\', \'init_op\', \'init_feed_dict\', \'local_init_op\', \'logdir\', \'summary_op\', \'saver\', \'global_step\', \'save_summaries_secs\', \'save_model_secs\', \'recovery_wait_secs\', \'stop_grace_secs\', \'checkpoint_basename\', \'session_manager\', \'summary_writer\', \'init_fn\', \'local_init_run_options\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'0\', \'True\', \'0\', \'None\', \'0\', \'None\', \'0\', \'0\', \'0\', \'120\', \'600\', \'30\', \'120\', \'model.ckpt\', \'None\', \'0\', \'None\', \'None\'], " } member_method { name: "loop" -- GitLab From eeb2946c4785e76863e9f4c72b067c95e1d86e01 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Thu, 24 May 2018 15:28:03 -0700 Subject: [PATCH 127/902] Allow combinations to be used on the class level. Make "mode" optional. Applying a generator to a class is the same as applying that generator to every member of that class. It is meant to allow avoiding repetition in some cases. The implementation relies on some internals of parameterized tests and how it works with a class level declaration: https://github.com/abseil/abseil-py/blob/master/absl/testing/parameterized.py#L319. The "mode" argument is required before this change. To accommodate cases where execution mode isn't the point of the test, "mode" became optional with "graph" mode being default. Another idea I had was to pick a random mode by default. PiperOrigin-RevId: 197964501 --- .../contrib/distribute/python/combinations.py | 157 ++++++++++-------- .../distribute/python/combinations_test.py | 24 +++ 2 files changed, 115 insertions(+), 66 deletions(-) diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py index 15935817b0..e400fa5be2 100644 --- a/tensorflow/contrib/distribute/python/combinations.py +++ b/tensorflow/contrib/distribute/python/combinations.py @@ -41,7 +41,10 @@ from __future__ import print_function from collections import OrderedDict import sys +import types +import unittest from absl.testing import parameterized +import six from tensorflow.contrib.distribute.python import mirrored_strategy from tensorflow.contrib.distribute.python import one_device_strategy @@ -67,8 +70,8 @@ def generate(combinations): combinations: a list of dictionaries created using combine() and times(). Restrictions: - -- there should always be a "mode" argument. Accepted values are "eager" - and "graph". + -- the "mode" argument can be either "eager" or "graph". It's "graph" by + default. -- arguments of the test method must match by name to get the corresponding value of the combination. Tests must accept all arguments except the "mode", "required_tpu" and "required_gpus". @@ -83,14 +86,15 @@ def generate(combinations): test will be skipped if the specified number of GPUs aren't available. Returns: - a decorator that will cause the test method to be run under the specified - conditions. + a decorator that will cause the test method or the test class to be run + under the specified conditions. Raises: - ValueError - if "mode" argument wasn't either "eager" or "graph". + ValueError - if "mode" argument wasn't either "eager" or "graph" or if other + arguments were not accepted by the test method. """ - def decorator(test_function): + def decorator(test_method_or_class): """The decorator to be returned.""" # Generate good test names that can be used with --test_filter. @@ -110,70 +114,91 @@ def generate(combinations): list(combination.items()) + [("testcase_name", "_test{}".format(name))])) - @parameterized.named_parameters(*named_combinations) - def decorated(self, **kwargs): - """A wrapped test method that sets up `test_function`.""" - assert "mode" in kwargs - mode = kwargs["mode"] - - distribution = kwargs.pop("distribution", None) - required_tpu = kwargs.pop("required_tpu", False) - required_gpus = kwargs.pop("required_gpus", None) - - if distribution: - assert required_gpus is None, ( - "Do not use `required_gpus` and `distribution` together.") - assert required_tpu is False, ( - "Do not use `required_tpu` and `distribution` together.") - kwargs["distribution"] = distribution.strategy - required_gpus = distribution.required_gpus - required_tpu = distribution.required_tpu - - if required_tpu and not TPU_TEST: - self.skipTest("Test requires a TPU, but it's not available.") - if not required_tpu and TPU_TEST: - self.skipTest("Test that doesn't require a TPU.") - - if not required_gpus: - if GPU_TEST: - self.skipTest("Test that doesn't require GPUs.") - elif context.num_gpus() < required_gpus: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(required_gpus, context.num_gpus())) - - # At this point, `kwargs` doesn't have `required_gpus` or `required_tpu` - # that the user might have specified. `kwargs` still has `mode`, which - # the test is allowed to accept or ignore. - requested_arguments = tf_inspect.getfullargspec(test_function).args - missing_arguments = set(list(kwargs.keys()) + ["self"]).difference( - set(requested_arguments + ["mode"])) - if missing_arguments: - raise ValueError("The test is missing arguments {} .".format( - missing_arguments)) - - kwargs_to_pass = {} - for arg in requested_arguments: - if arg == "self": - kwargs_to_pass[arg] = self - else: - kwargs_to_pass[arg] = kwargs[arg] - - if mode == "eager": - with context.eager_mode(), ops.Graph().as_default(): - test_function(**kwargs_to_pass) - elif mode == "graph": - with context.graph_mode(), ops.Graph().as_default(): - test_function(**kwargs_to_pass) - else: - raise ValueError( - "'mode' has to be either 'eager' or 'graph' and not {}".format( - mode)) + if isinstance(test_method_or_class, type): + class_object = test_method_or_class + class_object._test_method_ids = test_method_ids = {} + for name, test_method in six.iteritems(class_object.__dict__.copy()): + if (name.startswith(unittest.TestLoader.testMethodPrefix) and + isinstance(test_method, types.FunctionType)): + delattr(class_object, name) + methods = {} + parameterized._update_class_dict_for_param_test_case( + class_object.__name__, methods, test_method_ids, name, + parameterized._ParameterizedTestIter( + _augment_with_special_arguments(test_method), + named_combinations, parameterized._NAMED, name)) + for method_name, method in six.iteritems(methods): + setattr(class_object, method_name, method) + + return class_object + else: + test_method = _augment_with_special_arguments(test_method_or_class) + return parameterized.named_parameters(*named_combinations)(test_method) - return decorated return decorator +def _augment_with_special_arguments(test_method): + def decorated(self, **kwargs): + """A wrapped test method that treats some arguments in a special way.""" + mode = kwargs.pop("mode", "graph") + + distribution = kwargs.pop("distribution", None) + required_tpu = kwargs.pop("required_tpu", False) + required_gpus = kwargs.pop("required_gpus", None) + + if distribution: + assert required_gpus is None, ( + "Do not use `required_gpus` and `distribution` together.") + assert required_tpu is False, ( + "Do not use `required_tpu` and `distribution` together.") + kwargs["distribution"] = distribution.strategy + required_gpus = distribution.required_gpus + required_tpu = distribution.required_tpu + + if required_tpu and not TPU_TEST: + self.skipTest("Test requires a TPU, but it's not available.") + if not required_tpu and TPU_TEST: + self.skipTest("Test that doesn't require a TPU.") + + if not required_gpus: + if GPU_TEST: + self.skipTest("Test that doesn't require GPUs.") + elif context.num_gpus() < required_gpus: + self.skipTest( + "{} GPUs are not available for this test. {} GPUs are available". + format(required_gpus, context.num_gpus())) + + # At this point, `kwargs` doesn't have `required_gpus` or `required_tpu` + # that the user might have specified. `kwargs` still has `mode`, which + # the test is allowed to accept or ignore. + requested_arguments = tf_inspect.getfullargspec(test_method).args + missing_arguments = set(list(kwargs.keys()) + ["self"]).difference( + set(requested_arguments + ["mode"])) + if missing_arguments: + raise ValueError("The test is missing arguments {} .".format( + missing_arguments)) + + kwargs_to_pass = {} + for arg in requested_arguments: + if arg == "self": + kwargs_to_pass[arg] = self + else: + kwargs_to_pass[arg] = kwargs[arg] + + if mode == "eager": + with ops.Graph().as_default(), context.eager_mode(): + test_method(**kwargs_to_pass) + elif mode == "graph": + with ops.Graph().as_default(), context.graph_mode(): + test_method(**kwargs_to_pass) + else: + raise ValueError( + "'mode' has to be either 'eager' or 'graph' and not {}".format( + mode)) + return decorated + + def combine(**kwargs): """Generate combinations based on its keyword arguments. diff --git a/tensorflow/contrib/distribute/python/combinations_test.py b/tensorflow/contrib/distribute/python/combinations_test.py index 184bcf27e5..86aa48cea8 100644 --- a/tensorflow/contrib/distribute/python/combinations_test.py +++ b/tensorflow/contrib/distribute/python/combinations_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from collections import OrderedDict +from absl.testing import parameterized from tensorflow.contrib.distribute.python import combinations from tensorflow.python.eager import test @@ -120,5 +121,28 @@ class TestingCombinationsTest(test.TestCase): _ = combinations.times(c1, c2) +@combinations.generate(combinations.combine(a=[1, 0], b=[2, 3], c=[1])) +class CombineTheTestSuite(parameterized.TestCase): + + def test_add_things(self, a, b, c): + self.assertLessEqual(3, a + b + c) + self.assertLessEqual(a + b + c, 5) + + def test_add_things_one_more(self, a, b, c): + self.assertLessEqual(3, a + b + c) + self.assertLessEqual(a + b + c, 5) + + def not_a_test(self, a=0, b=0, c=0): + del a, b, c + self.fail() + + def _test_but_private(self, a=0, b=0, c=0): + del a, b, c + self.fail() + + # Check that nothing funny happens to a non-callable that starts with "_test". + test_member = 0 + + if __name__ == "__main__": test.main() -- GitLab From 9e2c9f35271a2416ec59cd6fa5ef9b5ee690c95d Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Thu, 24 May 2018 15:35:13 -0700 Subject: [PATCH 128/902] Avoid infinite recursion when checking for indexed slices. PiperOrigin-RevId: 197965508 --- tensorflow/contrib/distribute/python/cross_tower_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/distribute/python/cross_tower_utils.py b/tensorflow/contrib/distribute/python/cross_tower_utils.py index 4bff134cad..137fabf4c7 100644 --- a/tensorflow/contrib/distribute/python/cross_tower_utils.py +++ b/tensorflow/contrib/distribute/python/cross_tower_utils.py @@ -374,7 +374,7 @@ def contains_indexed_slices(value): """Check whether the value is `IndexedSlices` or contains `IndexedSlices`.""" if isinstance(value, ops.IndexedSlices): return True - elif isinstance(value, (list, tuple, pycoll.Sequence)) and value: + elif isinstance(value, (list, tuple)) and value: return any(contains_indexed_slices(v) for v in value) elif isinstance(value, value_lib.DistributedValues): return contains_indexed_slices(list(value._index.values())) # pylint: disable=protected-access -- GitLab From 7ed3c62877a0a29f636ead710b81dfc3ecdc68e2 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 24 May 2018 15:45:25 -0700 Subject: [PATCH 129/902] [XLA] Remove maps with a single instruction These maps aren't really pulling their weight, fold them to the instruction that they compute. PiperOrigin-RevId: 197967117 --- .../xla/service/algebraic_simplifier.cc | 35 +++++++++++++++++++ .../xla/service/algebraic_simplifier_test.cc | 33 +++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index f732ed8f39..c65c91e8e0 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -157,6 +157,8 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { Status HandleSubtract(HloInstruction* sub) override; + Status HandleMap(HloInstruction* map) override; + Status HandleMaximum(HloInstruction* maximum) override; Status HandleMinimum(HloInstruction* minimum) override; @@ -2188,6 +2190,39 @@ bool AlgebraicSimplifierVisitor::TransformToClampIfSameShape( return true; } +Status AlgebraicSimplifierVisitor::HandleMap(HloInstruction* map) { + auto* map_computation = map->to_apply(); + auto* map_root = map_computation->root_instruction(); + if (map_root->opcode() == HloOpcode::kParameter) { + ReplaceInstructionIfSameShape( + map, map->mutable_operand(map_root->parameter_number())); + return Status::OK(); + } + if (map_root->opcode() == HloOpcode::kConstant) { + if (!ShapeUtil::IsScalar(map_root->shape())) { + return Status::OK(); + } + auto clone = map_root->CloneWithNewOperands(map_root->shape(), {}); + if (ShapeUtil::IsScalar(map->shape())) { + return ReplaceWithNewInstruction(map, std::move(clone)); + } + return ReplaceWithNewInstruction( + map, + HloInstruction::CreateBroadcast( + map->shape(), computation_->AddInstruction(std::move(clone)), {})); + } + std::vector new_operands; + for (auto* root_operand : map_root->operands()) { + if (root_operand->opcode() != HloOpcode::kParameter) { + return Status::OK(); + } + new_operands.push_back( + map->mutable_operand(root_operand->parameter_number())); + } + auto clone = map_root->CloneWithNewOperands(map->shape(), new_operands); + return ReplaceWithNewInstruction(map, std::move(clone)); +} + Status AlgebraicSimplifierVisitor::HandleMaximum(HloInstruction* maximum) { // Match the following tree: // min_operand operand diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 4e082877c7..d5f0afe960 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -143,6 +143,39 @@ TEST_F(AlgebraicSimplifierTest, AddBroadcastZeroR0Operand) { EXPECT_EQ(root, param0); } +TEST_F(AlgebraicSimplifierTest, InlineTrivialMap) { + HloComputation::Builder builder(TestName()); + // Create add computation. + HloComputation* add_computation = nullptr; + { + HloComputation::Builder builder(TestName() + ".add"); + const Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + HloInstruction* p0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "p0")); + HloInstruction* p1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "p1")); + builder.AddInstruction( + HloInstruction::CreateBinary(scalar_shape, HloOpcode::kAdd, p0, p1)); + add_computation = module().AddEmbeddedComputation(builder.Build()); + } + Shape r2f32 = ShapeUtil::MakeShape(F32, {32, 1}); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, r2f32, "param0")); + HloInstruction* zero = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(0.0f))); + builder.AddInstruction( + HloInstruction::CreateMap(r2f32, {param0, zero}, add_computation)); + + auto computation = module().AddEntryComputation(builder.Build()); + HloInstruction* root = computation->root_instruction(); + EXPECT_EQ(root->opcode(), HloOpcode::kMap); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + ASSERT_TRUE(simplifier.Run(&module()).ValueOrDie()); + root = computation->root_instruction(); + EXPECT_THAT(root, op::Add(param0, zero)); +} + TEST_F(AlgebraicSimplifierTest, AddBroadcastZeroR1Operand) { Shape r2f32 = ShapeUtil::MakeShape(F32, {3, 2}); HloComputation::Builder builder(TestName()); -- GitLab From 589deaa9fb5cb1d1b5bddf07538729abbbbee996 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 15:53:44 -0700 Subject: [PATCH 130/902] Extracts the 'simplify squeeze node' optimization into its own method. PiperOrigin-RevId: 197968452 --- .../grappler/optimizers/constant_folding.cc | 42 ++++++++++++------- .../grappler/optimizers/constant_folding.h | 4 ++ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index b8b8088b40..3b56f10309 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1937,22 +1937,8 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } - if (use_shape_info && IsSqueeze(*node) && - !properties->GetInputProperties(node->name()).empty()) { - // https://www.tensorflow.org/api_docs/python/tf/squeeze mentions it's - // error to squeeze a dimension that is not 1, so we only need to check - // whether the input has > 1 size for each dimension. - const auto& shape = properties->GetInputProperties(node->name())[0].shape(); - // The node is replaceable iff - // unknown_rank == false && (dim_size == 0 || all dims have size > 1) - bool replaceable = !shape.unknown_rank(); - for (int j = 0; replaceable && j < shape.dim_size(); ++j) { - replaceable &= shape.dim(j).size() > 1; - } - if (replaceable) { - ReplaceOperationWithIdentity(0, *properties, node, optimized_graph); - return Status::OK(); - } + if (SimplifySqueeze(*properties, use_shape_info, optimized_graph, node)) { + return Status::OK(); } if (SimplifyPack(optimized_graph, node)) { @@ -2024,6 +2010,30 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +bool ConstantFolding::SimplifySqueeze(const GraphProperties& properties, + bool use_shape_info, + GraphDef* optimized_graph, + NodeDef* node) { + if (use_shape_info && IsSqueeze(*node) && + !properties.GetInputProperties(node->name()).empty()) { + // https://www.tensorflow.org/api_docs/python/tf/squeeze mentions it's + // error to squeeze a dimension that is not 1, so we only need to check + // whether the input has > 1 size for each dimension. + const auto& shape = properties.GetInputProperties(node->name())[0].shape(); + // The node is replaceable iff + // unknown_rank == false && (dim_size == 0 || all dims have size > 1) + bool replaceable = !shape.unknown_rank(); + for (int j = 0; replaceable && j < shape.dim_size(); ++j) { + replaceable &= shape.dim(j).size() > 1; + } + if (replaceable) { + ReplaceOperationWithIdentity(0, properties, node, optimized_graph); + return true; + } + } + return false; +} + bool ConstantFolding::SimplifyPack(GraphDef* optimized_graph, NodeDef* node) { if (IsPack(*node) && NumNonControlInputs(*node) == 1 && !OptimizedNodeExists(*node, "_const_axis")) { diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index be78004f6d..55ad686bc5 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -170,6 +170,10 @@ class ConstantFolding : public GraphOptimizer { // Simplifies Pack operation if applicable. bool SimplifyPack(GraphDef* optimized_graph, NodeDef* node); + // Simplifies a Squeeze operation to an Identity operation if applicable. + bool SimplifySqueeze(const GraphProperties& properties, bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node); + // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From 50d66adc550cc9bcd2337cd28d1561273db43de9 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 24 May 2018 16:02:11 -0700 Subject: [PATCH 131/902] Fix the generated builtin_ops enum header. PiperOrigin-RevId: 197969642 --- tensorflow/contrib/lite/builtin_ops.h | 1 - tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc | 1 - 2 files changed, 2 deletions(-) diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 7e285186f4..24a9b0f6b8 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -99,4 +99,3 @@ typedef enum { } // extern "C" #endif // __cplusplus #endif // TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ -} diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc index ac408d2f94..64ab0a9fe2 100644 --- a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc @@ -57,7 +57,6 @@ const char* kFileFooter = } // extern "C" #endif // __cplusplus #endif // TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ -} )"; } // anonymous namespace -- GitLab From c0dd400f43cf6335165ce772e290a13b50960b23 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Thu, 24 May 2018 16:20:31 -0700 Subject: [PATCH 132/902] Remove _get_backward_fn and depend on _gradient_function directly. (_magic_gradient_function was renamed to _gradient_function) Before: entry { name: "MicroBenchmarks.benchmark_tf_gradient_forward_identity" iters: 30000 wall_time: 5.88456789653 extras { key: "examples_per_sec" value { double_value: 169936.011885 } } } After: entry { name: "MicroBenchmarks.benchmark_tf_gradient_forward_identity" iters: 30000 wall_time: 5.04853725433 extras { key: "examples_per_sec" value { double_value: 198077.175551 } } } PiperOrigin-RevId: 197972668 --- tensorflow/c/eager/tape.h | 37 +++---- tensorflow/python/eager/BUILD | 1 + tensorflow/python/eager/backprop.py | 24 +---- tensorflow/python/eager/pywrap_tfe.h | 13 +-- tensorflow/python/eager/pywrap_tfe_src.cc | 125 ++++++++++++++-------- tensorflow/python/pywrap_tfe.i | 2 +- 6 files changed, 110 insertions(+), 92 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 1833b25fea..734e712daa 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -48,7 +48,7 @@ struct OpTapeEntry { // Should be called before deleting the backward function. TODO(apassos) use // unique_ptrs to ensure this happens. - std::function backward_function_deleter; + std::function backward_function_deleter; }; // Map from tensor_id to internally-defined operation-id of the operation which @@ -110,12 +110,6 @@ class VSpace { // Deletes the input tensor. virtual void DeleteGradient(Gradient* gradient) const = 0; - - // Lets this VSpace know that it can release resources held by the - // `backward_function`, It will not be called again. - // `backward_function` must not be null. - virtual void ReleaseBackwardFunction( - BackwardFunction* backward_function) const = 0; }; // Traces the execution of operations, doing eager garbage collection, and @@ -130,7 +124,7 @@ class GradientTape { GradientTape(bool persistent) : persistent_(persistent) {} ~GradientTape() { for (const auto& pair : op_tape_) { - pair.second.backward_function_deleter(); + pair.second.backward_function_deleter(pair.second.backward_function); } } @@ -139,12 +133,12 @@ class GradientTape { void Watch(int64 tensor_id); - void RecordOperation(const string& op_type, - gtl::ArraySlice output_tensors, - gtl::ArraySlice input_tensor_id, - gtl::ArraySlice input_dtypes, - BackwardFunction* backward_function, - const std::function& backward_function_deleter); + void RecordOperation( + const string& op_type, gtl::ArraySlice output_tensors, + gtl::ArraySlice input_tensor_id, + gtl::ArraySlice input_dtypes, + BackwardFunction* backward_function, + const std::function& backward_function_deleter); void DeleteTrace(int64 tensor_id); @@ -218,9 +212,9 @@ void GradientTape::RecordOperation( gtl::ArraySlice input_tensor_id, gtl::ArraySlice input_dtypes, BackwardFunction* backward_function, - const std::function& backward_function_deleter) { + const std::function& backward_function_deleter) { if (!ShouldRecord(input_tensor_id, input_dtypes)) { - backward_function_deleter(); + backward_function_deleter(backward_function); return; } std::vector ids; @@ -275,7 +269,7 @@ void GradientTape::DeleteTrace(int64 tensor_id) { for (int64 id : op_it->second.input_tensor_id) { DeleteTrace(id); } - op_it->second.backward_function_deleter(); + op_it->second.backward_function_deleter(op_it->second.backward_function); op_tape_.erase(op_it); } @@ -381,7 +375,8 @@ BackpropInitialState PrepareBackprop( // backward functions that will be used for gradient computation // has been transferred to `result`. for (const auto& op_pair : *op_tape) { - op_pair.second.backward_function_deleter(); + op_pair.second.backward_function_deleter( + op_pair.second.backward_function); } op_tape->clear(); } @@ -473,7 +468,7 @@ Status GradientTape::ComputeGradient( if (!persistent_) { // Release all backprop functions for (const auto& pair : state.op_tape) { - pair.second.backward_function_deleter(); + pair.second.backward_function_deleter(pair.second.backward_function); } } }; @@ -541,7 +536,7 @@ Status GradientTape::ComputeGradient( Status s = vspace.CallBackwardFunction(trace.backward_function, out_gradients, &in_gradients); if (!persistent_) { - vspace.ReleaseBackwardFunction(trace.backward_function); + trace.backward_function_deleter(trace.backward_function); } if (!s.ok()) { cleanup(); @@ -550,7 +545,7 @@ Status GradientTape::ComputeGradient( } else { in_gradients.resize(trace.input_tensor_id.size()); if (!persistent_) { - vspace.ReleaseBackwardFunction(trace.backward_function); + trace.backward_function_deleter(trace.backward_function); } for (Gradient* grad : out_gradients) { if (grad != nullptr) { diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 8dbb53211f..dee86966f1 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -26,6 +26,7 @@ cc_library( "//tensorflow/c/eager:tape", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/python:cpp_python_util", "//tensorflow/python:ndarray_tensor", "//tensorflow/python:ndarray_tensor_bridge", "//tensorflow/python:numpy_lib", diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index dcfd03b458..2d859dca16 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -20,7 +20,6 @@ from __future__ import print_function import functools import operator -import threading import six @@ -94,8 +93,8 @@ class _MockOp(object): ) -def _magic_gradient_function(op_name, attr_tuple, num_inputs, - inputs, outputs, out_grads): +def _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs, + out_grads): """Calls the gradient function of the op. Args: @@ -117,8 +116,7 @@ def _magic_gradient_function(op_name, attr_tuple, num_inputs, return grad_fn(mock_op, *out_grads) -_gradient_functions = {} -_gradient_functions_lock = threading.Lock() +pywrap_tensorflow.TFE_Py_RegisterGradientFunction(_gradient_function) _tracing = False @@ -142,22 +140,6 @@ _grad_fn_accepts_none_for_indices = { } -def _get_backward_fn(op_name, attrs, num_inputs, op_inputs, op_outputs): - - def grad_fn(*orig_outputs): - result = _magic_gradient_function(op_name, attrs, num_inputs, - op_inputs, op_outputs, orig_outputs) - if _tracing: - print("Gradient for", op_name, "inputs", op_inputs, "output_grads", - orig_outputs, "gradients", result) - return nest.flatten(result) - - return grad_fn - - -pywrap_tensorflow.TFE_Py_RegisterBackwardFunctionGetter(_get_backward_fn) - - def _record_gradient(op_name, inputs, attrs, results, name): return pywrap_tensorflow.TFE_Py_RecordGradient(op_name, inputs, attrs, results, name) diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index 626c33be15..73fe80e8ca 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -67,14 +67,15 @@ PyObject* TFE_Py_RegisterResourceVariableType(PyObject* e); // This function is not thread-safe. PyObject* TFE_Py_RegisterFallbackExceptionClass(PyObject* e); -// Registers e as the backward_function_getter. -// The registered function creates a backward function (a function that can -// return the gradient of the inputs an op given the gradient of it's outputs). -// The registered function will be passed the following arguments: -// op_name, attrs, num_inputs, op_inputs, op_outputs +// Registers e as the gradient_function. +// The registered function takes +// (op_name, attrs, num_inputs, inputs, outputs, output_gradients) and returns +// the input gradients. This function will not correctly be able to generate +// gradients for functional ops - the gradients for those ops are calculated +// through a different codepath (see function.py for additional information). // // This function is not thread-safe. -PyObject* TFE_Py_RegisterBackwardFunctionGetter(PyObject* e); +PyObject* TFE_Py_RegisterGradientFunction(PyObject* e); // Returns 0 if 'status' is TF_OK. Otherwise, raises an exception (using // `exception` if not nullptr, else using the class registered via diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 9885b3d3d7..9bbb6f5941 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/python/eager/pywrap_tensor.h" #include "tensorflow/python/lib/core/safe_ptr.h" +#include "tensorflow/python/util/util.h" using tensorflow::string; using tensorflow::strings::Printf; @@ -45,6 +46,9 @@ struct InputInfo { bool is_list = false; }; +// Takes in output gradients, returns input gradients. +typedef std::function PyBackwardFunction; + using AttrToInputsMap = tensorflow::gtl::FlatMap>; @@ -640,8 +644,8 @@ PyObject* exception_class GUARDED_BY(exception_class_mutex) = nullptr; // Python subclass of Exception that is created to signal fallback. PyObject* fallback_exception_class = nullptr; -// Python function that returns a backward_function. -PyObject* backward_function_getter = nullptr; +// Python function that returns input gradients given output gradients. +PyObject* gradient_function = nullptr; PyTypeObject* resource_variable_type = nullptr; @@ -734,19 +738,19 @@ PyObject* TFE_Py_RegisterFallbackExceptionClass(PyObject* e) { } } -PyObject* TFE_Py_RegisterBackwardFunctionGetter(PyObject* e) { - if (backward_function_getter != nullptr) { - Py_DECREF(backward_function_getter); +PyObject* TFE_Py_RegisterGradientFunction(PyObject* e) { + if (gradient_function != nullptr) { + Py_DECREF(gradient_function); } if (!PyCallable_Check(e)) { - backward_function_getter = nullptr; + gradient_function = nullptr; PyErr_SetString(PyExc_TypeError, "TFE_Py_RegisterBackwardFunctionGetter: " "Registered object should be function."); return nullptr; } else { Py_INCREF(e); - backward_function_getter = e; + gradient_function = e; Py_RETURN_NONE; } } @@ -870,10 +874,11 @@ static tensorflow::DataType FastTensorDtype(PyObject* tensor) { } class GradientTape - : public tensorflow::eager::GradientTape { + : public tensorflow::eager::GradientTape { public: explicit GradientTape(bool persistent) - : tensorflow::eager::GradientTape(persistent) {} + : tensorflow::eager::GradientTape( + persistent) {} virtual ~GradientTape() { for (PyObject* v : watched_variables_) { @@ -1226,11 +1231,13 @@ void TapeSetRecordOperation( PyObject* op_type, PyObject* output_tensors, const std::vector& input_ids, const std::vector& input_dtypes, - PyObject* backward_function) { + const std::function& backward_function_getter, + const std::function& backward_function_killer) { std::vector output_info; PyObject* seq = PySequence_Fast(output_tensors, "expected a sequence of integer tensor ids"); int len = PySequence_Size(output_tensors); + if (PyErr_Occurred()) return; output_info.reserve(len); for (int i = 0; i < len; ++i) { output_info.push_back( @@ -1259,10 +1266,10 @@ void TapeSetRecordOperation( } for (TFE_Py_Tape* tape : SafeTapeSet()) { - Py_INCREF(backward_function); - tape->tape->RecordOperation( - op_type_str, output_info, input_ids, input_dtypes, backward_function, - [backward_function]() { Py_DECREF(backward_function); }); + auto* function = backward_function_getter(); + tape->tape->RecordOperation(op_type_str, output_info, input_ids, + input_dtypes, function, + backward_function_killer); } } } // namespace @@ -1279,8 +1286,21 @@ void TFE_Py_TapeSetRecordOperation(PyObject* op_type, PyObject* output_tensors, std::vector input_dtypes = MakeTensorDtypeList(input_tensors); if (PyErr_Occurred()) return; - TapeSetRecordOperation(op_type, output_tensors, input_ids, input_dtypes, - backward_function); + + TapeSetRecordOperation( + op_type, output_tensors, input_ids, input_dtypes, + [backward_function]() { + Py_INCREF(backward_function); + PyBackwardFunction* function = + new PyBackwardFunction([backward_function](PyObject* out_grads) { + return PyObject_CallObject(backward_function, out_grads); + }); + return function; + }, + [backward_function](PyBackwardFunction* py_backward_function) { + Py_DECREF(backward_function); + delete py_backward_function; + }); } void TFE_Py_TapeSetDeleteTrace(tensorflow::int64 tensor_id) { @@ -1289,7 +1309,8 @@ void TFE_Py_TapeSetDeleteTrace(tensorflow::int64 tensor_id) { } } -class PyVSpace : public tensorflow::eager::VSpace { +class PyVSpace + : public tensorflow::eager::VSpace { public: explicit PyVSpace(PyObject* py_vspace) : py_vspace_(py_vspace) {} @@ -1382,7 +1403,7 @@ class PyVSpace : public tensorflow::eager::VSpace { } tensorflow::Status CallBackwardFunction( - PyObject* backward_function, + PyBackwardFunction* backward_function, tensorflow::gtl::ArraySlice output_gradients, std::vector* result) const final { PyObject* grads = PyTuple_New(output_gradients.size()); @@ -1395,8 +1416,7 @@ class PyVSpace : public tensorflow::eager::VSpace { reinterpret_cast(output_gradients[i])); } } - PyObject* py_result = PyEval_CallObject( - reinterpret_cast(backward_function), grads); + PyObject* py_result = (*backward_function)(grads); Py_DECREF(grads); if (py_result == nullptr) { return tensorflow::errors::Internal("gradient function threw exceptions"); @@ -1425,10 +1445,6 @@ class PyVSpace : public tensorflow::eager::VSpace { return tensorflow::Status::OK(); } - void ReleaseBackwardFunction(PyObject* backward_function) const final { - Py_DECREF(backward_function); - } - void DeleteGradient(PyObject* tensor) const final { Py_XDECREF(tensor); } private: @@ -1587,12 +1603,12 @@ bool CheckInputsOk(PyObject* seq, int start_index, for (Py_ssize_t j = 0; j < PySequence_Fast_GET_SIZE(item); j++) { PyObject* inner_item = PySequence_Fast_GET_ITEM(item, j); if (!CheckOneInput(inner_item)) { - VLOG(1) - << "Falling back to slow path for Op \"" << op_def.name() - << "\", Input \"" << op_def.input_arg(i).name() << "\", Index " - << j - << " since we expected an EagerTensor/ResourceVariable, but got " - << inner_item->ob_type->tp_name; + VLOG(1) << "Falling back to slow path for Op \"" << op_def.name() + << "\", Input \"" << op_def.input_arg(i).name() + << "\", Index " << j + << " since we expected an EagerTensor/ResourceVariable, " + "but got " + << inner_item->ob_type->tp_name; return false; } } @@ -1799,18 +1815,41 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, } PyObject* num_inputs = PyLong_FromLong(PySequence_Size(inputs)); - PyObject* callback_args = - Py_BuildValue("OOOOO", op_name, attrs, num_inputs, op_inputs, op_outputs); - - PyObject* backward_function = - PyObject_CallObject(backward_function_getter, callback_args); - Py_DECREF(callback_args); - if (backward_function == nullptr) return nullptr; - TapeSetRecordOperation(op_name, results, input_ids, input_dtypes, - backward_function); - - Py_DECREF(backward_function); + TapeSetRecordOperation( + op_name, results, input_ids, input_dtypes, + [op_name, attrs, num_inputs, op_inputs, op_outputs]() { + Py_INCREF(op_name); + Py_INCREF(attrs); + Py_INCREF(num_inputs); + Py_INCREF(op_inputs); + Py_INCREF(op_outputs); + PyBackwardFunction* function = + new PyBackwardFunction([op_name, attrs, num_inputs, op_inputs, + op_outputs](PyObject* output_grads) { + tensorflow::Safe_PyObjectPtr callback_args( + Py_BuildValue("OOOOOO", op_name, attrs, num_inputs, op_inputs, + op_outputs, output_grads)); + + tensorflow::Safe_PyObjectPtr result( + PyObject_CallObject(gradient_function, callback_args.get())); + + if (PyErr_Occurred()) return static_cast(nullptr); + + return tensorflow::swig::Flatten(result.get()); + }); + return function; + }, + [op_name, attrs, num_inputs, op_inputs, + op_outputs](PyBackwardFunction* backward_function) { + Py_DECREF(op_name); + Py_DECREF(attrs); + Py_DECREF(num_inputs); + Py_DECREF(op_inputs); + Py_DECREF(op_outputs); + + delete backward_function; + }); Py_RETURN_NONE; } @@ -1881,8 +1920,8 @@ bool ReadVariableOp(const FastPathOpExecInfo& parent_op_exec_info, // Supports only 2 cases at the moment: // i) input is an EagerTensor -// ii) input is a ResourceVariable - in this case, the is_variable param is set -// to true. +// ii) input is a ResourceVariable - in this case, the is_variable param is +// set to true. // // NOTE: dtype_hint_getter must *always* return a PyObject that can be // decref'd. So if no hint is found, Py_RETURN_NONE (which correctly diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 5f1fafb9dc..42c708b024 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -34,7 +34,7 @@ limitations under the License. %rename("%s") TFE_OpNameGetAttrType; %rename("%s") TFE_Py_InitEagerTensor; %rename("%s") TFE_Py_RegisterExceptionClass; -%rename("%s") TFE_Py_RegisterBackwardFunctionGetter; +%rename("%s") TFE_Py_RegisterGradientFunction; %rename("%s") TFE_Py_RegisterFallbackExceptionClass; %rename("%s") TFE_Py_RegisterResourceVariableType; %rename("%s") TFE_Py_Execute; -- GitLab From 0d35a11ccf25d31f45777dd58c1f3ec1f33806b2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 16:31:48 -0700 Subject: [PATCH 133/902] move wide string manipulations out of windows_file_system PiperOrigin-RevId: 197974385 --- tensorflow/core/BUILD | 1 + tensorflow/core/platform/env.cc | 4 +- tensorflow/core/platform/windows/env.cc | 7 +-- tensorflow/core/platform/windows/wide_char.h | 46 +++++++++++++++++++ .../platform/windows/windows_file_system.cc | 1 + .../platform/windows/windows_file_system.h | 19 -------- 6 files changed, 54 insertions(+), 24 deletions(-) create mode 100644 tensorflow/core/platform/windows/wide_char.h diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index ce68ee174d..2dd8e6fb31 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -441,6 +441,7 @@ cc_library( "env.cc", "load_library.cc", ]) + tf_platform_hdrs([ + "wide_char.h", ]) + [ "platform/env.cc", "platform/file_system.cc", diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc index fe7d0aa7d1..47c59d435b 100644 --- a/tensorflow/core/platform/env.cc +++ b/tensorflow/core/platform/env.cc @@ -26,7 +26,7 @@ limitations under the License. #endif #if defined(PLATFORM_WINDOWS) #include -#include "tensorflow/core/platform/windows/windows_file_system.h" +#include "tensorflow/core/platform/windows/wide_char.h" #define PATH_MAX MAX_PATH #else #include @@ -311,7 +311,7 @@ string Env::GetExecutablePath() { HMODULE hModule = GetModuleHandleW(NULL); WCHAR wc_file_path[MAX_PATH] = {0}; GetModuleFileNameW(hModule, wc_file_path, MAX_PATH); - string file_path = WindowsFileSystem::WideCharToUtf8(wc_file_path); + string file_path = WideCharToUtf8(wc_file_path); std::copy(file_path.begin(), file_path.end(), exe_path); #else CHECK_NE(-1, readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1)); diff --git a/tensorflow/core/platform/windows/env.cc b/tensorflow/core/platform/windows/env.cc index 2f54f423b2..68ee3595a2 100644 --- a/tensorflow/core/platform/windows/env.cc +++ b/tensorflow/core/platform/windows/env.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/platform/load_library.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/windows/wide_char.h" #include "tensorflow/core/platform/windows/windows_file_system.h" #pragma comment(lib, "Shlwapi.lib") @@ -71,8 +72,8 @@ class WindowsEnv : public Env { } bool MatchPath(const string& path, const string& pattern) override { - std::wstring ws_path(WindowsFileSystem::Utf8ToWideChar(path)); - std::wstring ws_pattern(WindowsFileSystem::Utf8ToWideChar(pattern)); + std::wstring ws_path(Utf8ToWideChar(path)); + std::wstring ws_pattern(Utf8ToWideChar(pattern)); return PathMatchSpecW(ws_path.c_str(), ws_pattern.c_str()) == TRUE; } @@ -125,7 +126,7 @@ class WindowsEnv : public Env { std::string file_name = library_filename; std::replace(file_name.begin(), file_name.end(), '/', '\\'); - std::wstring ws_file_name(WindowsFileSystem::Utf8ToWideChar(file_name)); + std::wstring ws_file_name(Utf8ToWideChar(file_name)); HMODULE hModule = LoadLibraryExW(ws_file_name.c_str(), NULL, LOAD_WITH_ALTERED_SEARCH_PATH); diff --git a/tensorflow/core/platform/windows/wide_char.h b/tensorflow/core/platform/windows/wide_char.h new file mode 100644 index 0000000000..1b86abc3fa --- /dev/null +++ b/tensorflow/core/platform/windows/wide_char.h @@ -0,0 +1,46 @@ +/* Copyright 2018 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_WINDOWS_WIDE_CHAR_H_ +#define TENSORFLOW_CORE_PLATFORM_WINDOWS_WIDE_CHAR_H_ + +#include +#include +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +inline std::wstring Utf8ToWideChar(const string& utf8str) { + int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), + (int)utf8str.size(), NULL, 0); + std::wstring ws_translated_str(size_required, 0); + MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), + &ws_translated_str[0], size_required); + return ws_translated_str; +} + +inline string WideCharToUtf8(const std::wstring& wstr) { + if (wstr.empty()) return std::string(); + int size_required = WideCharToMultiByte( + CP_UTF8, 0, wstr.c_str(), (int)wstr.size(), NULL, 0, NULL, NULL); + string utf8_translated_str(size_required, 0); + WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), (int)wstr.size(), + &utf8_translated_str[0], size_required, NULL, NULL); + return utf8_translated_str; +} + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_WINDOWS_WIDE_CHAR_H_ diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc index dc2efbeaf5..9079a5ccaa 100644 --- a/tensorflow/core/platform/windows/windows_file_system.cc +++ b/tensorflow/core/platform/windows/windows_file_system.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/posix/error.h" #include "tensorflow/core/platform/windows/error.h" +#include "tensorflow/core/platform/windows/wide_char.h" #include "tensorflow/core/platform/windows/windows_file_system.h" // TODO(mrry): Prevent this Windows.h #define from leaking out of our headers. diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h index ba0302f0fd..6b04720c68 100644 --- a/tensorflow/core/platform/windows/windows_file_system.h +++ b/tensorflow/core/platform/windows/windows_file_system.h @@ -64,25 +64,6 @@ class WindowsFileSystem : public FileSystem { Status RenameFile(const string& src, const string& target) override; string TranslateName(const string& name) const override { return name; } - - static std::wstring Utf8ToWideChar(const string& utf8str) { - int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), - (int)utf8str.size(), NULL, 0); - std::wstring ws_translated_str(size_required, 0); - MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), - &ws_translated_str[0], size_required); - return ws_translated_str; - } - - static string WideCharToUtf8(const std::wstring& wstr) { - if (wstr.empty()) return std::string(); - int size_required = WideCharToMultiByte( - CP_UTF8, 0, wstr.c_str(), (int)wstr.size(), NULL, 0, NULL, NULL); - string utf8_translated_str(size_required, 0); - WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), (int)wstr.size(), - &utf8_translated_str[0], size_required, NULL, NULL); - return utf8_translated_str; - } }; class LocalWinFileSystem : public WindowsFileSystem { -- GitLab From b9b93e90eb523712747b5cb2f70c738115d5f626 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Thu, 24 May 2018 16:44:17 -0700 Subject: [PATCH 134/902] Don't XLA-compile naked variable reads Before this change, when we executed a naked variable read (i.e. outside of a defun, directly running ->Compute()), tf2xla kernel would copy the variable's tensor leading to many unnecessary copies. This change uses the regular non-tf2xla kernel for naked variable reads and marks the tf2xla one for CompilationOnly(). PiperOrigin-RevId: 197976146 --- tensorflow/compiler/jit/BUILD | 1 + tensorflow/compiler/jit/xla_device_ops.h | 4 ++ tensorflow/compiler/tests/eager_test.py | 15 +++-- .../compiler/tf2xla/kernels/variable_ops.cc | 2 +- tensorflow/core/kernels/BUILD | 1 + .../core/kernels/resource_variable_ops.cc | 62 +++++++++---------- .../core/kernels/resource_variable_ops.h | 33 ++++++++++ 7 files changed, 79 insertions(+), 39 deletions(-) create mode 100644 tensorflow/core/kernels/resource_variable_ops.h diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 980e0eec9e..6d6c030a26 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -178,6 +178,7 @@ cc_library( "//tensorflow/core/kernels:identity_n_op", "//tensorflow/core/kernels:identity_op", "//tensorflow/core/kernels:no_op", + "//tensorflow/core/kernels:resource_variable_ops", "//tensorflow/core/kernels:sendrecv_ops", "//tensorflow/core/kernels:variable_ops", ], diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h index 59822a18c0..33029b7cbe 100644 --- a/tensorflow/compiler/jit/xla_device_ops.h +++ b/tensorflow/compiler/jit/xla_device_ops.h @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/kernels/identity_n_op.h" #include "tensorflow/core/kernels/identity_op.h" #include "tensorflow/core/kernels/no_op.h" +#include "tensorflow/core/kernels/resource_variable_ops.h" #include "tensorflow/core/kernels/sendrecv_ops.h" #include "tensorflow/core/kernels/variable_ops.h" @@ -74,6 +75,9 @@ class XlaDeviceDummyOp : public OpKernel { REGISTER_KERNEL_BUILDER( \ Name("VarHandleOp").Device(DEVICE).HostMemory("resource"), \ ResourceHandleOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReadVariableOp").Device(DEVICE).HostMemory("resource"), \ + ReadVariableOp); \ REGISTER_KERNEL_BUILDER(Name("ControlTrigger").Device(DEVICE), \ ControlTriggerOp); diff --git a/tensorflow/compiler/tests/eager_test.py b/tensorflow/compiler/tests/eager_test.py index 583a2c26d4..4dff5f0f40 100644 --- a/tensorflow/compiler/tests/eager_test.py +++ b/tensorflow/compiler/tests/eager_test.py @@ -117,6 +117,15 @@ class EagerTest(XLATestCase): v.assign_add(2.0) self.assertEqual(3.0, v.numpy()) + def testReadAssignRead(self): + with self.test_scope(): + v = resource_variable_ops.ResourceVariable(1.0) + val1 = v.read_value() + v.assign_add(2.0) + val2 = v.read_value() + self.assertEqual(1.0, val1.numpy()) + self.assertEqual(3.0, val2.numpy()) + def testGradient(self): def f(x): return x @@ -137,10 +146,8 @@ class EagerTest(XLATestCase): self.assertEqual(2., grads[0][0].numpy()) def testMultipleVariableReads(self): - # TODO(b/79715516): Currently, whenever we read a variable by going - # through XLA, we create a copy. This leads large memory usage. - self.skipTest('When variable is read through XLA, a copy is created.') - + # This test makes sure consecutive variable reads don't copy + # the underlying memory. with self.test_scope(): # Create 128MiB variables var = resource_variable_ops.ResourceVariable( diff --git a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc index 6109db8e89..631cd4471b 100644 --- a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc @@ -57,7 +57,7 @@ class ReadVariableOp : public XlaOpKernel { private: DataType dtype_; }; -REGISTER_XLA_OP(Name("ReadVariableOp"), ReadVariableOp); +REGISTER_XLA_OP(Name("ReadVariableOp").CompilationOnly(), ReadVariableOp); class AssignVariableOp : public XlaOpKernel { public: diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 1f0157acf4..b2b631a222 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1968,6 +1968,7 @@ tf_kernel_library( tf_kernel_library( name = "resource_variable_ops", srcs = ["resource_variable_ops.cc"], + hdrs = ["resource_variable_ops.h"], deps = [ ":bounds_check", ":dense_update_functor", diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index 03cc414905..af921e4815 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -51,6 +51,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif +#include "tensorflow/core/kernels/resource_variable_ops.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/resource_mgr.h" @@ -72,40 +73,33 @@ namespace tensorflow { REGISTER_RESOURCE_HANDLE_KERNEL(Var); -class ReadVariableOp : public OpKernel { - public: - explicit ReadVariableOp(OpKernelConstruction* c) : OpKernel(c) { - OP_REQUIRES_OK(c, c->GetAttr("dtype", &dtype_)); - } - - void Compute(OpKernelContext* ctx) override { - Var* variable = nullptr; - ResourceHandle handle = HandleFromInput(ctx, 0); - const auto status = LookupResource(ctx, handle, &variable); - OP_REQUIRES(ctx, status.ok(), - errors::FailedPrecondition( - "Error while reading resource variable ", handle.name(), - " from Container: ", handle.container(), - ". This could mean that the variable was uninitialized. ", - status.ToString())); - - core::ScopedUnref s(variable); - // We're acquiring a reference to the underlying buffer while - // holding a shared lock to guarantee ordering of reads and - // writes. - tf_shared_lock ml(*variable->mu()); - const Tensor& t = *variable->tensor(); - OP_REQUIRES( - ctx, dtype_ == t.dtype(), - errors::InvalidArgument( - "Trying to read variable with wrong dtype. Expected ", - DataTypeString(dtype_), " got ", DataTypeString(t.dtype()))); - ctx->set_output(0, t); - } - - private: - DataType dtype_; -}; +ReadVariableOp::ReadVariableOp(OpKernelConstruction* c) : OpKernel(c) { + OP_REQUIRES_OK(c, c->GetAttr("dtype", &dtype_)); +} + +void ReadVariableOp::Compute(OpKernelContext* ctx) { + Var* variable = nullptr; + ResourceHandle handle = HandleFromInput(ctx, 0); + const auto status = LookupResource(ctx, handle, &variable); + OP_REQUIRES(ctx, status.ok(), + errors::FailedPrecondition( + "Error while reading resource variable ", handle.name(), + " from Container: ", handle.container(), + ". This could mean that the variable was uninitialized. ", + status.ToString())); + + core::ScopedUnref s(variable); + // We're acquiring a reference to the underlying buffer while + // holding a shared lock to guarantee ordering of reads and + // writes. + tf_shared_lock ml(*variable->mu()); + const Tensor& t = *variable->tensor(); + OP_REQUIRES(ctx, dtype_ == t.dtype(), + errors::InvalidArgument( + "Trying to read variable with wrong dtype. Expected ", + DataTypeString(dtype_), " got ", DataTypeString(t.dtype()))); + ctx->set_output(0, t); +} REGISTER_KERNEL_BUILDER(Name("ReadVariableOp").Device(DEVICE_CPU), ReadVariableOp); diff --git a/tensorflow/core/kernels/resource_variable_ops.h b/tensorflow/core/kernels/resource_variable_ops.h new file mode 100644 index 0000000000..8cae5d21f0 --- /dev/null +++ b/tensorflow/core/kernels/resource_variable_ops.h @@ -0,0 +1,33 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_RESOURCE_VARIABLE_OPS_H_ +#define TENSORFLOW_CORE_KERNELS_RESOURCE_VARIABLE_OPS_H_ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { + +class ReadVariableOp : public OpKernel { + public: + explicit ReadVariableOp(OpKernelConstruction* c); + void Compute(OpKernelContext* ctx) override; + + private: + DataType dtype_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_RESOURCE_VARIABLE_OPS_H_ -- GitLab From 9532bbbf994df5de1fa6550f3cf9f4dc08fcd640 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Thu, 24 May 2018 16:53:33 -0700 Subject: [PATCH 135/902] When converting a numpy float64 to an EagerTensor, always ensure that it becomes a float64 tensor. Earlier py_seq_tensor would fall back to a float32 if not explicitly requesting a float64 (which would not happen if we had no other information). PiperOrigin-RevId: 197977260 --- tensorflow/python/eager/tensor_test.py | 5 +++++ tensorflow/python/lib/core/py_seq_tensor.cc | 13 ++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py index b044b30231..626a4eb1ee 100644 --- a/tensorflow/python/eager/tensor_test.py +++ b/tensorflow/python/eager/tensor_test.py @@ -292,6 +292,11 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase): def testUnicode(self): self.assertEqual(constant_op.constant(u"asdf").numpy(), b"asdf") + def testFloatTensor(self): + self.assertEqual(dtypes.float64, _create_tensor(np.float64()).dtype) + self.assertEqual(dtypes.float32, _create_tensor(np.float32()).dtype) + self.assertEqual(dtypes.float32, _create_tensor(0.0).dtype) + def testSliceDimOutOfRange(self): t1 = _create_tensor([[1, 2], [3, 4], [5, 6]], dtype=dtypes.int32) t2 = _create_tensor([1, 2], dtype=dtypes.int32) diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc index 32ea737a99..386be35ba2 100644 --- a/tensorflow/python/lib/core/py_seq_tensor.cc +++ b/tensorflow/python/lib/core/py_seq_tensor.cc @@ -51,6 +51,10 @@ bool IsPyInt(PyObject* obj) { #endif } +bool IsPyDouble(PyObject* obj) { + return PyIsInstance(obj, &PyDoubleArrType_Type); // NumPy double type. +} + bool IsPyFloat(PyObject* obj) { return PyFloat_Check(obj) || PyIsInstance(obj, &PyFloatingArrType_Type); // NumPy float types @@ -113,8 +117,10 @@ Status InferShapeAndType(PyObject* obj, TensorShape* shape, DataType* dtype) { "Attempted to convert an invalid sequence to a Tensor."); } } - } else if (IsPyFloat(obj)) { + } else if (IsPyDouble(obj)) { *dtype = DT_DOUBLE; + } else if (IsPyFloat(obj)) { + *dtype = DT_FLOAT; } else if (PyBool_Check(obj) || PyIsInstance(obj, &PyBoolArrType_Type)) { // Have to test for bool before int, since IsInt(True/False) == true. *dtype = DT_BOOL; @@ -433,7 +439,7 @@ Status PySeqToTensor(PyObject* obj, PyObject* dtype, Tensor* ret) { break; } switch (infer_dtype) { - case DT_DOUBLE: + case DT_FLOAT: // TODO(josh11b): Handle mixed floats and complex numbers? if (requested_dtype == DT_INVALID) { // TensorFlow uses float32s to represent floating point numbers @@ -446,7 +452,8 @@ Status PySeqToTensor(PyObject* obj, PyObject* dtype, Tensor* ret) { // final type. RETURN_STRING_AS_STATUS(ConvertDouble(obj, shape, ret)); } - + case DT_DOUBLE: + RETURN_STRING_AS_STATUS(ConvertDouble(obj, shape, ret)); case DT_INT64: if (requested_dtype == DT_INVALID) { const char* error = ConvertInt32(obj, shape, ret); -- GitLab From 0c940ff33add2e8481cc1a5a166d8af72a5a21f9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 17:06:34 -0700 Subject: [PATCH 136/902] Enabling some potential optimization using the restrict qualifier. PiperOrigin-RevId: 197979118 --- .../internal/reference/portable_tensor_utils.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc index cc86729fdd..f8c6f341f7 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc @@ -37,8 +37,10 @@ bool PortableIsZeroVector(const float* vector, int v_size) { } void PortableSymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float* min, - float* max, float* scaling_factor) { + int8_t* quantized_values, + float* __restrict__ min, + float* __restrict__ max, + float* __restrict__ scaling_factor) { auto minmax = std::minmax_element(values, values + size); *min = *minmax.first; *max = *minmax.second; @@ -78,13 +80,14 @@ void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, void PortableMatrixBatchVectorMultiplyAccumulate( const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float* scaling_factors, - int n_batch, float* __restrict__ result, int result_stride) { + const int8_t* __restrict__ vectors, + const float* __restrict__ scaling_factors, int n_batch, + float* __restrict__ result, int result_stride) { int batch, row, col; for (batch = 0; batch < n_batch; ++batch, vectors += m_cols) { const float batch_scaling_factor_inv = 1.0 / scaling_factors[batch]; // Get the address of the first row. - int8_t* row_ptr = (int8_t*)matrix; // NOLINT + const int8_t* row_ptr = matrix; for (row = 0; row < m_rows; ++row, result += result_stride) { // Initialize the dot product sum for the row to 0. int32_t dotprod = 0; -- GitLab From f6066436884476d7bc32cf2ad6cfc8d9c52b5482 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 17:48:21 -0700 Subject: [PATCH 137/902] Add heuristic on picking NHWC layout for (V100, fp16) convolutions. Also move AlgorithmPicker after layout assignment, as now cudnn_convolution_runner will return failures on invalid input layouts. Also add a backend debug option to switch the layout heuristic. By default it has the old behavior (all NCHW). PiperOrigin-RevId: 197983747 --- tensorflow/compiler/xla/layout_util.cc | 10 ++ tensorflow/compiler/xla/layout_util.h | 4 + tensorflow/compiler/xla/service/gpu/BUILD | 26 +++ .../service/gpu/cudnn_convolution_runner.cc | 17 +- .../compiler/xla/service/gpu/gpu_compiler.cc | 56 +++---- .../xla/service/gpu/gpu_layout_assignment.cc | 121 ++++++++------ .../xla/service/gpu/gpu_layout_assignment.h | 13 +- .../service/gpu/gpu_layout_assignment_test.cc | 12 +- .../compiler/xla/service/gpu/gpu_options.cc | 28 ++++ .../compiler/xla/service/gpu/gpu_options.h | 33 ++++ .../xla/service/gpu/stream_executor_util.cc | 151 ++++++++++++++++++ .../xla/service/gpu/stream_executor_util.h | 46 ++++++ tensorflow/compiler/xla/tests/BUILD | 50 +++--- 13 files changed, 459 insertions(+), 108 deletions(-) create mode 100644 tensorflow/compiler/xla/service/gpu/gpu_options.cc create mode 100644 tensorflow/compiler/xla/service/gpu/gpu_options.h create mode 100644 tensorflow/compiler/xla/service/gpu/stream_executor_util.cc create mode 100644 tensorflow/compiler/xla/service/gpu/stream_executor_util.h diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc index a76fdcda25..89cafa1a7d 100644 --- a/tensorflow/compiler/xla/layout_util.cc +++ b/tensorflow/compiler/xla/layout_util.cc @@ -65,6 +65,16 @@ void SetDefaultLayoutToContainer( return layout; } +/* static */ Layout LayoutUtil::MakeLayoutFromMajorToMinor( + tensorflow::gtl::ArraySlice major_to_minor) { + Layout layout; + layout.set_format(DENSE); + for (int i = major_to_minor.size() - 1; i >= 0; i--) { + layout.add_minor_to_major(major_to_minor[i]); + } + return layout; +} + /* static */ Layout LayoutUtil::MakeSparseLayout(int64 max_sparse_elements) { Layout layout; layout.set_format(SPARSE); diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h index d3d6a2cc94..739bbe7367 100644 --- a/tensorflow/compiler/xla/layout_util.h +++ b/tensorflow/compiler/xla/layout_util.h @@ -36,6 +36,10 @@ class LayoutUtil { // convenience function for protobuf construction.) static Layout MakeLayout(tensorflow::gtl::ArraySlice minor_to_major); + // Similar to MakeLayout, but take indices in reverse order. + static Layout MakeLayoutFromMajorToMinor( + tensorflow::gtl::ArraySlice major_to_minor); + // Creates a sparse layout with the given maximum number of elements. (This is // a convenience function for protobuf construction.) static Layout MakeSparseLayout(int64 max_sparse_elements); diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index aafb61b583..ffb1af2d87 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -338,6 +338,7 @@ cc_library( srcs = ["cudnn_convolution_runner.cc"], hdrs = ["cudnn_convolution_runner.h"], deps = [ + ":stream_executor_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status", "//tensorflow/compiler/xla:status_macros", @@ -590,14 +591,18 @@ cc_library( srcs = ["gpu_layout_assignment.cc"], hdrs = ["gpu_layout_assignment.h"], deps = [ + ":gpu_options", ":ir_emission_utils", + ":stream_executor_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:window_util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service:computation_layout", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:layout_assignment", "//tensorflow/core:lib", + "//tensorflow/core:stream_executor_no_cuda", ], ) @@ -694,6 +699,27 @@ cc_library( ], ) +cc_library( + name = "gpu_options", + srcs = ["gpu_options.cc"], + hdrs = ["gpu_options.h"], + deps = [ + "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/core:lib_internal", + ], +) + +cc_library( + name = "stream_executor_util", + srcs = ["stream_executor_util.cc"], + hdrs = ["stream_executor_util.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:stream_executor_no_cuda", + ], +) + tf_cc_test( name = "gpu_hlo_support_checker_test", srcs = ["gpu_hlo_support_checker_test.cc"], diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc index 10b4c3de89..0645fbb3ad 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/util.h" @@ -113,8 +115,17 @@ Status RunCudnnConvolution( // cuDNN's convolution APIs support the BDYX layout for activations/output and // the OIYX layout for weights. + DataLayout input_dl; + FilterLayout filter_dl; + DataLayout output_dl; + + TF_ASSIGN_OR_RETURN(std::tie(input_dl, filter_dl, output_dl), + XlaConvLayoutsToStreamExecutorLayouts( + dnums, input_shape.layout(), filter_shape.layout(), + output_shape.layout())); + BatchDescriptor input_descriptor(effective_num_dimensions); - input_descriptor.set_layout(DataLayout::kBatchDepthYX) + input_descriptor.set_layout(input_dl) .set_feature_map_count( input_shape.dimensions(dnums.input_feature_dimension())) .set_count(input_shape.dimensions(dnums.input_batch_dimension())); @@ -126,7 +137,7 @@ Status RunCudnnConvolution( } FilterDescriptor filter_descriptor(effective_num_dimensions); - filter_descriptor.set_layout(FilterLayout::kOutputInputYX) + filter_descriptor.set_layout(filter_dl) .set_input_feature_map_count( filter_shape.dimensions(dnums.kernel_input_feature_dimension())) .set_output_feature_map_count( @@ -149,7 +160,7 @@ Status RunCudnnConvolution( } BatchDescriptor output_descriptor(effective_num_dimensions); - output_descriptor.set_layout(DataLayout::kBatchDepthYX) + output_descriptor.set_layout(output_dl) .set_feature_map_count( output_shape.dimensions(dnums.output_feature_dimension())) .set_count(output_shape.dimensions(dnums.output_batch_dimension())); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 1445684e5d..5ef422c90b 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -202,18 +202,28 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, pipeline.AddInvariantChecker(); pipeline.AddPass(); pipeline.AddPass(); + TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); + } + + { + HloPassPipeline pipeline("layout_assignment"); + pipeline.AddPass( + hlo_module->mutable_device_entry_computation_layout(), stream_exec); + + // The LayoutAssignment pass may leave behind kCopy instructions which are + // duplicate or NOPs, so remove them with algebraic simplification and CSE. + pipeline.AddPass>( + /*is_layout_sensitive=*/true, + /*valid_bitcast_callback=*/[](const Shape&, const Shape&) { + return true; + }); // Choose the fastest algorithm for each conv. // - // In theory doing this here is way too early: It needs to happen after - // layout assignment, because the layout of the inputs/outputs affects the - // speed of the conv. But currently we only allow only one input/output - // layout when calling cudnn, so there's no ambiguity. - // - // We pick the algorithm at this early stage so we can generate better HLO. - // After CudnnConvolutionRewriter, our convolutions are CustomCalls which - // return a tuple (conv_result, scratch_memory), and the each conv uses 0 - // bytes of scratch: + // We pick the algorithm before fusion so we can generate better HLO. After + // CudnnConvolutionRewriter, our convolutions are CustomCalls which return a + // tuple (conv_result, scratch_memory), and the each conv uses 0 bytes of + // scratch: // // customcall = (f32[...], f32[0]) // return gte(customcall, 0) @@ -229,35 +239,15 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, // The new tuple and gte instructions then be simplified away, because // nobody is expected to use the scratch value. // - // However, if we were to run CudnnConvolutionAlgorithmPicker after layout - // assignment, fusion would already have run, and the gte(customcall, 0) - // would probably already be into a fusion node. We can't simplify across - // HloComputation boundaries, so in this case we wouldn't be able to - // simplify away the new_tuple bits. - // - // We'll need to revisit this if we ever allow multiple layouts for the - // inputs/outputs of a cudnn convolution. + // However, if we were to run CudnnConvolutionAlgorithmPicker after fusion + // the gte(customcall, 0) would probably already be into a fusion node. We + // can't simplify across HloComputation boundaries, so in this case we + // wouldn't be able to simplify away the new_tuple bits. pipeline.AddPass(stream_exec, device_allocator); // Clean up new_tuple described above. pipeline.AddPass(); - pipeline.AddPass(); - - TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); - } - - { - HloPassPipeline pipeline("layout_assignment"); - pipeline.AddPass( - hlo_module->mutable_device_entry_computation_layout()); - // The LayoutAssignment pass may leave behind kCopy instructions which are - // duplicate or NOPs, so remove them with algebraic simplification and CSE. - pipeline.AddPass>( - /*is_layout_sensitive=*/true, - /*valid_bitcast_callback=*/[](const Shape&, const Shape&) { - return true; - }); pipeline.AddPass(/*is_layout_sensitive=*/true); TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); } diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc index 89f1e62588..178457721a 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc @@ -18,31 +18,72 @@ limitations under the License. #include #include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/service/gpu/gpu_options.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" +#include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/window_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/errors.h" namespace xla { namespace gpu { -// cuDNN convolutions are called with specific layouts on the input, output, -// and filter: -// -// input: DataLayout::kBatchDepthYX -// output: DataLayout::kBatchDepthYX -// filter: FilterLayout::kOutputInputYX -// -// The order dimensions in the constant name is major-to-minor (eg, the -// most-major dimension of the input is batch, most-minor is X). The -// specific dimension numbers these named dimensions correspond to is -// determined by the ConvolutionDimensionNumbers argument. Y is spatial -// dimension 0, and X is spatial dimension 1. -// -// TODO(b/29399649): Be more flexible about handling layouts of cuDNN calls. -static Status AddBackendConstraintsToDnnConvCustomCall( +using stream_executor::dnn::DataLayout; +using stream_executor::dnn::FilterLayout; + +static bool IsVoltaOrLater(const se::StreamExecutor& stream_executor) { + int major, minor; + CHECK(stream_executor.GetDeviceDescription().cuda_compute_capability(&major, + &minor)); + return major >= 7; +} + +// Returns (input, filter, output) layouts. +static std::tuple +HeuristicLayoutAssignment(const HloInstruction* instr, + stream_executor::StreamExecutor* stream_executor) { + // DataLayout and FilterLayout uses weird enum names. Translations: + // N <=> Batch or Output + // C <=> Depth or Input + // H <=> Y + // W <=> X + // + // Therefore kOutputInputYX means NHWC; kBatchDepthYX means NCHW. + + // As of today, our empirical evidence is that cudnn 7.0 is faster on V100 x + // fp16 with the mostly-NHWC layout. The heuristic may change as cudnn version + // changes, as well as the hardware updates. + if (!(instr->operand(0)->shape().element_type() == xla::PrimitiveType::F16 && + IsVoltaOrLater(*stream_executor))) { + return std::make_tuple(DataLayout::kBatchDepthYX, + FilterLayout::kOutputInputYX, + DataLayout::kBatchDepthYX); + } + VLOG(2) << "Using heuristic to figure out layouts for " << instr->ToString(); + // For BackwardInput that has stride, full NHWC layouts run significantly + // slower than (NHWC, NCHW, NCHW) or (NHWC, NCHW, NHWC). + // + // TODO(timshen): more closely compare (NHWC, NCHW, NCHW) and (NHWC, NCHW, + // NHWC). + if (instr->custom_call_target() == kCudnnConvBackwardInputCallTarget && + window_util::HasStride(instr->window())) { + return std::make_tuple(DataLayout::kBatchYXDepth, + FilterLayout::kOutputInputYX, + DataLayout::kBatchDepthYX); + } + return std::make_tuple(DataLayout::kBatchYXDepth, + FilterLayout::kOutputYXInput, + DataLayout::kBatchYXDepth); +} + +// Adds layout constraints on the cudnn custom-call instruction. The layout +// constraints are represented in terms of minor_to_major fields of both +// operands and the output shape. Depending on the underlying algorithm, one of +// { NCHW, NHWC } ^ 3 = 8 different layout combinations may be chosen. +Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall( HloInstruction* instr, LayoutConstraints* constraints) { CHECK(IsCustomCallToDnnConvolution(*instr)) << instr->ToString(); Shape input_shape; @@ -66,39 +107,25 @@ static Status AddBackendConstraintsToDnnConvCustomCall( << instr->custom_call_target(); } - // Construct minor-to-major dimension orders for operands and result. - // cuDNN's convolution APIs support the BDYX layout for activations/output - // and the OIYX layout for weights. - // TODO(b/29399649): Be more flexible about handling layouts of cuDNN - // calls after we switch to cuDNN v5. - const ConvolutionDimensionNumbers& dimension_numbers = - instr->convolution_dimension_numbers(); - std::vector input_layout; - for (int i = dimension_numbers.input_spatial_dimensions_size() - 1; i >= 0; - --i) { - input_layout.push_back(dimension_numbers.input_spatial_dimensions(i)); - } - input_layout.push_back(dimension_numbers.input_feature_dimension()); - input_layout.push_back(dimension_numbers.input_batch_dimension()); - *input_shape.mutable_layout() = LayoutUtil::MakeLayout(input_layout); - - std::vector filter_layout; - for (int i = dimension_numbers.kernel_spatial_dimensions_size() - 1; i >= 0; - --i) { - filter_layout.push_back(dimension_numbers.kernel_spatial_dimensions(i)); - } - filter_layout.push_back(dimension_numbers.kernel_input_feature_dimension()); - filter_layout.push_back(dimension_numbers.kernel_output_feature_dimension()); - *filter_shape.mutable_layout() = LayoutUtil::MakeLayout(filter_layout); - - std::vector output_layout; - for (int i = dimension_numbers.output_spatial_dimensions_size() - 1; i >= 0; - --i) { - output_layout.push_back(dimension_numbers.output_spatial_dimensions(i)); + { + DataLayout input; + FilterLayout filter; + DataLayout output; + if (ConvUseLayoutHeuristic(instr->GetModule()->config())) { + std::tie(input, filter, output) = + HeuristicLayoutAssignment(instr, stream_executor_); + } else { + input = DataLayout::kBatchDepthYX; + filter = FilterLayout::kOutputInputYX; + output = DataLayout::kBatchDepthYX; + } + + TF_ASSIGN_OR_RETURN( + std::tie(*input_shape.mutable_layout(), *filter_shape.mutable_layout(), + *output_shape.mutable_layout()), + StreamExecutorConvLayoutsToXlaLayouts( + instr->convolution_dimension_numbers(), input, filter, output)); } - output_layout.push_back(dimension_numbers.output_feature_dimension()); - output_layout.push_back(dimension_numbers.output_batch_dimension()); - *output_shape.mutable_layout() = LayoutUtil::MakeLayout(output_layout); // The custom call returns a tuple of (actual_result, scratch_buffer); // call_result_buf is the logical buffer for actual_result, the thing that diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h index 86a3a7111f..ce24af1cf8 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/computation_layout.h" #include "tensorflow/compiler/xla/service/layout_assignment.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" namespace xla { namespace gpu { @@ -27,8 +28,10 @@ namespace gpu { // layout constraints for operands and results of library calls. class GpuLayoutAssignment : public LayoutAssignment { public: - explicit GpuLayoutAssignment(ComputationLayout* entry_computation_layout) - : LayoutAssignment(entry_computation_layout) {} + explicit GpuLayoutAssignment(ComputationLayout* entry_computation_layout, + se::StreamExecutor* stream_executor) + : LayoutAssignment(entry_computation_layout), + stream_executor_(stream_executor) {} ~GpuLayoutAssignment() override {} protected: @@ -41,6 +44,12 @@ class GpuLayoutAssignment : public LayoutAssignment { LayoutConstraints* constraints) override; bool CustomCallRequiresMajorFirstLayout( const HloInstruction* instruction) override; + + private: + Status AddBackendConstraintsToDnnConvCustomCall( + HloInstruction* instr, LayoutConstraints* constraints); + + se::StreamExecutor* stream_executor_; }; } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc index 4c45d2e94a..e48165c142 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc @@ -69,7 +69,8 @@ TEST_F(LayoutAssignmentTest, Elementwise) { *computation_layout.mutable_result_layout() = ShapeLayout(result_shape_with_layout); - GpuLayoutAssignment layout_assignment(&computation_layout); + GpuLayoutAssignment layout_assignment( + &computation_layout, backend().default_stream_executor()); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); for (const HloInstruction* operand : add->operands()) { @@ -156,7 +157,8 @@ TEST_F(LayoutAssignmentTest, BatchNormInference) { *computation_layout.mutable_result_layout() = ShapeLayout(result_shape); } - GpuLayoutAssignment layout_assignment(&computation_layout); + GpuLayoutAssignment layout_assignment( + &computation_layout, backend().default_stream_executor()); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); // The first operand to batchnorm should have the same layout as the @@ -225,7 +227,8 @@ TEST_F(LayoutAssignmentTest, BatchNormTraining) { {result_shape, offset_scale_shape, offset_scale_shape})); } - GpuLayoutAssignment layout_assignment(&computation_layout); + GpuLayoutAssignment layout_assignment( + &computation_layout, backend().default_stream_executor()); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); // The first operand to batchnorm should have the same layout as the @@ -305,7 +308,8 @@ TEST_F(LayoutAssignmentTest, BatchNormGrad) { {result_shape, scale_shape, scale_shape})); } - GpuLayoutAssignment layout_assignment(&computation_layout); + GpuLayoutAssignment layout_assignment( + &computation_layout, backend().default_stream_executor()); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); // The first and fourth operands to the batchnorm call should have the diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.cc b/tensorflow/compiler/xla/service/gpu/gpu_options.cc new file mode 100644 index 0000000000..174aaf122c --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/gpu_options.cc @@ -0,0 +1,28 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/gpu_options.h" +#include "tensorflow/core/lib/gtl/map_util.h" + +namespace xla { +namespace gpu { + +bool ConvUseLayoutHeuristic(const HloModuleConfig& config) { + return config.debug_options().xla_backend_extra_options().count( + "xla_gpu_experimental_conv_use_layout_heuristic"); +} + +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.h b/tensorflow/compiler/xla/service/gpu/gpu_options.h new file mode 100644 index 0000000000..498d4a9495 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/gpu_options.h @@ -0,0 +1,33 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ + +#include "tensorflow/compiler/xla/service/hlo_module_config.h" + +// Helper functions for querying options that are specific to the GPU backend. + +namespace xla { +namespace gpu { + +// Returns true if we should use heuristics to assign convolution layouts, as +// opposed to always assigning NCHW. +bool ConvUseLayoutHeuristic(const HloModuleConfig& config); + +} // namespace gpu +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ diff --git a/tensorflow/compiler/xla/service/gpu/stream_executor_util.cc b/tensorflow/compiler/xla/service/gpu/stream_executor_util.cc new file mode 100644 index 0000000000..a50ddf6ac6 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/stream_executor_util.cc @@ -0,0 +1,151 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" + +#include "tensorflow/compiler/xla/layout_util.h" + +namespace xla { +namespace gpu { + +using stream_executor::dnn::DataLayout; +using stream_executor::dnn::DataLayoutString; +using stream_executor::dnn::FilterLayout; +using stream_executor::dnn::FilterLayoutString; + +StatusOr> +StreamExecutorConvLayoutsToXlaLayouts(const ConvolutionDimensionNumbers& dnums, + DataLayout input, FilterLayout filter, + DataLayout output) { + std::vector input_layout; + switch (input) { + case DataLayout::kBatchDepthYX: + input_layout.push_back(dnums.input_batch_dimension()); + input_layout.push_back(dnums.input_feature_dimension()); + input_layout.insert(input_layout.end(), + dnums.input_spatial_dimensions().begin(), + dnums.input_spatial_dimensions().end()); + break; + case DataLayout::kBatchYXDepth: + input_layout.push_back(dnums.input_batch_dimension()); + input_layout.insert(input_layout.end(), + dnums.input_spatial_dimensions().begin(), + dnums.input_spatial_dimensions().end()); + input_layout.push_back(dnums.input_feature_dimension()); + break; + default: + return tensorflow::errors::Internal("Invalid input layout: ", + DataLayoutString(input)); + } + + std::vector filter_layout; + switch (filter) { + case FilterLayout::kOutputInputYX: + filter_layout.push_back(dnums.kernel_output_feature_dimension()); + filter_layout.push_back(dnums.kernel_input_feature_dimension()); + filter_layout.insert(filter_layout.end(), + dnums.kernel_spatial_dimensions().begin(), + dnums.kernel_spatial_dimensions().end()); + break; + case FilterLayout::kOutputYXInput: + filter_layout.push_back(dnums.kernel_output_feature_dimension()); + filter_layout.insert(filter_layout.end(), + dnums.kernel_spatial_dimensions().begin(), + dnums.kernel_spatial_dimensions().end()); + filter_layout.push_back(dnums.kernel_input_feature_dimension()); + break; + default: + return tensorflow::errors::Internal("Invalid filter layout: ", + FilterLayoutString(filter)); + } + + std::vector output_layout; + switch (output) { + case DataLayout::kBatchDepthYX: + output_layout.push_back(dnums.output_batch_dimension()); + output_layout.push_back(dnums.output_feature_dimension()); + output_layout.insert(output_layout.end(), + dnums.output_spatial_dimensions().begin(), + dnums.output_spatial_dimensions().end()); + break; + case DataLayout::kBatchYXDepth: + output_layout.push_back(dnums.output_batch_dimension()); + output_layout.insert(output_layout.end(), + dnums.output_spatial_dimensions().begin(), + dnums.output_spatial_dimensions().end()); + output_layout.push_back(dnums.output_feature_dimension()); + break; + default: + return tensorflow::errors::Internal("Invalid output layout: ", + DataLayoutString(output)); + } + + return std::make_tuple(LayoutUtil::MakeLayoutFromMajorToMinor(input_layout), + LayoutUtil::MakeLayoutFromMajorToMinor(filter_layout), + LayoutUtil::MakeLayoutFromMajorToMinor(output_layout)); +} + +StatusOr> +XlaConvLayoutsToStreamExecutorLayouts(const ConvolutionDimensionNumbers& dnums, + const Layout& input, const Layout& filter, + const Layout& output) { + Layout nchw_input, nchw_filter, nchw_output; + std::tie(nchw_input, nchw_filter, nchw_output) = + StreamExecutorConvLayoutsToXlaLayouts(dnums, DataLayout::kBatchDepthYX, + FilterLayout::kOutputInputYX, + DataLayout::kBatchDepthYX) + .ConsumeValueOrDie(); + + Layout nhwc_input, nhwc_filter, nhwc_output; + std::tie(nhwc_input, nhwc_filter, nhwc_output) = + StreamExecutorConvLayoutsToXlaLayouts(dnums, DataLayout::kBatchYXDepth, + FilterLayout::kOutputYXInput, + DataLayout::kBatchYXDepth) + .ConsumeValueOrDie(); + + DataLayout input_layout; + if (LayoutUtil::Equal(input, nchw_input)) { + input_layout = DataLayout::kBatchDepthYX; + } else if (LayoutUtil::Equal(input, nhwc_input)) { + input_layout = DataLayout::kBatchYXDepth; + } else { + return tensorflow::errors::Internal("Invalid input layout: ", + input.ShortDebugString()); + } + + FilterLayout filter_layout; + if (LayoutUtil::Equal(filter, nchw_filter)) { + filter_layout = FilterLayout::kOutputInputYX; + } else if (LayoutUtil::Equal(filter, nhwc_filter)) { + filter_layout = FilterLayout::kOutputYXInput; + } else { + return tensorflow::errors::Internal("Invalid filter layout: ", + filter.ShortDebugString()); + } + + DataLayout output_layout; + if (LayoutUtil::Equal(output, nchw_output)) { + output_layout = DataLayout::kBatchDepthYX; + } else if (LayoutUtil::Equal(output, nhwc_output)) { + output_layout = DataLayout::kBatchYXDepth; + } else { + return tensorflow::errors::Internal("Invalid output layout: ", + output.ShortDebugString()); + } + + return std::make_tuple(input_layout, filter_layout, output_layout); +} +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/stream_executor_util.h b/tensorflow/compiler/xla/service/gpu/stream_executor_util.h new file mode 100644 index 0000000000..8218f4fd11 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/stream_executor_util.h @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_EXECUTOR_UTIL_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_EXECUTOR_UTIL_H_ + +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" + +// Helper functions for interacting with StreamExecutor. + +namespace xla { +namespace gpu { + +// Returns (input, filter, output) XLA Layout protos given the StreamExecutor +// layouts. +StatusOr> +StreamExecutorConvLayoutsToXlaLayouts(const ConvolutionDimensionNumbers& dnums, + stream_executor::dnn::DataLayout input, + stream_executor::dnn::FilterLayout filter, + stream_executor::dnn::DataLayout output); + +// Returns (input, filter, output) StreamExecutor layouts given the XLA layouts. +StatusOr> +XlaConvLayoutsToStreamExecutorLayouts(const ConvolutionDimensionNumbers& dnums, + const Layout& input, const Layout& filter, + const Layout& output); + +} // namespace gpu +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_EXECUTOR_UTIL_H_ diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index fd54ac761c..1a12fd0113 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -776,30 +776,42 @@ xla_test( ], ) +CONVOLUTION_TEST_DEPS = [ + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client:padding", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + "//tensorflow/core:test", +] + xla_test( name = "convolution_test", timeout = "long", srcs = ["convolution_test.cc"], shard_count = 25, - deps = [ - "//tensorflow/compiler/xla:array2d", - "//tensorflow/compiler/xla:array4d", - "//tensorflow/compiler/xla:literal_util", - "//tensorflow/compiler/xla:reference_util", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/client:global_data", - "//tensorflow/compiler/xla/client:local_client", - "//tensorflow/compiler/xla/client:padding", - "//tensorflow/compiler/xla/client/xla_client:xla_builder", - "//tensorflow/compiler/xla/tests:client_library_test_base", - "//tensorflow/compiler/xla/tests:literal_test_util", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/core:lib", - "//tensorflow/core:test", - ], + deps = CONVOLUTION_TEST_DEPS, +) + +xla_test( + name = "convolution_test_gpu_alternative_layout", + timeout = "long", + srcs = ["convolution_test.cc"], + backend_args = {"gpu": ["--xla_backend_extra_options=xla_gpu_experimental_conv_use_layout_heuristic"]}, + backends = ["gpu"], + shard_count = 25, + deps = CONVOLUTION_TEST_DEPS, ) xla_test( -- GitLab From 6b4eeb64006fe83f7d1d87f4f748227329cb9b2c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 24 May 2018 18:23:48 -0700 Subject: [PATCH 138/902] Rename TileLoader to MemoryTile; NFC In a later change I will expand MemoryTile to store tiles and load "3d" tiles (where we broadcast along one dimension as we load). PiperOrigin-RevId: 197987185 --- .../xla/service/cpu/dot_op_emitter.cc | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 97fa379ee1..48bea7c27e 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -42,14 +42,14 @@ using llvm_ir::SetToFirstInsertPoint; namespace cpu { namespace { -// Loads a tile of values from a 2D tensor. -class TileLoader { +// Provides tiled access to an in-memory rank 2 array. +class MemoryTile { public: - // Constructs a TileLoader that will load a tile consisting of + // Constructs a MemoryTile that can operate on tiles consisting of // `tile_size_along_major_dim` vectors from the matrix `matrix`, starting at // `major_dim_offset` in the major dimension. The tile size along the minor // dimension is the vector size, and that is implicitly determined by `vsl`. - TileLoader(VectorSupportLibrary* vsl, llvm::IRBuilder<>* ir_builder, + MemoryTile(VectorSupportLibrary* vsl, llvm::IRBuilder<>* ir_builder, llvm::Value* matrix, int64 matrix_size_along_minor_dim, llvm::Value* major_dim_offset, int64 tile_size_along_major_dim) : vsl_(vsl) { @@ -236,8 +236,8 @@ class ColumnMajorMatrixVectorProductEmitter void EmitOuterLoopBody(llvm::Value* column, int64 column_count, bool is_first_column); - TileLoader GetLhsTileLoader(llvm::Value* column_start, int64 column_count) { - return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, + MemoryTile GetLhsMemoryTile(llvm::Value* column_start, int64 column_count) { + return MemoryTile(&vsl_, ir_builder_, /*matrix=*/lhs_, /*matrix_size_along_minor_dim=*/m(), /*major_dim_offset=*/column_start, /*tile_size_along_major_dim=*/column_count); @@ -255,7 +255,7 @@ class ColumnMajorMatrixVectorProductEmitter return result; } - void EmitInnerLoopTiled(TileLoader* lhs_tile_loader, + void EmitInnerLoopTiled(MemoryTile* lhs_memory_tile, const std::vector& rhs_tile, int64 columns, bool is_first_column); @@ -274,12 +274,12 @@ class ColumnMajorMatrixVectorProductEmitter void ColumnMajorMatrixVectorProductEmitter::EmitOuterLoopBody( llvm::Value* column, int64 column_count, bool is_first_column) { - TileLoader lhs_tile_loader = GetLhsTileLoader(/*column_start=*/column, + MemoryTile lhs_memory_tile = GetLhsMemoryTile(/*column_start=*/column, /*column_count=*/column_count); std::vector rhs_tile = LoadRhsTile(column, /*count=*/column_count); - EmitInnerLoopTiled(&lhs_tile_loader, rhs_tile, + EmitInnerLoopTiled(&lhs_memory_tile, rhs_tile, /*columns=*/column_count, is_first_column); EmitInnerLoopEpilogue(column, /*columns=*/column_count, is_first_column); } @@ -302,14 +302,14 @@ void ColumnMajorMatrixVectorProductEmitter::Emit() { } void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( - TileLoader* lhs_tile_loader, const std::vector& rhs_tile, + MemoryTile* lhs_memory_tile, const std::vector& rhs_tile, int64 columns, bool is_first_column) { int64 row_limit = m() - (m() % tile_rows()); ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows(), [&](llvm::Value* row) { std::vector lhs_tile = - lhs_tile_loader->LoadTile(/*minor_dim_offset=*/row); + lhs_memory_tile->LoadTile(/*minor_dim_offset=*/row); llvm::Value* accumulator = is_first_column ? (addend_ ? vsl_.LoadVector(addend_, row) : vsl_.GetZeroVector()) @@ -461,8 +461,8 @@ class RowMajorMatrixVectorProductEmitter const Config& config() const { return config_; } private: - TileLoader GetLhsTileLoader(llvm::Value* row_start, int64 row_count) { - return TileLoader(&vsl_, ir_builder_, /*matrix=*/lhs_, + MemoryTile GetLhsMemoryTile(llvm::Value* row_start, int64 row_count) { + return MemoryTile(&vsl_, ir_builder_, /*matrix=*/lhs_, /*matrix_size_along_minor_dim=*/k(), /*major_dim_offset=*/row_start, /*tile_size_along_major_dim=*/row_count); @@ -470,7 +470,7 @@ class RowMajorMatrixVectorProductEmitter void EmitOuterLoopBody(llvm::Value* row, int64 row_count); - void EmitInnerLoopTiled(TileLoader* lhs_tile_loader, int64 rows, + void EmitInnerLoopTiled(MemoryTile* lhs_memory_tile, int64 rows, std::vector* vector_accumulators); void EmitInnerLoopEpilogue(llvm::Value* current_tile_row, int64 rows, @@ -488,7 +488,7 @@ class RowMajorMatrixVectorProductEmitter void RowMajorMatrixVectorProductEmitter::EmitOuterLoopBody(llvm::Value* row, int64 row_count) { - TileLoader lhs_tile_loader = GetLhsTileLoader(/*row_start=*/row, + MemoryTile lhs_memory_tile = GetLhsMemoryTile(/*row_start=*/row, /*row_count=*/row_count); std::vector vector_accumulators; std::vector scalar_accumulators; @@ -496,7 +496,7 @@ void RowMajorMatrixVectorProductEmitter::EmitOuterLoopBody(llvm::Value* row, vector_accumulators.emplace_back(&vsl_, vsl_.GetZeroVector()); scalar_accumulators.emplace_back(&vsl_, vsl_.GetZeroScalar()); } - EmitInnerLoopTiled(&lhs_tile_loader, /*rows=*/row_count, + EmitInnerLoopTiled(&lhs_memory_tile, /*rows=*/row_count, &vector_accumulators); EmitInnerLoopEpilogue(/*current_tile_row=*/row, /*rows=*/row_count, &scalar_accumulators); @@ -546,14 +546,14 @@ void RowMajorMatrixVectorProductEmitter::Emit() { } void RowMajorMatrixVectorProductEmitter::EmitInnerLoopTiled( - TileLoader* lhs_tile_loader, int64 rows, + MemoryTile* lhs_memory_tile, int64 rows, std::vector* vector_accumulators) { int64 column_limit = k() - (k() % tile_cols()); ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols(), [&](llvm::Value* col) { std::vector lhs_tile = - lhs_tile_loader->LoadTile(/*minor_dim_offset=*/col); + lhs_memory_tile->LoadTile(/*minor_dim_offset=*/col); llvm::Value* rhs_value = vsl_.LoadVector(rhs_, col); for (int i = 0; i < rows; i++) { llvm::Value* old_sum = (*vector_accumulators)[i].Get(); @@ -846,7 +846,7 @@ void MatrixMatrixBlockPanelEmitter::EmitInnerLoop( // rhs_loader will be used to load the tile off of the RHS, denoted as // <, ...> in the diagram. - TileLoader rhs_loader(vsl, ir_builder_, rhs_, dims().n(), k_i, + MemoryTile rhs_loader(vsl, ir_builder_, rhs_, dims().n(), k_i, k_tiling_factor); ksl_.For( "dot.n", n_start, n_end, vsl->vector_size(), [&](llvm::Value* n_i) { -- GitLab From 2b99d9cbc7166efedaff9eee11744348da30fc8a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 18:55:30 -0700 Subject: [PATCH 139/902] Extracts the 'simplify pad node' optimization into its own method. PiperOrigin-RevId: 197989813 --- .../grappler/optimizers/constant_folding.cc | 62 ++++++++++++------- .../grappler/optimizers/constant_folding.h | 4 ++ 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 3b56f10309..8cd1968df7 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1913,28 +1913,14 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } - if (use_shape_info && IsPad(*node) && - properties->GetInputProperties(node->name()).size() >= 2) { - const auto& p = properties->GetInputProperties(node->name())[1]; - if (TensorShape::IsValid(p.shape()) && p.has_value()) { - Tensor paddings(p.dtype(), p.shape()); - if (!paddings.FromProto(p.value())) { - return errors::InvalidArgument("Cannot parse tensor from proto: ", - p.value().DebugString()); - } - // The node is replaceable iff all values in paddings are 0. - bool replaceable = true; - // The operation requires it to be int32 value so we don't check for - // 1nt64. - const auto flatten = paddings.flat(); - for (int j = 0; replaceable && j < flatten.size(); ++j) { - replaceable &= flatten(j) == 0; - } - if (replaceable) { - ReplaceOperationWithIdentity(0, *properties, node, optimized_graph); - return Status::OK(); - } - } + bool simplify_pad_successful = false; + Status simplify_pad_status = + SimplifyPad(*properties, use_shape_info, optimized_graph, node, + &simplify_pad_successful); + if (!simplify_pad_status.ok()) { + return simplify_pad_status; + } else if (simplify_pad_successful) { + return Status::OK(); } if (SimplifySqueeze(*properties, use_shape_info, optimized_graph, node)) { @@ -2010,6 +1996,38 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +Status ConstantFolding::SimplifyPad(const GraphProperties& properties, + bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, + bool* success) { + if (use_shape_info && IsPad(*node) && + properties.GetInputProperties(node->name()).size() >= 2) { + const auto& p = properties.GetInputProperties(node->name())[1]; + if (TensorShape::IsValid(p.shape()) && p.has_value()) { + Tensor paddings(p.dtype(), p.shape()); + if (!paddings.FromProto(p.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + p.value().DebugString()); + } + // The node is replaceable iff all values in paddings are 0. + bool replaceable = true; + // The operation requires it to be int32 value so we don't check for + // 1nt64. + const auto flatten = paddings.flat(); + for (int j = 0; replaceable && j < flatten.size(); ++j) { + replaceable &= flatten(j) == 0; + } + if (replaceable) { + ReplaceOperationWithIdentity(0, properties, node, optimized_graph); + *success = true; + return Status::OK(); + } + } + } + *success = false; + return Status::OK(); +} + bool ConstantFolding::SimplifySqueeze(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 55ad686bc5..fa9249f50c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -174,6 +174,10 @@ class ConstantFolding : public GraphOptimizer { bool SimplifySqueeze(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node); + // Simplifies a Pad operation to an Identity operation if applicable. + Status SimplifyPad(const GraphProperties& properties, bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, bool* success); + // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From b59833c3fd91511b33255369016868e4ae6cda2e Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 24 May 2018 19:12:26 -0700 Subject: [PATCH 140/902] Merge changes from github. Revert #18413. Too many internal test failures due to the name scope change caused by this change. Revert #18192. Cannot use re2::StringPiece internally. Need alternative for set call. Will pull and clean this up in a separate change. PiperOrigin-RevId: 197991247 --- CONTRIBUTING.md | 11 + README.md | 39 +- RELEASE.md | 7 +- SECURITY.md | 2 +- configure.py | 3 + .../jit/encapsulate_subgraphs_pass.cc | 2 +- tensorflow/compiler/xla/README.md | 8 +- .../xla/service/conditional_simplifier.cc | 2 +- .../compiler/xla/service/copy_insertion.cc | 2 +- .../compiler/xla/service/cpu/ir_function.h | 4 +- .../xla/service/cpu/shape_partition.h | 2 +- .../compiler/xla/service/despecializer.h | 2 +- .../xla/service/gpu/ir_emitter_unnested.h | 2 +- .../compiler/xla/service/hlo_evaluator.cc | 1 + .../xla/service/interpreter/README.md | 2 +- .../compiler/xla/service/layout_assignment.h | 4 +- .../xla/service/reduce_precision_insertion.cc | 2 +- .../compiler/xla/service/source_map_util.h | 2 +- tensorflow/compiler/xla/shape_util.h | 2 +- .../compiler/xla/tests/dot_operation_test.cc | 18 +- tensorflow/compiler/xla/tests/tuple_test.cc | 2 +- tensorflow/compiler/xla/xlalogo.png | Bin 0 -> 46785 bytes tensorflow/contrib/autograph/impl/config.py | 2 +- .../autograph/operators/control_flow.py | 2 +- .../python/training/functions/gbdt_batch.py | 2 +- tensorflow/contrib/cmake/CMakeLists.txt | 29 +- tensorflow/contrib/cmake/external/zlib.cmake | 3 +- tensorflow/contrib/cmake/tf_tests.cmake | 2 + .../contrib/data/python/kernel_tests/BUILD | 4 + .../data/python/kernel_tests/resample_test.py | 109 +++- tensorflow/contrib/data/python/ops/BUILD | 2 + .../contrib/data/python/ops/resampling.py | 265 ++++++--- .../ops/bijectors/cholesky_outer_product.py | 2 +- tensorflow/contrib/eager/README.md | 2 +- tensorflow/contrib/ffmpeg/ffmpeg_lib.h | 2 +- .../python/ops/critical_section_ops.py | 2 +- .../estimator/python/gan_estimator_impl.py | 7 +- .../estimator/python/gan_estimator_test.py | 11 + .../gan/python/estimator/python/head_impl.py | 45 +- .../gan/python/estimator/python/head_test.py | 7 +- .../features/python/conditioning_utils.py | 2 +- tensorflow/contrib/graph_editor/transform.py | 2 +- .../hvx_ops_support_checker_main.cc | 2 +- tensorflow/contrib/image/__init__.py | 2 +- tensorflow/contrib/kfac/examples/convnet.py | 2 +- .../contrib/kfac/python/ops/optimizer.py | 6 +- .../contrib/kfac/python/ops/placement.py | 2 +- .../contrib/layers/python/layers/layers.py | 142 ++++- .../layers/python/layers/layers_test.py | 15 +- .../learn/utils/saved_model_export_utils.py | 3 +- tensorflow/contrib/lite/BUILD | 2 - tensorflow/contrib/lite/Makefile | 19 +- .../contrib/lite/examples/minimal/minimal.cc | 71 +++ tensorflow/contrib/lite/g3doc/rpi.md | 2 +- .../internal/optimized/optimized_ops.h | 2 +- .../internal/reference/reference_ops.h | 4 +- tensorflow/contrib/lite/schema/schema.fbs | 2 +- tensorflow/contrib/lite/schema/schema_v0.fbs | 2 +- tensorflow/contrib/lite/schema/schema_v1.fbs | 2 +- tensorflow/contrib/lite/schema/schema_v2.fbs | 2 +- tensorflow/contrib/lite/schema/schema_v3.fbs | 4 +- .../contrib/lite/testing/generate_examples.py | 4 +- .../contrib/lite/testing/tflite_driver.cc | 4 +- .../lite/toco/g3doc/cmdline_examples.md | 4 +- .../contrib/lite/toco/tflite/operator.h | 4 +- tensorflow/contrib/lite/toco/toco_flags.proto | 2 +- .../elastic_average_optimizer_test.py | 2 +- .../training/model_average_optimizer_test.py | 4 +- .../contrib/signal/python/ops/window_ops.py | 4 +- .../python/slim/data/tfexample_decoder.py | 2 +- .../contrib/slim/python/slim/learning.py | 2 +- .../tensorboard/db/summary_db_writer.cc | 22 +- .../tensorboard/db/summary_db_writer_test.cc | 50 ++ tensorflow/contrib/tensorrt/BUILD | 55 +- .../contrib/tensorrt/convert/convert_graph.cc | 123 +++-- .../contrib/tensorrt/convert/convert_graph.h | 10 + .../contrib/tensorrt/convert/convert_nodes.cc | 501 ++++++++---------- .../contrib/tensorrt/convert/convert_nodes.h | 14 +- .../tensorrt/convert/trt_optimization_pass.cc | 246 +++++++++ .../tensorrt/convert/trt_optimization_pass.h | 73 +++ .../tensorrt/custom_plugin_examples/BUILD | 118 +++++ .../custom_plugin_examples/__init__.py | 24 + .../tensorrt/custom_plugin_examples/inc_op.py | 32 ++ .../inc_op_kernel.cu.cc | 84 +++ .../custom_plugin_examples/inc_op_kernel.h | 35 ++ .../custom_plugin_examples/inc_op_plugin.cc | 86 +++ .../custom_plugin_examples/inc_op_plugin.h | 102 ++++ .../custom_plugin_examples/ops/inc_op.cc | 36 ++ .../custom_plugin_examples/plugin_test.py | 95 ++++ .../contrib/tensorrt/kernels/trt_engine_op.cc | 54 +- .../contrib/tensorrt/kernels/trt_engine_op.h | 11 +- tensorflow/contrib/tensorrt/log/trt_logger.h | 2 +- .../contrib/tensorrt/plugin/trt_plugin.cc | 106 ++++ .../contrib/tensorrt/plugin/trt_plugin.h | 74 +++ .../tensorrt/plugin/trt_plugin_factory.cc | 78 +++ .../tensorrt/plugin/trt_plugin_factory.h | 102 ++++ .../plugin/trt_plugin_factory_test.cc | 125 +++++ .../tensorrt/plugin/trt_plugin_utils.cc | 42 ++ .../tensorrt/plugin/trt_plugin_utils.h | 46 ++ .../tensorrt/resources/trt_allocator.cc | 62 +++ .../tensorrt/resources/trt_allocator.h | 68 +++ .../tensorrt/resources/trt_resources.h | 44 +- .../contrib/tensorrt/segment/segment.cc | 379 +++++++++++-- tensorflow/contrib/tensorrt/segment/segment.h | 18 +- .../contrib/tensorrt/segment/segment_test.cc | 16 +- .../contrib/tensorrt/shape_fn/trt_shfn.cc | 4 +- .../contrib/tensorrt/test/test_tftrt.py | 64 ++- .../tensorrt/test/tf_trt_integration_test.py | 19 +- .../contrib/tpu/python/tpu/tpu_context.py | 2 +- tensorflow/contrib/verbs/README.md | 2 +- tensorflow/core/BUILD | 7 +- .../base_api/api_def_RegexFullMatch.pbtxt | 30 ++ .../python_api/api_def_RegexFullMatch.pbtxt | 4 + tensorflow/core/common_runtime/broadcaster.cc | 4 +- .../core/common_runtime/buf_rendezvous.h | 2 +- .../core/common_runtime/ring_reducer.cc | 2 +- .../common_runtime/scoped_allocator_mgr.cc | 2 +- tensorflow/core/debug/debug_io_utils.cc | 2 +- .../rpc/grpc_worker_cache.cc | 2 +- tensorflow/core/example/example.proto | 2 +- .../example_parser_configuration.proto | 1 + tensorflow/core/example/feature.proto | 2 +- .../framework/allocation_description.proto | 1 + tensorflow/core/framework/api_def.proto | 1 + tensorflow/core/framework/attr_value.proto | 2 +- tensorflow/core/framework/cost_graph.proto | 2 +- .../core/framework/device_attributes.proto | 1 + tensorflow/core/framework/function.proto | 2 +- tensorflow/core/framework/graph.proto | 2 +- .../core/framework/graph_transfer_info.proto | 2 +- tensorflow/core/framework/iterator.proto | 1 + tensorflow/core/framework/kernel_def.proto | 2 +- tensorflow/core/framework/log_memory.proto | 2 +- tensorflow/core/framework/node_def.proto | 2 +- tensorflow/core/framework/op_def.proto | 2 +- tensorflow/core/framework/op_gen_lib.h | 4 +- tensorflow/core/framework/op_kernel.h | 2 +- tensorflow/core/framework/reader_base.proto | 1 + .../remote_fused_graph_execute_info.proto | 2 +- .../core/framework/resource_handle.proto | 1 + tensorflow/core/framework/step_stats.proto | 2 +- tensorflow/core/framework/summary.proto | 2 +- tensorflow/core/framework/tensor.proto | 2 +- .../core/framework/tensor_description.proto | 2 +- tensorflow/core/framework/tensor_shape.proto | 1 + tensorflow/core/framework/tensor_slice.proto | 1 + tensorflow/core/framework/types.proto | 1 + tensorflow/core/framework/variable.proto | 1 + tensorflow/core/framework/versions.proto | 1 + tensorflow/core/graph/mkl_layout_pass_test.cc | 27 + tensorflow/core/graph/while_context.h | 2 +- .../core/grappler/costs/graph_properties.cc | 2 +- .../core/grappler/costs/virtual_scheduler.h | 2 +- .../grappler/optimizers/layout_optimizer.cc | 2 +- tensorflow/core/kernels/BUILD | 8 + .../core/kernels/batch_matmul_op_impl.h | 106 +++- .../core/kernels/batch_matmul_op_real.cc | 4 + .../adaptive_shared_batch_scheduler.h | 2 +- tensorflow/core/kernels/conv_grad_ops_3d.cc | 4 +- tensorflow/core/kernels/conv_ops_gpu_3.cu.cc | 2 +- tensorflow/core/kernels/nth_element_op.cc | 2 +- .../core/kernels/regex_full_match_op.cc | 59 +++ tensorflow/core/kernels/roll_op.cc | 2 +- .../core/kernels/segment_reduction_ops.cc | 4 +- .../core/kernels/segment_reduction_ops.h | 2 +- tensorflow/core/lib/core/error_codes.proto | 1 + tensorflow/core/ops/image_ops.cc | 19 + tensorflow/core/ops/image_ops_test.cc | 19 + tensorflow/core/ops/math_ops.cc | 2 +- tensorflow/core/ops/nn_ops.cc | 3 +- tensorflow/core/ops/random_ops.cc | 10 +- tensorflow/core/ops/string_ops.cc | 11 + .../core/platform/cloud/gcs_file_system.cc | 2 +- tensorflow/core/platform/cloud/gcs_throttle.h | 2 +- .../core/profiler/g3doc/command_line.md | 2 +- tensorflow/core/protobuf/cluster.proto | 1 + tensorflow/core/protobuf/config.proto | 2 +- tensorflow/core/protobuf/control_flow.proto | 1 + .../core/protobuf/critical_section.proto | 1 + tensorflow/core/protobuf/debug.proto | 1 + .../core/protobuf/device_properties.proto | 1 + tensorflow/core/protobuf/master.proto | 2 +- tensorflow/core/protobuf/master_service.proto | 2 +- tensorflow/core/protobuf/meta_graph.proto | 2 +- tensorflow/core/protobuf/named_tensor.proto | 2 +- tensorflow/core/protobuf/queue_runner.proto | 2 +- .../core/protobuf/rewriter_config.proto | 3 +- tensorflow/core/protobuf/saved_model.proto | 2 +- tensorflow/core/protobuf/saver.proto | 1 + tensorflow/core/protobuf/tensor_bundle.proto | 2 +- .../core/protobuf/tensorflow_server.proto | 2 +- tensorflow/core/protobuf/worker.proto | 2 +- tensorflow/core/protobuf/worker_service.proto | 2 +- tensorflow/core/public/version.h | 2 +- tensorflow/core/util/cuda_device_functions.h | 2 +- tensorflow/core/util/mkl_util.h | 2 +- tensorflow/core/util/tensor_format.h | 2 +- .../api_guides/python/reading_data.md | 2 +- tensorflow/docs_src/community/benchmarks.md | 18 +- tensorflow/docs_src/community/swift.md | 2 +- tensorflow/docs_src/deploy/s3.md | 2 +- tensorflow/docs_src/extend/adding_an_op.md | 63 ++- tensorflow/docs_src/extend/architecture.md | 14 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +- tensorflow/docs_src/install/install_linux.md | 18 +- tensorflow/docs_src/install/install_mac.md | 10 +- .../docs_src/install/install_sources.md | 4 +- tensorflow/docs_src/mobile/mobile_intro.md | 2 +- tensorflow/docs_src/mobile/tflite/index.md | 2 +- tensorflow/docs_src/programmers_guide/faq.md | 17 +- .../docs_src/programmers_guide/tensors.md | 6 +- .../docs_src/programmers_guide/variables.md | 2 +- tensorflow/docs_src/tutorials/layers.md | 1 - .../examples/learn/text_classification_cnn.py | 2 +- tensorflow/go/op/wrappers.go | 2 +- tensorflow/python/data/util/nest.py | 2 +- tensorflow/python/debug/cli/curses_ui.py | 36 +- tensorflow/python/estimator/estimator.py | 2 +- .../inputs/queues/feeding_functions.py | 2 +- tensorflow/python/estimator/keras.py | 2 +- tensorflow/python/estimator/training.py | 2 +- .../python/feature_column/feature_column.py | 6 +- .../python/framework/fast_tensor_util.pyx | 12 + tensorflow/python/framework/ops.py | 26 +- tensorflow/python/framework/tensor_util.py | 12 +- tensorflow/python/framework/test_util.py | 2 +- tensorflow/python/keras/utils/__init__.py | 1 + tensorflow/python/kernel_tests/BUILD | 12 + tensorflow/python/kernel_tests/conv1d_test.py | 4 +- .../kernel_tests/conv3d_transpose_test.py | 17 + .../kernel_tests/distributions/util_test.py | 2 +- .../python/kernel_tests/manip_ops_test.py | 2 +- .../kernel_tests/regex_full_match_op_test.py | 54 ++ .../segment_reduction_ops_test.py | 10 +- tensorflow/python/layers/base.py | 14 +- tensorflow/python/layers/base_test.py | 16 + tensorflow/python/ops/math_ops.py | 2 +- tensorflow/python/ops/string_ops.py | 2 + .../python/profiler/model_analyzer_test.py | 7 +- tensorflow/python/saved_model/builder_impl.py | 5 +- tensorflow/python/training/distribute.py | 2 +- tensorflow/python/training/saver.py | 2 +- tensorflow/python/util/tf_inspect.py | 2 +- tensorflow/python/util/util.cc | 2 +- tensorflow/python/util/util.h | 2 +- tensorflow/stream_executor/blas.h | 14 + tensorflow/stream_executor/cuda/cuda_blas.cc | 106 +++- tensorflow/stream_executor/cuda/cuda_blas.h | 6 +- tensorflow/stream_executor/stream.cc | 34 ++ tensorflow/stream_executor/stream.h | 14 + tensorflow/tensorflow.bzl | 4 +- tensorflow/tools/api/generator/BUILD | 1 + tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + .../tools/api/golden/tensorflow.strings.pbtxt | 7 + .../ci_build/install/install_pip_packages.sh | 11 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- .../tools/docker/Dockerfile.devel-cpu-mkl | 4 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/graph_transforms/README.md | 2 +- .../tools/pip_package/build_pip_package.sh | 2 +- tensorflow/tools/pip_package/setup.py | 4 +- third_party/examples/eager/spinn/README.md | 2 +- third_party/gpus/cuda_configure.bzl | 2 +- third_party/mkl/BUILD | 2 + 266 files changed, 4146 insertions(+), 940 deletions(-) create mode 100644 tensorflow/compiler/xla/xlalogo.png create mode 100644 tensorflow/contrib/lite/examples/minimal/minimal.cc create mode 100644 tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc create mode 100644 tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin.cc create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin.h create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h create mode 100644 tensorflow/contrib/tensorrt/resources/trt_allocator.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_allocator.h create mode 100644 tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RegexFullMatch.pbtxt create mode 100644 tensorflow/core/kernels/regex_full_match_op.cc create mode 100644 tensorflow/python/kernel_tests/regex_full_match_op_test.py create mode 100644 tensorflow/tools/api/golden/tensorflow.strings.pbtxt diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3dad41a88c..8669c25c45 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,5 +1,16 @@ # Contributing guidelines +## Pull Request Checklist + +Before sending your pull requests, make sure you followed this list. + +- Read [contributing guidelines](CONTRIBUTING.md). +- Read [Code of Conduct](CODE_OF_CONDUCT.md). +- Ensure you have signed the [Contributor License Agreement (CLA)](https://cla.developers.google.com/). +- Check if my changes are consistent with the [guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md#general-guidelines-and-philosophy-for-contribution). +- Changes are consistent with the [Coding Style](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md#c-coding-style). +- Run [Unit Tests](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md#running-unit-tests). + ## How to become a contributor and submit your own code ### Contributor License Agreements diff --git a/README.md b/README.md index e1a50c87e2..6fb4486d0d 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ ----------------- -| **`Documentation`** | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** | -|-----------------|---------------------|------------------|-------------------|---------------|---------------| -| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.png) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-cc.png) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.png) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) +| **`Documentation`** | +|-----------------| +| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | **TensorFlow** is an open source software library for numerical computation using data flow graphs. The graph nodes represent mathematical operations, while @@ -40,15 +40,6 @@ environment to install the nightly TensorFlow build. We support CPU and GPU packages on Linux, Mac, and Windows. -**Individual whl files** -* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/)) / [Python 3.6](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp36-cp36m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=cpu-slave/)) -* Linux GPU: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/42/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/)) / [Python 3.6](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp36-cp36m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=gpu-linux/)) -* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/)) -* Windows CPU-only: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/)) -* Windows GPU: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/)) -* Android: [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/) -([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/)) - #### *Try your first TensorFlow program* ```shell $ python @@ -82,6 +73,30 @@ The TensorFlow project strives to abide by generally accepted best practices in [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486) + +## Continuous build status + +### Official Builds + +| Build Type | Status | Artifacts | +| --- | --- | --- | +| **Linux CPU** | ![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.png) | [pypi](https://pypi.org/project/tf-nightly/) | +| **Linux GPU** | ![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-cc.png) | [pypi](https://pypi.org/project/tf-nightly-gpu/) | +| **Linux XLA** | TBA | TBA | +| **MacOS** | ![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.png) | [pypi](https://pypi.org/project/tf-nightly/) | +| **Windows CPU** | [![Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [pypi](https://pypi.org/project/tf-nightly/) | +| **Windows GPU** | [![Status](http://ci.tensorflow.org/job/tf-master-win-gpu-cmake/badge/icon)](http://ci.tensorflow.org/job/tf-master-win-gpu-cmake/) | [pypi](https://pypi.org/project/tf-nightly-gpu/) | +| **Android** | [![Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) | [![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg)](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/) [build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/) | + + +### Community Supported Builds + +| Build Type | Status | Artifacts | +| --- | --- | --- | +| **IBM s390x** | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA | +| **IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA | + + ## For more information * [TensorFlow Website](https://www.tensorflow.org) diff --git a/RELEASE.md b/RELEASE.md index 2717c75740..84d9d52868 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -6,7 +6,7 @@ * Added Gradient Boosted Trees as pre-made Estimators: BoostedTreesClassifier, BoostedTreesRegressor. * Add 3rd generation pipeline config for Cloud TPUs which improves performance and usability. * `tf.contrib.bayesflow` is moving out to it's own repo. -* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication. +* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication[1](#rpc-issue). ## Bug Fixes and Other Changes * `tf.data`: @@ -49,13 +49,14 @@ * Fix non-uniformity of orthogonal matrices. * Fix bug where multi-image Estimator eval summaries were not displayed correctly. +1 The cancellation logic of the RPC op contains a concurrency error. A fix has been submitted to master and will be part of the next release. + ## Thanks to our Contributors This release contains contributions from many people at Google, as well as: 4d55397500, Aghasy, Alan Du, Alan Lee, Alan Yee, Alex Wiltschko, Animesh Karnewar, Ankit Gupta, Anton Matosov, Aris L, Ben Barsdell, Brent Yi, Brett Koonce, Carl Thomé, cbockman, Chikanaga Tomoyuki, Chris Tava, CéDric Deltheil, Dahan Gong, Dalmo Cirne, Daniel Erenrich, David Norman, DavidNorman, Edd Wilder-James, Fanjin Zeng, Felix Abecassis, fo40225, George Sterpu, Giovanni Terlingen, Gor Baghdasaryan, Guillaume Klein, Hanchen Li, Ilya Polenov, Jakub Kolodziejczyk, Jason Sadler, Jayaram Bobba, Jerry Liu, jinghuangintel, Jiongyan Zhang (张炯衍), Joel Shor, Jong Wook Kim, Julian Eisenschlos, Karl Lessard, Krish Ravindranath, Loo Rong Jie, Lukas Geiger, Luke Iwanski, Mahmoud Abuzaina, ManHyuk, Marvin Richter, Maximilian Mitchell, Mohammad Ashraf Bhuiyan, msofka, Mustafa Kasap, Nathan Burnham, Nathan Luehr, Naveen Marri, ngc92, nio1814, Oleg Zabluda, Ou Changkun, Panos Ipeirotis, Paul Van Eck, Peter Lee, Piotr Czapla, qjivy, Rholais Lii, Rodrigo Formigone, Russell Klopfer, ryantimjohn, Sang Han, SebastiáN RamíRez, shengfuintel, Siby Jose Plathottam, Silver Chan, Stanislaw Antol, Taehoon Lee, Tarang Chugh, Ted Chang, Thomas Bastiani, Xian Xu, Xiaoming (Jason) Cui, Yan Facai (颜发才), yaox12, Yashal Shakti Kanungo, Yong Tang, Yuan (Terry) Tang, Yuxin Wu, Ziyue(Louis) Lu - # Release 1.7.0 ## Major Features And Improvements @@ -235,7 +236,7 @@ Yoni Tsafir, yordun, Yuan (Terry) Tang, Yuxin Wu, zhengdi, Zhengsheng Wei, 田 * Add `complex64` support to XLA compiler. * `bfloat` support is now added to XLA infrastructure. * Make `ClusterSpec` propagation work with XLA devices. - * Use a determinisitic executor to generate XLA graph. + * Use a deterministic executor to generate XLA graph. * `tf.contrib`: * `tf.contrib.distributions`: * Add `tf.contrib.distributions.Autoregressive`. diff --git a/SECURITY.md b/SECURITY.md index a5ce3a62ee..01886b613e 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -173,7 +173,7 @@ the progress being made towards a fix and announcement. In addition, please include the following information along with your report: * Your name and affiliation (if any). -* A description the technical details of the vulnerabilities. It is very +* A description of the technical details of the vulnerabilities. It is very important to let us know how we can reproduce your findings. * An explanation who can exploit this vulnerability, and what they gain when doing so -- write an attack scenario. This will help us evaluate your report diff --git a/configure.py b/configure.py index 3a7f7b3de2..b6c32543cf 100644 --- a/configure.py +++ b/configure.py @@ -1222,6 +1222,9 @@ def set_tf_cuda_compute_capabilities(environ_cp): ask_cuda_compute_capabilities, default_cuda_compute_capabilities) # Check whether all capabilities from the input is valid all_valid = True + # Remove all whitespace characters before splitting the string + # that users may insert by accident, as this will result in error + tf_cuda_compute_capabilities = ''.join(tf_cuda_compute_capabilities.split()) for compute_capability in tf_cuda_compute_capabilities.split(','): m = re.match('[0-9]+.[0-9]+', compute_capability) if not m: diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index f06debaf31..6d1e3325eb 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -240,7 +240,7 @@ class Encapsulator { // Once edges between compiled and outside_compilation clusters have been // replaced by send/recv ops, some dependencies may no longer be apparent. // A clustering pass finds all the dependencies between HC nodes that are only - // present as a result of edges between nodes in outside_compilaton clusters. + // present as a result of edges between nodes in outside_compilation clusters. // Suppose there is a path from outside_compilation cluster C in subgraph S // to outside_compilation cluster D in subgraph T. If S != T then a control // edge is added from the call node for S to the call node for T, which diff --git a/tensorflow/compiler/xla/README.md b/tensorflow/compiler/xla/README.md index c93c39e180..39f8caaa96 100644 --- a/tensorflow/compiler/xla/README.md +++ b/tensorflow/compiler/xla/README.md @@ -1 +1,7 @@ -This is the home of XLA. +

+ +

+ +XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear +algebra that optimizes TensorFlow computations. See the +[documentation](https://www.tensorflow.org/performance/xla/) for more details. diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc index e560abc87f..e9ec796121 100644 --- a/tensorflow/compiler/xla/service/conditional_simplifier.cc +++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc @@ -35,7 +35,7 @@ namespace xla { // Tries to replace a conditional with a call operation of the corresponding // computation. If the given conditional has a constant predicate, tries to -// replace it with a call to its true/false computation as appropirate and then +// replace it with a call to its true/false computation as appropriate and then // inline that computation. // // Returns true if it made a change to the graph. diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index dce2014564..33d8338809 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -64,7 +64,7 @@ struct SpecialCaseCopyPolicy { // output tuple. bool copy_root_replicated_buffers = false; // If true, insert a copy if a buffer coming from a constant or a parameter - // is found wihtin the output tuple. + // is found within the output tuple. bool copy_parameters_and_constants = false; }; diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.h b/tensorflow/compiler/xla/service/cpu/ir_function.h index 557aa4a6bf..2e55181eed 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_function.h +++ b/tensorflow/compiler/xla/service/cpu/ir_function.h @@ -33,8 +33,8 @@ namespace cpu { // emitters for function and function argument access. // The llvm::Function is created with the standard function signature // used in the XLA CPU backend (see ir_function.cc for argument details). -// In addtion IrFunction saves the callers IR insert point during contruction, -// and restores it after desctruction. +// In addition IrFunction saves the callers IR insert point during construction, +// and restores it after destruction. // // Example usage: // diff --git a/tensorflow/compiler/xla/service/cpu/shape_partition.h b/tensorflow/compiler/xla/service/cpu/shape_partition.h index 33d02b70e6..db2cda2936 100644 --- a/tensorflow/compiler/xla/service/cpu/shape_partition.h +++ b/tensorflow/compiler/xla/service/cpu/shape_partition.h @@ -38,7 +38,7 @@ namespace cpu { // // [0, 1), [1, 2), [2, 3), [3, 4), [4, 5) [5, 8) // -// Note that the last partition has residule because the dimension size is +// Note that the last partition has residual because the dimension size is // not a multiple of the partition count. // // diff --git a/tensorflow/compiler/xla/service/despecializer.h b/tensorflow/compiler/xla/service/despecializer.h index af48f4ab6e..cc1695b7f8 100644 --- a/tensorflow/compiler/xla/service/despecializer.h +++ b/tensorflow/compiler/xla/service/despecializer.h @@ -25,7 +25,7 @@ namespace xla { // Creates an HloPassPipeline containing multiple HloPasses that can // despecialize an optimized HloModule. This is useful to run an HloModule -// optimized for one specfic platform on a different platform (undoing platform +// optimized for one specific platform on a different platform (undoing platform // specific passes) with matching numerics for comparison. // // Current despecialization passes are Defuser, ImplicitBroadcastRemover, diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index a1d4dca5e0..b41eaa303b 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -38,7 +38,7 @@ namespace gpu { // // Examples of things that are not unnested computations: // -// - The reducer of a kReduce HLO. This is emited using IrEmitterNested. +// - The reducer of a kReduce HLO. This is emitted using IrEmitterNested. // - The body of a fusion node. IrEmitterUnenested emits the relevant code // within a kernel function using FusedIrEmitter. (FusedIrEmitter is not // really an IrEmitter, but is more an "IR generator generator".) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 2beac3227e..fa59a5fb20 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -135,6 +135,7 @@ StatusOr> Compare( } // namespace + HloEvaluator::HloEvaluator(int64 max_loop_iterations) : max_loop_iterations_(max_loop_iterations) { typed_visitors_[PRED] = MakeUnique>(this); diff --git a/tensorflow/compiler/xla/service/interpreter/README.md b/tensorflow/compiler/xla/service/interpreter/README.md index 4c19a1b916..0b21b251c3 100644 --- a/tensorflow/compiler/xla/service/interpreter/README.md +++ b/tensorflow/compiler/xla/service/interpreter/README.md @@ -5,7 +5,7 @@ evaluating the result of the HLO graph directly with HloEvaluator, without lowering it further (to LLVM IR for example) before execution as other backends (CPU and GPU for example) do. -Its key componenets are: +Its key components are: * [`InterpreterCompiler`] despite the inherited naming of "compiler", all `InterpreterCompiler` really does is the following: diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index 8b4e07995a..c287cca0c5 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -282,8 +282,8 @@ class LayoutAssignment : public HloPassInterface { // the case that no particular layout is requested. // // channel_constraints is both an input and output. Any sends or recvs that - // are present in channel_constraints will be layed out as constrained. Any - // unconstrained sends or recvs will be layed out as locally optimal and their + // are present in channel_constraints will be laid out as constrained. Any + // unconstrained sends or recvs will be laid out as locally optimal and their // layout will be added as a constraint to channel_constraints. // // If channel_constraints is nullptr, no kSend or kRecvs must be contained diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc index e2c07e3827..688cceff0c 100644 --- a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc +++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc @@ -75,7 +75,7 @@ StatusOr ReducePrecisionInsertion::insert_after( return false; } - // Check that we haven't already inserted an equivalant reduce-precision + // Check that we haven't already inserted an equivalent reduce-precision // operation after this instruction. (The zero-user case occurs when this is // the root instruction.) if (instruction->user_count() > 0) { diff --git a/tensorflow/compiler/xla/service/source_map_util.h b/tensorflow/compiler/xla/service/source_map_util.h index a776d745f4..18e2651abb 100644 --- a/tensorflow/compiler/xla/service/source_map_util.h +++ b/tensorflow/compiler/xla/service/source_map_util.h @@ -23,7 +23,7 @@ limitations under the License. namespace xla { namespace source_map_util { -// Creates an INVALID_ARUGMENT status with the given format string. +// Creates an INVALID_ARGUMENT status with the given format string. // // Also, attempts to extract the OpMetadata for parameter_number on executable // and append it to the status message for source mapping to user code. diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 73e014805f..6f5765849a 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -234,7 +234,7 @@ class ShapeUtil { } // Returns the higher-precision element type if a and b are both floating - // point types; otherwise, checks that that they have the same element type + // point types; otherwise, checks that they have the same element type // and returns it. static PrimitiveType HigherPrecisionElementType(const Shape& a, const Shape& b) { diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index efa5aed2d1..0fd846cef8 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -61,7 +61,7 @@ using TypesF16F32F64CF64 = ::testing::Types; #endif // Check that we can safely pass an input tuple's elements to a dot operation. -TEST_F(DotOperationTest, DotOfInputTupleElem) { +XLA_TEST_F(DotOperationTest, DotOfInputTupleElem) { XlaBuilder builder(TestName()); XlaOp param; @@ -798,7 +798,7 @@ XLA_TYPED_TEST(DotOperationTest_F16F32F64, this->error_spec_); } -TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { +XLA_TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { std::unique_ptr> constant_lhs_array(new Array2D( {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); std::unique_ptr> constant_rhs_array( @@ -826,7 +826,7 @@ TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { ComputeAndCompareR2(&builder, expected, {}, error_spec_); } -TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { +XLA_TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { std::unique_ptr> constant_lhs_array(new Array2D( {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); std::unique_ptr> constant_rhs_array( @@ -855,7 +855,7 @@ TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( DotOfGatherOptimizationWithConstRHSReverseMM)))) { std::unique_ptr> constant_lhs_array( @@ -886,7 +886,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( DotOfGatherOptimizationWithConstLHSReverseMM)))) { std::unique_ptr> constant_lhs_array( @@ -917,7 +917,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU( DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSRows)))) { std::unique_ptr> constant_lhs_array( @@ -953,7 +953,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU( DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSRows)))) { std::unique_ptr> constant_lhs_array( @@ -989,7 +989,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU( DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSCols)))) { std::unique_ptr> constant_lhs_array(new Array2D( @@ -1017,7 +1017,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU( DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSCols)))) { std::unique_ptr> constant_lhs_array(new Array2D( diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc index 098443824e..41189231b9 100644 --- a/tensorflow/compiler/xla/tests/tuple_test.cc +++ b/tensorflow/compiler/xla/tests/tuple_test.cc @@ -514,7 +514,7 @@ XLA_TEST_F(TupleTest, ComplexTuples) { class TupleHloTest : public HloTestBase {}; // Disabled on the interpreter because bitcast doesn't exist on the interpreter. -TEST_F(TupleHloTest, DISABLED_ON_INTERPRETER(BitcastAfterGTE)) { +XLA_TEST_F(TupleHloTest, DISABLED_ON_INTERPRETER(BitcastAfterGTE)) { const char* testcase = R"( HloModule m diff --git a/tensorflow/compiler/xla/xlalogo.png b/tensorflow/compiler/xla/xlalogo.png new file mode 100644 index 0000000000000000000000000000000000000000..7a0a295953d0c47b23718197dcbab1677b337455 GIT binary patch literal 46785 zcmeAS@N?(olHy`uVBq!ia0y~yU_8#iz!=8C#=yWJb1W=`fq{V~-O<;Pfnj4m_n$;o z1_lO&WRD&6 zh2cL4F4((#G6MqxXMsm#F#|)48wfLsOmTK(U|?V`@$_|Nf5arp!KsrTmh_E*p@G5E z#WAGf)|G}7T2De zbMy6@z1M&1ISCpVi?y#@HN``<`Je3GnH`BJ@NEWqsH(S!G@|-`m)^t#X0) zW(v;dY@b_FgzV>(JvBt#S>#3({lR>jXaeMsKNldVPQSi%*NM`_4YT=tH6PevjFL z9skVV{l9sA!=vnz+w5c8zwt_Q*{<4SsK8RS?@#HUFSq=+$fZhL|E_t;k!6F%mjjC~^K zzvbAY9|1<4^6p9p*|+Ea&f5F9W}kcLM6o8Oc_-(`1i5g`XWTMJ>tXob;x{}A2EH%1 z-oD~5^LOeGeW};%Zvq-@ckVTCEnFb-so#Costp^9W&cbw{akSQN7d9Rx|gf2KR)`y zanIH|HNi@adoF277U{*Su%ap$RB4YO^3HCZj{4S#%je_YGf^NhF6 z#j8RlyG85tIVxR9bk_)1ZSz|_>64HA?yV74#;au7J7((U-&Jxz90X^x2yW;$1SxNykD~giwLvs{B$d+uw@C0n9iksvmQ~wXUtb0?p1Z# zI&l%V+^*hL)z>y}KNZHPA@zX&yz`$2s|#;r3jWz&zhLdgbr%-T5)pD*uz#`a{Wle> zmmb}=p3lg=C~*RV%ZxxxRiBF0IiUgzR3)tCR8`vdlqN6WPoT z#lR6dx4pdnW9*Mto-Iaq%5%Dp7B*xu6+i!ZZ|cdIh=VmP`@BE)PCF|XnpUA#t z>ca1?hNnxdnHlVQ14W-ajki!Pw2>{`@j`aiDY@k*>?`-bx|LLTBv*Qd_4)c4iF3r$ z;x;VLojM`;@g4W~3#Rd|-}EKos8S2l52wH@TWW-+|J}gK?3}?pWxB?m=)i*i)#sMR z=iaYNXnq_cHKnNk_kG1Q7g=_(JyUIk80MdRQQu>5McbzIO;f{!{?$>h=d8Bh{%37w zbFJ)*dnTJf`7!y4t;MpA4?kk-51hSw`}sAmpZgl7RQqvyB%J$JT5?W#e~PMc$cxWY z>O;V*S6`qmI)mDo+&t0 z$jaEKKeb=V@JW^1d1IE#AFpkEt+m)lq2K!Zgfo^d-CG(C9d@k=_L<24NlNB+QlU)N ztOM=(JUWJFxeo`Ot6W|bIk(Yx-cMoOnBOf^bhv9)ed>vxvsfXfTJqmxeeaJK?mGDA zgIw@Mi|fS>;Uo1td@|v0EuR1SzR^fvicn}DvvkApbxc2Gj$N4C(&+nS<(r8NET!Lk zyRD}G|I(Ga`TngBkvvP5O$%RsLcVhEEg_W;)7RItZ~y*HZ_UI{e`(#q@mai+wZjN|F?wM#di-L{_7F#P>% zr4R-eS$0NE+v&EeSOUx!KQC z(As!~uj!T($MwqN5)2F?j6zHwogZDBEWbbXX4ULniD|#MY}aVoINW`_xmTPsd zqW^oDZ(=CzxcFN`apsq68?~7@epVc7dihQCxy;v? zs|oS!Nx_%;G(S1SUD~iMLgqRT@50x;*AL12^VDuTR_G_^{Z>_QVOfH7E`RvvSKF4l zbhYO=GiQFy-nXTdo|D`WTm5}b1BZ*jIx-+KNpkAI5K=dVpH_;9(julU@H z{0T84DYh1|(U$qC{VYppna`xG$ ziZ?7RCF?q_T7=hrZ94aod9SQTWzPT7u0O^vXC_DM^xgkblKl0Vuc*AIihaED+{ee3 z-cWWjx#=z`R8e;CB1^7`^|aKM-Pfg>7F~UK?DqfO@3pp(f6vdK?(*%%oUi_|<`x0l zZRYM;7j^3J@#rv>2gh0!6%V+ymYzFr+{BP9P@vRu!pD5}7p|_~$Nbm#e|RQ&{9Mht ztrtrUO~lYGD3Usk2y#L8B=i*}jaM<=Q=$yUv%$}}o;3psW9`*SG;hQ$6`rvzg% zj~@_jX*|pFEuesxs2y)MQ)+#>p8+#1lH|ji~skU@yGl6MQbl4-(q)mTCjYv z?ETp5g8AFtu68=J@bBEqo3|BRu*uL~F6eMu(zaiir(aih6%)tr%72M#ueGOdy(7Hu z>0#!K+)hyOKSgu0luUIi!`!cOzrI{}uxZ7V<*$VW&WR|=ye@m&@O)Pp!@A-=xjD_h zw)H=(mgjx+>c93ahMQvDk7xL>%`m=RAEfsAAIImQ+@`$Drl6`zVSEgK&Uo1PzsUX` z_f6u0gsEcXL;m~b6-Pe3*I)W>KQcpsiAC|vbxVtk zueM*l%=)_Wc3x+4!iN19oM$gOGh@o_b=xBDoR#vob&i?mBNMIpbmi~O=eKC*UGEd$ zdwZ+%MTYnlLZV;iAKO}f_Q3Q1Q6)E%ZN!czxLf=EIv%ghZ?g8Za?rUsFRVAtSv!ZB z|JEtN!xg+QV&Cqa*KapfkoUt&c}ufWsf&tUO1jJD6fc`mpr&eJ)UdBqcE^jQ;#rTj z{e7rV{M6-nnd0$dUGDEcEZf3da69VdL1n|o4c;GTHjC_e{T*x!sQ|;GAf*?s$qWotZ5MxYC|-1*H@S?ZRwirRwD8*3&C*HMRktS^ z*px6WJHah1Cv!Wg@CyfP$sf;0i&7pP@zV64)3Nv1U5R(M3axE!Zgu83ATGiY64RHz z#)fD5KZ)FntDO3|8g5snu2?Ml|NU3f%jvhinI=Aak@A-3=ZwI&76}K6C8zlD?B;$^ z=XT)Rjf1bb79Y~*ufMMJ*ePLMe%)f*skVNG3w?gfQDx*=lYQJhK05aP8w>INH}C8& zrtB_Dcz8?wMscilLr$Sz0^6d?cD?)mFJTJ56YxTCp{|(w+9?ZIyT6n#4}KozRe5H~ z9hp=?2BjdSHx~0w%W*cGPy1r;C~@PJZZZP{$E!6aAt(6n|LeTRELZo@=asLI=yM;# zLZ25`uZV$q78e4KYrOYe>bGvfryZTE{O%|pjgFUw+_Nfk zWk{4iA1AcgzUalC{qEQPg`21Od0H=F@B zwU6i6%B?=I(TO31K}~RylSA|NNg9j_JDnZ+nHNT#FMZ1(_~`TOInVij9hy|e-kc+~ z^ntXg>qg^5aFNQW=-|yCet)vxe6fi|HDD{`*>g;~X>Z1xGd=QwrTd{1Bf zar>*3-v9YcJd51}3myd??|S|F*R-o1l@BZa1Rw97XUY~}o58byDS%DlZa+s#&^@hl zf-EhyG1jXO?tAshIBvcCoA(ELUK(@TuF*Wz!0EkNYf)g%%NX`;XC&RzjpF-_wm;Qw zm%g}_dET2_j!*0&&beLw^St0qSKq7U|9keyMt?SIk^DIGMcnnqv(CFJl^S5NwM zq2~1a5C*Y^7V*vQMb&m%y)7P-**Ci{NUfTh(=TiO{i(-^313YwYTm!9`K_DtBq(vu za9PmIAL#G9{Ct;z{Ji;VKIU$UwSEwp+5U>Tq+a*X)%iyk*RMG0djI_G`FY((AEw;@ z&%bn1y8o7SrB{|K?Eb#OxO!c7J5%_ri=R0ZEg!4}H=Ou<))%e&^H6nmw0Z7cd1p!f zxec0Xt(>0kltM#q+}O#tEB9!>LC^YdqvN0UeSZFRovh_^natx4xN{Fb=da&X)!Vqw zwBPsF{IJ!lH|9RQ{Nv&GeU{lP6KkxaI=2LgkvO&MSw&`-2ulhZ#|njzBYUrY zHr)OFMuh02f6{XQ{!V@$mvkfZice~PaL??&=Rn@{U?@4zc&l^fnQEmo?N6+;&nz@9 z*fo83nTFH?cGDk+<&Tz!$u>-0H>>3NoNW;@+CiEf!hIYv(@)lHXsYyFc(4Az-rL<) ze1C$f^r}S)mdaYosq<6?+!DMHamD?KYkB>i_YZc5$$8&iUK76j2LHF;uM9gk6vndG zZK%9_hVA&zgod33(shR~@%5LzWw`T5=BzpQ#9jUmkM93j_||svujkt|x$-Q2hU{9s zJ>7He6mj+?IzM_uPDn9Zd{}14ke{3r?Ybc+-}U&KEy{HXn@fM5Y_|!sthmp1WcK4$ zxoaY-r|Q<$Z9V$vgW=-u&t`wkfA?bVl6!AI#9cUPsQ$KM^+HkExG7~k;?FHM3#_ac zJwE40)YokP1^L0hTmG+%(CK?S_w|REeE+NLlTPYi_o#fcq+80pnCn5@jZDRal0zY-+>Q`_+|9idVf9JLc8EX;N zEpIZO20J$|5Apo@t|mjuy}Q0;?VQ*b2cJmt@+_Ud@vmTG&vC9`7Dh$+@U7P@!|U^Y zMqg(8ZDp`G6KuC+c|lJ1(Th(k4($24PnqZQ9)r(KNrC}AFK#jVm%Tk<`FzDbWr5sX zJol|#x73y-t$jMl1=JD`zrN2<`I$v{yV}8+y8V8PG0{6aKus_~hCfF<*0EK^eT_Ts z9-A7%_N4H++$78PjEQ0a6{pHey9|xrJ(!_%$oRW`+w5)mv!k`=eiuC)ciCf8to0u4 zw*e*&{2UpL+8>Ux^B+E#zWD8m&W+QuyN? zLBfG?!BR7@1ctnl*2k2IH~Pqg#$XvbbT?Xzb4= zlQc`bW$Bz~me(t-KbKwS`<8N@Pa%5FVt@Aqk2b%kvTM3*XRD}xTbpH`?fcr2*VlBp zrDdOQS}?_R=UR6Lr86t02&p~T$y#P_*u$6byO`-xGW&Whha>0Nb!Ml=g@hK@et4{| zcF_t{5wRT;z5HdB)}9}G{~y$Ux3~Hu)9qhTty4meR>Ute2)MsnPX9RnqqSexy2zcD zYwIpva+xbWL$)vKSJGA?*ZZIUOMUk~*K_pak8imZTQ)0yy6E!gR@8CJy)#6Ya@|p| zRAEuMcw`o*xFrjNkjFP+r((wXBhi9Ef(nWP@j{#D?fSKB@ipr!yveI9(z^nUp3S%> z*8SM_rOBfBUojsPPS?~&oVvDXZb8|kvWgAhV)^6Gh3s~%q35?HJ=FaDwdihF)3(L^ zqW9mtw_kRvP)}o-(wx15lY|0~H?qD@?YB!hp?lz4=#!oE`nwpM?B;Du*j0P<)8cun zrCz*t$N{A)4awq|;MJ=?zG&B994%k@V#3aCf3J2-nSAX;nbHX_>knaveZH;P;;hk- zZ0?{TlyCFN;j5(l)LQNL5}&<&`_H?a-^KSutxd}Be9FE!-KUPOJLm1Gr zF3~#gZf(@Hi`Z>2v&w(z6m{`r!E^UGQZ~Cw=Q-!;FKl%D+w)eNmFIADG`EX`$Mdd; z-r}p_dN1bKZCK2#c{%!dLf1#bWe+SASk_s;|65X9fB#@;&^nD)Rnvr)Esr~^Z%)3% zcR=@C;8&>^2eUUOJDbiD>v?&dy~W`=@BRb*=4<9(eR3n5ZRX#lCP$y-JY@))^0VKZ^h zHaDx|;?2^hGA=G{o+T^&@yeal7yW*p^9uWtk1oi#{b63Kf2jNPh51j{fB(69?M4f` zIX;h$*8exIxa;u$`0FJ#`3>IrU7-)&S@LOdiTETKUlwv;D9}-uE3zWyz=c1pa)EV$ zEF!K8QUmWqR35h1zw35uwa4;hUB%08STCL@|EuM|%=G!r@sctTQL1yx#TThwteZHw zY|WPqTMp}<3tad@{`masI!?~FSM;3RuFduD^$+2f+wRR>a_U>H|mPH-`~ZjQ(u!B zx;i&rCS{%G)7`nP9LM+mGLZfG$Uf34Y+-w_#1@7HS%Mn3w%*v7cBE@tKEZi?D9Cwz^G*w)%rvo4(unzKDMh>rZ4(mz!)g_l1D!%!Ml-&C&dJ z!ur@wP}k9^!OuE4W}=6T?9@+Jg3imdEtTcJ?>6u4Eyb2>AH6Bc54d9+f7iL>-jCdR zIrvsm;hC+u3HNI5ubg?NRaB>6=cZ_LtO7$K|Ls#R6BhJud@IO!I(ZXY(eARAS*xE; zz5nY$TSUIU^(48;+kFhJ?ipGbHT)};{qdmteE9h-Z@rt&TrAqr)4?)rO?9JR<)y=y z__kZLD_qeoxRuW5e6?gt?Mwf|_V2{F!sZq0-QAaan~}G#I5XhXrc39ZZ*uRwS$ul+ zx-HBSRh%zs&qgnM!mvQIhikvhpV;mm?#9Y}mc8FXzF8d2dt+*`D{?WD)k~4WLZeEj z)+y>!y`M4ch_yaf`-Ly-=7&}0yUU*3isD?#*;S@{$vM&F93um0lrHDRA;J1zu}>0C z@6pkDy5^C2wS@IT!D9?@yO~T3ridip_GHq^EReEcV)=m%$Z+GLxc3RR$po4xR@fp)bqpU&wXkcM|0-edUmY& zynkN(h3~z6U(DY$b+*!Jxn|G+iJjtyqpaOi z-sj%>@J_Pt^G1Qqae4A`t&MH-zE)%|b6+aUpb&DvFw|iCL63jjUAMM>?OUI$F?Cj% zm)hsETAKVit~bwR1kO}^D8;9|~TvWe8uXXi>KOpGr38TG6W^|V~C^L|yQfZ3!aX+nRF9D1DiyUF^! z-Nn54De{)jWo~CNOl*0!`~T$i{{#Ql#s6ntZh3C1W7^i9emR!!&J%=~+5|M7OE=mo za(M6yF6kGTd-lVnZ`y(jI9N(~MT2!ub5!VBG2MaB-=UDsy zd^hdv`?>Ed!dYi-E&6-r&6TZ->(n^4c_Snb7~T1QTlru9?05fVjel=_T2*8zrjUE! zq}`Io|Dvtdy^8W`aJQ3i`1XG%J45;N>?7~4&r|+-WU!zS}-n-Ke7bw2Y zUK|{2z3hgqMVQOt8<%$S^<|pKXG_F)Ps=oP<70EQ5_4tW{&_NYzq^%yWZ~yI+;KY> zJWFAYe>ai0r|g}fQo}R3oeI`6ImS2Z-@i>vnQv@lkuHCDeSRAAmZ$&jZ`|Z)`Mv)4 zTf=8fn@h_0&gpzCYiQGczhUiyf;W=lwx5?B!h)~)Qw5>>R~*Nt1(&v$rKwuC*KI!8?KlGDLM=fschp7*bL&+i_8 z!I%F8%g-zK)h{oexcl(Eug`ba)Te}<*z@`Gy^qgtzCS4#|2~t)x_0OK?_J;K^$Ix( zEnHgucw^wPKt|1z-q|^ziIUDOi!5eF&DSm2c${nP-g${i6O49-&hOc~$L{do+lzN~ z-~G%`%Wy{J!fsRM__d7sTi+!xtZGs{e%9EY;qRjNd$JeKtWy$juMSUN*ly;^cvI8j zmdlM>JNf=kmB^a+%tD-fhWAak;`s&*CW0Iet;vt?9DlbYlzrZz*XeRjNfrh@JPJRh zM865masM~Fyq@p8eQ zB8-93)5QNiORoLD=Rx~&yU4(=DV6E0!EYu%d~idoTbZ|k&&H#A(c`t{?KAmxzpuHd z(&WivZ(JgfmLKnaI{cnqy-_Y-;etgDi*J>xo%1i4cIlvFzKx!L^|O8zM-NUXm#XEu z1#B$ZuO4@Oeyn7$`_TU5+eCkN*e^bDoBQ0nV+xWI9##!68Oj8@#N1lV7##0?>{nvQ zY<AFHTOO~b%vp-`S#C4?0HU~6ZkU=zTe;TNGAF4Ua>90 z%L^YgXerK6|r>^vK7rQcgX*D>NlVN{OEGS zRZTVL#Ls!i_;>dU*g7YjvOIp6Ju8$^Q!>-pDUqEqg5wv1 zE4QNR;tv-NF*1B-(|GcD)itZ}s=w0TGHn{yw0K4gWf*KNto&ffdD z#y}x8GT;2^CO5_d`%m*c*t$;kOwX^}w>1Zyw+CJEVOGdE!;xa^Zk#&Lc9zSD6-@g! z&%Pbal_E2AjO23uiuc={DJaDEQ`L z_sZX9Kh@2hn6q0xm0vXKNDw@z|5T38?Dp2j3!?+yHm;LRSypf@x%@+;^tCrn1VknX zaTy1>Xk2GaW1EoFAQn>i;>j|j{k%71j~8xso0|LM#7Ew`dsI@>Bt=O`BUcqIJ|wOqUn~&CCb~~r=6U) zy2xnm`NW0^Tv;-EYWUcnxx3E1&;Hi!)C2?FP6O!+AIe0Qypda-_&ULwbAHkyi&LGS zngtjHUuiSV+3|a)czErfB}=v{2AZexJTPQo*iv4aGfY0v-2X@g$GX%#nCAVP|L-l^_TcXh zkRVB`D>mzAJ5FsyeQqjc+L`89=Fi>5 ze(8H|c)Wx=HW=$+S zRh<2B?uXSq857R`y5YGpxPveJmr(E}KK+fCqZ*kM?%Zmt?u*#=*wlBoqT-QfMK}A) zAF#QbTHoyFQZnKFdf_Mk-KeljeA{^D{5rC?CgXRpQ_mvbaD!^&iAqkEG5negHpYh* z8mCVPIHmlcUo5SB{lq1!v+rEL$X?R4sCDABK#Qjzp7k>-DJ&^=U2|;bD{b)-KlvLM z8@Ie%`}*kVHzvm0%1akrEc|fRUH+N&>g;*%1RwR81V$cVQJ!vU)W7m|lT%A_eC>>B zdw=jxY2M;&q`d8)Xx;n8`1wcfwdUxt>dt4cO%*Mt+E%sPP22Tto!=k- zB}}&0>omW%oG<@9L16V;p?JUIhMeuJZTV+@$}ha-@$gT3p0lri_(^Z;D_<1CZ2oZ1 z+gp2g#w_dKGZz{v>)x5&6V&oBt+ynAm%Tq*MQirfhKq_f?`DTAX=dpE=XQR%powjR z$@K$z^Y69>3SWtw9riM&?6m3K&+(P7Yrd#@|7I)fy14Q9+U=QBr>>~_Blu_6wr%g5 z4l)WR+Igny-DF*{MZlm#An91eyV~MQeQzITP1&^N@I=08dy4k&`L*7DiI%bPvUSpy z)h5c(GZq_9k14EYYbj|xw@~|e(E{B^mM`6IO*tVUh)av%36b7lxE{rc_g|Bv2oO1cXz%#;j{-|6)cWN8@CY#j@P+#KPjJ1uyPJi+M{6^}Q8)*ikq|aK|~rDO1Z@_HGO1c67LLytjDy&G6Wo zORsJ{B4+)zIv@4a@){c}~#uO8pZz9S2o z=>Kahwl?V){C_S()X8RqjJWu=3 zM%nNW>o@ZLmt*PJwZZ#Br z_@>uk+hk9#j=wXqK0B}DXn!77^y13PrQ4+J6nVJ+RlYgBa`&U_oU6p#POZMgC*MA6 z^<3@!e-H9*l->K*LrQMX0-Hm2>8GrYAN>99w|aa;^mnK20{eZq1BK){wRf8@OV_=i z)-`9#;}hclBZV&UofP_fYyPzNb&uktw(cvI=h$(2wa~jwrH08(3BN-5bow{OJ^wyM zn2%wGnR2q%i%TtbZAx?YF62;nAY5=IYDw;UPivFbz=zKd`$wk4aGd#ic6OS7YJch} zyYy|}*7?~y_|Clg`m*p}f6I9UZR5G@X1~k{ecF=Rdw2(QFxU0o)@H^Z?NvYju;;mX z|G#D!y8In?eA&ufGiEF8{Raaz7AG%ZyZT7xiAqwG&@l;?|BG8WIDT=g2~$pG-&=N`5j+iLWsz3@(@`b4++h;J7FZwHsiTivZg(=2Nz>IN8&Tj$mw$3V$Z z;;ZUfL**;C+UD!qW&Dm__3JZBpub_B-Lp+@@z*Y9FfkpGJ`1 zk!;ojmYMEv*RcIwm3^*eZ{3Fd%x5pBi&aGg`zS1)XL@V5mEqaK3m;#2{k(eW#bRrb zr}vVypQi7q?mdw5#`N8}j&-uGs&6Og2*&>xn*aNStVogC_IV+VyZ5|e@DTMZ>|=Fu z3w}^t&i#AV-skgQd6eZolHsnKwP%rGC?k`d=(M2j9`>cOyH0hmJvsT_Ua8jXVp8a> z8CvHwm5-Us-dbMzXu>fyoBDgdh4eFi>2fOG(^(c&XB;i_X#SHmg)@}i-debn?Z;mJ zyQenqx-R@FLFC=(cW=d<3(sw+f3--9Z=S{46M6SPIUh^8u}%B#jGXhw8{7iEc9`!j zW8iotGb8S4%*XrFbMo$Pc^k%)x~JmkRQ2R}maCUGC%@Y{SwAl$Vy|yto-cQx$VbEJ zrb6HDo~TgL+_p}(ucY9^toFiN#r2D8-ZF{>2d6NzC@(2|a7W^G=&t8o1v$shN||Q% zp0qFB(~)zX&wZ`qsmYi4792Y7-8VbEj^WnR#>vlS2H#AcY4Gg9jeA!+%|k^i{+d3v z+yCFZS!JG9$u@>Pb4~re#XWu};(wiwE#Ttznff&k)J#-T;3iCg=UKUor za7fwZ{Dmp+vsN0vlk{N}$jUuDw_Wz;?^(<4P7p})R{Zu@)h(rWQ-dg@iox-zWg<(m z+mlp2%$vIXbJN*ub=S)lQGb-Xia)(vGKH=2ww6W3F`iE`_F4?fjvDxK&No>ozG803 z0r!9}8bOz4$67ntt;QuyTdA9#76)vs+?zdJ(jR+{rDr4oJE@$J@ono20Kury! z-=(bIdA~eZRRyZ+~2^THdu#E)M+Qn5(#6C8~brhIt%n7@z7(cy2O z@0T!VZL3-0lLRJLYb{BB_+xq9aq;{0lQ}%4XOz5dzPQlf@QH0(KKU+EbY^QQS=!$m ze$F)^&85P3m!hh{lp=?1tsI}i7w$6exWlxyXD!n=ftpp@PldN;Ylp`RJ~)y;uRisv zy1!S{>-JX{%rC^hSm^&BHvR6f!h2ubq z#)G1ND<55Xxx!-i(eCJ84{0IE!;U9T9#}4Nd~Tx>Lwl&iG+tg$jibep&tCuV}EA zx%zN$t8Bj7}G^K_RMULPjzXo zxpv&oZ=O@hj!V|6Z#68{-jr3xhO28u-~P~_?EfNu?Q3(jmI(XE@0(_>IbjnJa(bn; z@uF<@DQ7OUoWB#bYX1G-7uMdnwOMh!pGC6kc{5itwF~7_8J94A*rUfB7uU&(GFD!R z?C$;C@$B{1M9ZnR(*%U2WNz=a`ncVJ;m_{<%jLiRtT%P}a>#}I;APbbrhLvV%MNzF zy>*a5t32n?ciRHvSHaA#b4{NG%oLm;6c%=Gqx}w0i)_z4XZI_DqCa^rr6ff0EI!cm z|Kg+T9JAIlWkz!DuGr@K*)1S_?Z!(#Pw0Y{**!h`J=uNJi}bacn|yv)hbpSRnm)hH zf9sYl`9W8tgI#8J{`zq%@}-+*O0Q*6taV?_M|m5;SnC@`1)qZVKR$fFYL~*lm5)^? zZ2ji_K0Wve4@X$ztY*rY49 zJn&RZ5L7(VbLr;{L8fhd>fk}b}n42;J7Iosw({`>2E z-SP^1vDdA@FFvPIR~akcxz#pbCu+~$*wFfUDzEO|->YVQo6A#w;Tpr^E~Re+jGQC( zeVT5Q|2o=L^X?7teQ&DG?=3r#DUhOb=-}pAo-3A?|M}g0=GGC;+inW{d!^L5m;^p> zPIz~AoorLXhJNX1ew)|t`LX2c*5+E-jG2PqI+K0!ChvRZ7y6%ddF51o72UUES=sB| zJ!if!d|vo_@>|u-Q9BJjFS#|*S8P{Fz~*G-d9!x&{Jof&eD2qhl7a`b+Mk-%|FpX* z@p96Q>^q`O)dy2IwOS?@p4;f)k-_za|Lmg=m!cjX-Fs|{=ERd09H&hFs7^9{7Qmvg zP$}6^M6T)C#$Pf=&&nmAH9dFh$4BnCt0vBVtJlvdI;WYW_V(NE#jAyTFVFt?W%qRc zy$fD$S81up-t&FePA5Lw+aDLIoGg1AvF}rv-0CgOj4bTiKezJV>3P{~t12l`oS{5R zsehYA`$MsZ7anCvTJ1CRFMD?M=!OUP%>+**uRqqAvSiC)&9lxD<}MB=+5?KW?)h|E z{?3g{mv8qi3EVY5S>Iq~cChKCgP!%jY~w@X|9>iT-Knd3EdBAs*WUzZxb&Z^c~JKD zhj8!KY<6q0zRG{=Yu?P7>ZEz_>6KVc56)Ki`yw|P4)(lw6ED~-cXh9X^h866zNZQd z!j*YbEfY1EIDVO0c&%06U6uh}pJ$&su~5V!QDVn zgP}?1Vb3DXO)a~xYxHG0e3&5h&{bXLcH$CW_F2h?!k=8~t$lc%W0!stPxt|?xw)4) zl~JbImR??wzQW~wiOqo}`;8UTTyCd5{J(WuX>Lh(@ubCnD~#`*lWJC-XZkFlqu@@& z5?ME{bh|2b{d*s>JjEWq>5U0~^2KhJQo6$iDP|cPb+)U|_vSDcbQjzx5@-o6F)K_4 zb(2CjgmrIOTA3&4An0)8_-4MoNO{YYfBTiP_usWueX9|qR=e$`^b2=qSJhwF_nf*O zdOasDYgRGy8E&`WkYqtcRhF7F9Mh_z-aWHDrPDIs?&$nK;9<~+ zOVz!*=d*r4cc!BKk&Lv1Q{aMq=I15~OR-Kpt!lugGkc z&aOgn+BRV<4lW;AeZO(D*C%@J_65zkY1n-#id@Cuv^IA~?aPO@`5Cu(ITg3P%eJpe zxhA18`@jn)m4brj-X*W>mdI9Z>OWuqL9FlnjgYX$!XH029sB53_Od4YlER6bqB2&w zR#U5vnaH)V{-4&$a5J@(lckjLQuc&Rf9FMOp7*lON;kI%k1c-o`m1xTY{oph%@+>O zN)rDh1S+8o&lY}I?Job!-*xr8H=>W;eX+B9nHI+VT)%zqF-(~|G<6YXYaY|M%@37r$1^5m`Zb@L1TQ!mfj@U*LcmiN4> zB=60SWuCLY`X(}0a-A@juwoFG+^WY79Rj}J0^2&~&n-7`{M>6%=e8x?w*J$-^%t9R z-==Q8zC*h@tLqGdkirMSuQ4D0nFVbz+Gl-(ds3szDMo2ErA1%9OMP$A;#hViai->v zDUY&uo6ZzqaCHb4@#!{SBLC~|zU%}W zN7~eWQ1#Ke_xXG-2GDc}A5+e>xAW!*I9ynMFmSKdTL!H^d!Fp5e8#>YUecm;Vdu4# zZ#HI}crAL<`~v@8NCPf!#hN=d_qR>|F*ChS#ziliMetD2wA5=BJfK!@i1I;5tJmjD z&$M;Ze+2LU&E_lrN>dXwtiZM>cFyAiu|cBgCdEf*8n;Z})F~5WB$UZ%0wey%TYBYj*oAxg%Ze^#_Z$l}`Cu^lIG`n_^$KH_rsv&t<7i z^p)B>&0?8vTkcJc13JGG99dXuwH91eO3j@2IX3jpI@yoRv)7*XkNQ-R`hRh3hxu{P zs?rJ1&j=Q5KK@d4>Wk{9ssBoH-%eZ1v)J@N%>uUY4|4^WTv|MKruQD+b9d|Gi>=3# zX`-vD?YZ^ZRyCZPeX=H?*MNuf@Du|b&ci7ZV!QY9=(c6^Z(3pmSjsOxd3_kpjmS0@;5zQotRcEhfHt3~GD&Dd}A`cU)rD5aCSyutfcw=PpS zd2`n~zmP545?^dk{%!xGfB!v+#K#tQ3um3pxh8Qc%%G}u&X&h_+ViFsoA2w&bhc)( znPU3PK*fQ3QILI|6r+vInmZ0p+g3dMpQd~*ud)04+{{aP@o)asN(ugTv(pt{)AoOU zEz{e#&u{8yEWa=LahJ&-Erp`9N0+85hOjmL-ze~bQzDJ`(oDw}S;vBpuiJ5{4>b3+ z>b!1#{jXp5^Y5A@d)*2w;40(XcJz^j?c`&+3kyDc%#Ge}bGUF{2D1vIi>;UpWBBEp zkGVI!iwMb{InTOIxTSHzv47{~4~QN&)$U|&*iwD=dWv$kN>u?GJR=qd6-A# z2CgYF`;YxS%4nd_l6`3I-TYpU_gi!m7?>4qsQXV3ogeEmD_$hDRg3?B`t0k=8LP>AsaeHr`}4I&96FB@z;9mC)MV*3>+y7tfk>Gh}OT~?dPW<;{H%t)_Z=w;gO+OkC8!VJN4 zQgVAH-_O|jkU>I->6E2m-<&%ktM1PD#+{}xvCLq~wD13Z&-^D}|5sf9>-^0*vjllH z-+#(nGQFDVXu)Q;+l)$%j1%}ypE|HUWr6m(yR*XOx)?Yce<&KSRbRg^^JZ21uH{Q) zSrZ_90MwI@#*{q*=>nx|D)v3Mwt6FB83dMYWJ8?w;^AgvV zqP>9$44MKz9!N=AZJXG&ib=gc#iz4JGyeXS+S}}E1-CUaEVWEs=V!-ScQi~mC%RGU`K$GD7cBSIyR% zb2)@tLR!x?YCl)HAto5t)7zJ#5_IXM|BdVVzfb(FeQ=IFQi`__kXu#LtgFj zojHlI<-t>{7R8H)K3(>=SjAwxH{#Zr(yWXMqLRT)DU06hn)HU-l|jL0egB7P zlAyg;OAnT4X{}n;!}!VL;+dUqW9L?cU2fc(pCb3=_j?(os$*K0*PLXVAjG8gIj%aC zYsc>ie&2RJZE2Zp+Vxn5oIeO`6t?**S<_e9p+nzu#sp@+hP%W}D&{#ZGF4p)B5k~sZ$&?PyJS8GhetmJn- zo%QbL`fdB=_jZ1o#oje{k=5do2h1-0SQ&&+Ox$JJcYMszT3 z951qh=FNSIW&V8KIQ3I;Xz1#y)*rsRc$%0PQrt`m<+^|S?)0$M5ioL`T6;SA&Fb&f zn;cBjg{S*i@8AFU{%yH`7wh-&B^0bbe_g~Wb&KTn|IfbazTS4zd?}Bd&Qk_ONe@XL z#XEbCU0k5qU@y?-8JJ?D@fI}k<}k19<;InlJNW`9ztqfhx6|15fMds{zV587jo+N7 ztIWIj^|p}Hp0HQ*J7W@i5A__7$ksmPc>y)3HF%*f;3xZvNp(<^riU5L$e zzxC~)=2Pn`c%jhSb| zTz8tPwuHVge9QlG&Blrel`+zL8PisrIlMkNOF}{7Y8{_W|3tYU+nJ`jA}1dSxb^ev z&H2?&Tm9qy|5{P!FCegXA76b%O78t^>9eyZOYU4M!d;WwKG8_kr1gP(>a`ipf(`}( z4Z2wqw(Sv@0L{=%Z=bvA!VZBMucHml&O0TiK64Q-sK9<9$`rBr)}MC!g7*sqR-X|% zJF7@CKJR~l&6G`Bl+!B~%)4{MkzcC&xq(mT7RB)L9IyM|uH{d^X}|CH)4;!R)#2({ z+9Ebz?b}bDzjy6-$omO<%#)|xv-FwGqZz9{=i?jm-zH6BE8LE89@08$edMnSqW8&P z@u4*swC7@{$k&^^28$QvitRJ2es@gI^=IUyj151^@;`!AT1`@XI7oAWh3`r^iY#y{UgAJZ3>+_`Pa zG;!bA6HoqH{_EJy%#?^*3$H5WR-QL^f_6l2%3G$~+pjEp|8C#R;7fg;i&vzt0>!m8 z-;d7YbLP7#{eLI^X#LV{&!??6`uhISd@H%4&~*oHGj6}Jk#UJB>#v)-Z{`br$+=foZ|I9zc5 zeRWx2IAi<*AvX7fX;q4cEIwrSyqZw+-aL#)F1Ge&(8k(?RTk-+G+sU6V3->_7c@xw zzTa+Le($ooGenY-Jx4&+K+RR1#eGCE5Eq*+5=Kqj-)%5%qjzae~ z7tb1%OD-Ea1ttgyue_rtn%U0jq_${dy(>@E)Htb??^Gt3sRT*>T>pFi%iqV}?<-d} z%QZgsH;Qp?K=kb<{X2WsWe2aXEk5t^#d8b*4~8rbYvuu0t9xjrM+A z2wt)E@~``n^J-CRZiy}1blh{n3YnsBZucGR`mZ9(T+C zdG&C;uFbFW3__3V_U>z}yAtYdXs{!et!ZjBU*1<)i7T1gf|g`f-kM;bqwq%jx5%G< zR#yQX_vTc-gUc-*YkklR09#~OD=%2NHVhvoji|K>`| z|NeEgpGml2_7bg@f>h&w@B6d2mFH~D^Ge&XULtjI*){vnU{1T-q{Y*f8XOcD+*f{E zv4wfXhtHE9KbtAvP*?h3%FkyZ-z*MJ@hQl8|B8k2L3rGzI&q$Vg#jJy-J!Q8-sb<} z>mYdUo<>?`JF|i=$E%f>Ee)0*+8k@{6RkA2@LFk}VcxIL`TZwizJCtiXdemAXII=s z4dP$^dEn2?|6O(Nev#<&?rqU6hZ&lF{a7OX`_Ov95`J%MCaLaU9eZB9>;EQOa{RD+ z@cD$ov&>8VJm>n#8y{Mm`{QAgYqWI;sPig&blY>YO`U}alX?$<+?3X2yLyuI^79Jo zC0(Be4{Z0y_>tA1L)tNwNNU-AySA46JKcK;m!kac4s>YaIs3{fG7yN8PwDq0#3k+R&pp z@zjaw*VcId?V4|QdFStaXBoEnzqr4mQ{%_Gr|h4m+P|+{m+oA#7SCYmkTnS5d2$3y=s zwj@7n^=Gs(cv!!D?`mQDAHnuj)z_BKt5N>Edf}FsV4hSFHHL?tJI}=aBoi`>xZ^)Tj=n8A9Lg z+n4SU>E3df32z0uKg(qq|Nclr9t58qe6x0`#tY^nO&d{-u>?zFQ{ zwClIL>|Oi!oPJQ7&I%Jd^}7M0vkV|JyMhr)T|MlZkFVMCct{fs&iz{c>r;$X1W&AWQT5tKKQd*aVtk)$ zjsBt2ci1C!&cfWh3-}KoT7DQ%7oAUtdP)%x;2sU;4HaE2X@c)JR5v0pl6A1=fW+2Zrk#I zxl?|AyFbfhyO;HsmkX|1HS6Qm_XU7}w|k`jJwM0Kx9`99`lO74 z%S-;>1=VhCq6W*~#M@2zkzOD4@AjGELO$aOM(W4xOZWV^vSy1jn~mn%57|Cj1%Ecq z)_)scbvK`H_N1~m>Z-NhOwwASZWnxhq-vYL>el(mF1J7LU1W60-ZbJt)GsGb>q|Zp zJ})+^@S2?dH&}9=^cjasKhHk@SAKW$^|}{)KKXMjK(it&0)5*aT-(3@+TUwpua_-3 zxcBECf8(6+T@SBkIL$3M=Hz#aaqYFKWfgDad<7j&9KS4Xax;95#jDm;jr;h(TU5hN z23>W`vMw-OmW+D)!aKmK6-$I5LS z$2Uow?D_oc*4LNO@il+XoTC?CGx0jbLGF;VgcJmYScWigvLJgt>^8#{N!%x^~XDt6X>cyNofOmi{PonQU#>yx>AUOH7@J?&h1R?0m{RBXqao}V=r|L4@N ztiG^QIoU6-qTp{Zs9AZvJ6R}>X#m*}Xn?df(5A^wWyk7en6b->A5?+v?-==aV*ElD^!(ciPTmjl=72OxAeoF_ULn zo}i!MTl3$gjA!G!x4dnBYnaK-z*2fmN_%eqog&}wf5PwC$XicJHN5(K>L$x%NARr1 zu5CA^uW6q;KiS23vZ-p%7k}L;CCTwW)FW?AOl%VEFZj?SxkT)6R8-@M8A6>~K7X*E z|K|7SXZz=$ZI(255S_FkKEW-x$Nt}B-u&E~iT7uj-JH|2KRK&yOMT=g55m3L}rg4AsO;>j!s;<_2?9XC!+2~Z9#tZ z&&3{}7XSY|`TTXc51C6QMRVEeeBu3~93^=4z=g2o`<7*$Zc6(bu;1@v=bN7=cvOsd z(ylLEF7ew_i|b&bY}>>JH?ciwP3yTD8fLPe+b}=!BHJ;U$*ONXj2-pXO*{Bl+E)86L(I!#373AJ z=B>Z;>w)Zj8?no;xVF#Pr9I1QMUUCz=<G)LRSx@ofRG;8b;vWOZj)Q7lvQdSd&JSFdKJ zSGDX=_BB~3-+6`O|CJ>B%E^jv4H#3jO1|m#9~8f88{Lv@-4o)Lmvw*2&X4En?^u5`1o@tNvZ94Ag9S+^~fK5kN!;o19PRlU{&w)TCBzt{EuxgLE!ZsVgO6Mt^8Dv#ZY z94l7*VtH5fpjS3O$1L9~<;l0siqn^twdgi3@Sk!dDD}W{r;q8o%PQ7bv~zI$6aXJ5 z^4wWEx~l5-M3XM#%lc+s4=%}B2WnpYGS^G|uVXT7xSYin-XHFq=KzjCQv+O)5_ zZ(97XuYKxeDo^siUza#gBr$VM{VMzOpuiDyPzYe0y+`xy1rfK_c)7AqPT)gE;+k!z z+V(YV;sJFfCooieJ$+5S>EL~Sj*Uh2OJ&&v&)Aq6&dX3+l-MZ_Grg!T;C;sg9puGpzOkZ#Ob+)wgxphyx%B4-UdyX;i zT<2^LwD^DJ~5$a`Mm?m9|Ug zF8A*p3T`EC{;}wNZtc?Q13yD=$=r9%iQ>7O^rI_a*GB!K zOMGFgCw@vWP5`g}j1t;xx2fdd;fd$o=N~_#nR(ngXxrtO>TvZ}GVFg&9RBpn`PIYIdSFMz0Z2*|9d!DI&1a6 zo?R=#UL?Mq{B51zo-Oy7HmH97u=?fh_Dg>bMf5Tj{afjhxBgg=%XN-@38p;9-=BOW zv;Wu%zK_-W%g$Xeyy!i>E0_7yl~1p%1GXI$WVlvZs&wmC+i!o%+y|nu`!y9^o=0q6 zb|+{_oD0Y`doJ~f2v=n4wokh{`9oF;OTr8xXU*r0)$(utTHBYe`@()@(Rrz2mfDuK zUt%@Sp3jw?U3O#p5+1W-pMHZ{m~YHP^@3g=+o!<5xL3^M+{>wGew zJ*enu#!}h7677)CRsoHPpq0v+dDr=b6H{y=<}60(O3q^QP3^B<_@+6zI$T}u!|%rZ zwlm^he!p+%@#Rm(W;NsJmf#*xnCM*MW1FDHSePWtE*@GLwQHiGf412`EiH`{Uu>>}~6A=iQ%TQjuUXz4X+Cf}(#bQw@??YHn@VqAVZl z-NnfyaK+MK@u{7k>h3YmN~+D!P2`(%)o0eTPVdxy&?H#dwf*;|?t6OQe~(?s)AIXTUN4XB z1Lfz~rA*KCE$sijKG6J^^LhR;yG`zw(^7QW{8fK=t>H+|*I?pc(|*vwwZt}J&SFi_ zi4F4B*Z(loTxao%&RA5cReguo|4hZ(*ts8!o8v3a?K776<-K3v`+M(O6K_xF_Tb!l zrs;9Ut}+XTpji!NVaDu9il6SVo#nXH_T--9?>_E*e~xj>Tl{TUG%fE9x6*=Dzxe(< z{kXL@+bqgl-KgmFJuw!G=oaQF^FVd5DWh#bchA~`pBBK^nC@Hle!*>dtF9_`TS2a4 zE$Qbvj`aj&OV{oD@!;L|kgw^Si{9-`^)>T4ZeP0R&FXCTS)4_IQ+eiD^m=Xim*lh| z=HUNV>DN^1QYE8}{#PtopVC$E%>^`6r6uy`7dQK7_57EW%gR0escjbT=23n=f0f`C zqm!T>huCckd* zx$Z2_j||?`0xJHkJ*{;ZW^Oq4>yyQwt9I|t%0%8d;~90kEMj}yzxdVna{g)xz`lZ55X`D^;Ced3i^zhVuOzrSE;cwudiPikZEm$R(U}Ujnbs%l4O!neG0m zA*?^=WS{q#C%xclFpv#SNG3*^%G2MLNq>{>9@{?lRK>}_AI z_xsE)TF%*jMa) zx_RRS&k`fQq{C&8kFEdrXQuDPQ|pyp=+E!>2DKW$TQY2u{=R?8zH|fC`5R@v&)P2Q zb}J!+>tOt%rvDcgX*no_uyQNvdt3KK7iOecUR3fGw3%v~ra9mLw+3j0YxyFE_3I7) zJ^lVaZNibsX=Vy(n=bLzbAk^?*>YILX%>g3qQX7Xxy{Aed&;$KnGR2q@UZy*>*@X7 z@00fhg@tCNPBwk^;dRTcSz>?oN8j3Q_RY$CIgg)r+4SIg#2Q$!veM-uc$< zOMLOcON)P=Uw6Z7dG_P<3-dq4MBGrGXT$B77P9r+q422mwejn?i?s z)ckHuWI33(`oV*j#w#MEZ#-kxY)pwN+rQPY_;PN2-A;3ff818)BL3@-b%GMeLWV}m z#QQ&AT)58`^y#3B+o2ObmrvXApneO7l;T^1y@h9#17kCfGoDVHDIR+LkN=0o$JH-d zh0b|=7Q70a>t)0Ix=TAXWqyY57pRw4D*79|6Xb|)t^@k2OZ-!Rp9#whV>OtO#$ zsm=VBF>8+RBJzx(W3X1u=mjK=>NQw}7|diUqjdo8<Mx;n%OOjZcZZ9r{d7pUthCLsBuo(8A%eT&3&J zyOr-n-}EO`s3o#Vtg_y@BU0{wLV22EQwT#_0w<$qp4f}Dj;5so1q&RURHd1OxMVmy zI6PJ!YGaGeNSVwf&84Ef;cf!AwB06=-kgPpo1FSW9$HvM20WY)KckCncFCrNliq4f zxa2MtQsnaO`+`Z#qJ4`er1Z*UJkj8?oWgr!&4iRz2`^TGuZtnHn0p=dZ2Ogms(zl$Yr^3UoEi zy%g-aK~3X(b$c_z3a&p-)_pTz7C6$S^q~276b}QJqk2=D;Ke|}H-|eym6SBNc>l+} ztCkYf?z$PDxb)Dq1TLxc`|sx_zL80k*d3bt_H+5SPu116_uieq`M28Ixw_)qyZO0t zmG@aDaJlEVy#292fw@#D_<7CL-@QvqoCJHi7|#7Va`NPU0ok(r)Gfy|yN{mETVK1S z!cdHX;Z{=N6_3v4&WiUhOPkj$I#{?*mBBQstlbWEGEc@+4p?`jA=RKG% zHeZWH^G5MHgKgLEU$$@Ce`a3xy_f9D^H1dePxz7k?z%O5rul6ZbBAUH&ponBmP+rC zUGZcBQ~M^yS*%MIA86#7U9w5Ue)*g3p!>D8ou7A{RDQ$1i7|b-Xu0DB!%xOfuC0Hg z(xTGjv^yt$g7lY+=lL6%s;p&|zUROFRoztf_sqLEV~}U_IxqM+CLZ~&!|oBi?r4O= z?ZYB|)$>fgP0hR*w{Pcq&W>-tPA{(b8)%bxogu4RibtYu!dB6X4Rf|MElHZ2s55&* zt;d=@e?J~L`oHq~wskLkjW<@F-sQxCynt?)Vi+y6dV*2>#g9O>a@s>%}N zvf@KP!5PU3>@yZJXGpzT$}P92_4>J(Lo-jc{f&!VwD-^Bxzo*$e+}m>P@fwsTVIhf z;ijd7FvHik3_cV37GKkRu4ffe%Xif-*+O0VZRdi(`eLTOiY7&dU0gRV>4sbrY18T| zSDN2ryr9UQsY+NVj6*V;)&4KE7Aq-CpS$x$h5p4q|6^}VGh&uIE4MeS znu#H!_vpt%*K{iuWXC?4Zy_tdM3(=n(kzhNG`Q&ZTvhqL%vcw}A^dwzn^SC{WUR`j%pPImZz( zkB`d9s~b!ne7?c!^et_Jo>0Q$edgC}zb*dGJL`OImcxCA`i1S&!fnK54u^)mh`in? z+v)N+bLN@rtC!knZ1-HkFd_1{1?%n{xv7$74K0ftQr#-1OEW4tojp+YiZA@*t8Irn zMDow_ISFiA+?chNcMpgDfL9|tv(h?T!+s%1k3c_RbW`f zz+&+EK-%w_qSy6f(--kid${(^#o+wPXdOOj23-Om`Xa5Z)AK~1 zIz@39e9K!W2if+Mb@Kz4H~-s%&o*r3`s28_2qw@LX>6G7IrPJMGtN|$Uk>z7^!XxQl~H-n zc>3Zf={Pu`*7rt zaKi7yakHcySX5k9XNrA(_=kfvB){?GF!-5O z>xgHzGn;42fELL|X0mTJKjX5~!}?fD!BW}WXWkj2uWe@9HpOJNE9iS!GqmPJbR5{? z=)}R=wzzre%g{{s(i7)Dz2u!OU-@pA+~a%VvrpMo>->JOB0O~A z^2M_Mzb^eRZhy*Pzi6su_Ll408g{!+7i3~us{3|B#{>Z;En}x-o-Zyfw;FS|*v6b) zeS2C}(5;QUy;l;-5*YWs^37ytYd7H2!^0Bq2 z`8U@S8w=khmj~q>&hq!N)>$jGKis~@Z}rL9m-wbGT+MTM^(8(JmZ>5hycJ=$1iuv` zdiz)J$9=o*7@6L;X&Kx4xpiiLU#(QF=kWtZMw}^&vP>NKuug z=Do_@8_zoxTqf71F#LJ*C%MB`(Pee=hZ9DDl?b}k_$=>1WayNGE{In_M5w~ey@u`aC zpJNL90-G-Jbzbx~|5hm{vEqGf|MbV*mEG)1WrJ4SP?9d3@Yljz>+U@5bAc_lKcC;g zxvJrLNj<}Y3CzK43Ae>c`a|ERC?HN z{r`S)Wbc-?vc4y}{jGlMOyPV;=!>i4p4 za}l+icDDWFVvZG;yxaK?@J?9fTX3WM7(;^srxL4VG~>aKF<;m1{@!scW5VzJh|2%L zr>9GlHD#YNs@6IEKw{M@r}w+~c$QAOd~fQ>n#EqHn7?hGq4s=tlvk{^%#MF6y{_4p zDt+3MnCV_S=k{#z%CK98*J~IHFFxJ*CNN-|hpzV954SEUF|0^AoFuOk?G(I3yt~*# z;8FYeZDF-v*F84#+Q{m*?9UR})bHE29epIgka4l`*3Okzy1X>~XJ~NmSTH5vLD^;v zR?CWY9U434pI&M0yuv7G$AgErS1jAzebf2wzdMWB*}kala^J~vH?3tS%l8G}r#C$N zR_PPXvRuTDLzqWUdBv=R7ap^pZ1iuKZQE8WW1Y}edQ-FACh+F}EcP_e@~btS_UD}V z6BaFem6h}N%GMt@|SJ>hBorO?Ct+tH47vIR?J zm+BNrT004BOOo<6zkej=<1F>1vQtdi6xK^IbFlM;NSH0);@tY?Qcta{y~BbZYutW6 zjeKvJvbI%Z`;?hm7O!ba&erME=i_^HRD2$HZ6Wl0_qz9+-!?Isf0*Iq;9j`wp5^}# z_2?F3$>QmHvRB^S=ve+t&$)if$>Kv5(JU9oqs z-R{71IPjsxrQ+Y~^P-JeN?1epIXpVp9(Ti_W;4zF2{6^9e8O8^yoZM`j-HY4h5-@aK+5{gAMTP3v~mERjw9Tz2e{ z@y6O%b}KgaSJRdUUk~-J{E?#jO;h9EI$6t?Y{wqVw|e*a!N+y7K^Zrcq&FU^WBKZ} z{(Dz3%YzNC=PcIz5c7?Lp=-hh8P;is=NYG+_p%OgO*l43^4Q^W+dn_^7TWigtg`7U z?#hglH_zQ@!Pas>JpF7~{hrj%JB?+YPhJ@KYk6P$oGphJJo+4a*3;Z6DCtu1&XwzA zB}775v=6s`di*53J6|+0s9!!Z+VZyUnU9MM9rPs9au}Ja^ZuJw%g4n_K6qk#e*d0V zkJZ)I@^C4+sUsd;nXFrW6cHgX9FklDA|tZZ7| zljM1`Ufo)lWNey}Jj@oa68?M6Jj1SZ#(S&HTV}Q;Er)tUMBCrL@_6a??_Pw{ z)`^FH^51QnC8zIYEm8Z62b3B1rk&V)iO-cm^YYedpQT(J0*VhPTrb~xf%%j%e3k8^ zJB#P9mwNNsVUK0pi^RA83q`9Kes>24H;emQ%zRh#@22zfo7c>>maaJ$o*-~ytBI0; zpqiJzrJRqF%I1G7g>~=V5Z}2@_O9I9TaHc%+U9}XU#-pyW_x~|vE}jGrAOCnS^Ok@ z>c+=WuLX^^?f(DDG_zfcLEvcmHOuTM|A;@coF()03Nzh3^WJXieXZSjGdf5YELz7}NUI=EbO#p+cab`|%FU$ZkEluy>U82@5n@9ar? z_h)t=-549_a9l=xovf72Mz2%FT#Bm|cATEUbKP>ez|O@JdCxJXJ<0a0{4T6$P+hIA z8~@XL#Z1NZYjqqs5_;cz{9?a%R&w=)#hM2`PTsxemB9ovTM<#W1zZXXjCGEidVG-D zbqYL%sb_YNTjxTyvrz8FiiPc_d0uA4JYk^=(_`6tN~(T!x;>kHHAqkAN>bdq?Dj~8 z`=Oi(9vlI3OQe6>@ZVe~tAA8{v-wh4t8FLPWRljq%)Rvdo zzu>8EoYRiu@#35t3h%#uE%*E4Z}zj#&CI7fxtHw8=Dup(v18r#O|#{GTwlbj`#O5+ zncmiYo}wqpv;~-$eoidA5&1vFwEt3X?L7AITaDk^!a%3d`W;t#?RiFEw(U$?t4n9+ zh470l>#<>PbG~4k(YJuhfx(zRXXE_H!oQbF*?t9f?OUyCUcB|#qaCU4`tzzDZ~c(o z)@B#|+9KV5OZ}^gz=vVIQ8!mzJo(Q@rKQ2Q#h^Iwg-$F--R&h^jkyt9KU6F^9O~jK zxb1W47l)vbfMa47XDTHeuYQcm<(rb2%b-C-$Zp3$bNa{^{-M6XVrz|(sh`Ytj z%e={Tk}2Q(_U*gNHi+*o+i|JyrOD-}v$>CS-+NlK9EeP2aIM(BG;M&a8e$}tjS~t5|%x-IYb?)4f{b8z47alcx+xI2*s-dO2h5FqG zIoBe3@)v2py)otOEyJ_Hm)MvRIuyz*1nmyh35aIh3O>%iWAfsc0y)QbU*ao0V)-HS z-l+wh46YN_X=Tg_n_cxl-#_=oXV$tN7fw%WuQby=x?=8a93>wVU%9S)ax+lVU-wv_ zgk>hXLFJ`BAG067xHUoTCV?4yYh~wd{xp3}N*kB$tUZf44r%XOd~|<-nzY=MG7Tn< zES~79;A<1C*&E+jdQ^Xqfow;#u3T8CXmH(p|3OX7O!t#4#~l?f)P7$7vH9!VwU={$ zpHe7Z5TS1lInK&J?MK-~#tEmy9O+Km=|XHz~r+W$}NM_B!T_t#7JC>)4cvr&*?fx%%BCNs6LTN8zt)C8T* zoY*dXVaCbQw+2FgTHKe))-IY>^Nd0MjQ^zTr+#g2l{B!?%9g0)v(bCIg70Uf-R8v8 z%Gvv_=HB^v=gXuU)jFLIGOk~FuyS$#;`@8PJW4Fi{VgpTxN$t7)`D1QoyV{;|J`qNNz?!ZH`x0*+Z2HfzFj;y$5*u;uMD{*sCqg{=9x`X!|^4uH`{icxWV#m!J5SD#ScO= z+m!^qWUeTfa*#cI?h^u!X|Q>J$U5yN zX@TP3eZPYpQiK#uopuF!@sz*i2>tjf^4{b!2acR$C1*Cp9r4+CuJ{t)`qNTH&l_&W z#(jNcdBV$j*6dX&?r(PN)c;+xEN6}OyEhwG2>&q=Y`b;;Q270tWVbVg>-Y@Ar`_7J z$k;huZk>TFzfj)O*C)Jg1Svnc5%KZH+$)>vOiv5hwtaL;Q2A1|D}3*i?K7w9*xvYJ zuwZvl-C%%5A&voWkd z{l*@pjt#xb0-J9h{Je3N$<`g}ZJqZ2*6dy6D)7WNnZYq~vcPRy_Ntd}dVM(F-BXrY z>?pV6;Y9U$HqR3d`z>6SvHAF;y)LU)Z+tiB`=z!0_C*gEr+r=PrTMzs>TE5$2*V@} zNhJ-5+3!~|D=gFqm?5YMnrocbo^!nq(v{dY1-&l=gfTY*2D&60onCw)NHOa{1kXU26@Dj^5n5Sfd%#-W1=Scd%A^my*0UPi*vS zi{;a+<{1XvTG)TE+;ryDJdx@3uPkn^%s$RJZ%Wx4fv+Ma)jt*LZcV<#my*i+i%`-@BF)M@vGaQT4&dT8K3?CetV?)XxcY(|5=&0 z{|hGaP5q^!H6b?WUj=Qrv-Q3fHRn;%n_Gb;#=+<8w_4B4uuz`-vuf)U_USdp zrnt6Bf6!6NsCYg(e%Gbm+KF#(8NRr?PL?y!_tK_<6`wU&LtgIORQIXi8B5CQ8?DkZ zOWCcb?}+t&a9uRyCsQZ8d{yD$3tz8?Cl;*Of5G{7=OV5?%l}uhlMCOr-`=)={mH_0 zoJP}qUmo(vWIr4mclY*2$;g*(3s{zdYH91zw-!AfA8+u?oLY8f>%8YPUVK`zMR~eq zyFh){?UEnoY-xVrW!=#* z<>{;cdpsPHSKOOc_5*qhU((RBgctvqTlZ z_}|KG(6EjF(tT07PXm`enr{^svSV67*fk-&?=wBP7`3BYg7sQGs#ULVRepSTo$TH# z4uaXz@+Zzud~VeKX7*g+qt&_hc~5v+bIe>*Ji*^N^1i@(uWi}xt`6z+k4Kc+=H&`+%kPTZ7H9 z=0*QjKCRoj1Ac$tL&HAGaAc9A4+xB)(L36@!Dw?8xVP zu2mgsT<ei)K@geL=Nx9+S=&O=UJ!kjr{8IhVktcH2^R8ePM#tWV9@opSu4XA_^;-6Q zPkJYG3)!mIdM{=f-nielCGto1!Udp%&5z66W_xz_``lTe%l(Wv-rZ{lm9+x>iw^NmjlyDR3Ox)ADiOew-| zp?k1I$iuMpQRi;=PTbqF&Pa=?=;qAs-tSD@UuG?EiPnj`U?^I`+nt^D;n?nT3vSP^ z`?R2S+SjvQny9HK%3 zPF?&_@LRFPg??=b2%8yS<4rwFAi`E6RWam zwd7>lyxThS=c)Kl|NUn3UTfZM)97xzKcU2b)`IJgI)7I)9SAPbu|1{!A#$d@aB8p8 zx1vpM#y>a}w;!;-wzbDFpJzujXU+|la|t}=>?}Li@88;f(7VI1f1=~f`;w>M8&>et zpKe-RSz@GClTw*8nK6Lnnf~0&*h^x+KTlt8UvKbnXZ52OFJ9@Kh~O&9za-Y3yn2y> zWOsMH&G$2r?6aP&4b;ruGg*62@6iJewE{oxFg9PiZ@zAQO~vIVg-WAJmKhuEf)2;; z`Ln#D@SC7Y%EIe6+&+Gsaln5w3JRpGLK>d@7c68Asx}{C8hV*KX7Pl4ZO1L{ih(wABoO~BM#X)A84`b zTcBky?W@Acmgc9zKREugNJTET;(yxmHsQ9n^cMcPf9E>C=MUTVdCQ9CZ%iyFno|1M z1chYZ{_s({cq8j}IQvXnNeM}ZD2@k9)Fdre>{61v*mP37YTK)3x5>v>{bFR`6Vwvg z`{iDH-edEmZcE{HH!W>?tmZBV{8KSyy{Y2s>c!lfm|D!=F8#9jOpU`;#`+5X^CCTe z)>^)o;ZUoc6qdM*!I<}lQ^Py?>QlSay>zy1NM5Xcw@dkl=bRVSB0l$`f{k;sW?asQ zFa3COb^M>B`_0RbM?Yefbieof!L|q)F$Sv#4sSn}%AT%0qaJb7No0MZQNk8Ci-KP~ zTrXH=iO#%N9ramyW%aFWtpoQq^)yy5P7vJiW~=*)71RHp{crcoIbh1hyLYEsE#W?3 z$Z(rsiF`oqZrS>W&wknL;orq~L-T+i(=ImuZEpiad9rwST;3t!{lt4km~e9ZGQGq< zTFO!rmd923yee7b{H`bL>&@Ja756qD-xFV1>y+?U-M)Q(-YtveCkxkIm{~is{DY)! zxJttm|5l6Lti&e2*~xws_wRf5;^wy6*w!hcS}~f=YPVYtabI4*oUr21@l&bAmPX5ymUrd0 zDj4XlPz(Q_8@7FEwkf}fwU4mk+}!h`C+?}fzU26}ueT!WozWNf*(ZW;u^r$Q*v9E0 zeWB**irgF4JD2r`9s9fe)lONq?G$b&C5FriF*zm@(cvn!Y8*voT@rog)*! zZ<(;{-MX}(TBR8)Iln~o=Qf|8xUk4h-b_%)$ssZPv1N@>+wL-sijCRF3$~@C-f>ts zZTIaDdK|11`gVU`_3GXJy=;Oinm2N;KMts4x_m{>|E+CWZTM>Ex@qTgZhSg1>;EgW zcU5&&_ck5h6H}PG(c+gVYkbb^wf`zkKNB^Uc^<5oX8Yajx!V7Uc8i~O9d+2Gd1RmV zv%rj~g#PbMObsi(vI|KEzrNbHFlw*QQdyqs8F%k>U;NdvPWD66^}dHsU2n6P^y}&i z?L3|>-7qEZ+l!57I2k-RcupC}+&*aZQNko}U-9F40W1zKAAkCOFdCA=eig}r5 z`rl{8+2wKyy~wWTz8`sAFdK5=VXS^}{k3!xXO`7I_AERKOSv9gk(8Mus#w+j`p$dd zhX-2MPE47azd80rqrXtvwP3U7hP$O*GNfKD{4BqRwQ=D*qkXBjxK-{vl9_f-$1kwd z;qz|BOJ>}*vkp11d}1xwZz1d-;mxSG<(v6_4yHqP>*n5l5cBn-@%y(On9z=Qs*q zy_}b~_x=ja+v#k?6Jn{fEJo2PbYN=qm|GQ7CrWQ6gU!NxK zy3KUQ)WY-4*R`g%`!^p5TP}8aE}u?F{O2n|){FrxCZPwnR3!gec;CLvfBmc9%t0sr zdH&r!SxP4T*dx$o3H{$n-1R-x_J?TC75*L?w!q-9z@j%xWHZ@%l%*|7619K+UA$T3 zoTv3A$I{84Q?}nf{zkw3b7E?Dfk|46+ebNJ!7R4z!R0Hu!p~dvoany9HmxDGBK%fj z^t*qnUwsZqkZ5Dcd2u5zG*L24X_d-D%S`r#8?{a_o7+^~pR7GiqHI#OTIu`3OQ8Fx z``&gn{f{VF@Z;yNb(fR2{cSZUPKeKcUQpO4AfUmY*?!hx(XI2ZL_Z76loT>)3l(%! zm?d^Wcs+041vB;6-m$kPUN*71sWj)J-)H4rWe48g3cR#D@eRjS&=qOn@A&4kHOeiQ z65P$+u5rH7@76-lp(_0IQ`i1_6?;5)N)mU^nJ1psYPuF|t5#juypDM$bjU9>NKYnr zLGXh#@nr%Rc5+x5y;t{i+~Al0@Wji@t;>4;HieZ48oXw@(eE#W?aD z&P=$?dm#K!X_p%F$!Q$F_2+vpw7S3J-B%Wdz_tBu%)u(%(j>mRvCPYvFmK! zmiRpD=MT1;1>LdnywvCUv*ash<5uxn7W15YHtR3)_WX)yZaT*kA9gEn)xOm{3QIK? z7)M)3a9@s?7F>RGeXv2nzcU$|uI*nKl5G}zbMF&@&p#Gf3(jh*+I*?+ozT8iDRq^Y z`++Z0Guz+gX0mfE`Ta4!&}`nOX%#`Y1V0sCJrni()%o==W|&>M&zC%DiLCIv#w|x5 z{qVfcXY>15_QUf$ymiaY72dO**C#ziHb(RJIw_td;@1m4EQ%E2cptx3lQBR!%i`q% zZv8J|zZwkh7l;T=<~X^A`BFw#!M8T>(v7V3^Ev?-OSg+Q&tx8dJazX@ zqyUp9U&4uR4>O`V8y>Bl*B^QR>a#Ylo%?>?h}3t{)`^Uko%`WUp_6^22D9)U1_sl5 zPZ!4!%l}ub`RrW&JUVOMH#hZme6NS3-22;RLAL_4_N^8=5D<}G_)=zFM1ZBDwF=k7 zLzkVW)Z`rx%XMG2E@X+U(18ma$shmjezECN-$BjHc6H{K>)Pd;Cj@Lby3%>`UokO_ z_0?IOw%TuDXJ6j``C<9))_*#eR(9&1n|a}Z#7VJ-IX*ns_FB*V7d3TC`@4LH;2DWR z2NzxX6zgkv(56nRJ^aYY<-LBp45dQ$3!Mvj5&b6gcIo~*bjN#G`Lt`G0fQ(>*WyK7SP0ylsE}^oHNl(p_HuSa*v@MXG5{yxsIe#!bS% zE{d`&YHP6ncr9T0F2#!hZ!Yo8=5Tfbtr}sPer)-!vI+}_2`9FFF}f3ZDcjur@N!4T z{@`A&bn9sy;tFc-fj;~D?eELi&bB=K z)t}ezN_1$}zSWwp49k6|!TfQ#(7Mr?5tdQrrQl8W1I-wbR z_f9?+S!q4xOu#~Z)BhY-w}utx95^FVW%ARVe~0AenPuM-_Hqcy#7opK`TnqUHSd#+ z#~ySs9I~6YZ^6lZpYAN4zgp_m>jP(={W!AspUWPPXq~>tJk0C={F`@R`|scDR!;wP zh~aaK>&A(f_&Oiyoimj1IJ++8|I%WOh_(J+8tM#QTlMF>*lcwq`)}z!rKx2SkFT2a zO^N}JCxNaz6bZlfFyh{nGM?`>UlzPGE|+R)VvJ_kTHbsdC%jUM|hmYQK_KF%)dt)R*)%HkE;MK37eKm^kKv?fB$mX z0jI+0Q*(s__J-};{kif+ig`lgXPZZ_*!=IRz-D#U`9NlM{v4E7p7`Sa`|J1Kuhx|7 zPL6(VWwU$n@>k6#pG?nQVw1P&-NUZfTN4u%HgEA`yMN!U@qxvff@2Jf0nSk>D?9XT zE(?EbtUX#{S9I)AqNd@@;BDG(Z}jy2nrWl@_CrL&iLW(Vj0$=gODa}sr2a7BXcTGk zxFyxsWWH2Z!DoHR>3kcuA*1 z|J=3*k6!J$x4T_yOV!s!k#jCweS36+?EKfqITQ9D`Ip@2@!-L_=|@)j-#L8PyC=>` z@}|fQjV~FWcy=DObjd69;|$sCe=1UdNiv@I-7(i2DOW4kJzuQVuwnjl`Df{IcjNuM{>Sr<-%cvLVvzjW*Zk%E*KhYOxmc*jYvf-Qv*3;L^y0}UHsO95%9_1X3 z;^(U6?`|qR-Q;$Br;bMIy>%Y(fI$x>7dFI`_B~P{MIZcftepoN7F;;zB(7a@38S_$c#iqsXAK0tx?`=qH zvMH0>v~M+UvGiMQeY>AiZ>7{*ulXDBvq$@KcK6Z3|In)@KOVWTb!%U-o89XtMf+AC zRd{P4>7l#g*@5=-;0;OpZTXMY+byb6nYiJ0W@-G|^$bV%@F-}q=qR~;_SCxKK`4x5 zoH5eftN-uKz1`~DSL|EB1ln71d*5oM_xo1!F?Zf#SUvsxVh^1V5lOGGTMK#jUb??D z*W2{j>2lMlrege_ku0o1k(uq&cb82lXkPp!<9c6WGDH7enVtrb@LPi0jb9fBrfHi`Q^qE*<`C^W~YSPU+oX|o2{8|dUxY`o}!1loqo(*D0-mWRF&h? z5;Z=(6*{X<>q&&Y{@R_^*%E(u68~{e?dZ9y4E;Th_jWdTJa}+!`im>-wpwokY<1o$ zFnw9@d+SUl8@0@e-zFSOzFOVs?cR8E)z>*&lwa?*di`L=*NIC%zwiI@_kP%xZAXrO zlUiJx6s^L;QIOIS%K2$6W0{ukFA@x=EaPe zDajxIHgQ}`u}}5*Q2e=%DZBD^lP;gf4)N7fKc+=5e>j`@c(pmZWyb>!+2}V>GS6(f zs!qKcVofDH$B_{P5)0@;-5<1#VkB4m~NW%5>k#e?h%!0bhfnYa@fe zOr7M9e~-MqwNXz0xVXQkb;$hXtHcyHrA-by#ycf$x8(^Zr-X*Z0c%92T@t=vvyFAO z?zzmoRg+{lK7O|E&F7g#QFmW#I$k?()wT$ky4#%t)UG?! z)r6}yEbKVip36o8L2U=2uwFou#CAL_(b9 zXhxFByNlD0PT#ZVs7(+U_na9GOtTruj?Pn0->t2=Mlj!$iak#nN?IpLu zlqE7OAMZ=w6aA-W;n`Y_{YCPp^i6u$V_iBAU)>`6dmW#Pp;w1Q&*|a^bL=)0h5Tg= zpJm#AKU9iqg;#L>w^JR*ek|RV7}9cCR^*t$<{aDIImd6S@Z_%WPyev+me278t!Fuc z)C!L3oD;NKqI0IJv9QlAS~2c`ZN^2JOQO#sL=-OiiN{p>ebKwI@BIu@)vB=CaR1jb z-rU^1jo?4wOO167*ZsG$e#;J|! zv9m&@tn}~7s$J9n7s$YIdNMaW)B&QsTC+)gqM zUUhMTq@x#C-o~WGRkGJ!hCA?-WgkyuXm71ZTeKtn==a(B6?tbx*DQQ{=HJ6Di}!f= zyC3B>->5o2$0~o-vcGDc9cLcSoADxIPO{}?)^7}qtE1x1F zws6@mjeBeDW_`Ka@c;PBw!*s}JJzx-dY?6=;ee%P%Kmz$;$=~8A;;!123*SF+}Gy8 zYJXu?@@wX;&gndM=N1YmPCU|4wZ(5m*_nntt7IpIm#%IM8+M>5JXR@0#pY@;Z1q$a)u#iWhfxz-o-HgGf<{t2&J_|%`bwEUUWvF~$S z4o`et7j;Dbx5d`Q<|WL{4E3w#PN}zSSDmKP^J1Y(V!(vwkN)NQhF*L8@XowHj;svv zYwB|s2{_-1*yI0W&aID@Tb-|6DN?syvBIp#!1sPrjjLBy`=Wv;z9+iZ#BO`m&f8UL z-E=Vc_qusT=RR&Rx$^H%_>bSGM1_~TFW&E~CS@JO?#!C@{=CNX4U3+iI_G#yVD40Z z&dr~9lpgWr_iN1x7II#>O{>;P*Vw(e^QZH3dhXi)^D-s2-)MLIf8}1&!IF-NabIGua&U;KO!|8J z`G%G?53ky9-Tr{5C*69^zvg|-S)mC%Q|DIhTVTj$|9pdxVq(j(VC%)rS;wcG54C<0 z{5BxDmr=z>w|rA`la#Ww>My&VaK2{GusMqh49*3XBu@PH^85UKdyG!K3Y>6vr^WKS zmea(<7Pzz(v1DT1>o3dRYcX;}h)8Uo8t`c8%otf$^S)I^ z?Pk|XZT74)%emfjP%4*491JoIS)!Ww^(0Xqng0B)^(B8=600_JP&w0`Lu6lG4*Db$*th|``>=~f~C&) zua-TZF=bX)n{s>MhPA4XvkyK<&UA_~lFFNHv30R}ZTMmqmgP$)Kb}{3X6A~%Rh6d9 z3zkJpu33AFG49%=+~m8z_Z0Z_oswMrPuBOvnp+cpTW)n;_WF5N`;|>+uJH9$opNdm zQ&rvS{E$IHTkD+3DX)Ds@9GXZPr0sG6~bVgl@liNC*=;y)Ah$!<)1U|`ClUVwR-cb zg--sz)^Rb^O>h0MmUD`UYHR?^?kZ#m`&JX$iN_Rr_IWiS-iXSL_qY?St^EX8a!s* zO=q?|IpF?2Z*^9w#ec5(`&k{ zrjwP0Y|^Zz7O_o?9w)YmKmImp8pC7V*rP26XUv`BqP*SeE6)a@g1_6$Z(L!P_T<>v zCFH|wab`-ENs8M9Bgd_?`~v%D2rgOjfa`eTZIeI2ce*CnMa}>6E-{qTldtgQn_}g7 zcC~2Z*hPEaYw7Fi9BPdcnPG6TuGFH&UFf2Cd-4A(_a5c^S|`QQvT4qedl3g@I2T_v zowdPWg=hRpU0y#AO98u;4>OiHCl}t<*du=Hpv18svu>E%MRByRPc(_oG{1g6xpyb; zWk)a8G(Wxr4J%q~)2$CZI&%A!`3CRVJd-xOUv=4Q+MJZfjjjK08Dw5xxPFmk!Y}rt z9{$JoMHb)Kw06GV{56^T{lt`NuPnWg6p*mZp}XvIx#~L|)VmDqX%UQ;4`M2uyEZx+pj!*X=TKFgX(8Ks;T!x|3ZaclbP=3{o zrMKZhYviFycg|uf)f3Ch7}yu6MNazTxH#ti?Kzys)^NH^cpBI7xFzfO(xO#m8ba;? zF^035Jnl&}6!%%3a@+RosK`F?u9S<5lgiF%>UbDyb*-DT!+)#rN*Rf-g30R23<=G1&o;`W) ze*LPPkIggt`VjGlCGBNb`;I=!fB948mvvQ2;lBe0EZwz+JPs~rzi3I{*?Qd0*X`Hg zHBK|m8Q!pZqxAgXp3IVi8-?y45m%O8dBCDgaK--1)ic`4dOZ)9|NGOXH&y!i7=mF<^nuCYe^o_?c_&vfh8MQ6li*8e`vq2Rs9eUU7e;G|XjiZT0~ zGratx6XqQJ)Kh5oM!e|6o{wy|+rOvYyRiQAjvoix&d={Oc6iuq9;wQdrQ>MgXW^e> zdC6V*@K2uW{&$pCyy$-JemUgUKHvTN>fhhG--^1wRrs!wyI|W)zY-3{Tpy;yFu86H zg-4ftndd$=yrFr&bF-yRRU%Mx-PU+&)u)`D7vMz}5*=hI7sCLfTyt%+&qxr5| z@233q`?YNGWb3cCb2dFrI1?+$a8|)((W4s=UwsJHU!!erWv!Z;7^G6P^{j!{(@LW= zVU_Fq_BWM1^?Nwydr^Um9i#hs#S#NWhpy0lb&FLFoM6xtn~>9c!`eY$LVo^|e6eta zn~L`@Wt(U%cwKTv{KB*I`)&w6+}YN+_4V}*_n;s<%X2@y6Fu{KG#DQi2c7sfHD=m_ z2L(4R=WLvQxa?8t_F4MTe^L`S9E%g%`q?|wZ_kt;-Lv)Y{maU)x;@YL)t5sLdN;Bt zNQh;8dwKfZ-_WDatk+wxm#3$`Ul~5-sN=b%iq?XH-^*sXV%t%}E6a^V#279s zGcbFY9p{*F-S5yG5f7t(rL0Fg9!%T#J2R_FIypw~e1=3zfQ!PS&ZqSeaeHeIzcVY? ze!V%`$SAPvckI9OYp%K4ozwoZ(C#uL8zYC6&XMP06O3;wzI`lkmE+%pjn(e134yhj zw=3s|w=^(6eaN5tO}wTj_o~W9pVe6hi-XrKUUh}(=c*7!17~*Q5`pbAe0Qcc8JsE6 z3U}NTSYb8GQ9#Wq`^7XT^Ub%P6!kC&baA9xAS6~tPX(%wThG9OiwZDuKCW$Q2fwVz--aiv$vY6>o-1pFy)e~$JXMM zDN`Ozd?b`Vb=5g;12Ktnt5g0mHgmi_?zgo3cb=bvbyy)5C z1zy+wJW#pe!f{{chWpWziZ9hVb~$pmv$iTFUF`XN|Nj41x6~MTmlc;(+ghyAaHx!Y zQoFm>RXTaLd-fFt4vuBFJ!eKAnBzI^V&%m2#)6Oo;wIPj{Pq0owQASl6`9u_eye(* z!jyY^k$#!#o~ws`?w%M}B|k$!rdZ|Dq|D+KXQ!I2TEE45M&RPKb*87TSn*j1nrexy z>@H|gEC}2b9y34wL$Lrh(n{Dt=*~C1YnL*3eeL_r7 z*U`)RS*tcgJY%(3FTHkchjKzi@|~qya^IMoTe?U3M2__F882m2n|EI0aMrm(C4r_H$UFFmcWG=Z3pu_eloHEbMSteLv#oa`9Eyb^d=9_3(UYW3Wt`?cVJT zet$GnzfM*=7thnX={JKxcfqOPqL;0jn`fkz@_v2E!#QJNBWLWT6@l!+QXUhl*G}Y- z4Nu*XnU*Ij61uLsp&>$$uQn)ByfXD~)82?LZjryA&$VgcI*{td+bg;%wXWgguZtq4 zEV^6<>hkB9rsss7dV8V7NkN5gdZiP0*xLHj<@X)E$2Pk-=W`@YBb zv$pDeD&AMWMKtrLi0FC$ebf4uh#T5*&HXRwn6j-qIpz}A)fG2CI~*##X3^20vvuY> zoqPQv?4I8RGNcq`L>D@osK~lD=gqJGwJ$T4pSS+WvHf~;%;5vezGffs4-PVu4gYa^ zaqBHXMFk^W-y8d;Ft3hz8+_Z5g+=*LM$T2S`oG6_m)$ieHv3e(Ynk2>uEx*v=l0j1 zl-#^OEKrQi@_O5^Kfk$~IA#~AUwZY?SXK8BAeW>Hnu{+Er zwx2DYcf~wp3be@*k)K-KoWFPLD!%n~5%)4@2kSigR#o7fCiboFG2j2P$+rp%mP}tK z#JG?}`{Qh`2WK|)3YFh4b}?nFzi?w)X8-%jMeK{S%eNeS`?X}}+S{)kWuGtj`+L!9 z*}mzwE$&OmicV9$ax->I$}O2JzY`O@=dHTSQW&?mCvfqdIp-`|6sFvG{c_tPu4kK0 ze&=y1Z7c}cvh7!I{hg}Zu-Co()8}vUoqoABM6uv!S87Ro*tdICu8}5}I3Ff5+)BOq zWX8O6scZd3Sy&D!MO>DseZ{{2|B=bFw~8?S-+1}mV-=?$Pfa=bdcUpVfzMbavZY_T zMoBeXJsQ8n|FixC{~H!;X)cV3GtN0~Vr>(?y0gf=cNNED>B>|SMNfvfo_t$(baC0AEi0C6@(O*$HWy57Fsr6k8e${`@C1_t$Y2=J8!ow|0+?K zb$k)GUY5;@C@~fZX~QdgOafb0w@7oJn5murKg3u4hqK$wb6p2EKW(4S{WmevRBQEK zTdQM<>)AgG30?fvzMswfX3qJa61QzHU3$}N#;!ZlY_^o}<|jIb9QTAq`BXY@TM_7S z=4DSKhsV?0lJY`HqZp@M?G7B9cUCCqOgsDM-Lbp>e_y(Ixohs+Z=9XM*4x&`E7wh% zwCUrHMSF~buFYA*;<7w_!rx0eYR09?0%lu&rTu-mTmJTj`d>?`Cr-Y(d+W3m#eh#2 z_&+r7`O|FLwMpSbM9PwuekqRkVsS3gT>43Sr!AIxsgPLodxui|6PJ{@-BYgals%L1 zVu#-9CcTq+54uvW@G@|?b3R<5u{y6q=kE4LULV_+TKn(*E2;bBTU9}Bm|5BVs@?Uw zkN#Ry&k|tG!gx?*_Dy@!qgu<;TXueU!!0GhfBVj#sf+$!(0;x6ZjkvomhIP@IT&v3 z^^Sg7vhUilpR*=9zEzO%Jh9;Uva@N5)sH+Z9x^t1R*46_5}oyW&7>l$WC52Go6RoC z9)0RDC1C1VgT-!##ZAQGKL73Kv-@;qqVc=pu*>ZsiU!3;i+1T-SLfg5xbTbHnC-9k z&LR)d{1*pVy>jDrztP%(6r9=-K3Nm#=&=JEyuhk0EVq zorc!7ovROPo9XZ3%U`E!|JW`)RdxIIX0EGC%4#w;ua`I%RnNw_&_+D+*%RY-=W~%t z3@wXV!uJ3BbwB*tp8cgKw@#UK#lAUdS7O7p^@96-z2a`4nHt_+JW)(Au{zCguhmB_ zH?d0Z?~el)_ayBzDzuV=mb@3~t+ zg;~e_eb(+z^nS~+O6Jk8nj$e(k6#)y%$*fdLZ*ecJ=_21-@a?w`?gEF`-8fv6?^1V z#6^?;&e@r}d(E`;nmO`v>oQv&i%C!O+dD67@^O#o7Uiuo@~xK#A1b}Zx1mcQPydFy zs^+ImQ>M39dcWS*W?H}h^YyDYHhq6%l#%|{-YUp?+OC*)%(6Yb3bLjyZBqkleAmEzy6rn);RlKP4BxiXKr0RFje(%piDIT!{#RL z4BwBcJtw1Dn0Fqn@h`Aa?UUkd?4G23^o@<>R;lDkA^#rR)&Bdtf7gna_u}8$ysoWI zS!=QNapdG%hV};{4i`igD={c8Z#CH7Fema`;_@%7vZoZE8`nBi8s$N}- zpqDSN-+%b6>O<#Qwh$SP`jqXu`N=uAKX`2x)|$I=OUbPq7MYWWU!7qSHR}F(>decY z*$m6V9>===j__NgHaAQ;!L9e;hKlmD>ALT2|NkhvVlHd{y0-d>%;PJ4cR@{b->l;s z0^fEB2<%nqmJv@kiv0P{@Ri$+Wtvw152~BV>dE|ePFcox=D-9EmQ7w>9}5CIpT98h z-rO#E_!OscrvE#Y%I*S&%>u0+UWV~AE$0?a$a`?=%i%9e??kPblxfQ35Tf3)Xwk}j zk+Z*l5PtsaUEQ0_a&PZH-hJ%t*9WK8#y{P}S}?IZigB;a zPpPyBl{Jz5i<@;O&k0<-WX`!AoJmo=<)YISuWHT@OWw1yLLp_(w==U%rR)Ar-mAd% z?S;hNSwTy=8nxr=8sD8gbuZYSXN9cdxr7+oX>%q|nz^K=UZP2Wqx+ch@k9TAT;3$h ztM@#5#cKQA|5omGUHY73lDF5#o|$R$&1H^kjMfR$nQPVEx9rKu=$7WmM{AyLYCbdP zT$zMa-U&+w@wAi6uW}q&bgaXoYK`HqFN^Hs1^#`E-~RQ7#NJJ_ZohWixr&MXo4ehX zoAEMUS?zufJ1jcaT<=XS<2*g(F^4P5tlM9v*Y8{K>al8nte<3NvvIPYIZw{dinTuq zy-podYh5pK%4D}=OKI}*&hOs(sXmuV)fkE(slk!MX`^r4S~ue?lL!1#`uG07{`A(q zri7m%yubf)cK#zJrzlTNIZdr^pjNc=)@LGHv=&(Yd~&APYu6@$_w%*1%4E50_Ew&E z{Qv!b#g5~DgR8rhCQ7_w@aV3<|9*z_~)rJ zFH5#-DeW-b-*e9Mv-k3=95dz~bP_DQ5Gxz^rt)>s+kAaFh6~poKmKy7C~rM`=b@P5 zsH}CCd+l$$J+g?!<(#+pwwab^v|a`&Gjh1K+`F*Gw)*+`?eF(~`SbC_If*-thQ1sz zTMF#noj-Xue7Od<^Bn$N^*s#_w-!#n+_y1!=N(rIs~HPt2TsljTzm$U5?T&yY028< z8MOA;43()FU!}dD*=;HN&-;Iy@UlC#=P#&9Q;ocZZ%_L2W8>iH5rSNjYY-hPrPnlt~L>XU6u2I-!S6F$hU z?YH@Q;{LXfrAzI)p0q6N6r1DXp(WWK-zQyvWwpj_+a&Cj9 z!SW4eyH;1o8wv5KE#^HG=%k=ycQfW$b?N`pn^*qvcn@;T=`TvY95t0sxptXrX9uj1 zRct-HLy!O5C6|>erTi*sPHZWfOd&Pe1_77FI9$K15PkFDm*mplE z^1<^xRSD`|Em74>phT*>u4Ly*>9!+D``%vN`DvZ$q=k=ny=gX4-rcC(l~P~7#XE>q zI(iE$|G&K8vuEDdu{rYATBYr`aNmBt`R^(&8IAs6Yqm9WXQ(xNXi3>rm&)@-NYCQ9 zAQyL5*462?zsuXV{dw_@d%|2hn`<+Sc$xjz{GPCH_OyLVCw_i@xQ92Q%#`cF*-PFT z-v26lPCpZ{aw;;6U*_&>?6Kn<*Np6U$-2HJ?tzw*RNht#_Hq4iKGy!e=GLEY?DhMl z{(0O#-dy)tPd(6@ZOPmjH|;}J&*-Y8_wD%`Y&!P=w`8^1^qyB?pXNXF=%Q;Zk@>6)48lT?(xOo^@QzKH zz3=IasX?m)%{dNk$XJ=#rB{*jPrpC@|NPy8Tr#ge7ydpwNp_{|dCS;*UY4!OUJg$d z`D}`xbaKt4XThBf2@x(?v$wF`|9SRU*T+`Z6F!o0E0>5U8ocX_e;9xKS?CpwQpzo;v%I<&4slVwtU9wjOd0$}(k2yYhltYPHzf zRnfn{EZ*;$7WKd7{NJpz7F!?B{Jh*;Cg^J2(lQRFrp&TO#oRNDZz{blkhsVa#r*5m z^=tC=Q(x%svUhS0Yq>0YCyw8#r}5jHYyV!B{M$VB^VYwXfkG}nOsbjoT0Zn*^JG72 zks|ilHgK+X{uPPqSvpfgRvU_$UJJYCIxq6evD$y^^DP=??fSQJ?~CipHT~XNY^v-1 zG5^Vw-s7UFsr7-@hk6|Uf08Kj^Guxd>8~P-07rM4zwZ-yf1Q78%np97zVhvI?}hs- z`o6~gJ-kHI)IFp$u3_V|e={`%rE>gqW}3Cn656~(?~tY9v(u_;0(y(BBt0e;uML!7 z5?2eq_@>5ks}$qr+_dZi6;F@*SDD$DT>mUF`SNG8!n+Ia>%RI{Wnil=etZ9~())hh zrHSqQKeT4QN5lMgTDQ7mJhw`j z&K2P3R%T!A)LlO1zFgg>smUu|9p5&S`IF01&c^kBKCA9h*Urx0yGZAy_EzPThXR{F zpA3C4DTl*?@vg!T=VK3T-#uvR-zJms`w;W)hqhK0e}k*J`>!`YnG{r0@t1e)?zPvJ z|D99)%Pdf7hp=(x%`LAZZ%nheGTnU3gvc|~pFAsJGF*F%$2?YZWmxi>T(zw~zC6$W z_da&_^SYPwPrQ2G^_TIZY2JGF-yz%u3vB)b%CD52#c(M2a?9IOGHIJnm|J%#iwL^a zZr=9e+x)k;OaG<{Pn>IK#MbI__`uuM_ZL_j8I@lt-luY;a{cDjj>k48Jqit(XBw9! znCy^e7_X-CtmM!f=@hRuS2-AFxpB1g)b8eEou|IbJNk2^cAwnq`>M+rt5?Oic<5^$ z?eFfg-5dH=fvKre?9d&jXJ;l%-z9jJGnX;>^{uySPj^Zw-#+#C@H&p=);spf)ue1^ zj{e?u{pQ=#wKY{`vW`sLP<)ic)AY%4pV^Gw@vx$ptbwa3n+X>OJ3Y)CkA zVRqD4*Y7sl9zK}zarK0`7yl<8YKXk{_N#*8GLijfUPj$L^^@IpZA1@4$GH<~$6H@c znEKwIg~i!z!7fFs_Y3#ud`n4xrkvUFYxy^^PA7%*Un({s5hXjKLZ7osa(^vZRB|V4 zxoU8FaBNWD9aoD9(Pys5q)(l4nRD{4K#|K?J{$PI-sIt3G^@d0Wn12z%iAWss9XGP zhn>Q~Lk+uRrut+Zzx+I2V0oFzyj(wD0fAFRlZuiVWjHroTR!Vbuh{;N?;dS`bzkY% z%EYZ71cgnHJ^WUcpvaW_->3fG%c!fDocY%99ja!%oEiJ^Xpd*qru%#|kJfY^;EA-H zs}>@7aBXF(NnjADQ6uv3*W+7<>i<7p9(q&qy^?8R!m$>{t$A~973SQP`q=$sirmVw z7R%DZI3DLgbn7G(Yu(KK5YMYgsI35)O)+Tw8zZ%AT&Wx>$`@2Re0m zcC_A&<6riq@8GuCc^}`Jgj(N8UlnnMRY1#E?ex~G z){i-v0JmT=r8H6Tj_CNi$F-p=`+a=G^UfU-Gv{iT|=#f5CsV ztzcq!mEvBbmsx5$lk_%}G@MD=r*1KKl3Qs4e zx}5KgN@l>CH{Yr@+)fIzzINk{{tv(4jS`{`Pnvi(#m+jKGwE3{r-OmcmRrkS7=O3l zSNng<$DG+)YH+l2XuBE~07dJlHcuD-Bhwd|N zPa=NIJevP5Q;uie_XWO=kFNB!b9dXg?_L?9!qlW` zmQ>7rVrJG6I)NAH~OF`#;`#+})Q~ z_35Fc#AMSCCF>k_U%nH^ALFv%?d>m5{jR@SrzNJ`<2ZY!b6@f09ZP013otfLxLCDB z?a!6R=W|y3`J8D!ZMduFp^{UPr)FK8)xJ$Zq0d+)KbOzb5bU#>oOyDK`TPxs!SChUetfz9f77C8&WqSxs=BPQ79F#=6UQIa z;V|3Y@R_>GzRPERzUr$DKb%le`bTt2@tG;-y9)%?2lRITJY^a>VdiDdBU_}7Pr1Bv zQkE%`gN9bY9^d+p_m1hXtBYpF2L0n>Xi;)9+490Wzj{m3 z@BKAjW2DwdUJ6jWZ1Ca9E&iA6D>I{KS;WgF@#L;zJ-sZ(Jv5WGs9S=0LhKpyeb+RU zmka-hSuJRA{Kzq}w>$Iy7KQ(8oqU{E;^BU6o=$ZyVW!P-@t)=BY40-=YC7!?W?i`B z^zOtKW4E+-%aQHJx*>*_U5!X)>X!ctWRiXPUt z1ZrR1|G(wt-1)%_t9utOEL`EA{a~UJOVIxx(KY%f<329Sn!ms1lIj1uX&be@j$AJ^ z<||t?F-`p4L`I*tRhE56_iL!{yb#r@+lg`}hJbu7RUQWIK-p?NlGi2YCT=hM}c5KQzc?ULww4*15d(J&i zU|mr5wnLI3f?-)%-RjHR#2B;;#a=I5#i(GQ%km=efXr$?V=E)kk&aoLA8%V|vsAHh)w2Uxi3PVrP0Zw(FJzxQEZBMJ zkW$1BuKi*`kt!a$7DvCwFY|jo!wv=;4h7?H_x3VyD|{1R+Q8_+RB+|b&c8Di z80r|>BzJG^_qRIWm-q1qLxy_mnIEPcThj zy0)}AEn&&3xmO=azJ7AoW8(}#ujzdA3={pI1*}=qENPYN9;U8-^wb&7Q!AYOexd|$mQB~a`oh-P>-!;Z|86{Bt2Xbv8ynS#UQkC zQQ2;L%~h6+ui1`>uy$p=t@2Q9DD?YN!hO7_Sv#zuk>O7GCh^V3WE$0W_-J#guQ_-? z>$Ot5e8ry!+oq&+Pn(w{S@Qfx{fm22Pd+y`-(+B5P%UwdC`m~yNwrEYN=+Qu(UEU(FHLKI8@TFplHa=PsvQH#I2#S@_q&b1A`>UhT#0PlJdl& mR0hYC{G?O`&)mfH)S%SFl*+=BsWuD@3=E#GelF{r5}E+d{k#JJ literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/autograph/impl/config.py b/tensorflow/contrib/autograph/impl/config.py index 2600088595..878bb7e12f 100644 --- a/tensorflow/contrib/autograph/impl/config.py +++ b/tensorflow/contrib/autograph/impl/config.py @@ -33,7 +33,7 @@ DEFAULT_UNCOMPILED_MODULES = set(( (utils.__name__,), # All of tensorflow's subpackages. Unlike the root tf module, they don't - # have well-known names. Not refering to the module directly to avoid + # have well-known names. Not referring to the module directly to avoid # circular imports. ( utils.__name__[:-len('.contrib.autograph.utils')],), diff --git a/tensorflow/contrib/autograph/operators/control_flow.py b/tensorflow/contrib/autograph/operators/control_flow.py index 9f7202821f..671c9ccc13 100644 --- a/tensorflow/contrib/autograph/operators/control_flow.py +++ b/tensorflow/contrib/autograph/operators/control_flow.py @@ -174,7 +174,7 @@ def while_stmt(test, body, init_state, extra_deps, opts=None): Tuple containing the final state. """ # TODO(mdan): Consider adding a generic mechanism for dynamic dispatch. - # That could be somethins as simple as a collection of dispatch rules, with + # That could be something as simple as a collection of dispatch rules, with # some prioritization. if any(tensor_util.is_tensor(v) for v in init_state + extra_deps): return _tf_while_stmt(test, body, init_state, opts) diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index c492ef19f1..5dd2e0c7f2 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -371,7 +371,7 @@ class GradientBoostedDecisionTreeModel(object): Returns: a dictionary of prediction results - ENSEMBLE_STAMP, PREDICTION, PARTITION_IDS, - NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPED. + NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPTED. """ ensemble_stats = training_ops.tree_ensemble_stats(ensemble_handle, ensemble_stamp) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 44e39f7f7b..0708d6b7b9 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -172,19 +172,20 @@ if (tensorflow_OPTIMIZE_FOR_NATIVE_ARCH) endif() endif() +include(CheckCXXCompilerFlag) + +# OpenMP Support +CHECK_CXX_COMPILER_FLAG("-fopenmp" GCC_OPENMP_SUPPORT) +if (GCC_OPENMP_SUPPORT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") +endif() +CHECK_CXX_COMPILER_FLAG("/openmp" MSVC_OPENMP_SUPPORT) +if (MSVC_OPENMP_SUPPORT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") +endif() + # MSVC SIMD instructions if (tensorflow_WIN_CPU_SIMD_OPTIONS) - include(CheckCXXCompilerFlag) - if (tensorflow_ENABLE_MKL_SUPPORT) - add_definitions(-DINTEL_MKL -DEIGEN_USE_VML) - if (NOT tensorflow_ENABLE_MKLDNN_SUPPORT) - add_definitions(-DINTEL_MKL_ML) - endif() - endif() - CHECK_CXX_COMPILER_FLAG("-fopenmp" COMPILER_OPT_OPENMP_SUPPORT) - if (COMPILER_OPT_OPENMP_SUPPORT) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") - endif() if (WIN32) CHECK_CXX_COMPILER_FLAG(${tensorflow_WIN_CPU_SIMD_OPTIONS} COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) if(COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED) @@ -323,10 +324,13 @@ if(HAIKU) list(APPEND tensorflow_EXTERNAL_LIBRARIES network) endif() +# MKL Support if (tensorflow_ENABLE_MKL_SUPPORT) + add_definitions(-DINTEL_MKL -DEIGEN_USE_VML) if (WIN32) find_path(MKL_HOME_PLATFORM mkl PATHS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../ + $ENV{MKLROOT} $ENV{MKLROOT}/../ $ENV{MKLROOT}/../../ PATH_SUFFIXES windows) set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include) set(MKL_LINK_DIRS @@ -345,6 +349,7 @@ if (tensorflow_ENABLE_MKL_SUPPORT) # Fix me: complete the path on linux find_path(MKL_HOME_PLATFORM mkl HINTS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../ + $ENV{MKLROOT} $ENV{MKLROOT}/../ $ENV{MKLROOT}/../../ PATH_SUFFIXES linux) set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include) set(MKL_LINK_DIRS) # incompleted @@ -357,6 +362,8 @@ if (tensorflow_ENABLE_MKL_SUPPORT) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn) include_directories(${mkldnn_INCLUDE_DIRS}) + else (tensorflow_ENABLE_MKLDNN_SUPPORT) + add_definitions(-DINTEL_MKL_ML) endif() endif (tensorflow_ENABLE_MKL_SUPPORT) diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake index 116d423093..8942f3eecf 100644 --- a/tensorflow/contrib/cmake/external/zlib.cmake +++ b/tensorflow/contrib/cmake/external/zlib.cmake @@ -31,7 +31,8 @@ else (systemlib_ZLIB) set(ZLIB_URL https://github.com/madler/zlib) set(ZLIB_BUILD ${CMAKE_CURRENT_BINARY_DIR}/zlib/src/zlib) set(ZLIB_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/zlib/install) - set(ZLIB_TAG 50893291621658f355bc5b4d450a8d06a563053d) + # Match zlib version in tensorflow/workspace.bzl + set(ZLIB_TAG v1.2.11) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 92f2ab6dea..5942ff3363 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -267,6 +267,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/kernel_tests/variable_scope_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/functional_ops_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/py_func_test.py" + # Flaky on Windows cpu with py36 (b/73556968) + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/sparse_reshape_op_test.py" # Windows file management related issues. "${tensorflow_source_dir}/tensorflow/python/training/evaluation_test.py" # training tests diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index d269b5b69a..c483a43769 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -355,11 +355,15 @@ py_test( deps = [ "//tensorflow/contrib/data/python/ops:resampling", "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", "//tensorflow/python:string_ops", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 5f47dcb339..bdc003a8a5 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -18,6 +18,9 @@ from __future__ import division from __future__ import print_function import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import time +from absl.testing import parameterized from tensorflow.contrib.data.python.ops import resampling from tensorflow.python.data.ops import dataset_ops @@ -30,52 +33,98 @@ from tensorflow.python.platform import test from tensorflow.python.util import compat -class ResampleTest(test.TestCase): +def _time_resampling( + test_obj, data_np, target_dist, init_dist, num_to_sample): + dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat() - def testInitialKnownDistribution(self): - self._testDistribution(initial_known=True) + # Reshape distribution via rejection sampling. + dataset = dataset.apply( + resampling.rejection_resample( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist, + seed=142)) - def testInitialNotKnownDistribution(self): - self._testDistribution(initial_known=False) + get_next = dataset.make_one_shot_iterator().get_next() - def _testDistribution(self, initial_known): + with test_obj.test_session() as sess: + start_time = time.time() + for _ in xrange(num_to_sample): + sess.run(get_next) + end_time = time.time() + + return end_time - start_time + + +class ResampleTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + ("InitialDistributionKnown", True), + ("InitialDistributionUnknown", False)) + def testDistribution(self, initial_known): classes = np.random.randint(5, size=(20000,)) # Uniformly sampled target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] initial_dist = [0.2] * 5 if initial_known else None - iterator = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle( - 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply( - resampling.rejection_resample( - target_dist=target_dist, - initial_dist=initial_dist, - class_func=lambda c, _: c, - seed=27)).make_one_shot_iterator()) - get_next = iterator.get_next() + classes = math_ops.to_int64(classes) # needed for Windows build. + dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle( + 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat() + + get_next = dataset.apply( + resampling.rejection_resample( + target_dist=target_dist, + initial_dist=initial_dist, + class_func=lambda c, _: c, + seed=27)).make_one_shot_iterator().get_next() with self.test_session() as sess: returned = [] - with self.assertRaises(errors.OutOfRangeError): - while True: - returned.append(sess.run(get_next)) + while len(returned) < 4000: + returned.append(sess.run(get_next)) returned_classes, returned_classes_and_data = zip(*returned) _, returned_data = zip(*returned_classes_and_data) self.assertAllEqual([compat.as_bytes(str(c)) for c in returned_classes], returned_data) total_returned = len(returned_classes) - # Subsampling rejects a large percentage of the initial data in - # this case. - self.assertGreater(total_returned, 20000 * 0.2) class_counts = np.array([ len([True for v in returned_classes if v == c]) for c in range(5)]) returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2) + @parameterized.named_parameters( + ("OnlyInitial", True), + ("NotInitial", False)) + def testEdgeCasesSampleFromInitialDataset(self, only_initial_dist): + init_dist = [0.5, 0.5] + target_dist = [0.5, 0.5] if only_initial_dist else [0.0, 1.0] + num_classes = len(init_dist) + # We don't need many samples to test that this works. + num_samples = 100 + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + dataset = dataset_ops.Dataset.from_tensor_slices(data_np) + + # Reshape distribution. + dataset = dataset.apply( + resampling.rejection_resample( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist)) + + get_next = dataset.make_one_shot_iterator().get_next() + + with self.test_session() as sess: + returned = [] + with self.assertRaises(errors.OutOfRangeError): + while True: + returned.append(sess.run(get_next)) + def testRandomClasses(self): init_dist = [0.25, 0.25, 0.25, 0.25] target_dist = [0.0, 0.0, 0.0, 1.0] num_classes = len(init_dist) - # We don't need many samples to test a dirac-delta target distribution + # We don't need many samples to test a dirac-delta target distribution. num_samples = 100 data_np = np.random.choice(num_classes, num_samples, p=init_dist) @@ -109,5 +158,23 @@ class ResampleTest(test.TestCase): self.assertAllClose(target_dist, bincount, atol=1e-2) + +class ResampleDatasetBenchmark(test.Benchmark): + + def benchmarkResamplePerformance(self): + init_dist = [0.25, 0.25, 0.25, 0.25] + target_dist = [0.0, 0.0, 0.0, 1.0] + num_classes = len(init_dist) + # We don't need many samples to test a dirac-delta target distribution + num_samples = 1000 + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + resample_time = _time_resampling( + self, data_np, target_dist, init_dist, num_to_sample=1000) + + self.report_benchmark( + iters=1000, wall_time=resample_time, name="benchmark_resample") + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 144460fde0..eceecfd174 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -214,6 +214,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":batching", + ":interleave_ops", ":scan_ops", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", @@ -223,6 +224,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:random_ops", "//tensorflow/python/data/ops:dataset_ops", + "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index a182dddd38..bad6edd514 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -20,10 +20,12 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.contrib.data.python.ops import scan_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import logging_ops @@ -50,79 +52,182 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): A `Dataset` transformation function, which can be passed to @{tf.data.Dataset.apply}. """ - def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" - dist_estimation_batch_size = 32 target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist") class_values_ds = dataset.map(class_func) + + # Get initial distribution. if initial_dist is not None: initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist") - acceptance_dist = _calculate_acceptance_probs(initial_dist_t, - target_dist_t) + acceptance_dist, prob_of_original = ( + _calculate_acceptance_probs_with_mixing(initial_dist_t, + target_dist_t)) initial_dist_ds = dataset_ops.Dataset.from_tensors( initial_dist_t).repeat() acceptance_dist_ds = dataset_ops.Dataset.from_tensors( acceptance_dist).repeat() + prob_of_original_ds = dataset_ops.Dataset.from_tensors( + prob_of_original).repeat() + else: + initial_dist_ds = _estimate_initial_dist_ds( + target_dist_t, class_values_ds) + acceptance_and_original_prob_ds = initial_dist_ds.map( + lambda initial: _calculate_acceptance_probs_with_mixing( + initial, target_dist_t)) + acceptance_dist_ds = acceptance_and_original_prob_ds.map( + lambda accept_prob, _: accept_prob) + prob_of_original_ds = acceptance_and_original_prob_ds.map( + lambda _, prob_original: prob_original) + filtered_ds = _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, + class_values_ds, seed) + # Prefetch filtered dataset for speed. + filtered_ds = filtered_ds.prefetch(3) + + prob_original_static = _get_prob_original_static( + initial_dist_t, target_dist_t) if initial_dist is not None else None + if prob_original_static == 1: + return dataset_ops.Dataset.zip((class_values_ds, dataset)) + elif prob_original_static == 0: + return filtered_ds else: - num_classes = (target_dist_t.shape[0].value or - array_ops.shape(target_dist_t)[0]) - smoothing_constant = 10 - initial_examples_per_class_seen = array_ops.fill( - [num_classes], np.int64(smoothing_constant)) - - def update_estimate_and_tile(num_examples_per_class_seen, c): - updated_examples_per_class_seen, dist = _estimate_data_distribution( - c, num_examples_per_class_seen) - tiled_dist = array_ops.tile( - array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) - return updated_examples_per_class_seen, tiled_dist - - initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) - .apply(scan_ops.scan(initial_examples_per_class_seen, - update_estimate_and_tile)) - .apply(batching.unbatch())) - acceptance_dist_ds = initial_dist_ds.map( - lambda initial: _calculate_acceptance_probs(initial, target_dist_t)) - - def maybe_warn_on_large_rejection(accept_dist, initial_dist): - proportion_rejected = math_ops.reduce_sum( - (1 - accept_dist) * initial_dist) - return control_flow_ops.cond( - math_ops.less(proportion_rejected, .5), - lambda: accept_dist, - lambda: logging_ops.Print( # pylint: disable=g-long-lambda - accept_dist, [proportion_rejected, initial_dist, accept_dist], - message="Proportion of examples rejected by sampler is high: ", - summarize=100, - first_n=10)) - - acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds, - initial_dist_ds)) - .map(maybe_warn_on_large_rejection)) - - def _gather_and_copy(class_val, acceptance_prob, data): - return (class_val, array_ops.gather(acceptance_prob, class_val), data) - current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( - (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) - filtered_ds = ( - current_probabilities_and_class_and_data_ds - .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) - return filtered_ds.map(lambda class_value, _, data: (class_value, data)) + return interleave_ops.sample_from_datasets( + [dataset_ops.Dataset.zip((class_values_ds, dataset)), filtered_ds], + weights=prob_of_original_ds.map(lambda prob: [(prob, 1.0 - prob)]), + seed=seed) return _apply_fn -def _calculate_acceptance_probs(initial_probs, target_probs): - """Calculate the per-class acceptance rates. +def _get_prob_original_static(initial_dist_t, target_dist_t): + """Returns the static probability of sampling from the original. + + `tensor_util.constant_value(prob_of_original)` returns `None` if it encounters + an Op that it isn't defined for. We have some custom logic to avoid this. + + Args: + initial_dist_t: A tensor of the initial distribution. + target_dist_t: A tensor of the target distribution. + + Returns: + The probability of sampling from the original distribution as a constant, + if it is a constant, or `None`. + """ + init_static = tensor_util.constant_value(initial_dist_t) + target_static = tensor_util.constant_value(target_dist_t) + + if init_static is None or target_static is None: + return None + else: + return np.min(target_static / init_static) + + +def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds, + seed): + """Filters a dataset based on per-class acceptance probabilities. Args: - initial_probs: The class probabilities of the data. - target_probs: The desired class proportion in minibatches. + dataset: The dataset to be filtered. + acceptance_dist_ds: A dataset of acceptance probabilities. + initial_dist_ds: A dataset of the initial probability distribution, given or + estimated. + class_values_ds: A dataset of the corresponding classes. + seed: (Optional.) Python integer seed for the resampler. + Returns: - A list of the per-class acceptance probabilities. + A dataset of (class value, data) after filtering. + """ + def maybe_warn_on_large_rejection(accept_dist, initial_dist): + proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist) + return control_flow_ops.cond( + math_ops.less(proportion_rejected, .5), + lambda: accept_dist, + lambda: logging_ops.Print( # pylint: disable=g-long-lambda + accept_dist, [proportion_rejected, initial_dist, accept_dist], + message="Proportion of examples rejected by sampler is high: ", + summarize=100, + first_n=10)) + + acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds, + initial_dist_ds)) + .map(maybe_warn_on_large_rejection)) + + def _gather_and_copy(class_val, acceptance_prob, data): + return class_val, array_ops.gather(acceptance_prob, class_val), data + + current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( + (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) + filtered_ds = ( + current_probabilities_and_class_and_data_ds + .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) + return filtered_ds.map(lambda class_value, _, data: (class_value, data)) + + +def _estimate_initial_dist_ds( + target_dist_t, class_values_ds, dist_estimation_batch_size=32, + smoothing_constant=10): + num_classes = (target_dist_t.shape[0].value or + array_ops.shape(target_dist_t)[0]) + initial_examples_per_class_seen = array_ops.fill( + [num_classes], np.int64(smoothing_constant)) + + def update_estimate_and_tile(num_examples_per_class_seen, c): + updated_examples_per_class_seen, dist = _estimate_data_distribution( + c, num_examples_per_class_seen) + tiled_dist = array_ops.tile( + array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) + return updated_examples_per_class_seen, tiled_dist - This method is based on solving the following analysis: + initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) + .apply(scan_ops.scan(initial_examples_per_class_seen, + update_estimate_and_tile)) + .apply(batching.unbatch())) + + return initial_dist_ds + + +def _get_target_to_initial_ratio(initial_probs, target_probs): + # Add tiny to initial_probs to avoid divide by zero. + denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny) + return target_probs / denom + + +def _estimate_data_distribution(c, num_examples_per_class_seen): + """Estimate data distribution as labels are seen. + + Args: + c: The class labels. Type `int32`, shape `[batch_size]`. + num_examples_per_class_seen: Type `int64`, shape `[num_classes]`, + containing counts. + + Returns: + num_examples_per_lass_seen: Updated counts. Type `int64`, shape + `[num_classes]`. + dist: The updated distribution. Type `float32`, shape `[num_classes]`. + """ + num_classes = num_examples_per_class_seen.get_shape()[0].value + # Update the class-count based on what labels are seen in batch. + num_examples_per_class_seen = math_ops.add( + num_examples_per_class_seen, math_ops.reduce_sum( + array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0)) + init_prob_estimate = math_ops.truediv( + num_examples_per_class_seen, + math_ops.reduce_sum(num_examples_per_class_seen)) + dist = math_ops.cast(init_prob_estimate, dtypes.float32) + return num_examples_per_class_seen, dist + + +def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs): + """Calculates the acceptance probabilities and mixing ratio. + + In this case, we assume that we can *either* sample from the original data + distribution with probability `m`, or sample from a reshaped distribution + that comes from rejection sampling on the original distribution. This + rejection sampling is done on a per-class basis, with `a_i` representing the + probability of accepting data from class `i`. + + This method is based on solving the following analysis for the reshaped + distribution: Let F be the probability of a rejection (on any example). Let p_i be the proportion of examples in the data in class i (init_probs) @@ -151,39 +256,39 @@ def _calculate_acceptance_probs(initial_probs, target_probs): 0 <= t_i <= 1, sum_i(t_i) = 1 ``` - A solution for a_i in terms of the other variables is the following: ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` - """ - # Add tiny to initial_probs to avoid divide by zero. - denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny) - ratio_l = target_probs / denom - # Calculate list of acceptance probabilities. - max_ratio = math_ops.reduce_max(ratio_l) - return ratio_l / max_ratio + If we try to minimize the amount of data rejected, we get the following: + M_max = max_i [ t_i / p_i ] + M_min = min_i [ t_i / p_i ] -def _estimate_data_distribution(c, num_examples_per_class_seen): - """Estimate data distribution as labels are seen. + The desired probability of accepting data if it comes from class `i`: + + a_i = (t_i/p_i - m) / (M_max - m) + + The desired probability of pulling a data element from the original dataset, + rather than the filtered one: + + m = M_min Args: - c: The class labels. Type `int32`, shape `[batch_size]`. - num_examples_per_class_seen: Type `int64`, shape `[num_classes]`, - containing counts. + initial_probs: A Tensor of the initial probability distribution, given or + estimated. + target_probs: A Tensor of the corresponding classes. Returns: - num_examples_per_lass_seen: Updated counts. Type `int64`, shape - `[num_classes]`. - dist: The updated distribution. Type `float32`, shape `[num_classes]`. + (A 1D Tensor with the per-class acceptance probabilities, the desired + probability of pull from the original distribution.) """ - num_classes = num_examples_per_class_seen.get_shape()[0].value - # Update the class-count based on what labels are seen in batch. - num_examples_per_class_seen = math_ops.add( - num_examples_per_class_seen, math_ops.reduce_sum( - array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0)) - init_prob_estimate = math_ops.truediv( - num_examples_per_class_seen, - math_ops.reduce_sum(num_examples_per_class_seen)) - dist = math_ops.cast(init_prob_estimate, dtypes.float32) - return num_examples_per_class_seen, dist + ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs) + max_ratio = math_ops.reduce_max(ratio_l) + min_ratio = math_ops.reduce_min(ratio_l) + + # Target prob to sample from original distribution. + m = min_ratio + + # TODO(joelshor): Simplify fraction, if possible. + a_i = (ratio_l - m) / (max_ratio - m) + return a_i, m \ No newline at end of file diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py index ecdb8967f4..268c8d0342 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py @@ -53,7 +53,7 @@ class CholeskyOuterProduct(bijector.Bijector): its spectrum), and that the product of two positive-diagonal lower-triangular matrices is another positive-diagonal lower-triangular matrix. - A simple inductive argument (proceding one column of L_3 at a time) shows + A simple inductive argument (proceeding one column of L_3 at a time) shows that, if `I = L_3 @ L_3.T`, with L_3 being lower-triangular with positive- diagonal, then `L_3 = I`. Thus, `L_1 = L_2`, proving injectivity of g. diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md index 762685db14..4384431e7b 100644 --- a/tensorflow/contrib/eager/README.md +++ b/tensorflow/contrib/eager/README.md @@ -1,6 +1,6 @@ # Eager Execution -Eager execution provides an imperative interface to TensorFlow (similiar to +Eager execution provides an imperative interface to TensorFlow (similar to [NumPy](http://www.numpy.org)). When you enable eager execution, TensorFlow operations execute immediately; you do not execute a pre-constructed graph with [`Session.run()`](https://www.tensorflow.org/api_docs/python/tf/Session). diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h index a8d5a0dd83..bf2aa75545 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h +++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h @@ -53,7 +53,7 @@ Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, int32 samples_per_second, int32 channel_count, const std::vector& samples, string* output_data); -// Reads an video file using ffmpeg adn converts it into a RGB24 in uint8 +// Reads an video file using ffmpeg and converts it into a RGB24 in uint8 // [frames, height, width, 3]. The w, h, and frames are obtained from ffmpeg. Status ReadVideoFile(const string& filename, std::vector* output_data, uint32* width, uint32* height, uint32* frames); diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index bd764ed57a..72835c3ad8 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -202,7 +202,7 @@ class CriticalSection(object): or lazy way that may cause a deadlock. ValueError: If `exclusive_resource_access` is not provided (is `True`) and another `CriticalSection` has an execution requesting the same - resources as in `*args`, `**kwargs`, and any additionaly captured + resources as in `*args`, `**kwargs`, and any additionally captured inputs in `fn`. Note, even if `exclusive_resource_access` is `True`, if another execution in another `CriticalSection` was created without `exclusive_resource_access=True`, a `ValueError` will be raised. diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index e3fc6bf0f0..4092b32004 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -112,6 +112,7 @@ class GANEstimator(estimator.Estimator): generator_optimizer=None, discriminator_optimizer=None, get_hooks_fn=None, + get_eval_metric_ops_fn=None, add_summaries=None, use_loss_summaries=True, config=None): @@ -146,6 +147,9 @@ class GANEstimator(estimator.Estimator): list of hooks. These hooks are run on the generator and discriminator train ops, and can be used to implement the GAN training scheme. Defaults to `train.get_sequential_train_hooks()`. + get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a + dict of metric results keyed by name. The output of this function is + passed into `tf.estimator.EstimatorSpec` during evaluation. add_summaries: `None`, a single `SummaryType`, or a list of `SummaryType`. use_loss_summaries: If `True`, add loss summaries. If `False`, does not. If `None`, uses defaults. @@ -160,7 +164,8 @@ class GANEstimator(estimator.Estimator): else discriminator_optimizer) gan_head = head_lib.gan_head( generator_loss_fn, discriminator_loss_fn, gopt, dopt, - use_loss_summaries, get_hooks_fn=get_hooks_fn) + use_loss_summaries, get_hooks_fn=get_hooks_fn, + get_eval_metric_ops_fn=get_eval_metric_ops_fn) return _gan_model_fn( features, labels, mode, generator_fn, discriminator_fn, gan_head, add_summaries) diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py index 387a62bd74..955482599b 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py @@ -38,6 +38,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -194,6 +195,12 @@ class GANEstimatorIntegrationTest(test.TestCase): lr = learning_rate_decay.exponential_decay(1.0, gstep, 10, 0.9) return training.GradientDescentOptimizer(lr) + def get_metrics(gan_model): + return { + 'mse_custom_metric': metrics_lib.mean_squared_error( + gan_model.real_data, gan_model.generated_data) + } + gopt = make_opt if lr_decay else training.GradientDescentOptimizer(1.0) dopt = make_opt if lr_decay else training.GradientDescentOptimizer(1.0) est = estimator.GANEstimator( @@ -203,6 +210,7 @@ class GANEstimatorIntegrationTest(test.TestCase): discriminator_loss_fn=losses.wasserstein_discriminator_loss, generator_optimizer=gopt, discriminator_optimizer=dopt, + get_eval_metric_ops_fn=get_metrics, model_dir=self._model_dir) # TRAIN @@ -213,6 +221,9 @@ class GANEstimatorIntegrationTest(test.TestCase): scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) + self.assertEqual(scores['discriminator_loss'] + scores['generator_loss'], + scores['loss']) + self.assertIn('mse_custom_metric', six.iterkeys(scores)) # PREDICT predictions = np.array([x for x in est.predict(predict_input_fn)]) diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py index a21358c50b..ff903a78cc 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py @@ -25,17 +25,21 @@ from tensorflow.contrib.gan.python import train as tfgan_train from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator.canned import head from tensorflow.python.framework import ops +from tensorflow.python.ops import metrics as metrics_lib __all__ = [ 'GANHead', 'gan_head', ] +def _summary_key(head_name, val): + return '%s/%s' % (val, head_name) if head_name else val + def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer, use_loss_summaries=True, get_hooks_fn=tfgan_train.get_sequential_train_hooks(), - name=None): + get_eval_metric_ops_fn=None, name=None): """Creates a `GANHead`. Args: @@ -47,9 +51,12 @@ def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer: Same as `generator_optimizer`, but for the discriminator updates. use_loss_summaries: If `True`, add loss summaries. If `False`, does not. - If `None`, uses defaults. - get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list - of hooks. + If `None`, uses defaults. + get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a + list of hooks. + get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a + dict of metric results keyed by name. The output of this function is + passed into `tf.estimator.EstimatorSpec` during evaluation. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. @@ -62,6 +69,7 @@ def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer=discriminator_optimizer, use_loss_summaries=use_loss_summaries, get_hooks_fn=get_hooks_fn, + get_eval_metric_ops_fn=get_eval_metric_ops_fn, name=name) @@ -72,6 +80,7 @@ class GANHead(head._Head): # pylint: disable=protected-access generator_optimizer, discriminator_optimizer, use_loss_summaries=True, get_hooks_fn=None, + get_eval_metric_ops_fn=None, name=None): """`Head` for GAN training. @@ -85,8 +94,11 @@ class GANHead(head._Head): # pylint: disable=protected-access discriminator updates. use_loss_summaries: If `True`, add loss summaries. If `False`, does not. If `None`, uses defaults. - get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list - of hooks. Defaults to `train.get_sequential_train_hooks()` + get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a + list of hooks. Defaults to `train.get_sequential_train_hooks()` + get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a + dict of metric results keyed by name. The output of this function is + passed into `tf.estimator.EstimatorSpec` during evaluation. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. """ @@ -104,6 +116,8 @@ class GANHead(head._Head): # pylint: disable=protected-access self._generator_optimizer = generator_optimizer self._discriminator_optimizer = discriminator_optimizer self._get_hooks_fn = get_hooks_fn + self._get_eval_metric_ops_fn = get_eval_metric_ops_fn + self._name = name @property def name(self): @@ -173,13 +187,26 @@ class GANHead(head._Head): # pylint: disable=protected-access gan_loss = self.create_loss( features=None, mode=mode, logits=gan_model, labels=None) scalar_loss = gan_loss.generator_loss + gan_loss.discriminator_loss + with ops.name_scope(None, 'metrics', + [gan_loss.generator_loss, + gan_loss.discriminator_loss]): + eval_metric_ops = { + _summary_key(self._name, 'generator_loss'): + metrics_lib.mean(gan_loss.generator_loss), + _summary_key(self._name, 'discriminator_loss'): + metrics_lib.mean(gan_loss.discriminator_loss) + } + if self._get_eval_metric_ops_fn is not None: + custom_eval_metric_ops = self._get_eval_metric_ops_fn(gan_model) + if not isinstance(custom_eval_metric_ops, dict): + raise TypeError('get_eval_metric_ops_fn must return a dict, ' + 'received: {}'.format(custom_eval_metric_ops)) + eval_metric_ops.update(custom_eval_metric_ops) return model_fn_lib.EstimatorSpec( mode=model_fn_lib.ModeKeys.EVAL, predictions=gan_model.generated_data, loss=scalar_loss, - # TODO(joelshor): Add metrics. If head name provided, append it to - # metric keys. - eval_metric_ops={}) + eval_metric_ops=eval_metric_ops) elif mode == model_fn_lib.ModeKeys.TRAIN: if train_op_fn is None: raise ValueError('train_op_fn can not be None.') diff --git a/tensorflow/contrib/gan/python/estimator/python/head_test.py b/tensorflow/contrib/gan/python/estimator/python/head_test.py index 8168f005cd..6587f1fc60 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_test.py @@ -62,9 +62,14 @@ class GANHeadTest(test.TestCase): generator_loss_fn=dummy_loss, discriminator_loss_fn=dummy_loss, generator_optimizer=training.GradientDescentOptimizer(1.0), - discriminator_optimizer=training.GradientDescentOptimizer(1.0)) + discriminator_optimizer=training.GradientDescentOptimizer(1.0), + get_eval_metric_ops_fn=self.get_metrics) self.assertTrue(isinstance(self.gan_head, head.GANHead)) + def get_metrics(self, gan_model): + self.assertTrue(isinstance(gan_model, tfgan_tuples.GANModel)) + return {} + def _test_modes_helper(self, mode): self.gan_head.create_estimator_spec( features=None, diff --git a/tensorflow/contrib/gan/python/features/python/conditioning_utils.py b/tensorflow/contrib/gan/python/features/python/conditioning_utils.py index df71187fbd..a9b8faa712 100644 --- a/tensorflow/contrib/gan/python/features/python/conditioning_utils.py +++ b/tensorflow/contrib/gan/python/features/python/conditioning_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Miscellanous utilities for TFGAN code and examples.""" +"""Miscellaneous utilities for TFGAN code and examples.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index a320a3f232..592d37b432 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -677,7 +677,7 @@ def copy_with_input_replacements(sgv, replacement_ts, def _add_control_flow_ops(ops, control_ios): - """Complete `ops` so that the tranformed graph is valid. + """Complete `ops` so that the transformed graph is valid. Partially copying a graph can lead to a malformed graph. For instance, copying half of a while construct is likely to result in an invalid graph. diff --git a/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc b/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc index 60281951dd..66939fbb0f 100644 --- a/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc +++ b/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc @@ -115,7 +115,7 @@ static void CheckOpsSupport(const GraphDef& graph_def, HexagonOpsDefinitions::getInstance(); LOG(INFO) << "Checking " << graph_def.node_size() << " nodes"; LOG(INFO) << "dump_all_nodes = " << dump_all_nodes - << ", dump_shape_and_tpye = " << dump_shape_and_type; + << ", dump_shape_and_type = " << dump_shape_and_type; std::unordered_set unsupported_ops; bool all_supported = true; diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py index 8f406ace1d..f230d93da4 100755 --- a/tensorflow/contrib/image/__init__.py +++ b/tensorflow/contrib/image/__init__.py @@ -17,7 +17,7 @@ ### API This module provides functions for image manipulation; currently, chrominance -transformas (including changing saturation and hue) in YIQ space and +transforms (including changing saturation and hue) in YIQ space and projective transforms (including rotation) are supported. ## Image Transformation `Ops` diff --git a/tensorflow/contrib/kfac/examples/convnet.py b/tensorflow/contrib/kfac/examples/convnet.py index b261f41bf9..d6b1a61b71 100644 --- a/tensorflow/contrib/kfac/examples/convnet.py +++ b/tensorflow/contrib/kfac/examples/convnet.py @@ -325,7 +325,7 @@ def distributed_grads_only_and_ops_chief_worker( All workers perform gradient computation. Chief worker applies gradient after averaging the gradients obtained from all the workers. All workers block - execution untill the update is applied. Chief worker runs covariance and + execution until the update is applied. Chief worker runs covariance and inverse update ops. Covariance and inverse matrices are placed on parameter servers in a round robin manner. For further details on synchronous distributed optimization check `tf.train.SyncReplicasOptimizer`. diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index 45a760c9f1..b7f63d8d94 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -66,7 +66,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): the local approximation with the Fisher information matrix, and to regularize the update direction by making it closer to the gradient. If damping is adapted during training then this value is used for - initializing damping varaible. + initializing damping variable. (Higher damping means the update looks more like a standard gradient update - see Tikhonov regularization.) layer_collection: The layer collection object, which holds the fisher @@ -114,7 +114,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): self._estimation_mode = estimation_mode self._colocate_gradients_with_ops = colocate_gradients_with_ops - # The below paramaters are required only if damping needs to be adapated. + # The below parameters are required only if damping needs to be adapated. # These parameters can be set by calling # set_damping_adaptation_params() explicitly. self._damping_adaptation_decay = 0.95 @@ -195,7 +195,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): min_damping: `float`(Optional), Minimum value the damping parameter can take. Default value 1e-5. damping_adaptation_decay: `float`(Optional), The `damping` parameter is - multipled by the `damping_adaptation_decay` every + multiplied by the `damping_adaptation_decay` every `damping_adaptation_interval` number of iterations. Default value 0.99. damping_adaptation_interval: `int`(Optional), Number of steps in between updating the `damping` parameter. Default value 5. diff --git a/tensorflow/contrib/kfac/python/ops/placement.py b/tensorflow/contrib/kfac/python/ops/placement.py index 8a20ebe198..c4454325ae 100644 --- a/tensorflow/contrib/kfac/python/ops/placement.py +++ b/tensorflow/contrib/kfac/python/ops/placement.py @@ -51,7 +51,7 @@ class RoundRobinPlacementMixin(object): self._inv_devices = inv_devices def make_vars_and_create_op_thunks(self, scope=None): - """Make vars and create op thunks w/ a round-robin device placement strat. + """Make vars and create op thunks w/ a round-robin device placement start. For each factor, all of that factor's cov variables and their associated update ops will be placed on a particular device. A new device is chosen diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index f708da6693..b7194ae333 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -932,7 +932,8 @@ def convolution(inputs, variables_collections=None, outputs_collections=None, trainable=True, - scope=None): + scope=None, + conv_dims=None): """Adds an N-D convolution followed by an optional batch_norm layer. It is required that 1 <= N <= 3. @@ -993,6 +994,10 @@ def convolution(inputs, trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. + conv_dims: Optional convolution dimensionality, when set it would use the + corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When + leaved to None it would select the convolution dimensionality based on + the input rank (i.e. Conv ND, with N = input_rank - 2). Returns: A tensor representing the output of the operation. @@ -1015,6 +1020,9 @@ def convolution(inputs, inputs = ops.convert_to_tensor(inputs) input_rank = inputs.get_shape().ndims + if conv_dims is not None and conv_dims + 2 != input_rank: + raise ValueError('Convolution expects input with rank %d, got %d' % + (conv_dims + 2, input_rank)) if input_rank == 3: layer_class = convolutional_layers.Convolution1D elif input_rank == 4: @@ -1061,10 +1069,134 @@ def convolution(inputs, outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs) +@add_arg_scope +def convolution1d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=1) + +convolution1d.__doc__ = convolution.__doc__ -convolution2d = convolution -convolution3d = convolution +@add_arg_scope +def convolution2d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=2) + +convolution2d.__doc__ = convolution.__doc__ +@add_arg_scope +def convolution3d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=3) + +convolution3d.__doc__ = convolution.__doc__ @add_arg_scope def convolution2d_in_plane( @@ -1411,7 +1543,7 @@ def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None): Args: tensor: An `int` `Tensor` to be converted to a `Sparse`. eos_token: An integer. - It is part of the target label that signfies the end of a sentence. + It is part of the target label that signifies the end of a sentence. outputs_collections: Collection to add the outputs. scope: Optional scope for name_scope. """ @@ -1555,7 +1687,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None): output_collections: Collection to which the outputs will be added. scope: Optional scope for `name_scope`. Returns: - A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but + A `Tensor` or `SparseTensor` containing the same values as `inputs`, but with innermost dimensions flattened to obtain rank `new_rank`. Raises: diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 997f910a2a..b01fd5d5c9 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -310,6 +310,17 @@ class BiasAddTest(test.TestCase): class ConvolutionTest(test.TestCase): + def testInvalidShape(self): + with self.test_session(): + images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1) + with self.assertRaisesRegexp( + ValueError, 'Convolution expects input with rank 5, got 4'): + layers_lib.convolution3d(images_2d, 32, 3) + images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1) + with self.assertRaisesRegexp( + ValueError, 'Convolution expects input with rank 4, got 5'): + layers_lib.convolution2d(images_3d, 32, 3) + def testInvalidDataFormat(self): height, width = 7, 9 with self.test_session(): @@ -3155,7 +3166,7 @@ class RepeatTests(test.TestCase): with self.test_session(): images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32) output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3]) - self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu') + self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32]) def testRepeatWithScope(self): @@ -3749,7 +3760,7 @@ class StackTests(test.TestCase): layers_lib.convolution2d, [10, 20, 30], kernel_size=[3, 3], padding='SAME') - self.assertEqual(output.op.name, 'Stack/convolution_3/Relu') + self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30]) def testStackWithScope(self): diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index c7cdb41312..f8106d1e4a 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -343,7 +343,8 @@ def get_temp_export_dir(timestamped_export_dir): """ (dirname, basename) = os.path.split(timestamped_export_dir) temp_export_dir = os.path.join( - compat.as_bytes(dirname), compat.as_bytes('temp-{}'.format(basename))) + compat.as_bytes(dirname), + compat.as_bytes('temp-{}'.format(compat.as_text(basename)))) return temp_export_dir diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 01c76b7a66..55b984f260 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -6,8 +6,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops") -exports_files(["LICENSE"]) - exports_files(glob([ "testdata/*.bin", "testdata/*.pb", diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index 1053cce385..cc8a8035d1 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -1,4 +1,3 @@ - # Find where we're running from, so we can store generated files here. ifeq ($(origin MAKEFILE_DIR), undefined) MAKEFILE_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) @@ -69,12 +68,12 @@ LIB_NAME := libtensorflow-lite.a LIB_PATH := $(LIBDIR)$(LIB_NAME) # A small example program that shows how to link against the library. -BENCHMARK_PATH := $(BINDIR)benchmark_model +MINIMAL_PATH := $(BINDIR)minimal -BENCHMARK_SRCS := \ -tensorflow/contrib/lite/tools/benchmark_model.cc -BENCHMARK_OBJS := $(addprefix $(OBJDIR), \ -$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(BENCHMARK_SRCS)))) +MINIMAL_SRCS := \ +tensorflow/contrib/lite/examples/minimal/minimal.cc +MINIMAL_OBJS := $(addprefix $(OBJDIR), \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MINIMAL_SRCS)))) # What sources we want to compile, must be kept in sync with the main Bazel # build files. @@ -100,7 +99,7 @@ $(wildcard tensorflow/contrib/lite/*/*test.cc) \ $(wildcard tensorflow/contrib/lite/*/*/*test.cc) \ $(wildcard tensorflow/contrib/lite/*/*/*/*test.cc) \ $(wildcard tensorflow/contrib/lite/kernels/test_util.cc) \ -$(BENCHMARK_SRCS) +$(MINIMAL_SRCS) # Filter out all the excluded files. TF_LITE_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) # File names of the intermediate files target compilation generates. @@ -119,17 +118,17 @@ $(OBJDIR)%.o: %.c $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@ # The target that's compiled if there's no command-line arguments. -all: $(LIB_PATH) $(BENCHMARK_PATH) +all: $(LIB_PATH) $(MINIMAL_PATH) # Gathers together all the objects we've compiled into a single '.a' archive. $(LIB_PATH): $(LIB_OBJS) @mkdir -p $(dir $@) $(AR) $(ARFLAGS) $(LIB_PATH) $(LIB_OBJS) -$(BENCHMARK_PATH): $(BENCHMARK_OBJS) $(LIB_PATH) +$(MINIMAL_PATH): $(MINIMAL_OBJS) $(LIB_PATH) @mkdir -p $(dir $@) $(CXX) $(CXXFLAGS) $(INCLUDES) \ - -o $(BENCHMARK_PATH) $(BENCHMARK_OBJS) \ + -o $(MINIMAL_PATH) $(MINIMAL_OBJS) \ $(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS) # Gets rid of all generated files. diff --git a/tensorflow/contrib/lite/examples/minimal/minimal.cc b/tensorflow/contrib/lite/examples/minimal/minimal.cc new file mode 100644 index 0000000000..106e3b0270 --- /dev/null +++ b/tensorflow/contrib/lite/examples/minimal/minimal.cc @@ -0,0 +1,71 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include + +// This is an example that is minimal to read a model +// from disk and perform inference. There is no data being loaded +// that is up to you to add as a user. +// +// NOTE: Do not add any dependencies to this that cannot be built with +// the minimal makefile. This example must remain trivial to build with +// the minimal build tool. +// +// Usage: minimal + +using namespace tflite; + +#define TFLITE_MINIMAL_CHECK(x) \ + if(!(x)) { \ + fprintf(stderr, "Error at %s:%d\n", __FILE__, __LINE__); \ + exit(1); \ + } + + +int main(int argc, char *argv[]) { + if(argc != 2) { + fprintf(stderr, "Usage: %s \n"); + return 1; + } + const char* filename = argv[1]; + + // Load model + std::unique_ptr model + = tflite::FlatBufferModel::BuildFromFile(filename); + TFLITE_MINIMAL_CHECK(model != nullptr); + + // Build the interpreter + tflite::ops::builtin::BuiltinOpResolver resolver; + InterpreterBuilder builder(*model.get(), resolver); + std::unique_ptr interpreter; + builder(&interpreter); + TFLITE_MINIMAL_CHECK(interpreter != nullptr); + + // Allocate tensor buffers. + TFLITE_MINIMAL_CHECK(interpreter->AllocateTensors() == kTfLiteOk); + + // Fill input buffers + // TODO(user): Insert code to fill input tensors + + // Run inference + TFLITE_MINIMAL_CHECK(interpreter->Invoke() == kTfLiteOk); + + // Read output buffers + // TODO(user): Insert getting data out code. + + return 0; +} diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md index 7a3a231626..ab50789307 100644 --- a/tensorflow/contrib/lite/g3doc/rpi.md +++ b/tensorflow/contrib/lite/g3doc/rpi.md @@ -32,7 +32,7 @@ This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc v Log in to you RPI, install the toolchain. ```bash -sudo apt-get instal build-essential +sudo apt-get install build-essential ``` First, clone this TensorFlow repository. Run this at the root of the repository: diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index f23b90d9dc..d48178d608 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -3387,7 +3387,7 @@ inline void Concatenation(int concat_dim, const uint8* const* input_data, const int32 output_zeropoint, const float output_scale) { // The arguments input_zeropoint and input_scale are expected to be an array - // that have the quantization paramaters for all the inputs to the concat + // that have the quantization parameters for all the inputs to the concat // operator. gemmlowp::ScopedProfilingLabel label("Concatenation"); TFLITE_DCHECK_GT(inputs_count, 1); diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index f6d8d3257b..62d6fe0bb3 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1789,7 +1789,7 @@ inline void Concatenation(int concat_dim, const uint8* const* input_data, const int32 output_zeropoint, const float output_scale) { // The arguments input_zeropoint and input_scale are expected to be an array - // that have the quantization paramaters for all the inputs to the concat + // that have the quantization parameters for all the inputs to the concat // operator. TFLITE_DCHECK_GT(inputs_count, 1); int64_t concat_size = 0; @@ -1975,7 +1975,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // requiring a power-of-two representation interval. Thus, we should right // away quantize this array to a power-of-two interval; otherwise, // implementation will need to rescale that, losing any benefit that a tighter -// representation interval might otherwise yield, while introducting some +// representation interval might otherwise yield, while introducing some // numerical error and computational overhead. // // Now, Logistic and Tanh diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index e5b640fcee..8bdeb035f5 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -65,7 +65,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/schema/schema_v0.fbs b/tensorflow/contrib/lite/schema/schema_v0.fbs index 852ea988f3..891d8366cc 100644 --- a/tensorflow/contrib/lite/schema/schema_v0.fbs +++ b/tensorflow/contrib/lite/schema/schema_v0.fbs @@ -48,7 +48,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/schema/schema_v1.fbs b/tensorflow/contrib/lite/schema/schema_v1.fbs index 06cd9408ed..b438b569e6 100644 --- a/tensorflow/contrib/lite/schema/schema_v1.fbs +++ b/tensorflow/contrib/lite/schema/schema_v1.fbs @@ -53,7 +53,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/schema/schema_v2.fbs b/tensorflow/contrib/lite/schema/schema_v2.fbs index 96731c8aae..b90408ff6d 100644 --- a/tensorflow/contrib/lite/schema/schema_v2.fbs +++ b/tensorflow/contrib/lite/schema/schema_v2.fbs @@ -54,7 +54,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/schema/schema_v3.fbs b/tensorflow/contrib/lite/schema/schema_v3.fbs index cedefe08f3..020da38493 100644 --- a/tensorflow/contrib/lite/schema/schema_v3.fbs +++ b/tensorflow/contrib/lite/schema/schema_v3.fbs @@ -53,7 +53,7 @@ table Tensor { type:TensorType; // An index that refers to the buffers table at the root of the model. Or, // if there is no data buffer associated (i.e. intermediate results), then - // this is 0 (which refers to an always existant empty buffer). + // this is 0 (which refers to an always existent empty buffer). // // The data_buffer itself is an opaque container, with the assumption that the // target device is little-endian. In addition, all builtin operators assume @@ -64,7 +64,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 07d2b28bbe..0e036bda92 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -109,7 +109,7 @@ KNOWN_BUGS = { class ExtraTocoOptions(object): - """Additonal toco options besides input, output, shape.""" + """Additional toco options besides input, output, shape.""" def __init__(self): # Whether to ignore control dependency nodes. @@ -2016,7 +2016,7 @@ def make_lstm_tests(zip_path): return inputs_after_split, [out] def build_inputs(parameters, sess, inputs, outputs): - """Feed inputs, assign vairables, and freeze graph.""" + """Feed inputs, assign variables, and freeze graph.""" with tf.variable_scope("", reuse=True): kernel = tf.get_variable("rnn/basic_lstm_cell/kernel") diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc index 1f07068aee..8cab6cd8cd 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -227,8 +227,8 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) { if (!IsValid()) return; auto* tensor = interpreter_->tensor(id); if (expected_output_.count(id) != 0) { - fprintf(stderr, "Overriden expectation for tensor %d\n", id); - Invalidate("Overriden expectation"); + fprintf(stderr, "Overridden expectation for tensor %d\n", id); + Invalidate("Overridden expectation"); } expected_output_[id].reset(new Expectation); switch (tensor->type) { diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md index 495014c6fc..7680cdd344 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md @@ -115,7 +115,7 @@ bazel run --config=opt \ In order to evaluate the possible benefit of generating a quantized graph, TOCO allows "dummy-quantization" on float graphs. The flags `--default_ranges_min` -and `--default_ranges_max` accept plausable values for the min-max ranges of the +and `--default_ranges_max` accept plausible values for the min-max ranges of the values in all arrays that do not have min-max information. "Dummy-quantization" will produce lower accuracy but will emulate the performance of a correctly quantized model. @@ -338,7 +338,7 @@ below outline the use cases for each. ### Using `--output_format=GRAPHVIZ_DOT` The first way to get a graphviz rendering is to pass `GRAPHVIZ_DOT` into -`--output_format`. This results in a plausable visualization of the graph. This +`--output_format`. This results in a plausible visualization of the graph. This reduces the requirements that normally exist during conversion between other input and output formats. For example, this may be useful if conversion from TENSORFLOW_GRAPHDEF to TFLITE is failing. diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h index 50f0620b3c..5e9c20e40d 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.h +++ b/tensorflow/contrib/lite/toco/tflite/operator.h @@ -25,10 +25,10 @@ namespace tflite { class BaseOperator; -// Return a map contained all knwo TF Lite Operators, keyed by their names. +// Return a map contained all know TF Lite Operators, keyed by their names. std::map> BuildOperatorByNameMap(); -// Return a map contained all knwo TF Lite Operators, keyed by the type of +// Return a map contained all know TF Lite Operators, keyed by the type of // their tf.mini counterparts. std::map> BuildOperatorByTypeMap(); diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto index 253f022e6b..8589ca361d 100644 --- a/tensorflow/contrib/lite/toco/toco_flags.proto +++ b/tensorflow/contrib/lite/toco/toco_flags.proto @@ -127,7 +127,7 @@ message TocoFlags { // transformations that are necessary in order to generate inference // code for these graphs. Such graphs should be fixed, but as a // temporary work-around, setting this reorder_across_fake_quant flag - // allows toco to perform necessary graph transformaitons on them, + // allows toco to perform necessary graph transformations on them, // at the cost of no longer faithfully matching inference and training // arithmetic. optional bool reorder_across_fake_quant = 8; diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py index 37539b9599..5ed8057b86 100644 --- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py @@ -58,7 +58,7 @@ def create_local_cluster(num_workers, num_ps, protocol="grpc"): # Creates the workers and return their sessions, graphs, train_ops. -# Cheif worker will update at last +# Chief worker will update at last def _get_workers(num_workers, period, workers, moving_rate): sessions = [] graphs = [] diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py index 6cca0a8a00..3acd940268 100644 --- a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py @@ -57,7 +57,7 @@ def create_local_cluster(num_workers, num_ps, protocol="grpc"): # Creates the workers and return their sessions, graphs, train_ops. -# Cheif worker will update at last +# Chief worker will update at last def _get_workers(num_workers, steps, workers): sessions = [] graphs = [] @@ -146,7 +146,7 @@ class ModelAverageOptimizerTest(test.TestCase): self.assertAllEqual(1.0, sessions[0].run(global_var_1)) self.assertAllEqual(0, sessions[0].run(global_step)) - # iteration 2, global varibale update + # iteration 2, global variable update thread_0 = self.checkedThread( target=self._run, args=(train_ops[0], sessions[0])) thread_1 = self.checkedThread( diff --git a/tensorflow/contrib/signal/python/ops/window_ops.py b/tensorflow/contrib/signal/python/ops/window_ops.py index 50094010dc..59e67e8ba4 100644 --- a/tensorflow/contrib/signal/python/ops/window_ops.py +++ b/tensorflow/contrib/signal/python/ops/window_ops.py @@ -47,7 +47,7 @@ def hann_window(window_length, periodic=True, dtype=dtypes.float32, name=None): Raises: ValueError: If `dtype` is not a floating point type. - [hann]: https://en.wikipedia.org/wiki/Window_function#Hann_window + [hann]: https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows """ return _raised_cosine_window(name, 'hann_window', window_length, periodic, dtype, 0.5, 0.5) @@ -72,7 +72,7 @@ def hamming_window(window_length, periodic=True, dtype=dtypes.float32, Raises: ValueError: If `dtype` is not a floating point type. - [hamming]: https://en.wikipedia.org/wiki/Window_function#Hamming_window + [hamming]: https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows """ return _raised_cosine_window(name, 'hamming_window', window_length, periodic, dtype, 0.54, 0.46) diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py index f2d31dc8db..d877831fce 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py @@ -102,7 +102,7 @@ class BoundingBox(ItemHandler): """An ItemHandler that concatenates a set of parsed Tensors to Bounding Boxes. """ - def __init__(self, keys=None, prefix=None): + def __init__(self, keys=None, prefix=''): """Initialize the bounding box handler. Args: diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py index 8a2c74742a..6e55b9407b 100644 --- a/tensorflow/contrib/slim/python/slim/learning.py +++ b/tensorflow/contrib/slim/python/slim/learning.py @@ -571,7 +571,7 @@ def train(train_op, default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss - and global step and logged. + and global step are logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The address of the tensorflow master. diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index d5d8e4100f..cfdc884277 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -1080,14 +1080,20 @@ class SummaryDbWriter : public SummaryWriterInterface { // See tensorboard/plugins/histogram/summary.py and data_compat.py Tensor t{DT_DOUBLE, {k, 3}}; auto data = t.flat(); - for (int i = 0; i < k; ++i) { - double left_edge = ((i - 1 >= 0) ? histo.bucket_limit(i - 1) - : std::numeric_limits::min()); - double right_edge = ((i + 1 < k) ? histo.bucket_limit(i + 1) - : std::numeric_limits::max()); - data(i + 0) = left_edge; - data(i + 1) = right_edge; - data(i + 2) = histo.bucket(i); + for (int i = 0, j = 0; i < k; ++i) { + // TODO(nickfelt): reconcile with TensorBoard's data_compat.py + // From summary.proto + // Parallel arrays encoding the bucket boundaries and the bucket values. + // bucket(i) is the count for the bucket i. The range for + // a bucket is: + // i == 0: -DBL_MAX .. bucket_limit(0) + // i != 0: bucket_limit(i-1) .. bucket_limit(i) + double left_edge = (i == 0) ? std::numeric_limits::min() + : histo.bucket_limit(i - 1); + + data(j++) = left_edge; + data(j++) = histo.bucket_limit(i); + data(j++) = histo.bucket(i); } int64 tag_id; PatchPluginName(s->mutable_metadata(), kHistogramPluginName); diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc index c34b6763a1..2e8d4109dd 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc @@ -100,6 +100,56 @@ class SummaryDbWriterTest : public ::testing::Test { SummaryWriterInterface* writer_ = nullptr; }; +TEST_F(SummaryDbWriterTest, WriteHistogram_VerifyTensorValues) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "histtest", "test1", "user1", &env_, + &writer_)); + int step = 0; + std::unique_ptr e{new Event}; + e->set_step(step); + e->set_wall_time(123); + Summary::Value* s = e->mutable_summary()->add_value(); + s->set_tag("normal/myhisto"); + + double dummy_value = 10.123; + HistogramProto* proto = s->mutable_histo(); + proto->Clear(); + proto->set_min(dummy_value); + proto->set_max(dummy_value); + proto->set_num(dummy_value); + proto->set_sum(dummy_value); + proto->set_sum_squares(dummy_value); + + int size = 3; + double bucket_limits[] = {-30.5, -10.5, -5.5}; + double bucket[] = {-10, 10, 20}; + for (int i = 0; i < size; i++) { + proto->add_bucket_limit(bucket_limits[i]); + proto->add_bucket(bucket[i]); + } + TF_ASSERT_OK(writer_->WriteEvent(std::move(e))); + TF_ASSERT_OK(writer_->Flush()); + writer_->Unref(); + writer_ = nullptr; + + // TODO(nickfelt): implement QueryTensor() to encapsulate this + // Verify the data + string result = QueryString("SELECT data FROM Tensors"); + const double* val = reinterpret_cast(result.data()); + double histarray[] = {std::numeric_limits::min(), + -30.5, + -10, + -30.5, + -10.5, + 10, + -10.5, + -5.5, + 20}; + int histarray_size = 9; + for (int i = 0; i < histarray_size; i++) { + EXPECT_EQ(histarray[i], val[i]); + } +} + TEST_F(SummaryDbWriterTest, NothingWritten_NoRowsCreated) { TF_ASSERT_OK(CreateSummaryDbWriter(db_, "mad-science", "train", "jart", &env_, &writer_)); diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 6d6feb3c39..a5d8b061b6 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -67,6 +67,7 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = [ ":trt_logging", + ":trt_plugins", ] + if_tensorrt([ "@local_config_tensorrt//:nv_infer", ]) + tf_custom_op_library_additional_deps(), @@ -86,6 +87,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":trt_logging", + ":trt_plugins", ":trt_resources", "//tensorflow/core:gpu_headers_lib", "//tensorflow/core:lib_proto_parsing", @@ -197,10 +199,12 @@ tf_py_wrap_cc( tf_cuda_library( name = "trt_resources", srcs = [ + "resources/trt_allocator.cc", "resources/trt_int8_calibrator.cc", "resources/trt_resource_manager.cc", ], hdrs = [ + "resources/trt_allocator.h", "resources/trt_int8_calibrator.h", "resources/trt_resource_manager.h", "resources/trt_resources.h", @@ -221,18 +225,25 @@ tf_cuda_library( srcs = [ "convert/convert_graph.cc", "convert/convert_nodes.cc", + "convert/trt_optimization_pass.cc", ], hdrs = [ "convert/convert_graph.h", "convert/convert_nodes.h", + "convert/trt_optimization_pass.h", ], deps = [ ":segment", + ":trt_plugins", ":trt_logging", ":trt_resources", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core:framework", + "//tensorflow/core:gpu_runtime", "//tensorflow/core:framework_lite", "//tensorflow/core:graph", "//tensorflow/core:lib", @@ -241,8 +252,7 @@ tf_cuda_library( "//tensorflow/core/grappler:devices", "//tensorflow/core/grappler/clusters:virtual_cluster", "//tensorflow/core/grappler/costs:graph_properties", - "//tensorflow/core/grappler/optimizers:constant_folding", - "//tensorflow/core/grappler/optimizers:layout_optimizer", + "//tensorflow/core/grappler/optimizers:meta_optimizer", ] + if_tensorrt([ "@local_config_tensorrt//:nv_infer", ]) + tf_custom_op_library_additional_deps(), @@ -256,7 +266,6 @@ cc_library( "segment/segment.h", "segment/union_find.h", ], - linkstatic = 1, deps = [ "//tensorflow/core:graph", "//tensorflow/core:lib_proto_parsing", @@ -279,6 +288,46 @@ tf_cc_test( ], ) +# Library for the plugin factory +tf_cuda_library( + name = "trt_plugins", + srcs = [ + "plugin/trt_plugin.cc", + "plugin/trt_plugin_factory.cc", + "plugin/trt_plugin_utils.cc", + ], + hdrs = [ + "plugin/trt_plugin.h", + "plugin/trt_plugin_factory.h", + "plugin/trt_plugin_utils.h", + ], + deps = [ + "//tensorflow/core:framework_lite", + "//tensorflow/core:lib_proto_parsing", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), +) + +tf_cuda_cc_test( + name = "trt_plugin_factory_test", + size = "small", + srcs = ["plugin/trt_plugin_factory_test.cc"], + tags = [ + "manual", + "notap", + ], + deps = [ + ":trt_plugins", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ] + if_tensorrt([ + "@local_config_cuda//cuda:cuda_headers", + "@local_config_tensorrt//:nv_infer", + ]), +) + py_test( name = "tf_trt_integration_test", srcs = ["test/tf_trt_integration_test.py"], diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 0774027711..b7b26cfb1c 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorrt/convert/convert_graph.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include #include @@ -24,6 +25,9 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" #include "tensorflow/contrib/tensorrt/segment/segment.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/common_runtime/gpu/process_state.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_constructor.h" @@ -31,8 +35,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/devices.h" #include "tensorflow/core/grappler/grappler_item.h" -#include "tensorflow/core/grappler/optimizers/constant_folding.h" -#include "tensorflow/core/grappler/optimizers/layout_optimizer.h" +#include "tensorflow/core/grappler/optimizers/meta_optimizer.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" @@ -75,7 +78,8 @@ bool IsTensorRTCandidate(const tensorflow::Node* node) { // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) - return candidate_ops.count(node->type_string()); + return (candidate_ops.count(node->type_string()) || + PluginFactoryTensorRT::GetInstance()->IsPlugin(node->type_string())); } void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, @@ -144,7 +148,8 @@ struct ConvertGraphParams { size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, const tensorflow::grappler::GraphProperties& current_graph_properties, std::unordered_map>* output_edges, - int engine_precision_mode) + int engine_precision_mode, const string& device_name, + std::shared_ptr allocator, int cuda_gpu_id) : graph(inp_graph), output_names(output_node_names), subgraph_node_ids(subgraph_node_id_numbers), @@ -152,7 +157,10 @@ struct ConvertGraphParams { max_workspace_size_bytes(max_consumed_workspace_size_bytes), graph_properties(current_graph_properties), output_edge_map(output_edges), - precision_mode(engine_precision_mode) {} + precision_mode(engine_precision_mode), + device_name_(device_name), + allocator_(allocator), + cuda_gpu_id_(cuda_gpu_id) {} tensorflow::Graph& graph; const std::vector& output_names; const std::set& subgraph_node_ids; @@ -161,6 +169,9 @@ struct ConvertGraphParams { const tensorflow::grappler::GraphProperties& graph_properties; std::unordered_map>* output_edge_map; int precision_mode; + string device_name_; + std::shared_ptr allocator_; + int cuda_gpu_id_; std::vector> subgraph_inputs; std::vector> subgraph_outputs; tensorflow::EdgeSet subgraph_incoming_edges; @@ -194,7 +205,7 @@ static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { subgraph_outputs_set.begin(), subgraph_outputs_set.end()); return tensorflow::Status::OK(); -}; +} tensorflow::Status GetCalibNode(ConvertGraphParams* params) { TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); @@ -203,7 +214,8 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { params->subgraph_inputs, params->subgraph_outputs, params->max_batch_size, params->max_workspace_size_bytes, params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); + &trt_node_def, params->precision_mode, params->device_name_, + params->allocator_, params->cuda_gpu_id_); TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); tensorflow::Status status; tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); @@ -233,7 +245,8 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { params->subgraph_inputs, params->subgraph_outputs, params->max_batch_size, params->max_workspace_size_bytes, params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); + &trt_node_def, params->precision_mode, params->device_name_, + params->allocator_, params->cuda_gpu_id_); TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); tensorflow::Status status; tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); @@ -331,19 +344,12 @@ tensorflow::Status ConvertGraphDefToTensorRT( // optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; - tensorflow::GraphDef gdef; - - // Layout optimization item.graph = graph_def; - tensorflow::grappler::LayoutOptimizer optimizer; - tensorflow::grappler::Cluster* cluster; - // virtual cluster tensorflow::DeviceProperties device_properties; - device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); - cluster = + tensorflow::grappler::Cluster* cluster = new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); // single machine @@ -351,27 +357,38 @@ tensorflow::Status ConvertGraphDefToTensorRT( int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); VLOG(2) << "cpu_cores: " << num_cpu_cores; VLOG(2) << "gpus: " << num_gpus; - - TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef)); - - // constant folding + tensorflow::RewriterConfig rw_cfg; + tensorflow::grappler::MetaOptimizer meta_opt(nullptr, rw_cfg); + tensorflow::GraphDef gdef; + TF_RETURN_IF_ERROR(meta_opt.Optimize(cluster, item, &gdef)); item.graph = gdef; - tensorflow::grappler::ConstantFolding fold(nullptr); - TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef)); // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); - TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false)); + TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(true)); // Build full graph + + return ConvertAfterShapes(gdef, output_names, max_batch_size, + max_workspace_size_bytes, new_graph_def, + precision_mode, minimum_segment_size, + static_graph_properties, nullptr); +} + +tensorflow::Status ConvertAfterShapes( + const tensorflow::GraphDef& gdef, const std::vector& output_names, + size_t max_batch_size, size_t max_workspace_size_bytes, + tensorflow::GraphDef* new_graph_def, int precision_mode, + int minimum_segment_size, + const tensorflow::grappler::GraphProperties& graph_properties, + const tensorflow::grappler::Cluster* cluster) { + // Segment the graph into subgraphs that can be converted to TensorRT + tensorflow::tensorrt::segment::SegmentOptions segment_options; tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); tensorflow::Graph graph(flib); TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( tensorflow::GraphConstructorOptions(), gdef, &graph)); - // Segment the graph into subgraphs that can be converted to TensorRT - tensorflow::tensorrt::segment::SegmentOptions segment_options; - // TODO(ben,jie,sami): exclude output nodes (DISCUSS IT) for (auto node : output_names) { segment_options.exclude_node_list.insert(node); @@ -381,7 +398,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( segment_options.minimum_segment_size = minimum_segment_size; tensorflow::tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( - gdef, IsTensorRTCandidate, segment_options, &segments)); + &graph, IsTensorRTCandidate, segment_options, &segments)); if (segments.size() > 1) { VLOG(0) << "MULTIPLE tensorrt candidate conversion: " << segments.size(); } @@ -391,9 +408,21 @@ tensorflow::Status ConvertGraphDefToTensorRT( int count = 0; float total_num_nodes_in_segments = 0.; for (auto s : segments) { - total_num_nodes_in_segments += s.size(); + total_num_nodes_in_segments += s.first.size(); } - for (const std::set& subgraph_node_names : segments) { + // We create the map here since cluster may not be available in all cases. + std::map name_to_device_map; + if (cluster) { + // TODO(aaroey): consider using DeviceSet::FindDeviceByName(), as in a + // distributed environment, devices from different workers can have same + // short name. + for (const auto dm : cluster->GetDeviceSet()->devices()) { + name_to_device_map[dm->name()] = dm; + } + } + for (const auto& segment_nodes_and_device : segments) { + const std::set& subgraph_node_names = + segment_nodes_and_device.first; std::set subgraph_node_ids; size_t max_mem_per_engine = max_workspace_size_bytes * @@ -403,10 +432,40 @@ tensorflow::Status ConvertGraphDefToTensorRT( oss << " " << node_name; subgraph_node_ids.insert(node_map.at(node_name)->id()); } - VLOG(2) << "Subgraph nodes" << oss.str(); + VLOG(1) << "Subgraph nodes at device " << segment_nodes_and_device.second + << " : " << oss.str(); + auto target_device = + name_to_device_map.find(segment_nodes_and_device.second); + std::shared_ptr allocator(0); + + int cuda_device_id = 0; + if (target_device != name_to_device_map.end()) { + tensorflow::TfGpuId tf_gpu_id(target_device->second->parsed_name().id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) + << "Cuda device identification failed, using device 0. Error= " + << s; + } else { + cuda_device_id = cuda_gpu_id.value(); + } + tensorflow::GPUOptions gpuoptions; + // we need to us PM here since in python path there is no way to get to + // allocators + auto pm = tensorflow::ProcessState::singleton(); + // this should be instantiated by now + auto dev_allocator = pm->GetGPUAllocator(gpuoptions, tf_gpu_id, 1); + VLOG(1) << "Got an allocator for device tf_device=" << tf_gpu_id.value() + << " cuda device= " << cuda_device_id << " at " << dev_allocator; + allocator = std::make_shared(dev_allocator); + } else { // device unknown or not available + allocator = std::make_shared(); + } ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, - max_mem_per_engine, static_graph_properties, - &output_edge_map, precision_mode); + max_mem_per_engine, graph_properties, &output_edge_map, + precision_mode, segment_nodes_and_device.second, + allocator, cuda_device_id); if (precision_mode == INT8MODE) { tensorflow::Status status = GetCalibNode(&p); if (status != tensorflow::Status::OK()) { diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index e01e4a5328..65a67d7e73 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -18,6 +18,8 @@ limitations under the License. #include #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" @@ -43,6 +45,14 @@ tensorflow::Status ConvertGraphDefToTensorRT( size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, int precision_mode, int minimum_segment_size); +// Method to call from optimization pass +tensorflow::Status ConvertAfterShapes( + const tensorflow::GraphDef& graph, const std::vector& output_names, + size_t max_batch_size, size_t max_workspace_size_bytes, + tensorflow::GraphDef* new_graph_def, int precision_mode, + int minimum_segment_size, + const tensorflow::grappler::GraphProperties& graph_properties, + const tensorflow::grappler::Cluster* cluster); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index b81ae9dc3e..32b211dcd1 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include #include @@ -240,35 +241,49 @@ class TFAttrs { return attrs_.at(key); } template - T get(string key) const; + T get(const string& key) const; template - T get(string key, const T& default_value) const { + T get(const string& key, const T& default_value) const { return attrs_.count(key) ? this->get(key) : default_value; } + std::vector GetAllAttrKey() { + std::vector attr_list; + for (const auto& attr_item : attrs_) { + attr_list.emplace_back(attr_item.first); + } + return attr_list; + } + private: typedef std::map AttrMap; AttrMap attrs_; }; template <> -string TFAttrs::get(string key) const { +string TFAttrs::get(const string& key) const { return this->at(key)->s(); } template <> -std::vector TFAttrs::get>(string key) const { +std::vector TFAttrs::get>(const string& key) const { auto attr = this->at(key)->list().i(); return std::vector(attr.begin(), attr.end()); } template <> -std::vector TFAttrs::get>(string key) const { +std::vector TFAttrs::get>(const string& key) const { + auto attr = this->at(key)->list().f(); + return std::vector(attr.begin(), attr.end()); +} + +template <> +std::vector TFAttrs::get>(const string& key) const { auto attr = this->at(key)->list().s(); return std::vector(attr.begin(), attr.end()); } template <> -nvinfer1::Dims TFAttrs::get(string key) const { +nvinfer1::Dims TFAttrs::get(const string& key) const { auto values = this->get>(key); nvinfer1::Dims dims; dims.nbDims = values.size(); @@ -278,24 +293,25 @@ nvinfer1::Dims TFAttrs::get(string key) const { } template <> -nvinfer1::DataType TFAttrs::get(string key) const { +nvinfer1::DataType TFAttrs::get(const string& key) const { nvinfer1::DataType trt_dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(this->at(key)->type(), &trt_dtype)); return trt_dtype; } template <> -tensorflow::DataType TFAttrs::get(string key) const { +tensorflow::DataType TFAttrs::get( + const string& key) const { return this->at(key)->type(); } template <> -float TFAttrs::get(string key) const { +float TFAttrs::get(const string& key) const { return this->at(key)->f(); } template <> -bool TFAttrs::get(string key) const { +bool TFAttrs::get(const string& key) const { return this->at(key)->b(); } @@ -424,6 +440,7 @@ using OpConverter = class Converter { std::unordered_map trt_tensors_; std::unordered_map op_registry_; + OpConverter plugin_converter_; nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; tensorflow::tensorrt::TRTWeightStore* weight_store_; @@ -481,7 +498,7 @@ class Converter { weights.SetValues(weight_store_->store_.back().data()); return weights; } - bool isFP16() { return fp16_; }; + bool isFP16() { return fp16_; } TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) { return this->get_temp_weights(weights.type_, weights.shape_); } @@ -490,13 +507,17 @@ class Converter { std::vector inputs; TF_RETURN_IF_ERROR(this->get_inputs(node_def, &inputs)); string op = node_def.op(); - if (!op_registry_.count(op)) { - return tensorflow::errors::Unimplemented( - "No converter registered for op: " + op); - } - OpConverter op_converter = op_registry_.at(op); std::vector outputs; - TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs)); + if (PluginFactoryTensorRT::GetInstance()->IsPlugin(op)) { + TF_RETURN_IF_ERROR(plugin_converter_(*this, node_def, inputs, &outputs)); + } else { + if (!op_registry_.count(op)) { + return tensorflow::errors::Unimplemented( + "No converter registered for op: " + op); + } + OpConverter op_converter = op_registry_.at(op); + TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs)); + } for (size_t i = 0; i < outputs.size(); ++i) { TRT_TensorOrWeights output = outputs.at(i); // TODO(jie): tf protobuf seems to be omitting the :0 suffix @@ -672,7 +693,7 @@ std::function LambdaFactory::unary() { case OP_CATEGORY::RSQRT: { VLOG(2) << "RSQRT GETS DONE"; return [](Eigen::half t) -> Eigen::half { - return Eigen::half(1.0 / sqrt(float(t))); + return Eigen::half(1.0 / sqrt(static_cast(t))); }; } case OP_CATEGORY::NEG: @@ -1158,9 +1179,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented( - "binary op: " + node_def.op() + - " not supported at: " + node_def.name()); + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -1173,6 +1194,45 @@ tensorflow::Status BinaryTensorOpTensor( return tensorflow::Status::OK(); } +tensorflow::Status ConvertPlugin(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + // prepare input + std::vector all_inputs; + for (auto input : inputs) { + all_inputs.emplace_back(const_cast(input.tensor())); + } + + // plugin is owned by PluginFactory + // TODO(jie): destroy plugins later (resource management) + PluginTensorRT* plugin = + PluginFactoryTensorRT::GetInstance()->CreatePlugin(node_def.op()); + + // passing attributes + // TODO(jie): support more general attribute + TFAttrs attrs(node_def); + auto attr_key_vector = attrs.GetAllAttrKey(); + for (auto attr_key : attr_key_vector) { + // TODO(jie): support only list of float for toy example here. + auto data = attrs.get>(attr_key); + size_t size_data = data.size() * sizeof(float); + if (!plugin->SetAttribute(attr_key, static_cast(data.data()), + size_data)) { + return tensorflow::errors::InvalidArgument("plugin SetAttribute failed"); + } + } + + nvinfer1::IPluginLayer* layer = ctx.network()->addPlugin( + &all_inputs[0], static_cast(inputs.size()), *plugin); + + for (int i = 0; i < layer->getNbOutputs(); i++) { + nvinfer1::ITensor* output_tensor = layer->getOutput(i); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + } + return tensorflow::Status::OK(); +} + tensorflow::Status ConvertPlaceholder( Converter& ctx, const tensorflow::NodeDef& node_def, const std::vector& inputs, @@ -2073,6 +2133,8 @@ void Converter::register_op_converters() { op_registry_["Reshape"] = ConvertReshape; op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; + + plugin_converter_ = ConvertPlugin; } } // namespace @@ -2144,7 +2206,7 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( if (!status.ok() || !calib_res->calibrator_) { return tensorflow::errors::FailedPrecondition( "You must run calibration" - " and inference conversion in the same proces"); + " and inference conversion in the same process"); } calib_res->calibrator_->setDone(); @@ -2213,60 +2275,63 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( return tensorflow::Status::OK(); } -tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { - // Visit nodes in reverse topological order and construct the TRT network. - - // Toposort +tensorflow::Status ReverseTopologicalSort( + const tensorrt::convert::SubGraphParams& s, + std::list* order) { std::vector order_vec; tensorflow::GetPostOrder(s.graph, &order_vec); // Select just the subgraph - std::list order; for (tensorflow::Node* node : order_vec) { if (s.subgraph_node_ids.count(node->id())) { - order.push_front(node); // we want topological order to construct the + // We want topological order to contstruct the // network layer by layer + order->push_front(node); } } - // topological order is needed to build TRT network - static int static_id = 0; + return tensorflow::Status::OK(); +} + +tensorflow::Status SetInputList( + const tensorrt::convert::SubGraphParams& s, + tensorflow::NodeDefBuilder* op_builder, + const std::vector* input_names, + std::vector* input_dtypes) { + std::vector income_edges; + VLOG(2) << "input edge size: " << input_names->size(); + for (size_t i = 0; i < input_names->size(); ++i) { + VLOG(2) << "input edges: " << i << " " << input_names->at(i); + int output_idx = s.input_inds.at(i).second; + // we wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) + auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( + input_names->at(i), output_idx, input_dtypes->at(i)); + income_edges.push_back(incoming_edge); + } + tensorflow::gtl::ArraySlice input_list( + income_edges); + op_builder->Input(input_list); + return tensorflow::Status::OK(); +} + +string SubgraphNameScopeGenerator(const std::list* order) { string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); + if (!order->empty()) { + subgraph_name_scope = order->front()->name(); } - for (const tensorflow::Node* node : order) { + for (const tensorflow::Node* node : *order) { subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); } // TODO(sami,ben,jie): proper naming! - string calib_op_name = - StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); - string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); - static_id++; - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); - auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); - TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); - op_res->logger_ = new tensorflow::tensorrt::Logger(); - op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); - - if (!op_res->builder_) { - return tensorflow::errors::Internal( - "failed to create TensorRT builder object"); - } - - op_res->network_ = op_res->builder_->createNetwork(); - if (!op_res->network_) { - return tensorflow::errors::Internal( - "failed to create TensorRT network object"); - } - - // Build the network - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); + return subgraph_name_scope; +} - std::vector input_names; - std::vector input_dtypes; +tensorflow::Status ConvertSubgraph( + Converter& converter, tensorrt::convert::SubGraphParams& s, + std::list* order, std::vector* input_names, + std::vector* input_dtypes, + std::vector* output_names, + std::vector* output_dtypes, + const string& engine_name) { for (const std::pair& input : s.input_inds) { VLOG(2) << "parsing input. Node id= " << input.first; int node_id = input.first; @@ -2309,22 +2374,21 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes.push_back(tf_dtype); + input_dtypes->push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); auto type_status = ConvertDType(tf_dtype, &dtype); if (type_status != tensorflow::Status::OK()) { - LOG(WARNING) << "Data type conversion for input '" << node_name - << "' failed"; + LOG(WARNING) << "Type conversion failed for " << node_name; return type_status; } - VLOG(2) << "accessing output index of: " << output_idx + VLOG(2) << "Accessing output index of: " << output_idx << ", at node: " << node_name - << "with output entry from shape_map: " << op_info_vec.size(); + << " with output entry from shape_map: " << op_info_vec.size(); // TODO(ben,jie): update TRT input format/dimension - nvinfer1::DimsCHW input_dim_psuedo_chw; - for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + nvinfer1::DimsCHW input_dim_pseudo_chw; + for (int i = 0; i < 3; i++) input_dim_pseudo_chw.d[i] = 1; // TODO(jie): TRT 3.x only support 4 dimensional input tensor. // update the code once TRT 4.0 comes out. @@ -2338,7 +2402,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); - input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); + input_dim_pseudo_chw.d[i - 1] = op_info.shape().dim(i).size(); } // TODO(ben,jie): proper way to restore input tensor name? @@ -2347,33 +2411,29 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { input_tensor_name = StrCat(node_name, ":", output_idx); } - input_names.push_back(input_tensor_name); + input_names->push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( - input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); + input_tensor_name.c_str(), dtype, input_dim_pseudo_chw); if (!input_tensor) return tensorflow::errors::InvalidArgument( "Failed to create Input layer"); - VLOG(2) << "input tensor name :" << input_tensor_name; + VLOG(2) << "Input tensor name :" << input_tensor_name; if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) return tensorflow::errors::AlreadyExists( - "output tensor already exists for op: " + input_tensor_name); + "Output tensor already exists for op: " + input_tensor_name); } - VLOG(2) << "finished sorting"; - - for (const tensorflow::Node* node : order) { + for (const tensorflow::Node* node : *order) { const tensorflow::NodeDef& node_def = node->def(); - VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); + VLOG(2) << "Converting node: " << node_def.name() << " , " << node_def.op(); TF_RETURN_IF_ERROR(converter.convert_node(node_def)); } - VLOG(2) << "finished conversion"; + VLOG(2) << "Finished conversion"; // Gather output metadata - std::vector output_names; - std::vector output_dtypes; int trt_engine_op_output_idx = 0; for (const std::pair& output : s.output_inds) { int node_id = output.first; @@ -2388,14 +2448,13 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { : StrCat(engine_name, ":", trt_engine_op_output_idx), {output_idx, tensor_name}}); trt_engine_op_output_idx++; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - VLOG(1) << "output tensor name: " << tensor_name; - output_names.push_back(tensor_name); + if (output_idx != 0) + tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); + VLOG(2) << "Output tensor name: " << tensor_name; + output_names->push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument("Output node'" + tensor_name + + return tensorflow::errors::InvalidArgument("Output node '" + tensor_name + "' is weights not tensor"); } nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); @@ -2405,12 +2464,65 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { } converter.network()->markOutput(*tensor); tensorflow::DataType tf_dtype = node->output_type(output_idx); - output_dtypes.push_back(tf_dtype); + output_dtypes->push_back(tf_dtype); nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); tensor->setType(trt_dtype); } + return tensorflow::Status::OK(); +} + +tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { + // Visit nodes in reverse topological order and construct the TRT network. + // Toposort + std::list order; + TF_RETURN_IF_ERROR(ReverseTopologicalSort(s, &order)); + + static int static_id = 0; + string subgraph_name_scope = SubgraphNameScopeGenerator(&order); + // TODO(sami,ben,jie): proper naming! + string calib_op_name = + StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); + string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); + static_id++; + + auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); + auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); + auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); + TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); + op_res->logger_ = new tensorflow::tensorrt::Logger(); + cudaSetDevice(s.cuda_gpu_id_); + op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); + op_res->allocator_ = s.allocator_; +#if NV_TENSORRT_MAJOR > 3 + op_res->builder_->setGpuAllocator(s.allocator_.get()); +#endif + if (!op_res->builder_) { + return tensorflow::errors::Internal( + "failed to create TensorRT builder object"); + } + + op_res->network_ = op_res->builder_->createNetwork(); + if (!op_res->network_) { + return tensorflow::errors::Internal( + "failed to create TensorRT network object"); + } + + // Build the network + auto weight_rmgr = trt_rmgr->getManager("WeightStore"); + auto ws = new tensorflow::tensorrt::TRTWeightStore(); + TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); + Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); + + std::vector input_names; + std::vector input_dtypes; + std::vector output_names; + std::vector output_dtypes; + TF_RETURN_IF_ERROR(ConvertSubgraph(converter, s, &order, &input_names, + &input_dtypes, &output_names, + &output_dtypes, engine_name)); + VLOG(2) << "Finished processing outputs"; // Build the engine @@ -2422,21 +2534,8 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // Build the TRT op // TODO(sami,ben,jie): proper naming! tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp"); - std::vector income_edges; - for (size_t i = 0; i < input_names.size(); ++i) { - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) - auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( - input_names.at(i), output_idx, input_dtypes.at(i)); - VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) - << ":" << output_idx - << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i)); - income_edges.push_back(incoming_edge); - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - op_builder.Input(input_list); + SetInputList(s, &op_builder, &input_names, &input_dtypes); + std::vector segment_names; segment_names.reserve(s.subgraph_node_ids.size()); for (int i : s.subgraph_node_ids) { @@ -2460,46 +2559,29 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { tensorflow::Status ConvertSubGraphToTensorRTNodeDef( tensorrt::convert::SubGraphParams& s) { // Visit nodes in reverse topological order and construct the TRT network. - - // Toposort - std::vector order_vec; - tensorflow::GetPostOrder(s.graph, &order_vec); - // Select just the subgraph std::list order; - for (tensorflow::Node* node : order_vec) { - if (s.subgraph_node_ids.count(node->id())) { - // We want topological order to contstruct the - // network layer by layer - order.push_front(node); - } - } - // Topological order is needed to build TRT network + TF_RETURN_IF_ERROR(ReverseTopologicalSort(s, &order)); - tensorflow::tensorrt::Logger trt_logger; + static int static_id = 0; + string subgraph_name_scope = SubgraphNameScopeGenerator(&order); + string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id++); + tensorflow::tensorrt::Logger trt_logger; + cudaSetDevice(s.cuda_gpu_id_); auto trt_builder = infer_object(nvinfer1::createInferBuilder(trt_logger)); if (!trt_builder) { return tensorflow::errors::Internal( "Failed to create TensorRT builder object"); } - +#if NV_TENSORRT_MAJOR > 3 + trt_builder->setGpuAllocator(s.allocator_.get()); +#endif auto trt_network = infer_object(trt_builder->createNetwork()); if (!trt_network) { return tensorflow::errors::Internal( "Failed to create TensorRT network object"); } - string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); - } - for (const tensorflow::Node* node : order) { - subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); - } - static int static_id = 0; - // TODO(sami,ben,jie): proper naming! - string engine_name = StrCat(subgraph_name_scope, "my_trt_op"); - engine_name = StrCat(engine_name, static_id++); auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); auto weight_rmgr = trt_rmgr->getManager("WeightStore"); auto ws = new tensorflow::tensorrt::TRTWeightStore(); @@ -2510,147 +2592,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( std::vector input_names; std::vector input_dtypes; - for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input. Node id= " << input.first; - int node_id = input.first; - int output_idx = input.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - auto node_name = node->name(); - // input_names should use the node name in the graph - // here it should be the input tensor name -> matching the binding - // insert original node name without port - auto tensor_name = node_name; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - - VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name - << " idx: " << output_idx; - - auto shape_inference_node_name = node_name; - auto shape_inference_output_idx = output_idx; - // rewire the shape inference to original node in the graph - if (s.output_edge_map->count(tensor_name)) { - shape_inference_node_name = s.output_edge_map->at(tensor_name).second; - shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; - } - if (shape_inference_output_idx < 0) continue; - VLOG(2) << "shapeinference name: " << shape_inference_node_name - << " idx: " << shape_inference_output_idx; - - if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) - return tensorflow::errors::Internal("failed to find input node: " + - shape_inference_node_name); - - auto op_info_vec = - s.graph_properties.GetOutputProperties(shape_inference_node_name); - if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) - return tensorflow::errors::Internal( - "accessing output index of: ", shape_inference_output_idx, - ", at node: ", shape_inference_node_name, - " with output entry from shape_map: ", op_info_vec.size()); - - auto op_info = op_info_vec.at(shape_inference_output_idx); - tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes.push_back(tf_dtype); - - nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - auto type_status = ConvertDType(tf_dtype, &dtype); - if (type_status != tensorflow::Status::OK()) { - LOG(WARNING) << "Type conversion failed for " << node_name; - return type_status; - } - - VLOG(2) << "Accessing output index of: " << output_idx - << ", at node: " << node_name - << " with output entry from shape_map: " << op_info_vec.size(); - // TODO(ben,jie): update TRT input format/dimension - nvinfer1::DimsCHW input_dim_psuedo_chw; - for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - - // TODO(jie): TRT 3.x only support 4 dimensional input tensor. - // update the code once TRT 4.0 comes out. - if (op_info.shape().dim_size() != 4) { - string err_str = "Require 4 dimensional input."; - StrAppend(&err_str, " Got ", op_info.shape().dim_size(), " ", - shape_inference_node_name); - return tensorflow::errors::Unimplemented(err_str); - } - - for (int i = 1; i < op_info.shape().dim_size(); i++) { - VLOG(2) << "dimension: " << i - << " , size: " << op_info.shape().dim(i).size(); - input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); - } - - // TODO(ben,jie): proper way to restore input tensor name? - auto input_tensor_name = node_name; - if (output_idx != 0) { - input_tensor_name = StrCat(node_name, ":", output_idx); - } - - input_names.push_back(input_tensor_name); - nvinfer1::ITensor* input_tensor = converter.network()->addInput( - input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); - - if (!input_tensor) - return tensorflow::errors::InvalidArgument( - "Failed to create Input layer"); - VLOG(2) << "Input tensor name :" << input_tensor_name; - - if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) - return tensorflow::errors::AlreadyExists( - "Output tensor already exists for op: " + input_tensor_name); - } - - VLOG(2) << "Finished sorting"; - - for (const tensorflow::Node* node : order) { - const tensorflow::NodeDef& node_def = node->def(); - VLOG(2) << "Converting node: " << node_def.name() << " , " << node_def.op(); - TF_RETURN_IF_ERROR(converter.convert_node(node_def)); - } - - VLOG(2) << "Finished conversion"; - - // Gather output metadata std::vector output_names; std::vector output_dtypes; - int trt_engine_op_output_idx = 0; - for (const std::pair& output : s.output_inds) { - int node_id = output.first; - int output_idx = output.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - string op_name = node->name(); - string tensor_name = op_name; - - s.output_edge_map->insert( - {trt_engine_op_output_idx == 0 - ? engine_name - : StrCat(engine_name, ":", trt_engine_op_output_idx), - {output_idx, tensor_name}}); - trt_engine_op_output_idx++; - if (output_idx != 0) - tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); - VLOG(2) << "Output tensor name: " << tensor_name; - output_names.push_back(tensor_name); - auto tensor_or_weights = converter.get_tensor(tensor_name); - if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument("Output node '" + tensor_name + - "' is weights not tensor"); - } - nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); - if (!tensor) { - return tensorflow::errors::NotFound("Output tensor not found: " + - tensor_name); - } - converter.network()->markOutput(*tensor); - tensorflow::DataType tf_dtype = node->output_type(output_idx); - output_dtypes.push_back(tf_dtype); - nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; - TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); - tensor->setType(trt_dtype); - } + TF_RETURN_IF_ERROR(ConvertSubgraph(converter, s, &order, &input_names, + &input_dtypes, &output_names, + &output_dtypes, engine_name)); VLOG(2) << "Finished output"; @@ -2686,20 +2632,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Build the TRT op tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); - std::vector income_edges; - VLOG(2) << "input edge size: " << input_names.size(); - for (size_t i = 0; i < input_names.size(); ++i) { - VLOG(2) << "input edges: " << i << " " << input_names.at(i); - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) - auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( - input_names.at(i), output_idx, input_dtypes.at(i)); - income_edges.push_back(incoming_edge); - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - op_builder.Input(input_list); + SetInputList(s, &op_builder, &input_names, &input_dtypes); VLOG(0) << "Finished op preparation"; @@ -2707,9 +2640,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("input_nodes", input_names) .Attr("output_nodes", output_names) .Attr("OutT", output_dtypes) + .Device(s.device_name_) .Finalize(s.trt_node); - VLOG(0) << status.ToString() << " finished op building"; + VLOG(0) << status.ToString() << " finished op building for " << engine_name + << " on device " << s.device_name_; return tensorflow::Status::OK(); } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 954a1e72f8..3f6592cd25 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -22,11 +22,11 @@ limitations under the License. #include #include +#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/lib/core/status.h" - #if GOOGLE_CUDA #if GOOGLE_TENSORRT @@ -48,7 +48,9 @@ struct SubGraphParams { const tensorflow::grappler::GraphProperties& current_graph_properties, std::unordered_map>* output_edges, tensorflow::NodeDef* constructed_trt_node, - int engine_precision_mode = FP32MODE) + int engine_precision_mode = FP32MODE, const string& device_name = "", + std::shared_ptr allocator = nullptr, + int cuda_gpu_id = 0) : graph(inp_graph), subgraph_node_ids(subgraph_node_id_numbers), input_inds(input_indices), @@ -58,7 +60,10 @@ struct SubGraphParams { graph_properties(current_graph_properties), output_edge_map(output_edges), trt_node(constructed_trt_node), - precision_mode(engine_precision_mode) {} + precision_mode(engine_precision_mode), + device_name_(device_name), + allocator_(allocator), + cuda_gpu_id_(cuda_gpu_id) {} tensorflow::Graph& graph; const std::set& subgraph_node_ids; @@ -70,6 +75,9 @@ struct SubGraphParams { std::unordered_map>* output_edge_map; tensorflow::NodeDef* trt_node; const int precision_mode; + const string device_name_; + std::shared_ptr allocator_; + const int cuda_gpu_id_; }; // TODO(sami): Replace references with const reference or pointers diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc new file mode 100644 index 0000000000..8f634b1f74 --- /dev/null +++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc @@ -0,0 +1,246 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h" +#include "tensorflow/contrib/tensorrt/convert/convert_graph.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/logging.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +namespace tensorflow { +namespace tensorrt { +namespace convert { +// TODO(sami): Remove VLOG messages once the code matures +using tensorflow::str_util::Uppercase; +using tensorflow::strings::StrAppend; +using tensorflow::strings::StrCat; + +tensorflow::Status TRTOptimizationPass::Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) { + VLOG(1) << "Called INIT for " << name_ << " with config = " << config; + if (config == nullptr) { + maximum_workspace_size_ = 2 << 30; + return tensorflow::Status::OK(); + } + const auto params = config->parameter_map(); + if (params.count("minimum_segment_size")) { + minimum_segment_size_ = params.at("minimum_segment_size").i(); + } + if (params.count("max_batch_size")) { + maximum_batch_size_ = params.at("max_batch_size").i(); + } + if (params.count("max_workspace_size_bytes")) + maximum_workspace_size_ = params.at("max_workspace_size_bytes").i(); + if (params.count("precision_mode")) { + string pm = Uppercase(params.at("precision_mode").s()); + if (pm == "FP32") { + precision_mode_ = 0; + } else if (pm == "FP16") { + precision_mode_ = 1; + } else if (pm == "INT8") { + precision_mode_ = 2; + } else { + LOG(ERROR) << "Unknown precision mode '" << pm << "'"; + return tensorflow::errors::InvalidArgument( + "Unknown precision mode argument" + pm + + " Valid values are FP32, FP16, INT8"); + } + } + return tensorflow::Status::OK(); +} + +void TRTOptimizationPass::PrintDebugInfo( + tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item) { + VLOG(1) << "Cluster = " << cluster; + string offset(" "); + string offset2 = StrCat(offset, offset); + string offset3 = StrCat(offset2, offset); + string offset4 = StrCat(offset2, offset2); + if (cluster) { + VLOG(1) << offset << "type = " << cluster->type(); + VLOG(1) << offset << "num warmup steps = " << cluster->NumWarmupSteps(); + const auto dev_names = cluster->GetDeviceNames(); + if (dev_names.size()) { + VLOG(1) << offset << " Device names:"; + for (const auto s : dev_names) { + VLOG(1) << offset2 << s; + } + } + std::unordered_map peak_mem; + auto status = cluster->GetPeakMemoryUsage(&peak_mem); + if (status == tensorflow::Status::OK()) { + VLOG(1) << offset << "Peak Memory Usage :"; + for (auto s : peak_mem) { + VLOG(1) << offset2 << s.first << " = " << s.second; + } + } + + const auto dev_props = cluster->GetDevices(); + if (dev_props.size()) { + VLOG(1) << offset << "Device properties:"; + for (auto k : dev_props) { + VLOG(1) << offset2 << k.first; + const auto& dt = k.second; + VLOG(1) << offset3 << "type = " << dt.type(); + VLOG(1) << offset3 << "vendor = " << dt.vendor(); + VLOG(1) << offset3 << "model = " << dt.model(); + VLOG(1) << offset3 << "frequency = " << dt.frequency(); + VLOG(1) << offset3 << "num cores = " << dt.num_cores(); + VLOG(1) << offset3 << "num registers = " << dt.num_registers(); + VLOG(1) << offset3 << "L1 cache size = " << dt.l1_cache_size(); + VLOG(1) << offset3 << "L2 cache size = " << dt.l2_cache_size(); + VLOG(1) << offset3 << "L3 cache size = " << dt.l3_cache_size(); + VLOG(1) << offset3 << "SHMem per SMP = " + << dt.shared_memory_size_per_multiprocessor(); + VLOG(1) << offset3 << "memory size = " << dt.memory_size(); + VLOG(1) << offset3 << "bandwidth = " << dt.bandwidth(); + if (dt.environment_size()) { + VLOG(1) << offset3 << "environment :"; + for (const auto e : dt.environment()) { + VLOG(1) << offset4 << e.first << " = " << e.second; + } + } + } + } + } + VLOG(1) << "item: " << item.id; + if (item.feed.size()) { + VLOG(1) << offset << "Feeds :"; + for (const auto& f : item.feed) { + const auto& shape = f.second.shape(); + VLOG(1) << offset2 << f.first << " = shaped " << shape.DebugString(); + } + } else { + VLOG(1) << offset << "No Feeds"; + } + if (item.fetch.size()) { + VLOG(1) << offset << "Fetches :"; + for (const auto& f : item.fetch) { + VLOG(1) << offset2 << f; + } + } else { + VLOG(1) << offset << "No Fetches"; + } + + if (item.init_ops.size()) { + VLOG(1) << offset << "init ops :"; + for (const auto& f : item.init_ops) { + VLOG(1) << offset2 << f; + } + } else { + VLOG(1) << offset << "No init ops"; + } + VLOG(1) << "Save Op = " << item.save_op; + VLOG(1) << "Restore Op = " << item.restore_op; + VLOG(1) << "save_restore_loc_tensor = " << item.save_restore_loc_tensor; + if (item.keep_ops.size()) { + VLOG(1) << offset << "keep ops :"; + for (const auto& f : item.keep_ops) { + VLOG(1) << offset2 << f; + } + } else { + VLOG(1) << offset << "No keep ops"; + } + VLOG(3) << item.graph.DebugString(); + for (const auto dev : cluster->GetDeviceSet()->devices()) { + const auto& pname = dev->parsed_name(); + VLOG(1) << "Device name= " << dev->name() + << " parsedname job= " << pname.job << " id= " << pname.id + << " has_id: " << pname.has_id << " has_job: " << pname.has_job + << "has_type: " << pname.has_type << " type =" << pname.type; + } +} + +tensorflow::Status TRTOptimizationPass::Optimize( + tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item, GraphDef* optimized_graph) { + VLOG(1) << "Called TRTOptimization Pass " << name_; + if (VLOG_IS_ON(1)) { + PrintDebugInfo(cluster, item); + } + int max_dim = -1; + if (item.feed.size()) { + for (const auto& f : item.feed) { + const auto& shape = f.second.shape(); + if (shape.dims() > 0) { + if (shape.dim_size(0) > max_dim) max_dim = shape.dim_size(0); + } + } + } + if (maximum_batch_size_ < 0) { // automatic batch size from input + if (max_dim > 0) { + maximum_batch_size_ = max_dim; + VLOG(1) << "Setting maximum batch size to " << max_dim; + } else { + maximum_batch_size_ = 128; + LOG(WARNING) << "Maximum batch size is not set" + " and can't be deduced from inputs setting it to" + << maximum_batch_size_ + << ". Suggest configuring it from configuration parameters"; + } + } else { + if (max_dim > maximum_batch_size_) { + LOG(WARNING) << "Configured batch size " << maximum_batch_size_ + << " is less than input batch size " << max_dim + << " adjusting maximum batch size to match input batch size"; + } + } + tensorflow::grappler::GraphProperties static_graph_properties(item); + TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(true)); + auto status = tensorflow::tensorrt::convert::ConvertAfterShapes( + item.graph, item.fetch, maximum_batch_size_, maximum_workspace_size_, + optimized_graph, precision_mode_, minimum_segment_size_, + static_graph_properties, cluster); + VLOG(2) << optimized_graph->DebugString(); + return status; +} + +void TRTOptimizationPass::Feedback( + tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item, + const GraphDef& optimized_graph, double result) {} + +} // namespace convert +} // namespace tensorrt +} // namespace tensorflow + +class VerboseCustomGraphOptimizerRegistrar + : public tensorflow::grappler::CustomGraphOptimizerRegistrar { + public: + VerboseCustomGraphOptimizerRegistrar( + const tensorflow::grappler::CustomGraphOptimizerRegistry::Creator& cr, + const tensorflow::string& name) + : tensorflow::grappler::CustomGraphOptimizerRegistrar(cr, name) { + VLOG(1) << "Constructing a CustomOptimizationPass registration object for " + << name; + } +}; + +static VerboseCustomGraphOptimizerRegistrar TRTOptimizationPass_Registrar( + []() { + VLOG(1) + << "Instantiating CustomOptimizationPass object TensorRTOptimizer"; + return new tensorflow::tensorrt::convert::TRTOptimizationPass( + "TensorRTOptimizer"); + }, + ("TensorRTOptimizer")); + +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h new file mode 100644 index 0000000000..d8ecead23e --- /dev/null +++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h @@ -0,0 +1,73 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_TRT_OPTIMIZATION_PASS_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_TRT_OPTIMIZATION_PASS_H_ + +#include + +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" +#include "tensorflow/core/platform/logging.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { +namespace convert { + +class TRTOptimizationPass : public tensorflow::grappler::CustomGraphOptimizer { + public: + TRTOptimizationPass(const string& name = "TRTOptimizationPass") + : name_(name), + minimum_segment_size_(3), + precision_mode_(0), + maximum_batch_size_(-1), + maximum_workspace_size_(-1) { + VLOG(1) << "Constructing " << name_; + } + + string name() const override { return name_; }; + + tensorflow::Status Init(const tensorflow::RewriterConfig_CustomGraphOptimizer* + config = nullptr) override; + + tensorflow::Status Optimize(tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item, + GraphDef* optimized_graph) override; + + void Feedback(tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item, + const GraphDef& optimized_graph, double result) override; + + void PrintDebugInfo(tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item); + + private: + string name_; + int minimum_segment_size_; + int precision_mode_; + int maximum_batch_size_; + int64_t maximum_workspace_size_; +}; + +} // namespace convert +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT +#endif // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_TRT_OPTIMIZATION_PASS_H_ diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD b/tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD new file mode 100644 index 0000000000..a89cf3ab8b --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD @@ -0,0 +1,118 @@ +# Description: +# Example for plugin support in TensorRT(http://developer.nvidia.com/tensorrt) +# through TensorFlow integration. Targeting TensorRT 3.0.4 +# APIs are meant to change while upgrading TRT. +# add init_py into pip package BUILD dependency to install it. + +package(default_visibility = ["//tensorflow:__subpackages__"]) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_custom_op_library", + "tf_custom_op_library_additional_deps", + "tf_gen_op_libs", + "tf_gen_op_wrapper_py", + "tf_kernel_library", +) +load("//tensorflow:tensorflow.bzl", "cuda_py_test") +load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") +load( + "@local_config_tensorrt//:build_defs.bzl", + "if_tensorrt", +) + +tf_gen_op_libs( + op_lib_names = ["inc_op"], +) + +tf_gen_op_wrapper_py( + name = "inc_op", + deps = [":inc_op_op_lib"], +) + +tf_custom_op_library( + name = "_inc_op.so", + srcs = [ + "inc_op_kernel.h", + "inc_op_plugin.cc", + "inc_op_plugin.h", + "ops/inc_op.cc", + ], + gpu_srcs = [ + "inc_op_kernel.h", + "inc_op_kernel.cu.cc", + ], + deps = [ + "//tensorflow/contrib/tensorrt:trt_plugins", + "//tensorflow/core:framework_lite", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), +) + +tf_kernel_library( + name = "inc_op_plugin_kernel", + srcs = ["inc_op_plugin.cc"], + hdrs = [ + "inc_op_kernel.h", + "inc_op_plugin.h", + ], + gpu_srcs = [ + "inc_op_kernel.h", + "inc_op_kernel.cu.cc", + ], + deps = [ + "//tensorflow/contrib/tensorrt:trt_plugins", + "//tensorflow/core:stream_executor_headers_lib", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]) + tf_custom_op_library_additional_deps(), +) + +tf_custom_op_py_library( + name = "inc_op_loader", + srcs = ["inc_op.py"], + dso = [ + ":_inc_op.so", + ], + kernels = [ + ":inc_op_op_lib", + ":inc_op_plugin_kernel", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:resources", + ], +) + +py_library( + name = "init_py", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + deps = [ + ":inc_op", + ":inc_op_loader", + ], +) + +cuda_py_test( + name = "plugin_test", + size = "small", + srcs = ["plugin_test.py"], + additional_deps = [ + ":init_py", + "//tensorflow/contrib/util:util_py", + "//tensorflow/contrib/tensorrt:init_py", + "//tensorflow/python:platform", + "//tensorflow/python:client_testlib", + "//tensorflow/python:tf_optimizer", + ], + tags = [ + "manual", + "noguitar", + "notap", + ], +) diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py b/tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py new file mode 100644 index 0000000000..363edab2e8 --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Import custom op for plugin and register it in plugin factory registry.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.tensorrt.custom_plugin_examples import inc_op as import_inc_op_so +from tensorflow.contrib.tensorrt.custom_plugin_examples.ops import gen_inc_op + +inc_op = gen_inc_op.inc_plugin_trt diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py new file mode 100644 index 0000000000..a007c3f54e --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py @@ -0,0 +1,32 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Loader for the custom inc_op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import platform + +if platform.system() != "Windows": + # pylint: disable=g-import-not-at-top + from tensorflow.contrib.util import loader + from tensorflow.python.platform import resource_loader + # pylint: enable=g-import-not-at-top + + _inc_op = loader.load_op_library( + resource_loader.get_path_to_datafile("_inc_op.so")) +else: + raise RuntimeError("Windows not supported") diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc new file mode 100644 index 0000000000..988b35f74f --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc @@ -0,0 +1,84 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h" + +#include + +#include "tensorflow/core/framework/op_kernel.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda/include/cuda_runtime_api.h" +#include "tensorflow/core/platform/stream_executor.h" + +namespace tensorflow { +namespace tensorrt { + +__global__ void VecInc(const float* vec, float inc, float* dest, int n) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < n) dest[i] = vec[i] + inc; +} + +void IncrementKernel(const float* d_input, float inc, float* d_output, + int count, cudaStream_t stream) { + int threads_per_block = 256; + int blocks_per_grid = (count + threads_per_block - 1) / threads_per_block; + + VecInc<<>>(d_input, inc, + d_output, count); +} + +// Note: this kernel definition is not needed in the plugin_test rule, but it is +// required for correctness of the TF program, i.e. if not using plugin or when +// run with trt optimization pass, the test should work. +class IncPluginTRT : public OpKernel { + public: + explicit IncPluginTRT(OpKernelConstruction* context) : OpKernel(context) { + std::vector inc_list; + OP_REQUIRES_OK(context, context->GetAttr("inc", &inc_list)); + OP_REQUIRES(context, inc_list.size() == 1, + errors::InvalidArgument( + "The increment list should contain single element.")); + inc_ = inc_list[0]; + } + + void Compute(OpKernelContext* context) override { + const Tensor& input_tensor = context->input(0); + const TensorShape& input_shape = input_tensor.shape(); + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, input_shape, &output_tensor)); + const cudaStream_t* stream = CHECK_NOTNULL( + reinterpret_cast(context->op_device_context() + ->stream() + ->implementation() + ->CudaStreamMemberHack())); + IncrementKernel(input_tensor.flat().data(), inc_, + output_tensor->flat().data(), + input_shape.num_elements(), *stream); + } + + private: + float inc_; +}; + +REGISTER_KERNEL_BUILDER(Name("IncPluginTRT").Device(DEVICE_GPU), IncPluginTRT); + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h new file mode 100644 index 0000000000..c35955e105 --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h @@ -0,0 +1,35 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_KERNEL_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_KERNEL_H_ + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda/include/cuda_runtime_api.h" + +namespace tensorflow { +namespace tensorrt { + +void IncrementKernel(const float* d_input, float inc, float* d_output, + int count, cudaStream_t stream); + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_KERNEL_H_ diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc new file mode 100644 index 0000000000..8d4c893af5 --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc @@ -0,0 +1,86 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h" + +#include "tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { + +const char* kPluginName = "IncPluginTRT"; + +IncOpPlugin* CreateIncPlugin() { return new IncOpPlugin(); } + +IncOpPlugin* CreateIncPluginDeserialize(const void* buffer, size_t length) { + return new IncOpPlugin(buffer, length); +} + +REGISTER_TRT_PLUGIN(kPluginName, CreateIncPluginDeserialize, CreateIncPlugin); + +IncOpPlugin::IncOpPlugin() : plugin_name_(kPluginName) {} + +IncOpPlugin::IncOpPlugin(const void* serialized_data, size_t length) + : PluginTensorRT(serialized_data, length), plugin_name_(kPluginName) { + // account for the consumed pointer. + size_t consumed_data = PluginTensorRT::getSerializationSize(); + assert(length - consumed_data >= sizeof(float)); + const char* buffer = reinterpret_cast(serialized_data); + SetAttribute("inc", buffer + consumed_data, sizeof(float)); +} + +bool IncOpPlugin::SetAttribute(const string& key, const void* ptr, + const size_t size) { + if (strcmp(key.c_str(), "inc") == 0 && size == sizeof(float)) { + StoreAttribute(key, ptr, size); // save the attribute to own the data; + inc_ = *static_cast(ptr); + return true; + } + return false; +} + +bool IncOpPlugin::GetAttribute(const string& key, const void** ptr, + size_t* size) const { + const auto& iter = attr_map_.find(key); + if (iter != attr_map_.end()) { + *ptr = iter->second.data(); + *size = iter->second.size(); + return true; + } + return false; +} + +int IncOpPlugin::enqueue(int batch_size, const void* const* inputs, + void** outputs, void*, cudaStream_t stream) { + int count = 1; + for (int i = 0; i < input_dim_list_[0].nbDims; i++) { + count *= input_dim_list_[0].d[i]; + } + count *= batch_size; + const float* input = reinterpret_cast(inputs[0]); + float* output = reinterpret_cast(outputs[0]); + IncrementKernel(input, inc_, output, count, stream); + return 0; +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h new file mode 100644 index 0000000000..189e9c939b --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h @@ -0,0 +1,102 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_PLUGIN_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_PLUGIN_H_ + +#include +#include + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +class IncOpPlugin : public PluginTensorRT { + public: + IncOpPlugin(); + + IncOpPlugin(const void* serialized_data, size_t length); + + const string& GetPluginName() const override { return plugin_name_; }; + + bool Finalize() override { return true; }; + + bool SetAttribute(const string& key, const void* ptr, + const size_t size) override; + + bool GetAttribute(const string& key, const void** ptr, + size_t* size) const override; + + int getNbOutputs() const override { return 1; } + + nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, + int num_input_dims) override { + assert(index == 0); + assert(num_input_dims == 1); + return inputs[0]; + } + + // use configure to setup input dimensions + void configure(const nvinfer1::Dims* inputs, int num_inputs, + const nvinfer1::Dims* outputs, int num_outputs, + int max_batch_size) override { + assert(num_inputs == 1); + PluginTensorRT::configure(inputs, num_inputs, outputs, num_outputs, + max_batch_size); + } + + int initialize() override { return 0; } + + void terminate() override {} + + size_t getWorkspaceSize(int max_batch_size) const override { return 0; } + + int enqueue(int batch_size, const void* const* inputs, void** outputs, + void* workspace, cudaStream_t stream) override; + + size_t getSerializationSize() override { + return PluginTensorRT::getSerializationSize() + sizeof(float); + } + + void serialize(void* buffer) override { + // Serialize parent data. + PluginTensorRT::serialize(buffer); + // Incremented buffer after parent serialization. + buffer = + static_cast(buffer) + PluginTensorRT::getSerializationSize(); + std::memcpy(buffer, &inc_, sizeof(float)); + buffer = static_cast(buffer) + sizeof(float); + } + + protected: + float inc_; + nvinfer1::Dims dim_; + + private: + const string plugin_name_; +}; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_PLUGIN_H_ diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc b/tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc new file mode 100644 index 0000000000..d0eb0d299d --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc @@ -0,0 +1,36 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { + +REGISTER_OP("IncPluginTRT") + .Attr("inc: list(float)") + .Input("input: float32") + .Output("output: float32") + .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + return Status::OK(); + }); + +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py b/tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py new file mode 100644 index 0000000000..bc4d270bec --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py @@ -0,0 +1,95 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Script to show usage of TensorRT custom op & plugin.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy + +from tensorflow.contrib import tensorrt +from tensorflow.contrib.tensorrt import custom_plugin_examples +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.client import session +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import importer +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops +from tensorflow.python.platform import test + + +class TrtPluginTest(test_util.TensorFlowTestCase): + + def _get_plugin_graph_def(self): + """Create a simple graph and return its graph_def.""" + g = ops.Graph() + with g.as_default(): + a = array_ops.placeholder( + dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input") + relu = nn.relu(a, "relu") + v = nn_ops.max_pool( + relu, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") + + # insert custom_op in the graph + v = custom_plugin_examples.inc_op(v, inc=[16.5], name="plugin_test") + + v *= 2.0 + v = nn.relu(v) + v = nn.relu(v) + array_ops.squeeze(v, name="output") + return g.as_graph_def() + + def _run_graph(self, gdef, dumm_inp): + """Run given graphdef once.""" + gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + ops.reset_default_graph() + g = ops.Graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=gdef, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + + with session.Session( + config=config_pb2.ConfigProto(gpu_options=gpu_options), + graph=g) as sess: + val = sess.run(out, {inp: dumm_inp}) + return val + + def testIncOpPlugin(self): + inp_dims = (5, 24, 24, 2) + dummy_input = numpy.ones(inp_dims).astype(numpy.float32) + orig_graph = self._get_plugin_graph_def() # graph with plugin node + + # trigger conversion. + # plugin nodes have been registered during import, converter will be able to + # create corresponding plugin layer during conversion. + trt_graph = tensorrt.create_inference_graph( + input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="FP32", + minimum_segment_size=2) + o2 = self._run_graph(trt_graph, dummy_input) + self.assertEqual(35, o2.reshape([-1])[0]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index b8f881ceb1..9ac8047944 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/kernels/trt_engine_op.h" #include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" @@ -32,38 +33,40 @@ namespace tensorrt { TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // read serialized_engine - string serialized_engine; OP_REQUIRES_OK(context, - context->GetAttr("serialized_engine", &serialized_engine)); + context->GetAttr("serialized_engine", &serialized_engine_)); // register input output node name in trt_sub_graph OP_REQUIRES_OK(context, context->GetAttr("input_nodes", &input_nodes_)); OP_REQUIRES_OK(context, context->GetAttr("output_nodes", &output_nodes_)); +} - // TODO(samikama) runtime should be taken from a resourcemanager as well. - // Only engine should be in the op and context and runtime should be taken - // from resourcemanager - // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same - // gpu where the input/output is also located. - int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; - cudaSetDevice(gpu_id); - int device; - cudaGetDevice(&device); - if (gpu_id != device) LOG(FATAL) << "set device failed!"; - +void TRTEngineOp::Compute(OpKernelContext* context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager - IRuntime* infer = nvinfer1::createInferRuntime(logger); - trt_engine_ptr_.reset(infer->deserializeCudaEngine( - serialized_engine.c_str(), serialized_engine.size(), nullptr)); - trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); - // Runtime is safe to delete after engine creation - infer->destroy(); -} - -void TRTEngineOp::Compute(OpKernelContext* context) { + if (!trt_execution_context_ptr_) { + IRuntime* infer = nvinfer1::createInferRuntime(logger); +#if NV_TENSORRT_MAJOR > 3 + auto device = context->device(); + auto dev_allocator = + device->GetAllocator(tensorflow::AllocatorAttributes()); + if (!dev_allocator) { + LOG(FATAL) << "Can't find device allocator for gpu device " + << device->name(); + } + allocator_ = std::make_shared(dev_allocator); + infer->setGpuAllocator(allocator_.get()); +#endif + trt_engine_ptr_.reset(infer->deserializeCudaEngine( + serialized_engine_.c_str(), serialized_engine_.size(), + PluginFactoryTensorRT::GetInstance())); + trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); + // Runtime is safe to delete after engine creation + infer->destroy(); + serialized_engine_.clear(); + } int num_binding = context->num_inputs() + context->num_outputs(); std::vector buffers(num_binding); @@ -154,7 +157,12 @@ void TRTEngineOp::Compute(OpKernelContext* context) { VLOG(2) << "enqueue returns: " << ret; // sync should be done by TF. } - +TRTEngineOp::~TRTEngineOp() { + // Order matters! + trt_execution_context_ptr_.reset(); + trt_engine_ptr_.reset(); + allocator_.reset(); +} REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h index 0964b4b18a..e613a71422 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h @@ -17,25 +17,28 @@ limitations under the License. #define TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_ENGINE_OP_H_ #include -#include #include +#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" + #if GOOGLE_CUDA #if GOOGLE_TENSORRT #include "cuda/include/cuda_runtime_api.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { namespace tensorrt { class Logger; +// TODO(Sami): Remove this file? class TRTEngineOp : public OpKernel { public: explicit TRTEngineOp(OpKernelConstruction* context); void Compute(OpKernelContext* context) override; + ~TRTEngineOp(); private: template @@ -51,6 +54,8 @@ class TRTEngineOp : public OpKernel { std::vector input_nodes_; std::vector output_nodes_; + std::shared_ptr allocator_; + string serialized_engine_; }; } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h index 7f3544f8cf..96ccacb791 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.h +++ b/tensorflow/contrib/tensorrt/log/trt_logger.h @@ -28,7 +28,7 @@ namespace tensorrt { // Logger for GIE info/warning/errors class Logger : public nvinfer1::ILogger { public: - Logger(string name = "DefaultLogger") : name_(name){}; + Logger(string name = "DefaultLogger") : name_(name) {} void log(nvinfer1::ILogger::Severity severity, const char* msg) override; private: diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin.cc new file mode 100644 index 0000000000..062f86e8bb --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin.cc @@ -0,0 +1,106 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" +#include +#include +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { + +PluginTensorRT::PluginTensorRT(const void* serialized_data, size_t length) { + const char* buffer = static_cast(serialized_data); + size_t op_name_char_count = *reinterpret_cast(buffer); + buffer += sizeof(size_t); + buffer += op_name_char_count; + + size_t count = *reinterpret_cast(buffer); + buffer += sizeof(size_t); + + for (int i = 0; i < count; i++) { + nvinfer1::Dims dim; + std::memcpy(&(dim.nbDims), buffer, sizeof(dim.nbDims)); + buffer += sizeof(dim.nbDims); + std::memcpy(dim.d, buffer, sizeof(dim.d)); + buffer += sizeof(dim.d); + std::memcpy(dim.type, buffer, sizeof(dim.type)); + buffer += sizeof(dim.type); + input_dim_list_.emplace_back(dim); + } +} + +void PluginTensorRT::configure(const nvinfer1::Dims* inputs, int num_inputs, + const nvinfer1::Dims* outputs, int num_outputs, + int max_batch_size) { + for (int index = 0; index < num_inputs; index++) { + nvinfer1::Dims dim; + dim.nbDims = inputs[index].nbDims; + for (int i = 0; i < dim.nbDims; i++) { + dim.d[i] = inputs[index].d[i]; + dim.type[i] = inputs[index].type[i]; + } + input_dim_list_.emplace_back(dim); + } +} + +size_t PluginTensorRT::getSerializationSize() { + nvinfer1::Dims dim; + return sizeof(size_t) + GetPluginName().size() + + sizeof(input_dim_list_.size()) + sizeof(dim.nbDims) + sizeof(dim.d) + + sizeof(dim.type); +} + +void PluginTensorRT::serialize(void* serialized_data) { + size_t op_name_size = GetPluginName().size(); + char* buffer = static_cast(serialized_data); + std::memcpy(buffer, &op_name_size, sizeof(size_t)); + buffer += sizeof(size_t); + + std::memcpy(buffer, GetPluginName().data(), op_name_size); + buffer += op_name_size; + + auto list_size = input_dim_list_.size(); + std::memcpy(buffer, &list_size, sizeof(input_dim_list_.size())); + buffer += sizeof(input_dim_list_.size()); + + for (int i = 0; i < input_dim_list_.size(); i++) { + auto dim = input_dim_list_[i]; + std::memcpy(buffer, &(dim.nbDims), sizeof(dim.nbDims)); + buffer += sizeof(dim.nbDims); + std::memcpy(buffer, dim.d, sizeof(dim.d)); + buffer += sizeof(dim.d); + std::memcpy(buffer, dim.type, sizeof(dim.type)); + buffer += sizeof(dim.type); + } +} + +bool PluginTensorRT::StoreAttribute(const string& key, const void* ptr, + const size_t size) { + if (attr_map_.count(key) != 0) return false; + + attr_map_.emplace(key, std::vector(size)); + std::memcpy(attr_map_[key].data(), ptr, size); + return true; +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin.h b/tensorflow/contrib/tensorrt/plugin/trt_plugin.h new file mode 100644 index 0000000000..754920b60c --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin.h @@ -0,0 +1,74 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_H_ + +#include +#include +#include + +#include "tensorflow/core/platform/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +// A wrapper class for TensorRT plugin +// User application should inherit from this class to write custom kernels. +// Allows user to insert custom op in TensorRT engine +// To register plugin in converter, user should also register custom +// PluginDeserializeFunc & PluginConstructFunc through PluginFactoryTensorRT +class PluginTensorRT : public nvinfer1::IPlugin { + public: + PluginTensorRT() {} + PluginTensorRT(const void* serialized_data, size_t length); + + virtual const string& GetPluginName() const = 0; + + virtual bool Finalize() = 0; + + virtual bool SetAttribute(const string& key, const void* ptr, + const size_t size) = 0; + virtual bool GetAttribute(const string& key, const void** ptr, + size_t* size) const = 0; + + void configure(const nvinfer1::Dims* inputs, int num_inputs, + const nvinfer1::Dims* outputs, int num_outputs, + int max_batch_size) override; + + virtual bool StoreAttribute(const string& key, const void* ptr, + const size_t size); + + size_t getSerializationSize() override; + + void serialize(void* buffer) override; + + protected: + std::unordered_map > attr_map_; + + std::vector input_dim_list_; +}; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_H_ diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc new file mode 100644 index 0000000000..2bc591484d --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc @@ -0,0 +1,78 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { + +PluginTensorRT* PluginFactoryTensorRT::createPlugin(const char* layer_name, + const void* serial_data, + size_t serial_length) { + size_t parsed_byte = 0; + // extract op_name from serial_data + string encoded_op_name = + ExtractOpName(serial_data, serial_length, &parsed_byte); + + if (!IsPlugin(encoded_op_name)) { + return nullptr; + } + + tensorflow::mutex_lock lock(instance_m_); + auto plugin_ptr = + plugin_registry_[encoded_op_name].first(serial_data, serial_length); + owned_plugins_.emplace_back(plugin_ptr); + + return plugin_ptr; +} + +PluginTensorRT* PluginFactoryTensorRT::CreatePlugin(const string& op_name) { + if (!IsPlugin(op_name)) return nullptr; + + tensorflow::mutex_lock lock(instance_m_); + auto plugin_ptr = plugin_registry_[op_name].second(); + owned_plugins_.emplace_back(plugin_ptr); + + return plugin_ptr; +} + +bool PluginFactoryTensorRT::RegisterPlugin( + const string& op_name, PluginDeserializeFunc deserialize_func, + PluginConstructFunc construct_func) { + if (IsPlugin(op_name)) return false; + + tensorflow::mutex_lock lock(instance_m_); + auto ret = plugin_registry_.emplace( + op_name, std::make_pair(deserialize_func, construct_func)); + + return ret.second; +} + +void PluginFactoryTensorRT::DestroyPlugins() { + tensorflow::mutex_lock lock(instance_m_); + for (auto& owned_plugin_ptr : owned_plugins_) { + owned_plugin_ptr.release(); + } + owned_plugins_.clear(); +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h new file mode 100644 index 0000000000..bbae9fb65c --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h @@ -0,0 +1,102 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_FACTORY_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_FACTORY_H_ + +#include +#include + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/mutex.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +class PluginFactoryTensorRT : public nvinfer1::IPluginFactory { + public: + // TODO(aaroey): this static method has to be inlined to make the singleton a + // unique global symbol. Find a way to fix it. + static PluginFactoryTensorRT* GetInstance() { + static PluginFactoryTensorRT* factory_instance = + new PluginFactoryTensorRT(); + return factory_instance; + } + + // Deserialization method + PluginTensorRT* createPlugin(const char* layer_name, const void* serial_data, + size_t serial_length) override; + + // Plugin construction, PluginFactoryTensorRT owns the plugin. + PluginTensorRT* CreatePlugin(const string& op_name); + + bool RegisterPlugin(const string& op_name, + PluginDeserializeFunc deserialize_func, + PluginConstructFunc construct_func); + + bool IsPlugin(const string& op_name) { + return plugin_registry_.find(op_name) != plugin_registry_.end(); + } + + size_t CountOwnedPlugins() { return owned_plugins_.size(); } + + void DestroyPlugins(); + + protected: + std::unordered_map> + plugin_registry_; + + // TODO(jie): Owned plugin should be associated with different sessions; + // should really hand ownership of plugins to resource management; + std::vector> owned_plugins_; + tensorflow::mutex instance_m_; +}; + +class TrtPluginRegistrar { + public: + TrtPluginRegistrar(const string& name, PluginDeserializeFunc deserialize_func, + PluginConstructFunc construct_func) { + auto factory = PluginFactoryTensorRT::GetInstance(); + QCHECK(factory->RegisterPlugin(name, deserialize_func, construct_func)) + << "Failed to register plugin: " << name; + } +}; + +#define REGISTER_TRT_PLUGIN(name, deserialize_func, construct_func) \ + REGISTER_TRT_PLUGIN_UNIQ_HELPER(__COUNTER__, name, deserialize_func, \ + construct_func) +#define REGISTER_TRT_PLUGIN_UNIQ_HELPER(ctr, name, deserialize_func, \ + construct_func) \ + REGISTER_TRT_PLUGIN_UNIQ(ctr, name, deserialize_func, construct_func) +#define REGISTER_TRT_PLUGIN_UNIQ(ctr, name, deserialize_func, construct_func) \ + static ::tensorflow::tensorrt::TrtPluginRegistrar trt_plugin_registrar##ctr \ + TF_ATTRIBUTE_UNUSED = ::tensorflow::tensorrt::TrtPluginRegistrar( \ + name, deserialize_func, construct_func) + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_FACTORY_H_ diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc new file mode 100644 index 0000000000..129bdcdbc2 --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc @@ -0,0 +1,125 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { +namespace test { + +class StubPlugin : public PluginTensorRT { + public: + static const char* kPluginName; + + StubPlugin() : plugin_name_(kPluginName) {} + + StubPlugin(const void* serialized_data, size_t length) + : PluginTensorRT(serialized_data, length) {} + + const string& GetPluginName() const override { return plugin_name_; } + + bool Finalize() override { return true; } + + bool SetAttribute(const string& key, const void* ptr, + const size_t size) override { + return true; + } + + bool GetAttribute(const string& key, const void** ptr, + size_t* size) const override { + return true; + } + + int getNbOutputs() const override { return 1; } + + nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, + int nbInputDims) override { + return inputs[0]; + } + + int initialize() override { return 0; } + + void terminate() override {} + + size_t getWorkspaceSize(int maxBatchSize) const override { return 0; } + + int enqueue(int batch_size, const void* const* inputs, void** outputs, + void* workspace, cudaStream_t stream) override { + return 0; + } + + private: + const string plugin_name_; +}; + +const char* StubPlugin::kPluginName = "StubPlugin"; + +StubPlugin* CreateStubPlugin() { return new StubPlugin(); } + +StubPlugin* CreateStubPluginDeserialize(const void* serialized_data, + size_t length) { + return new StubPlugin(serialized_data, length); +} + +class TrtPluginFactoryTest : public ::testing::Test { + public: + bool RegisterStubPlugin() { + if (PluginFactoryTensorRT::GetInstance()->IsPlugin( + StubPlugin::kPluginName)) { + return true; + } + return PluginFactoryTensorRT::GetInstance()->RegisterPlugin( + StubPlugin::kPluginName, CreateStubPluginDeserialize, CreateStubPlugin); + } +}; + +TEST_F(TrtPluginFactoryTest, Registration) { + EXPECT_FALSE( + PluginFactoryTensorRT::GetInstance()->IsPlugin(StubPlugin::kPluginName)); + EXPECT_TRUE(RegisterStubPlugin()); + + ASSERT_TRUE( + PluginFactoryTensorRT::GetInstance()->IsPlugin(StubPlugin::kPluginName)); +} + +TEST_F(TrtPluginFactoryTest, CreationDeletion) { + EXPECT_TRUE(RegisterStubPlugin()); + ASSERT_TRUE( + PluginFactoryTensorRT::GetInstance()->IsPlugin(StubPlugin::kPluginName)); + + PluginFactoryTensorRT::GetInstance()->DestroyPlugins(); + ASSERT_TRUE(PluginFactoryTensorRT::GetInstance()->CreatePlugin( + StubPlugin::kPluginName)); + ASSERT_EQ(1, PluginFactoryTensorRT::GetInstance()->CountOwnedPlugins()); + PluginFactoryTensorRT::GetInstance()->DestroyPlugins(); + ASSERT_EQ(0, PluginFactoryTensorRT::GetInstance()->CountOwnedPlugins()); +} + +} // namespace test +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc new file mode 100644 index 0000000000..a8f60886c0 --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc @@ -0,0 +1,42 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h" +#include + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { + +string ExtractOpName(const void* serial_data, size_t serial_length, + size_t* incremental) { + size_t op_name_char_count = *static_cast(serial_data); + *incremental = sizeof(size_t) + op_name_char_count; + + assert(serial_length >= *incremental); + + const char* buffer = static_cast(serial_data) + sizeof(size_t); + string op_name(buffer, op_name_char_count); + + return op_name; +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h new file mode 100644 index 0000000000..274ce42fec --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_UTILS_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_UTILS_H_ + +#include + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" +#include "tensorflow/core/platform/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +typedef std::function + PluginDeserializeFunc; + +typedef std::function PluginConstructFunc; + +// TODO(jie): work on error handling here +string ExtractOpName(const void* serial_data, size_t serial_length, + size_t* incremental); + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_UTILS_H_ diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc new file mode 100644 index 0000000000..0f0508331c --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc @@ -0,0 +1,62 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" + +#include "tensorflow/core/platform/logging.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +#if NV_TENSORRT_MAJOR > 2 +#include "cuda/include/cuda_runtime_api.h" + +namespace tensorflow { +namespace tensorrt { +void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment, + uint32_t flags) { + assert((alignment & (alignment - 1)) == 0); // zero or a power of 2. + void* memory; + cudaMalloc(&memory, size); + return memory; +} + +void TRTCudaAllocator::free(void* memory) { cudaFree(memory); } + +void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment, + uint32_t flags) { + assert((alignment & (alignment - 1)) == 0); // zero or a power of 2. + void* mem = allocator_->AllocateRaw(alignment, size); + VLOG(2) << "Allocated " << size << " bytes with alignment " << alignment + << " @ " << mem; + return mem; +} + +TRTDeviceAllocator::TRTDeviceAllocator(tensorflow::Allocator* allocator) + : allocator_(allocator) { + VLOG(1) << "Using " << allocator->Name() << " allocator from TensorFlow"; +} + +void TRTDeviceAllocator::free(void* memory) { + VLOG(2) << "Deallocating " << memory; + allocator_->DeallocateRaw(memory); +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h new file mode 100644 index 0000000000..a0c2540a76 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h @@ -0,0 +1,68 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ + + +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/core/framework/allocator.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +#if NV_TENSORRT_MAJOR == 3 +// Define interface here temporarily until TRT 4.0 is released +namespace nvinfer1 { +class IGpuAllocator { + public: + virtual void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) = 0; + virtual void free(void* memory) = 0; +}; +} // namespace nvinfer1 +#endif + +namespace tensorflow { +namespace tensorrt { + +class TRTCudaAllocator : public nvinfer1::IGpuAllocator { + // Allocator implementation that is using cuda allocator instead of device + // allocator in case we can't get device allocator from TF. + public: + TRTCudaAllocator() {} + virtual ~TRTCudaAllocator() {} + void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) override; + void free(void* memory) override; +}; + +class TRTDeviceAllocator : public nvinfer1::IGpuAllocator { + // Allocator implementation wrapping TF device allocators. + public: + TRTDeviceAllocator(tensorflow::Allocator* allocator); + virtual ~TRTDeviceAllocator() {} + void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) override; + void free(void* memory) override; + + private: + tensorflow::Allocator* allocator_; +}; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h index 3c85968ae7..e3469124ac 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resources.h +++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h @@ -13,20 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ -#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCES_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCES_H_ #include #include #include #include #include + #include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" #include "tensorflow/core/framework/resource_mgr.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" + #include "tensorrt/include/NvInfer.h" namespace tensorflow { @@ -40,6 +43,11 @@ class TRTCalibrationResource : public tensorflow::ResourceBase { engine_(nullptr), logger_(nullptr), thr_(nullptr) {} + + ~TRTCalibrationResource() { + VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString(); + } + string DebugString() override { std::stringstream oss; oss << " Calibrator = " << std::hex << calibrator_ << std::dec << std::endl @@ -47,16 +55,17 @@ class TRTCalibrationResource : public tensorflow::ResourceBase { << " Network = " << std::hex << network_ << std::dec << std::endl << " Engine = " << std::hex << engine_ << std::dec << std::endl << " Logger = " << std::hex << logger_ << std::dec << std::endl + << " Allocator = " << std::hex << allocator_.get() << std::dec + << std::endl << " Thread = " << std::hex << thr_ << std::dec << std::endl; return oss.str(); } - ~TRTCalibrationResource() { - VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString(); - } + TRTInt8Calibrator* calibrator_; nvinfer1::IBuilder* builder_; nvinfer1::INetworkDefinition* network_; nvinfer1::ICudaEngine* engine_; + std::shared_ptr allocator_; tensorflow::tensorrt::Logger* logger_; // TODO(sami): Use threadpool threads! std::thread* thr_; @@ -65,31 +74,28 @@ class TRTCalibrationResource : public tensorflow::ResourceBase { class TRTWeightStore : public tensorflow::ResourceBase { public: TRTWeightStore() {} - std::list> store_; + + virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); } + string DebugString() override { std::stringstream oss; - size_t lenBytes = 0; + size_t len_bytes = 0; for (const auto& v : store_) { - lenBytes += v.size() * sizeof(uint8_t); + len_bytes += v.size() * sizeof(uint8_t); } oss << " Number of entries = " << store_.size() << std::endl << " Total number of bytes = " - << store_.size() * sizeof(std::vector) + lenBytes << std::endl; + << store_.size() * sizeof(std::vector) + len_bytes + << std::endl; return oss.str(); } - virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); } -}; -class TRTEngineResource : public tensorflow::ResourceBase { - public: - TRTEngineResource() : runtime_(nullptr), ctx_(nullptr){}; - string DebugString() override { return string(""); } - nvinfer1::IRuntime* runtime_; - nvinfer1::IExecutionContext* ctx_; + std::list> store_; }; } // namespace tensorrt } // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ + #endif #endif +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCES_H_ diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc index 8fc4697c51..cc42913eca 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.cc +++ b/tensorflow/contrib/tensorrt/segment/segment.cc @@ -25,18 +25,239 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { namespace tensorrt { namespace segment { +using ::tensorflow::strings::StrAppend; +// A simple graph representation to mirror tensorflow::Graph. This structure +// helps saving memory since segmenter modifies the graph in place, preventing +// the need to create a copy of the graph. It is composed of edges and nodes. +// Nodes keep pointers to original TF nodes. +class SimpleNode; +class SimpleGraph; +class SimpleEdge { + public: + SimpleEdge(int id, SimpleNode* src, int src_port, SimpleNode* dst, + int dst_port, bool is_control = false) + : id_(id), + src_(src), + src_port_(src_port), + dst_(dst), + dst_port_(dst_port), + control_(is_control) {} + ~SimpleEdge() {} + + SimpleNode* src() const { return src_; } + SimpleNode* dst() const { return dst_; } + int src_output() const { return src_port_; } + int dst_input() const { return dst_port_; } + int id() const { return id_; } + bool IsControlEdge() const { return control_; } + + private: + int id_; + SimpleNode* src_; + int src_port_; + SimpleNode* dst_; + int dst_port_; + bool control_; +}; + +class SimpleNode { + public: + SimpleNode(const tensorflow::Node* node, const int id); + + const std::vector& in_edges() const { return in_edges_; } + const std::vector& out_edges() const { return out_edges_; } + std::vector in_nodes() const { + std::vector res; + res.reserve(in_edges_.size()); + for (const auto e : in_edges_) { + if (e) res.push_back(e->src()); + } + return res; + } + const string& name() const { return node_->name(); } + const tensorflow::Node* tf_node() const { return node_; } + int id() const { return id_; } + + private: + const tensorflow::Node* node_; + std::vector in_edges_; + std::vector out_edges_; + int id_; + + friend class SimpleGraph; +}; + +class SimpleGraph { + public: + explicit SimpleGraph(const tensorflow::Graph* g); + ~SimpleGraph(); + + void AddControlEdge(SimpleNode* src, SimpleNode* dst); + void AddEdge(SimpleNode* src, int out_port, SimpleNode* dst, int in_port); + void RemoveEdge(const SimpleEdge*); + SimpleNode* FindNodeId(int node_id) { + if (node_id < 0 || node_id > static_cast(nodes_.size())) { + return nullptr; + } + return nodes_[node_id]; + } + int num_node_ids() const { return nodes_.size(); } + const SimpleNode* source_node() const { + return nodes_[tensorflow::Graph::kSourceId]; + } + const SimpleNode* sink_node() const { + return nodes_[tensorflow::Graph::kSinkId]; + } + + private: + const tensorflow::Graph* g_; + std::vector nodes_; + std::vector edges_; + // free_edge_ids_ and free_node_ids_ contain freed indices. + std::set free_edge_ids_; + std::set free_node_ids_; +}; + +SimpleNode::SimpleNode(const tensorflow::Node* node, const int id) + : node_(node), id_(id) { + if (node_) { + in_edges_.reserve(node_->in_edges().size()); + out_edges_.reserve(node_->out_edges().size()); + } +} + +SimpleGraph::SimpleGraph(const tensorflow::Graph* g) : g_(g) { + int n_nodes = g_->num_node_ids(); + nodes_.resize(n_nodes, nullptr); + nodes_[g->kSourceId] = new SimpleNode(g->source_node(), g->kSourceId); + nodes_[g->kSinkId] = new SimpleNode(g->sink_node(), g->kSinkId); + int n_edges = g->num_edge_ids(); + edges_.resize(n_edges, nullptr); + for (int i = 2; i < n_nodes; i++) { + const auto n = g->FindNodeId(i); + if (n) { + nodes_[i] = new SimpleNode(n, i); + } else { + free_node_ids_.insert(i); + } + } + for (int i = 0; i < n_edges; i++) { + const auto e = g->FindEdgeId(i); + if (e) { + const auto tfsrc = e->src(); + const auto tfdst = e->dst(); + bool is_control = e->IsControlEdge(); + auto src = nodes_[tfsrc->id()]; + auto dst = nodes_[tfdst->id()]; + auto edge = new SimpleEdge(i, src, e->src_output(), dst, e->dst_input(), + is_control); + edges_[i] = edge; + src->out_edges_.push_back(edge); + dst->in_edges_.push_back(edge); + } else { + free_edge_ids_.insert(i); + } + } +} + +void SimpleGraph::AddEdge(SimpleNode* src, int out_port, SimpleNode* dst, + int in_port) { + int i = edges_.size(); + if (!free_edge_ids_.empty()) { + auto it = free_edge_ids_.begin(); + i = *it; + free_edge_ids_.erase(it); + } else { + edges_.push_back(nullptr); + } + bool is_control = (out_port == tensorflow::Graph::kControlSlot); + is_control |= (in_port == tensorflow::Graph::kControlSlot); + auto edge = new SimpleEdge(i, src, out_port, dst, in_port, is_control); + edges_[i] = edge; + src->out_edges_.push_back(edge); + dst->in_edges_.push_back(edge); +} + +void SimpleGraph::AddControlEdge(SimpleNode* src, SimpleNode* dst) { + AddEdge(src, tensorflow::Graph::kControlSlot, dst, + tensorflow::Graph::kControlSlot); +} + +void SimpleGraph::RemoveEdge(const SimpleEdge* edge) { + auto src = edge->src(); + auto dst = edge->dst(); + for (auto it = src->out_edges_.begin(); it != src->out_edges_.end(); ++it) { + if (*it == edge) { + src->out_edges_.erase(it); + break; + } + } + for (auto it = dst->in_edges_.begin(); it != dst->in_edges_.end(); ++it) { + if (*it == edge) { + dst->in_edges_.erase(it); + break; + } + } +} + +SimpleGraph::~SimpleGraph() { + for (auto x : nodes_) delete x; + for (auto x : edges_) delete x; +} namespace { -bool CanContractEdge(const tensorflow::Edge* edge, - const tensorflow::Graph& graph) { - const tensorflow::Node* src = edge->src(); - const tensorflow::Node* dst = edge->dst(); +bool CheckCycles(const std::unique_ptr& g, const SimpleNode* src, + const std::vector& start) { + // copied from TF ReverseDFS. + struct Work { + SimpleNode* node; + bool leave; // Are we entering or leaving n? + }; + + std::vector stack(start.size()); + for (int i = 0; i < start.size(); ++i) { + stack[i] = Work{start[i], false}; + } + + std::vector visited(g->num_node_ids(), false); + while (!stack.empty()) { + Work w = stack.back(); + stack.pop_back(); + + auto n = w.node; + if (w.leave) { + if (n == src) { + return true; + } + continue; + } + + if (visited[n->id()]) continue; + visited[n->id()] = true; + // Arrange to call leave(n) when all done with descendants. + stack.push_back(Work{n, true}); + + auto nodes = n->in_nodes(); + for (const auto node : nodes) { + if (!visited[node->id()]) { + stack.push_back(Work{node, false}); + } + } + } + return false; +} + +bool CanContractEdge(const SimpleEdge* edge, + const std::unique_ptr& graph) { + const auto src = edge->src(); + const auto dst = edge->dst(); // Can't contract edge if doing so would cause a cycle in the // graph. So, if there is a directed path from 'src' to 'dst', other @@ -48,46 +269,38 @@ bool CanContractEdge(const tensorflow::Edge* edge, // 1. Get all nodes incoming to 'dst', excluding 'src' // 2. Reverse DFS from those nodes // 3. If reverse DFS reaches 'src' then we have a cycle - std::vector dfs_start_nodes; - for (tensorflow::Node* node : dst->in_nodes()) { + std::vector dfs_start_nodes; + for (SimpleNode* node : dst->in_nodes()) { if (node != src) { dfs_start_nodes.push_back(node); } } - bool is_cycle = false; - if (!dfs_start_nodes.empty()) { - tensorflow::ReverseDFSFrom(graph, dfs_start_nodes, {}, - [&is_cycle, src](tensorflow::Node* node) { - if (node == src) { - is_cycle = true; - } - }); - } - + bool is_cycle = CheckCycles(graph, src, dfs_start_nodes); return !is_cycle; } +} // namespace -void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, - std::vector* remove_edges) { +void ContractEdge(SimpleEdge* edge, SimpleGraph* graph, + std::vector* remove_edges) { // Transfer all inputs and outputs of 'dst' to 'src' except edges // connecting the two. - tensorflow::Node* src = edge->src(); - tensorflow::Node* dst = edge->dst(); + auto src = edge->src(); + auto dst = edge->dst(); // We can use '0' for input/output index because we don't need them // to be accurate for the way we are using the graph. - std::vector in_edges(dst->in_edges().begin(), - dst->in_edges().end()); - for (const tensorflow::Edge* in_edge : in_edges) { + std::vector in_edges(dst->in_edges().begin(), + dst->in_edges().end()); + for (const SimpleEdge* in_edge : in_edges) { if (in_edge->IsControlEdge()) { if (in_edge->src() != src) { - tensorflow::Edge* e = const_cast(in_edge); + SimpleEdge* e = const_cast(in_edge); graph->AddControlEdge(e->src(), src); } } else { if (in_edge->src() != src) { - tensorflow::Edge* e = const_cast(in_edge); + SimpleEdge* e = const_cast(in_edge); if (e->src() == graph->source_node()) { graph->AddEdge(e->src(), e->src_output(), src, tensorflow::Graph::kControlSlot); @@ -98,14 +311,14 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, } } - std::vector out_edges(dst->out_edges().begin(), - dst->out_edges().end()); - for (const tensorflow::Edge* out_edge : out_edges) { + std::vector out_edges(dst->out_edges().begin(), + dst->out_edges().end()); + for (const SimpleEdge* out_edge : out_edges) { if (out_edge->IsControlEdge()) { - tensorflow::Edge* e = const_cast(out_edge); + SimpleEdge* e = const_cast(out_edge); graph->AddControlEdge(src, e->dst()); } else { - tensorflow::Edge* e = const_cast(out_edge); + SimpleEdge* e = const_cast(out_edge); if (e->dst() == graph->sink_node()) { VLOG(1) << " edge to sink node " << src->name() << " -> " << e->dst()->name(); @@ -128,8 +341,6 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, } } -} // namespace - tensorflow::Status SegmentGraph( const tensorflow::GraphDef& gdef, const std::function& candidate_fn, @@ -140,17 +351,22 @@ tensorflow::Status SegmentGraph( tensorflow::Graph graph(flib); TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( tensorflow::GraphConstructorOptions(), gdef, &graph)); + return SegmentGraph(&graph, candidate_fn, options, segments); +} - // tensorflow::DumpGraph("Pre-Segment", &graph); - +tensorflow::Status SegmentGraph( + tensorflow::Graph* tf_graph, + const std::function& candidate_fn, + const SegmentOptions& options, SegmentNodesVector* segments) { + auto graph = std::unique_ptr(new SimpleGraph(tf_graph)); // Use a union-find to collect the nodes that belong to the same - // segment. A node value of nullptr indicates that the node is not a - // candidate for TRT. - std::vector> node_segments; - for (int i = 0; i < graph.num_node_ids(); ++i) { - tensorflow::Node* node = graph.FindNodeId(i); + // segment. A node value of nullptr indicates that the node is not a candidate + // for TRT. + std::vector> node_segments; + for (int i = 0; i < graph->num_node_ids(); ++i) { + SimpleNode* node = graph->FindNodeId(i); if (options.exclude_node_list.count(node->name()) != 0 || - !candidate_fn(node)) { + !candidate_fn(node->tf_node())) { node = nullptr; } node_segments.emplace_back(node); @@ -164,10 +380,16 @@ tensorflow::Status SegmentGraph( // a measure of how beneficial it is to include a given node in a // TRT subgraph then we can revisit this algorithm to take advantage // of that information. - std::vector order; - tensorflow::GetPostOrder(graph, &order); - - for (const tensorflow::Node* node : order) { + std::vector tforder; + tensorflow::GetPostOrder(*tf_graph, &tforder); + // use postorder implementation from tensorflow and construct mirror in + // internal format + std::vector order; + order.reserve(tforder.size()); + for (const auto tfnode : tforder) { + order.push_back(graph->FindNodeId(tfnode->id())); + } + for (const SimpleNode* node : order) { // All output nodes of 'node' have been visited... VLOG(2) << "Trying node " << node->name() << " id=" << node->id(); @@ -181,8 +403,8 @@ tensorflow::Status SegmentGraph( // nodes. Iterate since combining two nodes may unblock other // combining. while (true) { - std::set contract_edges; - for (const tensorflow::Edge* out_edge : node->out_edges()) { + std::set contract_edges; + for (const SimpleEdge* out_edge : node->out_edges()) { VLOG(2) << "... out node " << out_edge->dst()->name() << " ( " << out_edge->dst()->id() << " <- " << node->id() << " )"; if (out_edge->IsControlEdge()) { @@ -210,9 +432,9 @@ tensorflow::Status SegmentGraph( // Contract edges and collect the adjacent nodes into the same // segment/subgraph. while (!contract_edges.empty()) { - const tensorflow::Edge* contract_edge = *contract_edges.begin(); - const tensorflow::Node* src = contract_edge->src(); - const tensorflow::Node* dst = contract_edge->dst(); + const SimpleEdge* contract_edge = *contract_edges.begin(); + const SimpleNode* src = contract_edge->src(); + const SimpleNode* dst = contract_edge->dst(); VLOG(2) << "Merge " << src->name() << " <- " << dst->name() << " (" << src->id() << " <- " << dst->id(); @@ -221,13 +443,13 @@ tensorflow::Status SegmentGraph( // Contracting the edge leaves disconnected graph edges. // Remove these from the graph and from 'contract_edges' so we // don't visit them again. - tensorflow::Edge* e = const_cast(contract_edge); - std::vector remove_edges; - ContractEdge(e, &graph, &remove_edges); + SimpleEdge* e = const_cast(contract_edge); + std::vector remove_edges; + ContractEdge(e, graph.get(), &remove_edges); - for (const tensorflow::Edge* r : remove_edges) { + for (const SimpleEdge* r : remove_edges) { contract_edges.erase(r); - graph.RemoveEdge(r); + graph->RemoveEdge(r); } } } @@ -236,9 +458,27 @@ tensorflow::Status SegmentGraph( // Collect the segments/subgraphs. Each subgraph is represented by a // set of the names of the nodes in that subgraph. std::unordered_map> sg_map; + std::unordered_map> device_maps; for (auto& u : node_segments) { if ((u.Value() != nullptr) && (u.ParentValue() != nullptr)) { sg_map[u.ParentValue()->name()].insert(u.Value()->name()); + auto tf_node = u.Value()->tf_node(); + // has_assigned_device_name() is expected to return true + // when called from optimization pass. However, since graph + // is converted back and forth between graph and graphdef, + // assigned devices demoted to requested devices. If the graph + // is passed directly to this module, assigned devices will be set. + if (tf_node->has_assigned_device_name()) { + device_maps[u.ParentValue()->name()].insert( + tf_node->assigned_device_name()); + } else if (!tf_node->requested_device().empty()) { + device_maps[u.ParentValue()->name()].insert( + tf_node->requested_device()); + } else { + VLOG(1) << "Node " << tf_node->name() + << " has no device assigned requested device is: " + << tf_node->requested_device(); + } } } @@ -260,10 +500,35 @@ tensorflow::Status SegmentGraph( << segment_node_names.size() << " nodes, dropping"; continue; } - - segments->emplace_back(segment_node_names); + // TODO(sami): Make segmenter placement aware once trtscopes are in place + const auto& dev_itr = device_maps.find(itr.first); + if (dev_itr == device_maps.end() || dev_itr->second.empty()) { + VLOG(1) << "No device assigned to segment " << segments->size(); + segments->emplace_back(std::make_pair(segment_node_names, string())); + } else if (dev_itr->second.size() > 1) { + string s("Segment "); + StrAppend(&s, segments->size(), " has multiple devices attached: "); + for (const auto& dev : dev_itr->second) { + StrAppend(&s, dev, ", "); + } + LOG(WARNING) << s << " choosing " << *(dev_itr->second.begin()); + segments->emplace_back( + std::make_pair(segment_node_names, *(dev_itr->second.begin()))); + } else { + segments->emplace_back( + std::make_pair(segment_node_names, *(dev_itr->second.begin()))); + } + } + if (VLOG_IS_ON(1)) { + for (const auto& d : device_maps) { + string s("Segment "); + StrAppend(&s, ": '", d.first, "' "); + for (const auto& dd : d.second) { + StrAppend(&s, dd, ", "); + } + VLOG(1) << "Devices " << s; + } } - return tensorflow::Status::OK(); } diff --git a/tensorflow/contrib/tensorrt/segment/segment.h b/tensorflow/contrib/tensorrt/segment/segment.h index 7e8685f44a..1568dd9153 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.h +++ b/tensorflow/contrib/tensorrt/segment/segment.h @@ -29,7 +29,9 @@ namespace tensorflow { namespace tensorrt { namespace segment { -using SegmentNodesVector = std::vector>; +// vector of segments, each entry contains a device name and a set of nodes in +// segment +using SegmentNodesVector = std::vector, string>>; struct SegmentOptions { // Segment must contain at least this many nodes. @@ -51,6 +53,20 @@ tensorflow::Status SegmentGraph( const std::function& candidate_fn, const SegmentOptions& options, SegmentNodesVector* segments); +// Get the subgraphs of a graph that can be handled by TensorRT. +// +// @param graph tensorflow::Graph of the network +// @param candidate_fn A function that returns true for a Node* if +// that node can be handled by TensorRT. +// @param segments Returns the TensorRT segments/subgraphs. Each entry +// in the vector describes a subgraph by giving a set of the names of +// all the NodeDefs in that subgraph. +// @return the status. +tensorflow::Status SegmentGraph( + tensorflow::Graph* tf_graph, + const std::function& candidate_fn, + const SegmentOptions& options, SegmentNodesVector* segments); + } // namespace segment } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/segment/segment_test.cc b/tensorflow/contrib/tensorrt/segment/segment_test.cc index 6f7655fcab..2de3923b06 100644 --- a/tensorflow/contrib/tensorrt/segment/segment_test.cc +++ b/tensorflow/contrib/tensorrt/segment/segment_test.cc @@ -34,7 +34,7 @@ class SegmentTest : public ::testing::Test { TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name); - std::function MakeCandidateFn( + std::function MakeCandidateFn( const std::set& node_names); protected: @@ -59,9 +59,9 @@ bool SegmentTest::GetGraphDef(TF_Graph* graph, return ret; } -std::function SegmentTest::MakeCandidateFn( +std::function SegmentTest::MakeCandidateFn( const std::set& node_names) { - return [node_names](const Node* node) -> bool { + return [node_names](const tensorflow::Node* node) -> bool { return node_names.find(node->name()) != node_names.end(); }; } @@ -164,7 +164,7 @@ TEST_F(SegmentTest, Simple) { ASSERT_EQ(segments.size(), 1); std::vector expected{"add0", "add1", "add2", "add3", "add4"}; for (const auto& ex : expected) { - EXPECT_TRUE(segments[0].find(ex) != segments[0].end()) + EXPECT_TRUE(segments[0].first.find(ex) != segments[0].first.end()) << "Missing expected node " << ex; } TF_DeleteGraph(graph); @@ -277,13 +277,13 @@ TEST_F(SegmentTest, Multiple) { std::vector expected0{"add0", "add1", "add2", "add3"}; for (const auto& ex : expected0) { - EXPECT_TRUE(segments[0].find(ex) != segments[0].end()) + EXPECT_TRUE(segments[0].first.find(ex) != segments[0].first.end()) << "Missing expected node " << ex; } std::vector expected1{"add6", "add8"}; for (const auto& ex : expected1) { - EXPECT_TRUE(segments[1].find(ex) != segments[1].end()) + EXPECT_TRUE(segments[1].first.find(ex) != segments[1].first.end()) << "Missing expected node " << ex; } TF_DeleteGraph(graph); @@ -347,13 +347,13 @@ TEST_F(SegmentTest, BigIfElse) { std::vector expected0{"add3", "add4", "add5", "add6", "add7"}; for (const auto& ex : expected0) { - EXPECT_TRUE(segments[0].find(ex) != segments[0].end()) + EXPECT_TRUE(segments[0].first.find(ex) != segments[0].first.end()) << "Missing expected node " << ex; } std::vector expected1{"add0", "add1"}; for (const auto& ex : expected1) { - EXPECT_TRUE(segments[1].find(ex) != segments[1].end()) + EXPECT_TRUE(segments[1].first.find(ex) != segments[1].first.end()) << "Missing expected node " << ex; } TF_DeleteGraph(graph); diff --git a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc index 8b475177bc..f36495f6b6 100644 --- a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc +++ b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorrt/shape_fn/trt_shfn.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include #include @@ -33,7 +34,8 @@ tensorflow::Status TRTEngineOpShapeInference(InferenceContext* context) { TF_RETURN_IF_ERROR(context->GetAttr("serialized_engine", &serialized_engine)); nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger); nvinfer1::ICudaEngine* trt_engine = infer->deserializeCudaEngine( - serialized_engine.c_str(), serialized_engine.size(), nullptr); + serialized_engine.c_str(), serialized_engine.size(), + tensorrt::PluginFactoryTensorRT::GetInstance()); int num_batch = -1; std::vector<::tensorflow::DataType> input_type; diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index ad01bedd8f..175ccd8006 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -18,7 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import argparse import numpy as np + # normally we should do import tensorflow as tf and then # tf.placeholder, tf.constant, tf.nn.conv2d etc but # it looks like internal builds don't like it so @@ -26,6 +28,7 @@ import numpy as np from tensorflow.contrib import tensorrt as trt from tensorflow.core.protobuf import config_pb2 as cpb2 +from tensorflow.core.protobuf import rewriter_config_pb2 as rwpb2 from tensorflow.python.client import session as csess from tensorflow.python.framework import constant_op as cop from tensorflow.python.framework import dtypes as dtypes @@ -59,9 +62,11 @@ def get_simple_graph_def(): return g.as_graph_def() -def run_graph(gdef, dumm_inp): +def execute_graph(gdef, dumm_inp): """Run given graphdef once.""" + print("executing") gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + sessconfig = cpb2.ConfigProto(gpu_options=gpu_options) ops.reset_default_graph() g = ops.Graph() with g.as_default(): @@ -69,15 +74,14 @@ def run_graph(gdef, dumm_inp): graph_def=gdef, return_elements=["input", "output"]) inp = inp.outputs[0] out = out.outputs[0] - with csess.Session( - config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: + with csess.Session(config=sessconfig, graph=g) as sess: val = sess.run(out, {inp: dumm_inp}) return val # Use real data that is representative of the inference dataset # for calibration. For this test script it is random data. -def run_calibration(gdef, dumm_inp): +def execute_calibration(gdef, dumm_inp): """Run given calibration graph multiple times.""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() @@ -96,7 +100,9 @@ def run_calibration(gdef, dumm_inp): return val -if "__main__" in __name__: +def user(run_graph=execute_graph, run_calibration=execute_calibration): + """Example function that converts a graph to TFTRT graph.""" + inp_dims = (100, 24, 24, 2) dummy_input = np.random.random_sample(inp_dims) orig_graph = get_simple_graph_def() # use a frozen graph for inference @@ -137,3 +143,51 @@ if "__main__" in __name__: assert np.allclose(o1, o4) assert np.allclose(o1, o5) print("Pass") + + +def auto(): + """Run the conversion as an optimization pass.""" + inp_dims = (100, 24, 24, 2) + dummy_input = np.random.random_sample(inp_dims) + orig_graph = get_simple_graph_def() + opt_config = rwpb2.RewriterConfig() + opt_config.optimizers.extend(["constfold", "layout"]) + custom_op = opt_config.custom_optimizers.add() + custom_op.name = "TensorRTOptimizer" + custom_op.parameter_map["minimum_segment_size"].i = 3 + custom_op.parameter_map["precision_mode"].s = "FP32" + custom_op.parameter_map["max_batch_size"].i = inp_dims[0] + custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25 + print(custom_op) + gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + graph_options = cpb2.GraphOptions(rewrite_options=opt_config) + sessconfig = cpb2.ConfigProto( + gpu_options=gpu_options, graph_options=graph_options) + print(sessconfig) + g = ops.Graph() + ops.reset_default_graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=orig_graph, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + with csess.Session(config=sessconfig, graph=g) as sess: + val = sess.run(out, {inp: dummy_input}) + print(val.shape) + + +if "__main__" in __name__: + P = argparse.ArgumentParser( + prog="tftrt_test", + description="Example utilization of TensorFlow-TensorRT integration") + P.add_argument( + "--automatic", + "-a", + action="store_true", + help="Do TRT conversion automatically", + default=False) + flags, unparsed = P.parse_known_args() + if flags.automatic: + auto() + else: + user() diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py index d426e9f12c..0403b652d7 100644 --- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py +++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py @@ -44,8 +44,7 @@ class IntegrationTest(test_util.TensorFlowTestCase): inp_dims = (100, 24, 24, 2) self._input = np.random.random_sample(inp_dims) self._original_graph = self.get_simple_graph_def() - self._gpu_options = cpb2.GPUOptions( - per_process_gpu_memory_fraction=0.50) + self._gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) self._config = cpb2.ConfigProto(gpu_options=self._gpu_options) self._reference = self.run_graph(self._original_graph, self._input) @@ -60,11 +59,7 @@ class IntegrationTest(test_util.TensorFlowTestCase): name="weights", dtype=dtypes.float32) conv = nn.conv2d( - input=a, - filter=e, - strides=[1, 2, 2, 1], - padding="SAME", - name="conv") + input=a, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv") b = cop.constant( [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32) t = nn.bias_add(conv, b, name="biasAdd") @@ -85,8 +80,7 @@ class IntegrationTest(test_util.TensorFlowTestCase): inp = inp.outputs[0] out = out.outputs[0] with self.test_session( - graph=g, config=self._config, use_gpu=True, - force_gpu=True) as sess: + graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess: val = sess.run(out, {inp: dumm_inp}) return val @@ -104,15 +98,14 @@ class IntegrationTest(test_util.TensorFlowTestCase): # run over real calibration data here, we are mimicking a calibration # set of 30 different batches. Use as much calibration data as you want with self.test_session( - graph=g, config=self._config, use_gpu=True, - force_gpu=True) as sess: + graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess: for _ in range(30): val = sess.run(out, {inp: dumm_inp}) return val def get_trt_graph(self, mode): """Return trt converted graph.""" - if mode in ["FP32", "FP16", "INT8"]: + if mode in ["FP32", "FP16", "INT8"]: return trt.create_inference_graph( input_graph_def=self._original_graph, outputs=["output"], @@ -120,7 +113,7 @@ class IntegrationTest(test_util.TensorFlowTestCase): max_workspace_size_bytes=1 << 25, precision_mode=mode, # TRT Engine precision "FP32","FP16" or "INT8" minimum_segment_size=2 # minimum number of nodes in an engine - ) + ) return None def testFP32(self): diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py index 5dd7bde205..5b9aeaa879 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # =================================================================== -"""TPU system metdata and associated tooling.""" +"""TPU system metadata and associated tooling.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index 4b6104a8b4..3137bfd03e 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -159,7 +159,7 @@ When the receiver receives the RDMA write, it will locate the relevant **RdmaTen * step_id - Step ID. * request_index - Request index. * remote_addr/rkey - Address/rkey of the reallocated result/proxy tensor. -* **RDMA_MESSAGE_ERROR_STATUS** - (sender ==> receiver) Notify the receiver that an error had occured on the sender side, so it can propagate it to the upper levels. +* **RDMA_MESSAGE_ERROR_STATUS** - (sender ==> receiver) Notify the receiver that an error had occurred on the sender side, so it can propagate it to the upper levels. * type - The message type. * name (name_size) - Name of the requested tensor. * step_id - Step ID. diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 2dd8e6fb31..3286f856db 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2762,6 +2762,7 @@ cc_library( ], visibility = [ "//tensorflow/compiler:__subpackages__", + "//tensorflow/core/kernels:__subpackages__", "//tensorflow/core/profiler:__subpackages__", ], deps = [":lib_internal"], @@ -3683,7 +3684,11 @@ tf_cuda_only_cc_test( ":test", ":test_main", "//third_party/eigen3", - ], + ] + if_mkl( + [ + "//third_party/mkl:intel_binary_blob", + ], + ), ) tf_cc_test_gpu( diff --git a/tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt b/tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt new file mode 100644 index 0000000000..8cef243aee --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt @@ -0,0 +1,30 @@ +op { + graph_op_name: "RegexFullMatch" + in_arg { + name: "input" + description: <

-template <typename T>
+```c++
+template 
 class ZeroOutOp : public OpKernel {
  public:
-  explicit ZeroOutOp(OpKernelConstruction\* context) : OpKernel(context) {}
- void Compute(OpKernelContext\* context) override { + explicit ZeroOutOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { // Grab the input tensor - const Tensor& input\_tensor = context->input(0); - auto input = input\_tensor.flat<T>();
+ const Tensor& input_tensor = context->input(0); + auto input = input_tensor.flat(); + // Create an output tensor Tensor* output = NULL; - OP\_REQUIRES\_OK(context, - context->allocate\_output(0, input_tensor.shape(), &output)); - auto output\_flat = output->template flat<T>();
+ OP_REQUIRES_OK(context, + context->allocate_output(0, input_tensor.shape(), &output)); + auto output_flat = output->template flat(); + // Set all the elements of the output tensor to 0 const int N = input.size(); - for (int i = 0; i < N; i++) { - output\_flat(i) = 0; - }
+ for (int i = 0; i < N; i++) { + output_flat(i) = 0; + } + // Preserve the first input value - if (N > 0) output\_flat(0) = input(0); + if (N > 0) output_flat(0) = input(0); } -};
-// Note that TypeConstraint<int32>("T") means that attr "T" (defined +}; + +// Note that TypeConstraint("T") means that attr "T" (defined // in the op registration above) must be "int32" to use this template -// instantiation.
-REGISTER\_KERNEL\_BUILDER( +// instantiation. +REGISTER_KERNEL_BUILDER( Name("ZeroOut") - .Device(DEVICE\_CPU) - .TypeConstraint<int32>("T"), - ZeroOutOp<int32>); -REGISTER\_KERNEL\_BUILDER( + .Device(DEVICE_CPU) + .TypeConstraint("T"), + ZeroOutOp); +REGISTER_KERNEL_BUILDER( Name("ZeroOut") - .Device(DEVICE\_CPU) - .TypeConstraint<float>("T"), - ZeroOutOp<float>); -REGISTER\_KERNEL\_BUILDER( + .Device(DEVICE_CPU) + .TypeConstraint("T"), + ZeroOutOp); +REGISTER_KERNEL_BUILDER( Name("ZeroOut") - .Device(DEVICE\_CPU) - .TypeConstraint<double>("T"), - ZeroOutOp<double>); -
+ .Device(DEVICE_CPU) + .TypeConstraint("T"), + ZeroOutOp); +``` If you have more than a couple overloads, you can put the registration in a macro. diff --git a/tensorflow/docs_src/extend/architecture.md b/tensorflow/docs_src/extend/architecture.md index c0fc714a44..c8f522a03a 100644 --- a/tensorflow/docs_src/extend/architecture.md +++ b/tensorflow/docs_src/extend/architecture.md @@ -4,8 +4,8 @@ We designed TensorFlow for large-scale distributed training and inference, but it is also flexible enough to support experimentation with new machine learning models and system-level optimizations. -This document describes the system architecture that makes possible this -combination of scale and flexibility. It assumes that you have basic familiarity +This document describes the system architecture that makes this +combination of scale and flexibility possible. It assumes that you have basic familiarity with TensorFlow programming concepts such as the computation graph, operations, and sessions. See @{$programmers_guide/low_level_intro$this document} for an introduction to these topics. Some familiarity @@ -15,8 +15,8 @@ will also be helpful. This document is for developers who want to extend TensorFlow in some way not supported by current APIs, hardware engineers who want to optimize for TensorFlow, implementers of machine learning systems working on scaling and -distribution, or anyone who wants to look under Tensorflow's hood. After -reading it you should understand TensorFlow architecture well enough to read +distribution, or anyone who wants to look under Tensorflow's hood. By the end of this document +you should understand the TensorFlow architecture well enough to read and modify the core TensorFlow code. ## Overview @@ -35,7 +35,7 @@ This document focuses on the following layers: * **Client**: * Defines the computation as a dataflow graph. * Initiates graph execution using a [**session**]( - https://www.tensorflow.org/code/tensorflow/python/client/session.py) + https://www.tensorflow.org/code/tensorflow/python/client/session.py). * **Distributed Master** * Prunes a specific subgraph from the graph, as defined by the arguments to Session.run(). @@ -55,7 +55,7 @@ Figure 2 illustrates the interaction of these components. "/job:worker/task:0" a server": a task responsible for storing and updating the model's parameters. Other tasks send updates to these parameters as they work on optimizing the parameters. This particular division of labor between tasks is not required, but -it is common for distributed training. + is common for distributed training. ![TensorFlow Architecture Diagram](https://www.tensorflow.org/images/diag1.svg){: width="500"} @@ -193,7 +193,7 @@ https://www.tensorflow.org/code/tensorflow/contrib/nccl/python/ops/nccl_ops.py)) ## Kernel Implementations -The runtime contains over 200 standard operations, including mathematical, array +The runtime contains over 200 standard operations including mathematical, array manipulation, control flow, and state management operations. Each of these operations can have kernel implementations optimized for a variety of devices. Many of the operation kernels are implemented using Eigen::Tensor, which uses diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 8c165aad52..1abd840ab3 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 26cbcc9a9b..52a2a3f8a6 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 05b2878701..1256fb99c4 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.8.0-rc1 + 1.8.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.8.0-rc1 + 1.8.0 @@ -124,12 +124,12 @@ instead: org.tensorflow libtensorflow - 1.8.0-rc1 + 1.8.0 org.tensorflow libtensorflow_jni_gpu - 1.8.0-rc1 + 1.8.0 ``` @@ -148,7 +148,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -175,10 +175,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip). 3. Extract this .zip file. @@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.8.0-rc1.jar HelloTF.java
+
javac -cp libtensorflow-1.8.0.jar HelloTF.java
### Running @@ -241,11 +241,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.8.0-rc1.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.8.0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.8.0-rc1.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.8.0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 9d9322dbb5..0ed8160027 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -438,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
## Validate your installation @@ -684,14 +684,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -703,14 +703,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -722,14 +722,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp35-cp35m-linux_x86_64.whl
 
@@ -741,14 +741,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 0906b55008..29a867a9e3 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl @@ -522,7 +522,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
 
@@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 8bbdf013ca..5ba522b436 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.8.0rc1 on Linux: +for TensorFlow 1.8.0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0-py2-none-any.whl
 
## Validate your installation diff --git a/tensorflow/docs_src/mobile/mobile_intro.md b/tensorflow/docs_src/mobile/mobile_intro.md index 1b0b9b44b4..241f01d460 100644 --- a/tensorflow/docs_src/mobile/mobile_intro.md +++ b/tensorflow/docs_src/mobile/mobile_intro.md @@ -212,7 +212,7 @@ handle the task then it will be difficult to train a computer to do better. After you’ve solved any fundamental issues with your use case, you need to create a labeled dataset to define what problem you’re trying to solve. This -step is extremely important, moreso than picking which model to use. You want it +step is extremely important, more than picking which model to use. You want it to be as representative as possible of your actual use case, since the model will only be effective at the task you teach it. It’s also worth investing in tools to make labeling the data as efficient and accurate as possible. For diff --git a/tensorflow/docs_src/mobile/tflite/index.md b/tensorflow/docs_src/mobile/tflite/index.md index 01881ccf3b..5622034827 100644 --- a/tensorflow/docs_src/mobile/tflite/index.md +++ b/tensorflow/docs_src/mobile/tflite/index.md @@ -155,7 +155,7 @@ retraining for both floating point and quantized inference. The following diagram shows the architectural design of TensorFlow Lite: -TensorFlow Lite architecture diagram diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 51c1a1e032..b6291a9ffa 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -72,7 +72,7 @@ tensors in the execution of a step. If `t` is a @{tf.Tensor} object, @{tf.Tensor.eval} is shorthand for -@{tf.Session.run} (where `sess` is the +@{tf.Session.run}, where `sess` is the current @{tf.get_default_session}. The two following snippets of code are equivalent: @@ -101,9 +101,8 @@ sessions, it may be more straightforward to make explicit calls to Sessions can own resources, such as @{tf.Variable}, @{tf.QueueBase}, and -@{tf.ReaderBase}; and these resources can use -a significant amount of memory. These resources (and the associated memory) are -released when the session is closed, by calling +@{tf.ReaderBase}. These resources can sometimes use +a significant amount of memory, and can be released when the session is closed by calling @{tf.Session.close}. The intermediate tensors that are created as part of a call to @@ -137,7 +136,7 @@ TensorFlow also has a to help build support for more client languages. We invite contributions of new language bindings. -Bindings for various other languages (such as [C#](https://github.com/migueldeicaza/TensorFlowSharp), [Julia](https://github.com/malmaud/TensorFlow.jl), [Ruby](https://github.com/somaticio/tensorflow.rb) and [Scala](https://github.com/eaplatanios/tensorflow_scala)) created and supported by the opensource community build on top of the C API supported by the TensorFlow maintainers. +Bindings for various other languages (such as [C#](https://github.com/migueldeicaza/TensorFlowSharp), [Julia](https://github.com/malmaud/TensorFlow.jl), [Ruby](https://github.com/somaticio/tensorflow.rb) and [Scala](https://github.com/eaplatanios/tensorflow_scala)) created and supported by the open source community build on top of the C API supported by the TensorFlow maintainers. #### Does TensorFlow make use of all the devices (GPUs and CPUs) available on my machine? @@ -210,8 +209,8 @@ a new tensor with a different dynamic shape. #### How do I build a graph that works with variable batch sizes? -It is often useful to build a graph that works with variable batch sizes, for -example so that the same code can be used for (mini-)batch training, and +It is often useful to build a graph that works with variable batch sizes +so that the same code can be used for (mini-)batch training, and single-instance inference. The resulting graph can be @{tf.Graph.as_graph_def$saved as a protocol buffer} and @@ -260,7 +259,7 @@ See the how-to documentation for There are three main options for dealing with data in a custom format. The easiest option is to write parsing code in Python that transforms the data -into a numpy array. Then use @{tf.data.Dataset.from_tensor_slices} to +into a numpy array. Then, use @{tf.data.Dataset.from_tensor_slices} to create an input pipeline from the in-memory data. If your data doesn't fit in memory, try doing the parsing in the Dataset @@ -274,7 +273,7 @@ If your data is not easily parsable with the built-in TensorFlow operations, consider converting it, offline, to a format that is easily parsable, such as @{tf.python_io.TFRecordWriter$`TFRecord`} format. -The more efficient method to customize the parsing behavior is to +The most efficient method to customize the parsing behavior is to @{$adding_an_op$add a new op written in C++} that parses your data format. The @{$new_data_formats$guide to handling new data formats} has more information about the steps for doing this. diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md index 58a80d5339..1248c3cabe 100644 --- a/tensorflow/docs_src/programmers_guide/tensors.md +++ b/tensorflow/docs_src/programmers_guide/tensors.md @@ -265,7 +265,7 @@ example: ```python constant = tf.constant([1, 2, 3]) tensor = constant * constant -print tensor.eval() +print(tensor.eval()) ``` The `eval` method only works when a default `tf.Session` is active (see @@ -306,8 +306,8 @@ Note that you rarely want to use the following pattern when printing a ``` python t = <> -print t # This will print the symbolic tensor when the graph is being built. - # This tensor does not have a value in this context. +print(t) # This will print the symbolic tensor when the graph is being built. + # This tensor does not have a value in this context. ``` This code prints the `tf.Tensor` object (which represents deferred computation) diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md index e8cf771155..cd8c4b5b9a 100644 --- a/tensorflow/docs_src/programmers_guide/variables.md +++ b/tensorflow/docs_src/programmers_guide/variables.md @@ -237,7 +237,7 @@ TensorFlow supports two ways of sharing variables: While code which explicitly passes variables around is very clear, it is sometimes convenient to write TensorFlow functions that implicitly use variables in their implementations. Most of the functional layers from -`tf.layer` use this approach, as well as all `tf.metrics`, and a few other +`tf.layers` use this approach, as well as all `tf.metrics`, and a few other library utilities. Variable scopes allow you to control variable reuse when calling functions which diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index ead5a636b9..0f17899dae 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -209,7 +209,6 @@ for two-dimensional image data expect input tensors to have a shape of * _`channels`_. Number of color channels in the example images. For color images, the number of channels is 3 (red, green, blue). For monochrome images, there is just 1 channel (black). -* _`image_height`_. Height of the example images. * _`data_format`_. A string, one of `channels_last` (default) or `channels_first`. `channels_last` corresponds to inputs with shape `(batch, ..., channels)` while `channels_first` corresponds to diff --git a/tensorflow/examples/learn/text_classification_cnn.py b/tensorflow/examples/learn/text_classification_cnn.py index 9e21aee87f..a40a9eaecb 100644 --- a/tensorflow/examples/learn/text_classification_cnn.py +++ b/tensorflow/examples/learn/text_classification_cnn.py @@ -73,7 +73,7 @@ def cnn_model(features, labels, mode): kernel_size=FILTER_SHAPE2, padding='VALID') # Max across each filter to get useful features for classification. - pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1]) + pool2 = tf.squeeze(tf.reduce_max(conv2, 1), axis=[1]) # Apply regular WX + B and classification. logits = tf.layers.dense(pool2, MAX_LABEL, activation=None) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index a503b3b00a..36db3dda6b 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -21321,7 +21321,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { // generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. // // The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. // Each element must be in the range `[0, 255]` (It represents the value of a // pixel in the output image). Non-finite values in the input tensor are // replaced by this tensor in the output image. The default value is the color diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py index 9af2e9b8b6..32e08021dc 100644 --- a/tensorflow/python/data/util/nest.py +++ b/tensorflow/python/data/util/nest.py @@ -103,7 +103,7 @@ def is_sequence(seq): NOTE(mrry): This differs from `tensorflow.python.util.nest.is_sequence()`, which *does* treat a Python list as a sequence. For ergonomic reasons, `tf.data` users would prefer to treat lists as - implict `tf.Tensor` objects, and dicts as (nested) sequences. + implicit `tf.Tensor` objects, and dicts as (nested) sequences. Args: seq: an input sequence. diff --git a/tensorflow/python/debug/cli/curses_ui.py b/tensorflow/python/debug/cli/curses_ui.py index f66cefb427..7b87972d69 100644 --- a/tensorflow/python/debug/cli/curses_ui.py +++ b/tensorflow/python/debug/cli/curses_ui.py @@ -190,8 +190,6 @@ class ScrollBar(object): return layout def get_click_command(self, mouse_y): - # TODO(cais): Support continuous scrolling when the mouse button is held - # down. if self._output_num_rows <= 1: return None elif mouse_y == self._min_y: @@ -271,6 +269,10 @@ class CursesUI(base_ui.BaseUI): _UI_WAIT_MESSAGE = "Processing..." + # The delay (in ms) between each update of the scroll bar when the mouse + # button is held down on the scroll bar. Controls how fast the screen scrolls. + _MOUSE_SCROLL_DELAY_MS = 100 + _single_instance_lock = threading.Lock() def __init__(self, on_ui_exit=None, config=None): @@ -855,7 +857,30 @@ class CursesUI(base_ui.BaseUI): except curses.error: mouse_event_type = None - if mouse_event_type == curses.BUTTON1_RELEASED: + if mouse_event_type == curses.BUTTON1_PRESSED: + # Logic for held mouse-triggered scrolling. + if mouse_x >= self._max_x - 2: + # Disable blocking on checking for user input. + self._command_window.nodelay(True) + + # Loop while mouse button is pressed. + while mouse_event_type == curses.BUTTON1_PRESSED: + # Sleep for a bit. + curses.napms(self._MOUSE_SCROLL_DELAY_MS) + scroll_command = self._scroll_bar.get_click_command(mouse_y) + if scroll_command in (_SCROLL_UP_A_LINE, _SCROLL_DOWN_A_LINE): + self._scroll_output(scroll_command) + + # Check to see if different mouse event is in queue. + self._command_window.getch() + try: + _, _, _, _, mouse_event_type = self._screen_getmouse() + except curses.error: + pass + + self._command_window.nodelay(False) + return x + elif mouse_event_type == curses.BUTTON1_RELEASED: # Logic for mouse-triggered scrolling. if mouse_x >= self._max_x - 2: scroll_command = self._scroll_bar.get_click_command(mouse_y) @@ -1677,4 +1702,7 @@ class CursesUI(base_ui.BaseUI): self._redraw_output() def _screen_set_mousemask(self): - curses.mousemask(self._mouse_enabled) + if self._mouse_enabled: + curses.mousemask(curses.BUTTON1_RELEASED | curses.BUTTON1_PRESSED) + else: + curses.mousemask(0) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 9b4b866697..347a760333 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -1163,7 +1163,7 @@ class Estimator(object): model_fn_lib.ModeKeys.TRAIN, self.config) - # TODO(anjalisridhar): Figure out how to resolve the folowing scaffold + # TODO(anjalisridhar): Figure out how to resolve the following scaffold # parameters: init_feed_dict, init_fn. scaffold_list = self._distribution.unwrap( grouped_estimator_spec.scaffold) diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py index 8e5d8141a1..8e2ec83020 100644 --- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py +++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py @@ -52,7 +52,7 @@ def _fill_array(arr, seq, fillvalue=0): If length of seq is less than arr padded length, fillvalue used. Args: arr: Padded tensor of shape [batch_size, ..., max_padded_dim_len]. - seq: Non-padded list of data sampels of shape + seq: Non-padded list of data samples of shape [batch_size, ..., padded_dim(None)] fillvalue: Default fillvalue to use. """ diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py index 9961fa74c2..7bcf3d84bb 100644 --- a/tensorflow/python/estimator/keras.py +++ b/tensorflow/python/estimator/keras.py @@ -74,7 +74,7 @@ def _any_variable_initalized(): """Check if any variable has been initialized in the Keras model. Returns: - boolean, True if at least one variable has been initalized, else False. + boolean, True if at least one variable has been initialized, else False. """ variables = variables_module.global_variables() for v in variables: diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 08fff3ba64..522662cd32 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -597,7 +597,7 @@ class _TrainingExecutor(object): # max_steps, the evaluator will send the final export signal. There is a # small chance that the Estimator.train stopping logic sees a different # global_step value (due to global step race condition and the fact the - # saver sees a larger value for checkpoing saving), which does not end + # saver sees a larger value for checkpoint saving), which does not end # the training. When the training ends, a new checkpoint is generated, which # triggers the listener again. So, it could be the case the final export is # triggered twice. diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index ede6e0d159..ffcb9990d5 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -48,7 +48,7 @@ should choose depends on (1) the feature type and (2) the model type. embedded_dept_column = embedding_column( categorical_column_with_vocabulary_list( - "department", ["math", "philosphy", ...]), dimension=10) + "department", ["math", "philosophy", ...]), dimension=10) * Wide (aka linear) models (`LinearClassifier`, `LinearRegressor`). @@ -280,7 +280,7 @@ def input_layer(features, # TODO(akshayka): InputLayer should be a subclass of Layer, and it # should implement the logic in input_layer using Layer's build-and-call # paradigm; input_layer should create an instance of InputLayer and -# return the result of inovking its apply method, just as functional layers do. +# return the result of invoking its apply method, just as functional layers do. class InputLayer(object): """An object-oriented version of `input_layer` that reuses variables.""" @@ -834,7 +834,7 @@ def shared_embedding_columns( tensor_name_in_ckpt=None, max_norm=None, trainable=True): """List of dense columns that convert from sparse, categorical input. - This is similar to `embedding_column`, except that that it produces a list of + This is similar to `embedding_column`, except that it produces a list of embedding columns that share the same embedding weights. Use this when your inputs are sparse and of the same type (e.g. watched and diff --git a/tensorflow/python/framework/fast_tensor_util.pyx b/tensorflow/python/framework/fast_tensor_util.pyx index 19928314ef..17d112a1ec 100644 --- a/tensorflow/python/framework/fast_tensor_util.pyx +++ b/tensorflow/python/framework/fast_tensor_util.pyx @@ -7,6 +7,18 @@ cimport numpy as np from tensorflow.python.util import compat +def AppendFloat16ArrayToTensorProto( + # For numpy, npy_half is a typedef for npy_uint16, + # see: https://github.com/numpy/numpy/blob/master/doc/source/reference/c-api.coremath.rst#half-precision-functions + # Because np.float16_t dosen't exist in cython, we use uint16_t here. + # TODO: Use np.float16_t when cython supports it. + tensor_proto, np.ndarray[np.uint16_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.half_val.append(nparray[i]) + + def AppendFloat32ArrayToTensorProto( tensor_proto, np.ndarray[np.float32_t, ndim=1] nparray): cdef long i, n diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 80140e4063..9fc8136348 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -2582,7 +2582,7 @@ def set_shape_and_handle_data_for_outputs(op): When _USE_C_API = True, this is lazily called when a tensor's shape is first requested. Usually this should work automatically, but some edge cases may - require manaully calling this first to make sure Tensor._shape_val and + require manually calling this first to make sure Tensor._shape_val and Tensor._handle_data are set (e.g. manually overriding _handle_data, copying a Tensor). """ @@ -5426,36 +5426,30 @@ def enable_eager_execution(config=None, device_policy=None, in which operations are executed. Note that @{tf.ConfigProto} is also used to configure graph execution (via @{tf.Session}) and many options within `tf.ConfigProto` are not implemented (or are irrelevant) when - eager execution is enabled. + eager execution is enabled. device_policy: (Optional.) Policy controlling how operations requiring - inputs on a specific device (e.g., a GPU 0) handle inputs on a different - device (e.g. GPU 1 or CPU). When set to None, an appropriate value will be - picked automatically. The value picked may change between TensorFlow - releases. - Valid values: - + inputs on a specific device (e.g., a GPU 0) handle inputs on a different + device (e.g. GPU 1 or CPU). When set to None, an appropriate value will be + picked automatically. The value picked may change between TensorFlow + releases. + Valid values: - tf.contrib.eager.DEVICE_PLACEMENT_EXPLICIT: raises an error if the placement is not correct. - - tf.contrib.eager.DEVICE_PLACEMENT_WARN: copies the tensors which are not on the right device but logs a warning. - - tf.contrib.eager.DEVICE_PLACEMENT_SILENT: silently copies the tensors. Note that this may hide performance problems as there is no notification provided when operations are blocked on the tensor being copied between devices. - - tf.contrib.eager.DEVICE_PLACEMENT_SILENT_FOR_INT32: silently copies int32 tensors, raising errors on the other ones. execution_mode: (Optional.) Policy controlling how operations dispatched are actually executed. When set to None, an appropriate value will be picked automatically. The value picked may change between TensorFlow releases. Valid values: - - - tf.contrib.eager.SYNC: executes each operation synchronously. - - - tf.contrib.eager.ASYNC: executes each operation asynchronously. These - operations may return "non-ready" handles. + - tf.contrib.eager.SYNC: executes each operation synchronously. + - tf.contrib.eager.ASYNC: executes each operation asynchronously. These + operations may return "non-ready" handles. Raises: ValueError: If eager execution is enabled after creating/executing a diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 8cf24206ed..ca63efbc84 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -50,6 +50,13 @@ def SlowAppendFloat16ArrayToTensorProto(tensor_proto, proto_values): [ExtractBitsFromFloat16(x) for x in proto_values]) +def _MediumAppendFloat16ArrayToTensorProto(tensor_proto, proto_values): + # TODO: Remove the conversion if cython supports np.float16_t + fast_tensor_util.AppendFloat16ArrayToTensorProto( + tensor_proto, + np.asarray(proto_values, dtype=np.float16).view(np.uint16)) + + def ExtractBitsFromBFloat16(x): return np.asscalar( np.asarray(x, dtype=dtypes.bfloat16.as_numpy_dtype).view(np.uint16)) @@ -64,11 +71,8 @@ if _FAST_TENSOR_UTIL_AVAILABLE: _NP_TO_APPEND_FN = { dtypes.bfloat16.as_numpy_dtype: SlowAppendBFloat16ArrayToTensorProto, - # TODO(sesse): We should have a - # fast_tensor_util.AppendFloat16ArrayToTensorProto, - # but it seems np.float16_t doesn't exist? np.float16: - SlowAppendFloat16ArrayToTensorProto, + _MediumAppendFloat16ArrayToTensorProto, np.float32: fast_tensor_util.AppendFloat32ArrayToTensorProto, np.float64: diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 97cd22e47a..5b01df48fe 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -682,7 +682,7 @@ def run_in_graph_and_eager_modes(__unused__=None, Args: - __unused__: Prevents sliently skipping tests. + __unused__: Prevents silently skipping tests. config: An optional config_pb2.ConfigProto to use to configure the session when executing graphs. use_gpu: If True, attempt to run as many operations as possible on GPU. diff --git a/tensorflow/python/keras/utils/__init__.py b/tensorflow/python/keras/utils/__init__.py index 7b5eecc153..69337b6a8d 100644 --- a/tensorflow/python/keras/utils/__init__.py +++ b/tensorflow/python/keras/utils/__init__.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.python.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras.utils.data_utils import get_file +from tensorflow.python.keras.utils.data_utils import OrderedEnqueuer from tensorflow.python.keras.utils.data_utils import Sequence from tensorflow.python.keras.utils.data_utils import SequenceEnqueuer from tensorflow.python.keras.utils.generic_utils import custom_object_scope diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 72cc357c71..3dfad9c130 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -741,6 +741,18 @@ tf_py_test( ], ) +tf_py_test( + name = "regex_full_match_op_test", + size = "small", + srcs = ["regex_full_match_op_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:string_ops", + ], +) + tf_py_test( name = "save_restore_ops_test", size = "small", diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index e2e6205911..fcba456004 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -31,9 +31,7 @@ class Conv1DTest(test.TestCase): def testBasic(self): """Test that argument passing to conv1d is handled properly.""" - # TODO(yongtang): dtypes.float64 can only be enabled once conv2d support - # dtypes.float64, as conv1d implicitly calls conv2d after expand_dims. - for dtype in [dtypes.float16, dtypes.float32]: + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]: x = constant_op.constant([1, 2, 3, 4], dtype=dtype) x = array_ops.expand_dims(x, 0) # Add batch dimension x = array_ops.expand_dims(x, 2) # And depth dimension diff --git a/tensorflow/python/kernel_tests/conv3d_transpose_test.py b/tensorflow/python/kernel_tests/conv3d_transpose_test.py index 8973a450fa..289ae29fce 100644 --- a/tensorflow/python/kernel_tests/conv3d_transpose_test.py +++ b/tensorflow/python/kernel_tests/conv3d_transpose_test.py @@ -131,6 +131,23 @@ class Conv3DTransposeTest(test.TestCase): nn_ops.conv3d_transpose( x_value, f_value, y_shape, strides, data_format='NCDHW') + def testConv3DTransposeOutputShapeType(self): + # Test case for GitHub issue 18887 + for dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(): + x_shape = [2, 5, 6, 4, 3] + y_shape = [2, 5, 6, 4, 2] + f_shape = [3, 3, 3, 2, 3] + strides = [1, 1, 1, 1, 1] + x_value = constant_op.constant( + 1.0, shape=x_shape, name="x", dtype=dtypes.float32) + f_value = constant_op.constant( + 1.0, shape=f_shape, name="filter", dtype=dtypes.float32) + output = nn_ops.conv3d_transpose( + x_value, f_value, constant_op.constant(y_shape, dtype=dtype), + strides=strides, padding="SAME") + output.eval() + def testConv3DTransposeValid(self): with self.test_session(): strides = [1, 2, 2, 2, 1] diff --git a/tensorflow/python/kernel_tests/distributions/util_test.py b/tensorflow/python/kernel_tests/distributions/util_test.py index 8e5556d0a0..63d19c15cf 100644 --- a/tensorflow/python/kernel_tests/distributions/util_test.py +++ b/tensorflow/python/kernel_tests/distributions/util_test.py @@ -735,7 +735,7 @@ class FillTriangularTest(test.TestCase): raise ValueError("Invalid shape.") n = np.int32(n) # We can't do: `x[..., -(n**2-m):]` because this doesn't correctly handle - # `m == n == 1`. Hence, we do absoulte indexing. + # `m == n == 1`. Hence, we do absolute indexing. x_tail = x[..., (m - (n * n - m)):] y = np.concatenate( [x, x_tail[..., ::-1]] if upper else [x_tail, x[..., ::-1]], diff --git a/tensorflow/python/kernel_tests/manip_ops_test.py b/tensorflow/python/kernel_tests/manip_ops_test.py index f31426713c..dc3ea38671 100644 --- a/tensorflow/python/kernel_tests/manip_ops_test.py +++ b/tensorflow/python/kernel_tests/manip_ops_test.py @@ -93,7 +93,7 @@ class RollTest(test_util.TensorFlowTestCase): def testNegativeAxis(self): self._testAll(np.random.randint(-100, 100, (5)).astype(np.int32), 3, -1) self._testAll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), 3, -2) - # Make sure negative axis shoudl be 0 <= axis + dims < dims + # Make sure negative axis should be 0 <= axis + dims < dims with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of range"): diff --git a/tensorflow/python/kernel_tests/regex_full_match_op_test.py b/tensorflow/python/kernel_tests/regex_full_match_op_test.py new file mode 100644 index 0000000000..5daae1b79b --- /dev/null +++ b/tensorflow/python/kernel_tests/regex_full_match_op_test.py @@ -0,0 +1,54 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for RegexFullMatch op from string_ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import string_ops +from tensorflow.python.platform import test + + +class RegexFullMatchOpTest(test.TestCase): + + def testRegexFullMatch(self): + values = ["abaaba", "abcdabcde"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + matched = string_ops.regex_full_match(input_vector, "a.*a").eval() + self.assertAllEqual([True, False], matched) + + def testEmptyMatch(self): + values = ["abc", "1"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + matched = string_ops.regex_full_match(input_vector, "").eval() + self.assertAllEqual([False, False], matched) + + def testInvalidPattern(self): + values = ["abc", "1"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + invalid_pattern = "A[" + matched = string_ops.regex_full_match(input_vector, invalid_pattern) + with self.assertRaisesOpError("Invalid pattern"): + matched.eval() + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 3bca5fadc4..794be096b7 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -91,16 +91,18 @@ class SegmentReductionOpTest(SegmentReductionHelper): ] # Each item is np_op1, np_op2, tf_op - ops_list = [(np.add, None, math_ops.segment_sum), (self._mean_cum_op, - self._mean_reduce_op, - math_ops.segment_mean), + ops_list = [(np.add, None, math_ops.segment_sum), + (self._mean_cum_op, self._mean_reduce_op, + math_ops.segment_mean), (np.ndarray.__mul__, None, math_ops.segment_prod), (np.minimum, None, math_ops.segment_min), (np.maximum, None, math_ops.segment_max)] # A subset of ops has been enabled for complex numbers complex_ops_list = [(np.add, None, math_ops.segment_sum), - (np.ndarray.__mul__, None, math_ops.segment_prod)] + (np.ndarray.__mul__, None, math_ops.segment_prod), + (self._mean_cum_op, self._mean_reduce_op, + math_ops.segment_mean)] n = 10 shape = [n, 2] diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 340c34fc5e..eda036ece4 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -191,6 +191,16 @@ class Layer(base_layer.Layer): RuntimeError: If called with partioned variable regularization and eager execution is enabled. """ + + def _should_add_regularizer(variable, existing_variable_set): + if isinstance(variable, tf_variables.PartitionedVariable): + for var in variable: + if var in existing_variable_set: + return False + return True + else: + return variable not in existing_variable_set + init_graph = None if not context.executing_eagerly(): default_graph = ops.get_default_graph() @@ -233,7 +243,8 @@ class Layer(base_layer.Layer): getter=vs.get_variable) if regularizer: - if context.executing_eagerly() or variable not in existing_variables: + if context.executing_eagerly() or _should_add_regularizer( + variable, existing_variables): self._handle_weight_regularization(name, variable, regularizer) if init_graph is not None: @@ -353,4 +364,3 @@ def _add_elements_to_collection(elements, collection_list): for element in elements: if element not in collection_set: collection.append(element) - diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index f08b552840..ab49e37b90 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -30,6 +30,7 @@ from tensorflow.python.layers import core as core_layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import random_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope @@ -95,6 +96,21 @@ class BaseLayerTest(test.TestCase): regularizer=regularizer) self.assertEqual(len(layer.losses), 1) + def testReusePartitionedVaraiblesAndRegularizers(self): + regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3 + partitioner = partitioned_variables.fixed_size_partitioner(3) + for reuse in [False, True]: + with variable_scope.variable_scope(variable_scope.get_variable_scope(), + partitioner=partitioner, + reuse=reuse): + layer = base_layers.Layer(name='my_layer') + variable = layer.add_variable( + 'reg_part_var', [4, 4], + initializer=init_ops.zeros_initializer(), + regularizer=regularizer) + self.assertEqual( + len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 3) + def testNoEagerActivityRegularizer(self): with context.eager_mode(): with self.assertRaisesRegexp(ValueError, 'activity_regularizer'): diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index ab5997e85c..3a31ef7f88 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1285,7 +1285,7 @@ def reduce_sum(input_tensor, The reduced tensor, of the same dtype as the input_tensor. @compatibility(numpy) - Equivalent to np.sum appart the fact that numpy upcast uint8 and int32 to + Equivalent to np.sum apart the fact that numpy upcast uint8 and int32 to int64 while tensorflow returns the same dtype as the input. @end_compatibility """ diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index 1271ee5108..ae79c01949 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -39,6 +39,8 @@ from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import tf_export # pylint: enable=wildcard-import +# Expose regex_full_match in strings namespace +tf_export("strings.regex_full_match")(regex_full_match) @tf_export("string_split") def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=invalid-name diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py index 75580fc630..9e49188c1e 100644 --- a/tensorflow/python/profiler/model_analyzer_test.py +++ b/tensorflow/python/profiler/model_analyzer_test.py @@ -232,7 +232,12 @@ class PrintModelAnalysisTest(test.TestCase): self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) - self.assertLess(145660, tfprof_node.total_float_ops) + #The graph is modifed when MKL is enabled,total_float_ops will + #be different + if test_util.IsMklEnabled(): + self.assertLess(101600, tfprof_node.total_float_ops) + else: + self.assertLess(145660, tfprof_node.total_float_ops) self.assertEqual(8, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual( diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py index 4b3982677f..24a13c0f33 100644 --- a/tensorflow/python/saved_model/builder_impl.py +++ b/tensorflow/python/saved_model/builder_impl.py @@ -130,7 +130,8 @@ class SavedModelBuilder(object): if not file_io.file_exists(asset_destination_filepath): file_io.copy(asset_source_filepath, asset_destination_filepath) - tf_logging.info("Assets written to: %s", assets_destination_dir) + tf_logging.info("Assets written to: %s", + compat.as_text(assets_destination_dir)) def _maybe_add_legacy_init_op(self, legacy_init_op=None): """Add legacy init op to the SavedModel. @@ -461,7 +462,7 @@ class SavedModelBuilder(object): compat.as_bytes(self._export_dir), compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB)) file_io.write_string_to_file(path, self._saved_model.SerializeToString()) - tf_logging.info("SavedModel written to: %s", path) + tf_logging.info("SavedModel written to: %s", compat.as_text(path)) return path diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index 6d05a2ee29..ab8b37bb65 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -750,7 +750,7 @@ class DistributionStrategy(object): `fn` may call `tf.get_tower_context()` to access methods such as `tower_id()` and `merge_call()`. - `merge_call()` is used to communicate betwen the towers and + `merge_call()` is used to communicate between the towers and re-enter the cross-tower context. All towers pause their execution having encountered a `merge_call()` call. After that the `merge_fn`-function is executed. Its results are then unwrapped and diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index fc89f88063..4d464135fd 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1743,7 +1743,7 @@ class Saver(object): return if save_path is None: raise ValueError("Can't load save_path when it is None.") - logging.info("Restoring parameters from %s", save_path) + logging.info("Restoring parameters from %s", compat.as_text(save_path)) try: if context.executing_eagerly(): self._build_eager(save_path, build_save=False, build_restore=True) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index 5faf644c91..fbd6561767 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -232,7 +232,7 @@ def getcallargs(func, *positional, **named): it. If no attached decorators modify argspec, the final unwrapped target's argspec will be used. """ - argspec = getargspec(func) + argspec = getfullargspec(func) call_args = named.copy() this = getattr(func, 'im_self', None) or getattr(func, '__self__', None) if ismethod(func) and this: diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index 2b33d106bc..0f465eda4f 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -320,7 +320,7 @@ void SetDifferentKeysError(PyObject* dict1, PyObject* dict2, string* error_msg, // Returns true iff there were no "internal" errors. In other words, // errors that has nothing to do with structure checking. -// If an "internal" error occured, the appropriate Python error will be +// If an "internal" error occurred, the appropriate Python error will be // set and the caller can propage it directly to the user. // // Both `error_msg` and `is_type_error` must be non-null. `error_msg` must diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h index 9851c11c2e..70efc10c9a 100644 --- a/tensorflow/python/util/util.h +++ b/tensorflow/python/util/util.h @@ -97,7 +97,7 @@ PyObject* AssertSameStructure(PyObject* o1, PyObject* o2, bool check_types); // used instead. The same convention is followed in `pack_sequence_as`. This // correctly repacks dicts and `OrderedDict`s after they have been flattened, // and also allows flattening an `OrderedDict` and then repacking it back using -// a correponding plain dict, or vice-versa. +// a corresponding plain dict, or vice-versa. // Dictionaries with non-sortable keys cannot be flattened. // // Args: diff --git a/tensorflow/stream_executor/blas.h b/tensorflow/stream_executor/blas.h index be0b0bf5fb..ea87744b22 100644 --- a/tensorflow/stream_executor/blas.h +++ b/tensorflow/stream_executor/blas.h @@ -1083,6 +1083,13 @@ class BlasSupport { // This is a batched version of DoBlasGemm. // The batched GEMM computes matrix product for each input/output in a, b, // and c, which contain batch_count DeviceMemory objects. + virtual bool DoBlasGemmBatched( + Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, + uint64 n, uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, + float beta, const port::ArraySlice *> &c, + int ldc, int batch_count, ScratchAllocator *scratch_allocator) = 0; virtual bool DoBlasGemmBatched( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, @@ -1945,6 +1952,13 @@ class BlasSupport { DeviceMemory> *c, int ldc, \ blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ blas::ProfileResult *output_profile_result) override; \ + bool DoBlasGemmBatched( \ + Stream *stream, blas::Transpose transa, blas::Transpose transb, \ + uint64 m, uint64 n, uint64 k, float alpha, \ + const port::ArraySlice *> &a, int lda, \ + const port::ArraySlice *> &b, int ldb, \ + float beta, const port::ArraySlice *> &c, \ + int ldc, int batch_count, ScratchAllocator *scratch_allocator) override; \ bool DoBlasGemmBatched( \ Stream *stream, blas::Transpose transa, blas::Transpose transb, \ uint64 m, uint64 n, uint64 k, float alpha, \ diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc index 3e9a23c658..08fe153b59 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.cc +++ b/tensorflow/stream_executor/cuda/cuda_blas.cc @@ -286,6 +286,10 @@ STREAM_EXECUTOR_CUBLAS_WRAP(cublasGetMathMode) STREAM_EXECUTOR_CUBLAS_WRAP(cublasSetMathMode) #endif +#if CUDA_VERSION >= 9010 +STREAM_EXECUTOR_CUBLAS_WRAP(cublasGemmBatchedEx) +#endif + } // namespace wrap static string ToString(cublasStatus_t status) { @@ -2330,13 +2334,23 @@ bool CUDABlas::DoBlasGemmWithAlgorithm( computation_type, algorithm, output_profile_result); } -template +template +struct HalfAsFloat { + typedef T type; +}; + +template <> +struct HalfAsFloat { + typedef float type; +}; + +template port::Status CUDABlas::DoBlasGemmBatchedInternal( FuncT cublas_func, Stream *stream, blas::Transpose transa, - blas::Transpose transb, uint64 m, uint64 n, uint64 k, T alpha, + blas::Transpose transb, uint64 m, uint64 n, uint64 k, Scalar alpha, const port::ArraySlice *> &a_ptrs_to_wrappers, int lda, const port::ArraySlice *> &b_ptrs_to_wrappers, int ldb, - T beta, const port::ArraySlice *> &c_ptrs_to_wrappers, + Scalar beta, const port::ArraySlice *> &c_ptrs_to_wrappers, int ldc, int batch_count, ScratchAllocator *scratch_allocator) { std::vector a_raw_ptrs, b_raw_ptrs, c_raw_ptrs; for (int i = 0; i < batch_count; ++i) { @@ -2345,7 +2359,7 @@ port::Status CUDABlas::DoBlasGemmBatchedInternal( c_raw_ptrs.push_back(static_cast(c_ptrs_to_wrappers[i]->opaque())); } - typedef typename CUDAComplexT::type CUDA_T; + typedef typename HalfAsFloat::type>::type CUDA_T; const size_t size = batch_count * sizeof(CUDA_T *); @@ -2397,18 +2411,84 @@ port::Status CUDABlas::DoBlasGemmBatchedInternal( "CUDABlas::DoBlasGemmBatched"); } - bool ok = DoBlasInternal( - cublas_func, stream, true /* = pointer_mode_host */, - CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, - CUDAComplex(&alpha), const_cast(CUDAMemory(a)), lda, - const_cast(CUDAMemory(b)), ldb, CUDAComplex(&beta), - const_cast(CUDAMemory(c)), ldc, batch_count); + cudaDataType_t data_type = CUDADataType::type; - if (ok) { +#if CUDA_VERSION >= 9010 + int cc_major, cc_minor; + if (stream->parent()->GetDeviceDescription().cuda_compute_capability( + &cc_major, &cc_minor) && + cc_major >= 5) { + bool use_tensor_ops = TensorOpMathEnabled() && data_type == CUDA_R_16F; + cublasGemmAlgo_t algo = + (use_tensor_ops ? CUBLAS_GEMM_DFALT_TENSOR_OP : CUBLAS_GEMM_DFALT); + cudaDataType_t compute_type = + (data_type == CUDA_R_16F ? CUDA_R_32F : data_type); + const void **a_void_ptrs = reinterpret_cast( + const_cast(CUDAMemory(a))); + const void **b_void_ptrs = reinterpret_cast( + const_cast(CUDAMemory(b))); + void **c_void_ptrs = + reinterpret_cast(const_cast(CUDAMemory(c))); + bool ok; + ok = DoBlasInternalImpl( + wrap::cublasGemmBatchedEx, stream, true /* = pointer_mode_host */, + true /* = err_on_failure */, use_tensor_ops, CUDABlasTranspose(transa), + CUDABlasTranspose(transb), m, n, k, &alpha, a_void_ptrs, data_type, lda, + b_void_ptrs, data_type, ldb, &beta, c_void_ptrs, data_type, ldc, + batch_count, compute_type, algo); + if (ok) { + return port::Status::OK(); + } + return port::Status(port::error::INTERNAL, + "failed BLAS call, see log for details"); + } +#endif + // either CUDA_VERSION < 9.1 or SM < 5.0 + if (data_type != CUDA_R_16F) { + bool ok = DoBlasInternal( + cublas_func, stream, true /* = pointer_mode_host */, + CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, + CUDAComplex(&alpha), const_cast(CUDAMemory(a)), lda, + const_cast(CUDAMemory(b)), ldb, CUDAComplex(&beta), + const_cast(CUDAMemory(c)), ldc, batch_count); + if (ok) { + return port::Status::OK(); + } + return port::Status(port::error::INTERNAL, + "failed BLAS call, see log for details"); + } else { + // Fall back to a loop for fp16 + for (int b = 0; b < batch_count; ++b) { + const DeviceMemory &a_matrix = *a_ptrs_to_wrappers[b]; + const DeviceMemory &b_matrix = *b_ptrs_to_wrappers[b]; + DeviceMemory *c_matrix = c_ptrs_to_wrappers[b]; + bool ok = DoBlasGemm(stream, transa, transb, m, n, k, alpha, a_matrix, + lda, b_matrix, ldb, beta, c_matrix, ldc); + if (!ok) { + return port::Status(port::error::INTERNAL, + "failed BLAS call, see log for details"); + } + } return port::Status::OK(); } - return port::Status(port::error::INTERNAL, - "failed BLAS call, see log for details"); +} + +bool CUDABlas::DoBlasGemmBatched( + Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, + uint64 n, uint64 k, float alpha, + const port::ArraySlice *> &a_array, int lda, + const port::ArraySlice *> &b_array, int ldb, + float beta, const port::ArraySlice *> &c_array, + int ldc, int batch_count, ScratchAllocator *scratch_allocator) { + // Note: The func passed here (cublasSgemmBatched) is not actually called, + // due to special handling of fp16 inside DoBlasGemmBatchedInternal. + port::Status status = DoBlasGemmBatchedInternal( + wrap::cublasSgemmBatched, stream, transa, transb, m, n, k, alpha, a_array, + lda, b_array, ldb, beta, c_array, ldc, batch_count, scratch_allocator); + if (!status.ok()) { + LOG(ERROR) << status; + } + return status.ok(); } bool CUDABlas::DoBlasGemmBatched( diff --git a/tensorflow/stream_executor/cuda/cuda_blas.h b/tensorflow/stream_executor/cuda/cuda_blas.h index 12dc5e47fd..42b3fde5b0 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.h +++ b/tensorflow/stream_executor/cuda/cuda_blas.h @@ -107,12 +107,12 @@ class CUDABlas : public blas::BlasSupport { // A helper function to implement DoBlasGemmBatched interfaces for generic // types. - template + template port::Status DoBlasGemmBatchedInternal( FuncT cublas_func, Stream *stream, blas::Transpose transa, - blas::Transpose transb, uint64 m, uint64 n, uint64 k, T alpha, + blas::Transpose transb, uint64 m, uint64 n, uint64 k, Scalar alpha, const port::ArraySlice *> &a_array, int lda, - const port::ArraySlice *> &b_array, int ldb, T beta, + const port::ArraySlice *> &b_array, int ldb, Scalar beta, const port::ArraySlice *> &c_array, int ldc, int batch_count, ScratchAllocator *scratch_allocator); diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 2bc9b6b798..4a98cfe164 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -4480,6 +4480,40 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, n, alpha, a, lda, b, ldb); } +Stream &Stream::ThenBlasGemmBatched( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, float beta, + const port::ArraySlice *> &c, int ldc, + int batch_count) { + return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda, + b, ldb, beta, c, ldc, batch_count, + /*scratch_allocator=*/nullptr); +} + +Stream &Stream::ThenBlasGemmBatchedWithScratch( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, float beta, + const port::ArraySlice *> &c, int ldc, + int batch_count, ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc), PARAM(batch_count)); + + ThenBlasImpl *> &, int, + const port::ArraySlice *> &, int, + float, const port::ArraySlice *> &, + int, int, ScratchAllocator *> + impl; + return impl(this, &blas::BlasSupport::DoBlasGemmBatched, transa, transb, m, n, + k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, + scratch_allocator); +} + Stream &Stream::ThenBlasGemmBatched( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const port::ArraySlice *> &a, diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index c6e37da6d1..3da1b856d6 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -1471,6 +1471,13 @@ class Stream { blas::ProfileResult *output_profile_result); // See BlasSupport::DoBlasGemmBatched. + Stream &ThenBlasGemmBatched( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, + float beta, const port::ArraySlice *> &c, + int ldc, int batch_count); Stream &ThenBlasGemmBatched(blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const port::ArraySlice *> &a, @@ -1503,6 +1510,13 @@ class Stream { std::complex beta, const port::ArraySlice> *> &c, int ldc, int batch_count); + Stream &ThenBlasGemmBatchedWithScratch( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, + float beta, const port::ArraySlice *> &c, + int ldc, int batch_count, ScratchAllocator *scratch_allocator); Stream &ThenBlasGemmBatchedWithScratch( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const port::ArraySlice *> &a, diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 880ec0523d..d71fd71bbd 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1300,7 +1300,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]): native.cc_library( name=basename + "_gpu", srcs=gpu_srcs, - copts=_cuda_copts(), + copts=_cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]), deps=deps + if_cuda(cuda_deps)) cuda_deps.extend([":" + basename + "_gpu"]) @@ -1483,7 +1483,7 @@ def tf_py_wrap_cc(name, # This macro is for running python tests against system installed pip package # on Windows. # -# py_test is built as an exectuable python zip file on Windows, which contains all +# py_test is built as an executable python zip file on Windows, which contains all # dependencies of the target. Because of the C++ extensions, it would be very # inefficient if the py_test zips all runfiles, plus we don't need them when running # tests against system installed pip package. So we'd like to get rid of the deps diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD index e58de5b63e..f46bb4b5fc 100644 --- a/tensorflow/tools/api/generator/BUILD +++ b/tensorflow/tools/api/generator/BUILD @@ -101,6 +101,7 @@ genrule( "api/profiler/__init__.py", "api/python_io/__init__.py", "api/resource_loader/__init__.py", + "api/strings/__init__.py", "api/saved_model/__init__.py", "api/saved_model/builder/__init__.py", "api/saved_model/constants/__init__.py", diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 74b1b39d9f..dc2bd40096 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -500,6 +500,10 @@ tf_module { name: "string" mtype: "" } + member { + name: "strings" + mtype: "" + } member { name: "summary" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt new file mode 100644 index 0000000000..a3fbe95bba --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt @@ -0,0 +1,7 @@ +path: "tensorflow.strings" +tf_module { + member_method { + name: "regex_full_match" + argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index 5aaf544afd..982161cefe 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -17,14 +17,9 @@ set -e # We don't apt-get install so that we can install a newer version of pip. -# Only needed for Ubuntu 14.04 ,and not needed for Ubuntu 16.04 / Debian 8,9 -if $(cat /etc/*-release | grep -q 14.04); then - easy_install -U pip==9.0.3 - easy_install3 -U pip==9.0.3 -else - pip2 install --upgrade pip==9.0.3 - pip3 install --upgrade pip==9.0.3 -fi +# Only needed for Ubuntu 14.04 and 16.04; not needed for 18.04 and Debian 8,9? +easy_install -U pip==9.0.3 +easy_install3 -U pip==9.0.3 # Install pip packages from whl files to avoid the time-consuming process of # building from source. diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index b9996395d0..406d134699 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -85,7 +85,7 @@ RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.g ENV CI_BUILD_PYTHON python RUN tensorflow/tools/ci_build/builds/configured CPU \ - bazel build -c opt --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ + bazel build -c opt --copt=-mavx --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ # For optimized builds appropriate for the hardware platform of your choosing, uncomment below... # For ivy-bridge or sandy-bridge # --copt=-march="ivybridge" \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index c65e0b72bc..a6cd44ced1 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -35,10 +35,10 @@ ENV CI_BUILD_PYTHON=python \ PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \ CC_OPT_FLAGS='-march=native' \ TF_NEED_JEMALLOC=0 \ - TF_NEED_GCP=0 \ + TF_NEED_GCP=1 \ TF_NEED_CUDA=0 \ TF_NEED_HDFS=0 \ - TF_NEED_S3=0 \ + TF_NEED_S3=1 \ TF_NEED_OPENCL=0 \ TF_NEED_GDR=0 \ TF_ENABLE_XLA=0 \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 7e5e6ef2d5..2fe47f3356 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -98,7 +98,7 @@ ENV TF_CUDNN_VERSION=7 RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \ tensorflow/tools/ci_build/builds/configured GPU \ - bazel build -c opt --config=cuda \ + bazel build -c opt --copt=-mavx --config=cuda \ --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ tensorflow/tools/pip_package:build_pip_package && \ rm /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ diff --git a/tensorflow/tools/graph_transforms/README.md b/tensorflow/tools/graph_transforms/README.md index 67badb4869..9f6f553ba1 100644 --- a/tensorflow/tools/graph_transforms/README.md +++ b/tensorflow/tools/graph_transforms/README.md @@ -388,7 +388,7 @@ input is collapsed down into a simple constant. Args: * clear_output_shapes: Clears tensor shape information saved as attributes. - Some older graphs containes out-of-date information and may cause import + Some older graphs contains out-of-date information and may cause import errors. Defaults to true. Prerequisites: None diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index b66d5bdd37..1a83c6e757 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -24,7 +24,7 @@ function real_path() { function cp_external() { local src_dir=$1 local dest_dir=$2 - for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*org_tensorflow*'`; do + for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*local_config_tensorrt*' ! -name '*org_tensorflow*'`; do cp -R "$f" "$dest_dir" done mkdir -p "${dest_dir}/local_config_cuda/cuda/cuda/" diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index f7385e5991..319878e1b5 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -31,7 +31,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.8.0-rc1' +_VERSION = '1.8.0' _SHORT_DESCRIPTION = ('TensorFlow is an open source machine learning framework ' 'for everyone.') @@ -55,7 +55,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorboard >= 1.7.0, < 1.8.0', + 'tensorboard >= 1.8.0, < 1.9.0', 'termcolor >= 1.1.0', ] diff --git a/third_party/examples/eager/spinn/README.md b/third_party/examples/eager/spinn/README.md index 7f477d1920..fbb1fde837 100644 --- a/third_party/examples/eager/spinn/README.md +++ b/third_party/examples/eager/spinn/README.md @@ -70,7 +70,7 @@ Other eager execution examples can be found under [tensorflow/contrib/eager/pyth - After training, you may use the model to perform inference on input data in the SNLI data format. The premise and hypotheses sentences are specified with the command-line flags `--inference_premise` and `--inference_hypothesis`, - respecitvely. Each sentence should include the words, as well as parentheses + respectively. Each sentence should include the words, as well as parentheses representing a binary parsing of the sentence. The words and parentheses should all be separated by spaces. For instance, diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 50d1b778d9..c90c66912d 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -604,7 +604,7 @@ def _find_cupti_header_dir(repository_ctx, cuda_config): for relative_path in CUPTI_HEADER_PATHS: if repository_ctx.path("%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists: return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] - auto_configure_fail("Cannot find cupti.h under %s" % cuda_toolkit_path) + auto_configure_fail("Cannot find cupti.h under %s" % ", ".join([cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS])) def _find_cupti_lib(repository_ctx, cuda_config): diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD index c2adf578c7..a058c46cc4 100644 --- a/third_party/mkl/BUILD +++ b/third_party/mkl/BUILD @@ -34,6 +34,7 @@ filegroup( "@org_tensorflow//tensorflow:windows": [ "@mkl_windows//:LICENSE", ], + "//conditions:default": [], }), visibility = ["//visibility:public"], ) @@ -54,5 +55,6 @@ cc_library( "@mkl_windows//:mkl_headers", "@mkl_windows//:mkl_libs_windows", ], + "//conditions:default": [], }), ) -- GitLab From 3d067dfdbd93f75caae4c0835eeee7cee10e10e1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 19:20:31 -0700 Subject: [PATCH 141/902] Update ops-related pbtxt files. PiperOrigin-RevId: 197991672 --- .../core/ops/compat/ops_history.v1.pbtxt | 151 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 35 +++- 2 files changed, 185 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index c867674489..e5c568daa6 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -14194,6 +14194,92 @@ op { } } } +op { + name: "Conv3DBackpropInputV2" + input_arg { + name: "input_sizes" + type_attr: "Tshape" + } + input_arg { + name: "filter" + type_attr: "T" + } + input_arg { + name: "out_backprop" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 5 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NDHWC" + } + allowed_values { + list { + s: "NDHWC" + s: "NCDHW" + } + } + } + attr { + name: "dilations" + type: "list(int)" + default_value { + list { + i: 1 + i: 1 + i: 1 + i: 1 + i: 1 + } + } + } + attr { + name: "Tshape" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "Copy" input_arg { @@ -42361,6 +42447,21 @@ op { } allows_uninitialized_input: true } +op { + name: "RegexFullMatch" + input_arg { + name: "input" + type: DT_STRING + } + input_arg { + name: "pattern" + type: DT_STRING + } + output_arg { + name: "output" + type: DT_BOOL + } +} op { name: "RegexReplace" input_arg { @@ -55295,6 +55396,56 @@ op { } } } +op { + name: "SegmentMean" + input_arg { + name: "data" + type_attr: "T" + } + input_arg { + name: "segment_ids" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "SegmentMin" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index e45125a1e8..62aaedb470 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -5927,7 +5927,7 @@ op { name: "Conv3DBackpropInputV2" input_arg { name: "input_sizes" - type: DT_INT32 + type_attr: "Tshape" } input_arg { name: "filter" @@ -5995,6 +5995,19 @@ op { } } } + attr { + name: "Tshape" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } } op { name: "Copy" @@ -21494,6 +21507,21 @@ op { } allows_uninitialized_input: true } +op { + name: "RegexFullMatch" + input_arg { + name: "input" + type: DT_STRING + } + input_arg { + name: "pattern" + type: DT_STRING + } + output_arg { + name: "output" + type: DT_BOOL + } +} op { name: "RegexReplace" input_arg { @@ -26047,9 +26075,14 @@ op { type: DT_UINT8 type: DT_INT16 type: DT_INT8 + type: DT_COMPLEX64 type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_BFLOAT16 type: DT_UINT16 + type: DT_COMPLEX128 type: DT_HALF type: DT_UINT32 type: DT_UINT64 -- GitLab From d5a71ef8355ebb8d983c5e7f5b1e840cf2d00c17 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 19:45:27 -0700 Subject: [PATCH 142/902] Initialize the score threshold to -inf to avoid filtering out negative logits PiperOrigin-RevId: 197993147 --- .../core/kernels/non_max_suppression_op.cc | 14 +++---- .../kernels/non_max_suppression_op_test.cc | 38 +++++++++++++++++++ tensorflow/python/ops/image_ops_impl.py | 2 +- .../tools/api/golden/tensorflow.image.pbtxt | 2 +- 4 files changed, 45 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/non_max_suppression_op.cc b/tensorflow/core/kernels/non_max_suppression_op.cc index 2b010f816d..23fdfe944a 100644 --- a/tensorflow/core/kernels/non_max_suppression_op.cc +++ b/tensorflow/core/kernels/non_max_suppression_op.cc @@ -117,10 +117,6 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& boxes, } } - auto suppress_func = [iou_threshold](const float x) { - return x <= iou_threshold ? 1 : 0; - }; - std::vector selected; std::vector selected_scores; Candidate next_candidate; @@ -134,14 +130,14 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& boxes, // Overlapping boxes are likely to have similar scores, // therefore we iterate through the previously selected boxes backwards // in order to see if `next_candidate` should be suppressed. + bool should_select = true; for (int j = selected.size() - 1; j >= 0; --j) { iou = IOU(boxes_data, next_candidate.box_index, selected[j]); if (iou == 0.0) continue; - next_candidate.score *= suppress_func(iou); - if (next_candidate.score <= score_threshold) break; + if (iou > iou_threshold) should_select = false; } - if (original_score == next_candidate.score) { + if (should_select) { selected.push_back(next_candidate.box_index); selected_scores.push_back(next_candidate.score); } @@ -178,7 +174,7 @@ class NonMaxSuppressionOp : public OpKernel { errors::InvalidArgument("max_output_size must be 0-D, got shape ", max_output_size.shape().DebugString())); - const float score_threshold_val = 0.0; + const float score_threshold_val = std::numeric_limits::lowest(); DoNonMaxSuppressionOp(context, boxes, scores, max_output_size, iou_threshold_, score_threshold_val); } @@ -211,7 +207,7 @@ class NonMaxSuppressionV2Op : public OpKernel { iou_threshold.shape().DebugString())); const float iou_threshold_val = iou_threshold.scalar()(); - const float score_threshold_val = 0.0; + const float score_threshold_val = std::numeric_limits::lowest(); DoNonMaxSuppressionOp(context, boxes, scores, max_output_size, iou_threshold_val, score_threshold_val); } diff --git a/tensorflow/core/kernels/non_max_suppression_op_test.cc b/tensorflow/core/kernels/non_max_suppression_op_test.cc index c71aa23e01..ed7db313bd 100644 --- a/tensorflow/core/kernels/non_max_suppression_op_test.cc +++ b/tensorflow/core/kernels/non_max_suppression_op_test.cc @@ -86,6 +86,23 @@ TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostTwoBoxesFromThreeClusters) { test::ExpectTensorEqual(expected, *GetOutput(0)); } +TEST_F(NonMaxSuppressionOpTest, TestSelectWithNegativeScores) { + MakeOp(.5); + AddInputFromArray( + TensorShape({6, 4}), + {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f, + 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101}); + AddInputFromArray( + TensorShape({6}), {.9f - 10.0f, .75f - 10.0f, .6f - 10.0f, .95f - 10.0f, + .5f - 10.0f, .3f - 10.0f}); + AddInputFromArray(TensorShape({}), {6}); + TF_ASSERT_OK(RunOpKernel()); + + Tensor expected(allocator(), DT_INT32, TensorShape({3})); + test::FillValues(&expected, {3, 0, 5}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + TEST_F(NonMaxSuppressionOpTest, TestSelectAtMostThirtyBoxesFromThreeClusters) { MakeOp(.5); AddInputFromArray( @@ -393,6 +410,27 @@ TEST_F(NonMaxSuppressionV3OpTest, test::ExpectTensorEqual(expected, *GetOutput(0)); } +TEST_F(NonMaxSuppressionV3OpTest, + TestSelectFromThreeClustersWithScoreThresholdZeroScores) { + MakeOp(); + AddInputFromArray( + TensorShape({6, 4}), + {0, 0, 1, 1, 0, 0.1f, 1, 1.1f, 0, -0.1f, 1, 0.9f, + 0, 10, 1, 11, 0, 10.1f, 1, 11.1f, 0, 100, 1, 101}); + AddInputFromArray(TensorShape({6}), {.1, 0, 0, .3, .2, -5.0}); + // If we ask for more boxes than we actually expect to get back; + // should still only get 2 boxes back. + AddInputFromArray(TensorShape({}), {6}); + AddInputFromArray(TensorShape({}), {0.5f}); + AddInputFromArray(TensorShape({}), {-3.0f}); + TF_ASSERT_OK(RunOpKernel()); + + Tensor expected(allocator(), DT_INT32, TensorShape({2})); + test::FillValues(&expected, {3, 0}); + + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + TEST_F(NonMaxSuppressionV3OpTest, TestSelectFromThreeClustersFlippedCoordinates) { MakeOp(); diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 52141ba24a..16aa85ca10 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1827,7 +1827,7 @@ def non_max_suppression(boxes, scores, max_output_size, iou_threshold=0.5, - score_threshold=0.0, + score_threshold=float('-inf'), name=None): """Greedily selects a subset of bounding boxes in descending order of score. diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt index acc3fc4c5b..2f12d46cb1 100644 --- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt @@ -110,7 +110,7 @@ tf_module { } member_method { name: "non_max_suppression" - argspec: "args=[\'boxes\', \'scores\', \'max_output_size\', \'iou_threshold\', \'score_threshold\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'0.0\', \'None\'], " + argspec: "args=[\'boxes\', \'scores\', \'max_output_size\', \'iou_threshold\', \'score_threshold\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'float('-inf')\', \'None\'], " } member_method { name: "pad_to_bounding_box" -- GitLab From 732a7e30e0be5c09fcc735e257965624a77b6dc5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 19:49:05 -0700 Subject: [PATCH 143/902] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 197993384 --- tensorflow/go/op/wrappers.go | 476 +++++++++++++++++------------------ 1 file changed, 238 insertions(+), 238 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 36db3dda6b..372f639fdc 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -2674,29 +2674,50 @@ func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_uppe return op.Output(0) } -// Clips tensor values to a specified min and max. +// Returns the batched diagonal part of a batched tensor. // -// Given a tensor `t`, this operation returns a tensor of the same type and -// shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. -// Any values less than `clip_value_min` are set to `clip_value_min`. Any values -// greater than `clip_value_max` are set to `clip_value_max`. +// This operation returns a tensor with the `diagonal` part +// of the batched `input`. The `diagonal` part is computed as follows: +// +// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a +// tensor of rank `k - 1` with dimensions `[I, J, K, ..., min(M, N)]` where: +// +// `diagonal[i, j, k, ..., n] = input[i, j, k, ..., n, n]`. +// +// The input must be at least a matrix. +// +// For example: +// +// ``` +// # 'input' is [[[1, 0, 0, 0] +// [0, 2, 0, 0] +// [0, 0, 3, 0] +// [0, 0, 0, 4]], +// [[5, 0, 0, 0] +// [0, 6, 0, 0] +// [0, 0, 7, 0] +// [0, 0, 0, 8]]] +// +// and input.shape = (2, 4, 4) +// +// tf.matrix_diag_part(input) ==> [[1, 2, 3, 4], [5, 6, 7, 8]] +// +// which has shape (2, 4) +// ``` // // Arguments: -// t: A `Tensor`. -// clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape -// as `t`. The minimum value to clip by. -// clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape -// as `t`. The maximum value to clip by. +// input: Rank `k` tensor where `k >= 2`. // -// Returns A clipped `Tensor` with the same shape as input 't'. -func ClipByValue(scope *Scope, t tf.Output, clip_value_min tf.Output, clip_value_max tf.Output) (output tf.Output) { +// Returns The extracted diagonal(s) having shape +// `diagonal.shape = input.shape[:-2] + [min(input.shape[-2:])]`. +func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ClipByValue", + Type: "MatrixDiagPart", Input: []tf.Input{ - t, clip_value_min, clip_value_max, + input, }, } op := scope.AddOperation(opspec) @@ -7310,6 +7331,47 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ... return op.Output(0) } +// RandomPoissonAttr is an optional argument to RandomPoisson. +type RandomPoissonAttr func(optionalAttr) + +// RandomPoissonSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func RandomPoissonSeed(value int64) RandomPoissonAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomPoissonSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func RandomPoissonSeed2(value int64) RandomPoissonAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Use RandomPoissonV2 instead. +// +// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2 +func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomPoisson", + Input: []tf.Input{ + shape, rate, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) @@ -7768,47 +7830,6 @@ func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf return output_indices, output_values, output_shape } -// RandomPoissonAttr is an optional argument to RandomPoisson. -type RandomPoissonAttr func(optionalAttr) - -// RandomPoissonSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed(value int64) RandomPoissonAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomPoissonSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed2(value int64) RandomPoissonAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Use RandomPoissonV2 instead. -// -// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2 -func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomPoisson", - Input: []tf.Input{ - shape, rate, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. type ResourceSparseApplyFtrlV2Attr func(optionalAttr) @@ -10094,6 +10115,43 @@ func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, o return op.Output(0) } +// Says whether the targets are in the top `K` predictions. +// +// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the +// prediction for the target class is among the top `k` predictions among +// all predictions for example `i`. Note that the behavior of `InTopK` differs +// from the `TopK` op in its handling of ties; if multiple classes have the +// same prediction value and straddle the top-`k` boundary, all of those +// classes are considered to be in the top `k`. +// +// More formally, let +// +// \\(predictions_i\\) be the predictions for all classes for example `i`, +// \\(targets_i\\) be the target class for example `i`, +// \\(out_i\\) be the output for example `i`, +// +// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ +// +// Arguments: +// predictions: A `batch_size` x `classes` tensor. +// targets: A `batch_size` vector of class ids. +// k: Number of top elements to look at for computing precision. +// +// Returns Computed precision at `k` as a `bool Tensor`. +func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "InTopKV2", + Input: []tf.Input{ + predictions, targets, k, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. type DecodeAndCropJpegAttr func(optionalAttr) @@ -10949,101 +11007,6 @@ func Fact(scope *Scope) (fact tf.Output) { return op.Output(0) } -// AngleAttr is an optional argument to Angle. -type AngleAttr func(optionalAttr) - -// AngleTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func AngleTout(value tf.DataType) AngleAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Returns the argument of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the argument of each element in `input`. All elements in -// `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part. -// -// The argument returned by this operation is of the form \\(atan2(b, a)\\). -// -// For example: -// -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.angle(input) ==> [2.0132, 1.056] -// ``` -// -// @compatibility(numpy) -// Equivalent to np.angle. -// @end_compatibility -func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Angle", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// VarHandleOpAttr is an optional argument to VarHandleOp. -type VarHandleOpAttr func(optionalAttr) - -// VarHandleOpContainer sets the optional container attribute to value. -// -// value: the container this variable is placed in. -// If not specified, defaults to "" -func VarHandleOpContainer(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// VarHandleOpSharedName sets the optional shared_name attribute to value. -// -// value: the name by which this variable is referred to. -// If not specified, defaults to "" -func VarHandleOpSharedName(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a handle to a Variable resource. -// -// Arguments: -// dtype: the type of this variable. Must agree with the dtypes -// of all ops using this variable. -// shape: The (possibly partially specified) shape of this variable. -func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "VarHandleOp", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Elementwise computes the bitwise XOR of `x` and `y`. // // The result will have those bits set, that are different in `x` and `y`. The @@ -18002,43 +17965,6 @@ func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padd return op.Output(0) } -// Says whether the targets are in the top `K` predictions. -// -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. -// -// More formally, let -// -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, -// -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ -// -// Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. -// -// Returns Computed precision at `k` as a `bool Tensor`. -func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InTopKV2", - Input: []tf.Input{ - predictions, targets, k, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Assigns a new value to a variable. // // Any ReadVariableOp with a control dependency on this op is guaranteed to return @@ -19594,6 +19520,130 @@ func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...Or return op.Output(0) } +// VarHandleOpAttr is an optional argument to VarHandleOp. +type VarHandleOpAttr func(optionalAttr) + +// VarHandleOpContainer sets the optional container attribute to value. +// +// value: the container this variable is placed in. +// If not specified, defaults to "" +func VarHandleOpContainer(value string) VarHandleOpAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// VarHandleOpSharedName sets the optional shared_name attribute to value. +// +// value: the name by which this variable is referred to. +// If not specified, defaults to "" +func VarHandleOpSharedName(value string) VarHandleOpAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a handle to a Variable resource. +// +// Arguments: +// dtype: the type of this variable. Must agree with the dtypes +// of all ops using this variable. +// shape: The (possibly partially specified) shape of this variable. +func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype, "shape": shape} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "VarHandleOp", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AngleAttr is an optional argument to Angle. +type AngleAttr func(optionalAttr) + +// AngleTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func AngleTout(value tf.DataType) AngleAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Returns the argument of a complex number. +// +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the argument of each element in `input`. All elements in +// `input` must be complex numbers of the form \\(a + bj\\), where *a* +// is the real part and *b* is the imaginary part. +// +// The argument returned by this operation is of the form \\(atan2(b, a)\\). +// +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.angle(input) ==> [2.0132, 1.056] +// ``` +// +// @compatibility(numpy) +// Equivalent to np.angle. +// @end_compatibility +func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Angle", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Clips tensor values to a specified min and max. +// +// Given a tensor `t`, this operation returns a tensor of the same type and +// shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. +// Any values less than `clip_value_min` are set to `clip_value_min`. Any values +// greater than `clip_value_max` are set to `clip_value_max`. +// +// Arguments: +// t: A `Tensor`. +// clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape +// as `t`. The minimum value to clip by. +// clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape +// as `t`. The maximum value to clip by. +// +// Returns A clipped `Tensor` with the same shape as input 't'. +func ClipByValue(scope *Scope, t tf.Output, clip_value_min tf.Output, clip_value_max tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ClipByValue", + Input: []tf.Input{ + t, clip_value_min, clip_value_max, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Counts the number of occurrences of each value in an integer array. // // Outputs a vector with length `size` and the same dtype as `weights`. If @@ -21321,7 +21371,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { // generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. // // The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. +// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. // Each element must be in the range `[0, 255]` (It represents the value of a // pixel in the output image). Non-finite values in the input tensor are // replaced by this tensor in the output image. The default value is the color @@ -26649,56 +26699,6 @@ func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) { return op.Output(0) } -// Returns the batched diagonal part of a batched tensor. -// -// This operation returns a tensor with the `diagonal` part -// of the batched `input`. The `diagonal` part is computed as follows: -// -// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a -// tensor of rank `k - 1` with dimensions `[I, J, K, ..., min(M, N)]` where: -// -// `diagonal[i, j, k, ..., n] = input[i, j, k, ..., n, n]`. -// -// The input must be at least a matrix. -// -// For example: -// -// ``` -// # 'input' is [[[1, 0, 0, 0] -// [0, 2, 0, 0] -// [0, 0, 3, 0] -// [0, 0, 0, 4]], -// [[5, 0, 0, 0] -// [0, 6, 0, 0] -// [0, 0, 7, 0] -// [0, 0, 0, 8]]] -// -// and input.shape = (2, 4, 4) -// -// tf.matrix_diag_part(input) ==> [[1, 2, 3, 4], [5, 6, 7, 8]] -// -// which has shape (2, 4) -// ``` -// -// Arguments: -// input: Rank `k` tensor where `k >= 2`. -// -// Returns The extracted diagonal(s) having shape -// `diagonal.shape = input.shape[:-2] + [min(input.shape[-2:])]`. -func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixDiagPart", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes the absolute value of a tensor. // // Given a tensor `x`, this operation returns a tensor containing the absolute -- GitLab From a797ded69e4fb2d8e7cd23b5f73a09abaabb31c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 24 May 2018 20:36:45 -0700 Subject: [PATCH 144/902] Extracts the 'simplify tile node' optimization into its own method. PiperOrigin-RevId: 197996636 --- .../grappler/optimizers/constant_folding.cc | 70 ++++++++++++------- .../grappler/optimizers/constant_folding.h | 3 + 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 8cd1968df7..a64e9a38fd 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1885,32 +1885,14 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } - if (use_shape_info && IsTile(*node) && - properties->GetInputProperties(node->name()).size() == 2) { - const auto& m = properties->GetInputProperties(node->name())[1]; - if (TensorShape::IsValid(m.shape()) && m.has_value()) { - Tensor multiplies(m.dtype(), m.shape()); - if (!multiplies.FromProto(m.value())) { - return errors::InvalidArgument("Cannot parse tensor from proto: ", - m.value().DebugString()); - } - // The node is replaceable iff all values in multiplies are 1. - bool replaceable = true; - if (multiplies.dtype() == DT_INT32) { - for (int j = 0; replaceable && j < multiplies.vec().size(); ++j) { - replaceable &= multiplies.vec()(j) == 1; - } - } else { - for (int j = 0; replaceable && j < multiplies.vec().size(); - ++j) { - replaceable &= multiplies.vec()(j) == 1; - } - } - if (replaceable) { - ReplaceOperationWithIdentity(0, *properties, node, optimized_graph); - return Status::OK(); - } - } + bool simplify_tile_successful = false; + Status simplify_tile_status = + SimplifyTile(*properties, use_shape_info, optimized_graph, node, + &simplify_tile_successful); + if (!simplify_tile_status.ok()) { + return simplify_tile_status; + } else if (simplify_tile_successful) { + return Status::OK(); } bool simplify_pad_successful = false; @@ -1996,6 +1978,42 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +Status ConstantFolding::SimplifyTile(const GraphProperties& properties, + bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, + bool* success) { + if (use_shape_info && IsTile(*node) && + properties.GetInputProperties(node->name()).size() == 2) { + const auto& m = properties.GetInputProperties(node->name())[1]; + if (TensorShape::IsValid(m.shape()) && m.has_value()) { + Tensor multiplies(m.dtype(), m.shape()); + if (!multiplies.FromProto(m.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + m.value().DebugString()); + } + // The node is replaceable iff all values in multiplies are 1. + bool replaceable = true; + if (multiplies.dtype() == DT_INT32) { + for (int j = 0; replaceable && j < multiplies.vec().size(); ++j) { + replaceable &= multiplies.vec()(j) == 1; + } + } else { + for (int j = 0; replaceable && j < multiplies.vec().size(); + ++j) { + replaceable &= multiplies.vec()(j) == 1; + } + } + if (replaceable) { + ReplaceOperationWithIdentity(0, properties, node, optimized_graph); + *success = true; + return Status::OK(); + } + } + } + *success = false; + return Status::OK(); +} + Status ConstantFolding::SimplifyPad(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node, diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index fa9249f50c..30e63544a7 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -178,6 +178,9 @@ class ConstantFolding : public GraphOptimizer { Status SimplifyPad(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node, bool* success); + // Simplifies a Tile operation to an Identity operation if applicable. + Status SimplifyTile(const GraphProperties& properties, bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, bool* success); // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From e7daf7fee5c5e1168fbaa4fdc2dc85bf1bb960c6 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 25 May 2018 01:36:23 -0700 Subject: [PATCH 145/902] Fix typo, fix build. PiperOrigin-RevId: 198017870 --- tensorflow/tools/api/golden/tensorflow.image.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt index 2f12d46cb1..87543e374b 100644 --- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt @@ -110,7 +110,7 @@ tf_module { } member_method { name: "non_max_suppression" - argspec: "args=[\'boxes\', \'scores\', \'max_output_size\', \'iou_threshold\', \'score_threshold\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'float('-inf')\', \'None\'], " + argspec: "args=[\'boxes\', \'scores\', \'max_output_size\', \'iou_threshold\', \'score_threshold\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'-inf\', \'None\'], " } member_method { name: "pad_to_bounding_box" -- GitLab From d4a4f1389f3cc48c81b6cd90cdd793d786195312 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 25 May 2018 02:23:06 -0700 Subject: [PATCH 146/902] eager: Update introduction notebooks. PiperOrigin-RevId: 198022387 --- .../python/examples/notebooks/1_basics.ipynb | 620 ++++++++---------- .../examples/notebooks/2_gradients.ipynb | 474 +++---------- .../notebooks/3_training_models.ipynb | 443 +++++++++++++ 3 files changed, 827 insertions(+), 710 deletions(-) create mode 100644 tensorflow/contrib/eager/python/examples/notebooks/3_training_models.ipynb diff --git a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb index 9fd2d8d125..51d10a7784 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/1_basics.ipynb @@ -1,495 +1,429 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Eager Execution Tutorial: Basics", - "version": "0.3.2", - "views": {}, - "default_view": {}, - "provenance": [ - { - "file_id": "0B0kLcpwLFwKEVm9XNkFueGk4bTg", - "timestamp": 1504118841551 - } - ] - } - }, "cells": [ { + "cell_type": "markdown", "metadata": { - "id": "U9i2Dsh-ziXr", - "colab_type": "text" + "colab_type": "text", + "id": "U9i2Dsh-ziXr" }, - "cell_type": "markdown", "source": [ - "# Eager Execution Tutorial: Basics\n", + "# An introduction to TensorFlow\n", "\n", - "This notebook introduces the basics of using TensorFlow's eager execution capabilities. It covers concepts such as:\n", + "This is an introductory tutorial for using TensorFlow. It will cover:\n", "\n", "* Importing required packages\n", - "* Enabling eager execution\n", - "* Creating and using TensorFlow Tensors and Variables\n", - "* Using TensorFlow interactively\n", - "* Using GPUs with eager execution enabled\n", - "\n", - "This notebook does *not* cover modeling topics, such as gradients." + "* Creating and using Tensors\n", + "* Using GPU acceleration\n" ] }, { + "cell_type": "markdown", "metadata": { - "id": "z1JcS5iBXMRO", - "colab_type": "text" + "colab_type": "text", + "id": "z1JcS5iBXMRO" }, - "cell_type": "markdown", "source": [ - "# Step 1: Import Eager\n", + "## Import TensorFlow\n", "\n", - "The key imports for eager execution are the following:" + "To get started, import the `tensorflow` module and enable eager execution.\n", + "Eager execution enables a more interactive frontend to TensorFlow, the details of which we will discuss much later." ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "RlIWhyeLoYnG", - "colab_type": "code", + "cellView": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, - "cellView": "code" + "colab_type": "code", + "id": "RlIWhyeLoYnG" }, - "cell_type": "code", + "outputs": [], "source": [ - "# Import TensorFlow.\n", "import tensorflow as tf\n", "\n", - "# Import TensorFlow eager execution support (subject to future changes).\n", - "tfe = tf.contrib.eager" - ], - "execution_count": 0, - "outputs": [] + "tf.enable_eager_execution()" + ] }, { + "cell_type": "markdown", "metadata": { - "id": "H9UySOPLXdaw", - "colab_type": "text" + "colab_type": "text", + "id": "H9UySOPLXdaw" }, - "cell_type": "markdown", "source": [ - "# Step 2: Enable eager execution\n", + "## Tensors\n", "\n", - "All future TensorFlow calls will execute the\n", - "underlying TensorFlow ops immediately:" + "A Tensor is a multi-dimensional array. Similar to NumPy `ndarray` objects, `Tensor` objects have a data type and a shape. Additionally, Tensors can reside in accelerator (like GPU) memory. TensorFlow offers a rich library of operations ([tf.add](https://www.tensorflow.org/api_docs/python/tf/add), [tf.matmul](https://www.tensorflow.org/api_docs/python/tf/matmul), [tf.linalg.inv](https://www.tensorflow.org/api_docs/python/tf/linalg/inv) etc.) that consume and produce Tensors. These operations automatically convert native Python types. For example:\n" ] }, { - "metadata": { - "id": "WPTUfGq6kJ5w", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - }, - "cellView": "code" - }, "cell_type": "code", - "source": [ - "tf.enable_eager_execution()" - ], "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "twBfWd5xyu_d", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "# Step 3: Interactively Use TensorFlow!\n", - "\n", - "Now you can call TensorFlow functions and get results, immediately! No more `tf.Sessions`!\n", - "\n", - "TensorFlow will automatically wrap native Python types for you with operator overloading for TensorFlow Tensors." - ] - }, - { "metadata": { - "id": "ngUe237Wt48W", - "colab_type": "code", + "cellView": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "height": 125 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 320, + "status": "ok", + "timestamp": 1526420535530, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 }, - "cellView": "code" + "id": "ngUe237Wt48W", + "outputId": "b1a1cd60-4eb3-443d-cd6b-68406390784e" }, - "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tf.Tensor(3, shape=(), dtype=int32)\n", + "tf.Tensor([4 6], shape=(2,), dtype=int32)\n", + "tf.Tensor(25, shape=(), dtype=int32)\n", + "tf.Tensor(6, shape=(), dtype=int32)\n", + "tf.Tensor(aGVsbG8gd29ybGQ, shape=(), dtype=string)\n", + "tf.Tensor(13, shape=(), dtype=int32)\n" + ] + } + ], "source": [ "print(tf.add(1, 2))\n", "print(tf.add([1, 2], [3, 4]))\n", "print(tf.square(5))\n", "print(tf.reduce_sum([1, 2, 3]))\n", "print(tf.encode_base64(\"hello world\"))\n", - "print(\"\")\n", "\n", - "x = tf.constant(2)\n", - "y = tf.constant(3)\n", - "print(x * y + 1)\n", - "\n", - "# Most TensorFlow ops are directly usable with eager execution, giving\n", - "# results immediately.\n", - "print(tf.contrib.signal.hamming_window(x * y + 1))" - ], - "execution_count": 0, - "outputs": [] + "# Operator overloading is also supported\n", + "print(tf.square(2) + tf.square(3))" + ] }, { + "cell_type": "markdown", "metadata": { - "id": "IDY4WsYRhP81", - "colab_type": "text" + "colab_type": "text", + "id": "IDY4WsYRhP81" }, - "cell_type": "markdown", "source": [ - "Numpy arrays are supported, too:" + "Each Tensor has a shape and a datatype" ] }, { - "metadata": { - "id": "lCUWzso6mbqR", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, "cell_type": "code", - "source": [ - "import numpy as np\n", - "\n", - "ones = np.ones([3, 3])\n", - "\n", - "print(\"numpy 3x3 matrix of 1s:\")\n", - "print(ones)\n", - "print(\"\")\n", - "\n", - "print(\"Multiplied by 42:\")\n", - "print(tf.multiply(ones, 42))" - ], "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "PBNP8yTRfu_X", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "# Step 4: Define and Print TensorFlow Variables\n", - "\n", - "To define TensorFlow variables, use the `get_variable()` function as follows:" - ] - }, - { "metadata": { - "id": "3Twf_Rw-gQFM", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "height": 53 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 215, + "status": "ok", + "timestamp": 1526420538162, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 }, - "cellView": "code" + "id": "srYWH1MdJNG7", + "outputId": "5e4ac41c-5115-4e50-eba0-42e249c16561" }, - "cell_type": "code", - "source": [ - "x = tfe.Variable(0.)" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, 2)\n", + "\u003cdtype: 'int32'\u003e\n" + ] + } ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "45G7094TxsMb", - "colab_type": "text" - }, - "cell_type": "markdown", "source": [ - "## Printing TensorFlow Variables" + "x = tf.matmul([[1]], [[2, 3]])\n", + "print(x.shape)\n", + "print(x.dtype)" ] }, { + "cell_type": "markdown", "metadata": { - "id": "UJBJeZ5XxuwA", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - }, - "cellView": "code" + "colab_type": "text", + "id": "eBPw8e8vrsom" }, - "cell_type": "code", "source": [ - "# This does NOT print the Variable's actual value:\n", - "print(\"Printing a TensorFlow Variable:\")\n", - "print(x)\n", - "print(\"\")\n", + "The most obvious differences between NumPy arrays and TensorFlow Tensors are:\n", "\n", - "\n", - "print(\"Printing a TensorFlow Variable's value as a numpy array:\")\n", - "print(x.numpy())" - ], - "execution_count": 0, - "outputs": [] + "1. Tensors can be backed by accelerator memory (like GPU, TPU).\n", + "2. Tensors are immutable." + ] }, { + "cell_type": "markdown", "metadata": { - "id": "2njjWHcTpBEn", - "colab_type": "text" + "colab_type": "text", + "id": "Dwi1tdW3JBw6" }, - "cell_type": "markdown", "source": [ - "## Changing a TensorFlow Variable's value\n", + "### NumPy Compatibility\n", "\n", - "To change a TensorFlow Variable's value, use its `.assign()` or `.assign_add()` method:" + "Conversion between TensorFlow Tensors and NumPy ndarrays is quite simple as:\n", + "* TensorFlow operations automatically convert NumPy ndarrays to Tensors.\n", + "* NumPy operations automatically convert Tensors to NumPy ndarrays.\n", + "\n", + "Tensors can be explicitly converted to NumPy ndarrays by invoking the `.numpy()` method on them.\n", + "These conversions are typically cheap as the array and Tensor share the underlying memory representation if possible. However, sharing the underlying representation isn't always possible since the Tensor may be hosted in GPU memory while NumPy arrays are always backed by host memory, and the conversion will thus involve a copy from GPU to host memory." ] }, { - "metadata": { - "id": "v3wr6Erbo_hB", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, "cell_type": "code", - "source": [ - "x.assign(42)\n", - "print(x)\n", - "\n", - "x.assign_add(3)\n", - "print(x)" - ], "execution_count": 0, - "outputs": [] - }, - { "metadata": { - "id": "uhtynjHVpTB5", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Use a Variable just like any other Tensor" - ] - }, - { - "metadata": { - "id": "7PbktdnHoehR", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } - } + }, + "height": 251 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 238, + "status": "ok", + "timestamp": 1526420540562, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "lCUWzso6mbqR", + "outputId": "fd0a22bc-8249-49dd-fcbd-63161cc47e46" }, - "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TensorFlow operations convert numpy arrays to Tensors automatically\n", + "tf.Tensor(\n", + "[[ 42. 42. 42.]\n", + " [ 42. 42. 42.]\n", + " [ 42. 42. 42.]], shape=(3, 3), dtype=float64)\n", + "And NumPy operations convert Tensors to numpy arrays automatically\n", + "[[ 43. 43. 43.]\n", + " [ 43. 43. 43.]\n", + " [ 43. 43. 43.]]\n", + "The .numpy() method explicitly converts a Tensor to a numpy array\n", + "[[ 42. 42. 42.]\n", + " [ 42. 42. 42.]\n", + " [ 42. 42. 42.]]\n" + ] + } + ], "source": [ - "print(x + 3)\n", + "import numpy as np\n", "\n", - "# This code will broadcast the value across the list of numbers:\n", - "print(x * [1, 2, 4])" - ], - "execution_count": 0, - "outputs": [] + "ndarray = np.ones([3, 3])\n", + "\n", + "print(\"TensorFlow operations convert numpy arrays to Tensors automatically\")\n", + "tensor = tf.multiply(ndarray, 42)\n", + "print(tensor)\n", + "\n", + "\n", + "print(\"And NumPy operations convert Tensors to numpy arrays automatically\")\n", + "print(np.add(tensor, 1))\n", + "\n", + "print(\"The .numpy() method explicitly converts a Tensor to a numpy array\")\n", + "print(tensor.numpy())" + ] }, { + "cell_type": "markdown", "metadata": { - "id": "GVChqwlwy1SI", - "colab_type": "text" + "colab_type": "text", + "id": "PBNP8yTRfu_X" }, - "cell_type": "markdown", "source": [ - "# Step 5: Debug Errors with Instant Feedback\n", + "## GPU acceleration\n", "\n", - "TensorFlow's eager execution helps you identify and debug runtime issues through interactive exploration of code snippets.\n", - "\n", - "Below, we'll define a length-4 vector, and attempt two `tf.slice()` operations,\n", - "one being legal and the other being illegal, leading to a runtime error that is\n", - "raised immediately." + "Many TensorFlow operations can be accelerated by using the GPU for computation. Without any annotations, TensorFlow automatically decides whether to use the GPU or CPU for an operation (and copies the tensor between CPU and GPU memory if necessary). Tensors produced by an operation are typically backed by the memory of the device on which the operation executed. For example:" ] }, { - "metadata": { - "id": "23ap04N0v4k0", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - }, - "cellView": "code" - }, "cell_type": "code", - "source": [ - "vector = tf.constant([10.0, 20.0, 30.0, 40.0])" - ], "execution_count": 0, - "outputs": [] - }, - { "metadata": { - "id": "FCUMsIYxxRRa", - "colab_type": "code", + "cellView": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "height": 53 }, - "cellView": "code" - }, - "cell_type": "code", - "source": [ - "# Works, because the values of `begin` and `size` (the 2nd and 3rd input\n", - "# arguments) are within the bound of `vector`.\n", - "print(tf.slice(vector, [1], [3]))" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "T8me2oCNxpFp", "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } + "executionInfo": { + "elapsed": 340, + "status": "ok", + "timestamp": 1526420543562, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 }, - "cellView": "code" + "id": "3Twf_Rw-gQFM", + "outputId": "2239ae2b-adf3-4895-b1f3-464cf5361d1b" }, - "cell_type": "code", - "source": [ - "# The following does NOT work, because the value of `size` (the 3rd\n", - "# argument) causes the indices to go out of the bounds of `vector`. The\n", - "# error is raised immediately.\n", - "try:\n", - " print(tf.slice(vector, [1], [4]))\n", - "except tf.OpError as e:\n", - " print(\"Caught error: %s\" % e)" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Is there a GPU available: False\n", + "Is the Tensor on GPU #0: False\n" + ] + } ], - "execution_count": 0, - "outputs": [] + "source": [ + "x = tf.random_uniform([3, 3])\n", + "\n", + "print(\"Is there a GPU available: \"),\n", + "print(tf.test.is_gpu_available())\n", + "\n", + "print(\"Is the Tensor on GPU #0: \"),\n", + "print(x.device.endswith('GPU:0'))" + ] }, { + "cell_type": "markdown", "metadata": { - "id": "irxJhAgar84v", - "colab_type": "text" + "colab_type": "text", + "id": "vpgYzgVXW2Ud" }, - "cell_type": "markdown", "source": [ - "# Step 6: Using the GPU\n", - "\n", - "You can explicitly place Tensors on the GPU by calling a Tensor's `.gpu()` method. The `.device` property tells you whether the Tensor is backed by CPU or GPU memory.\n", + "### Device Names\n", "\n", - "The first operation executing on the GPU may be slow as TensorFlow initializes. Subsequent uses will be much faster." + "The `Tensor.device` property provides a fully qualified string name of the device hosting the contents of the Tensor. This name encodes a bunch of details, such as an identifier of the network address of the host on which this program is executing and the device within that host. This is required for distributed execution of TensorFlow programs, but we'll skip that for now. The string will end with `GPU:\u003cN\u003e` if the tensor is placed on the `N`-th tensor on the host." ] }, { + "cell_type": "markdown", "metadata": { - "id": "7J4N9baqaKCL", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } + "colab_type": "text", + "id": "ZWZQCimzuqyP" }, - "cell_type": "code", "source": [ - "# Create some Tensors\n", - "SIZE = 1000\n", - "tensor = tf.random_normal([SIZE, SIZE])\n", - "print(tensor.device)\n", "\n", "\n", - "if tf.test.is_gpu_available():\n", - " gpu_tensor = tensor.gpu()\n", - " cpu_tensor = tensor.cpu()\n", - "else:\n", - " print(\"GPU not available.\")\n", - " cpu_tensor = tensor" - ], - "execution_count": 0, - "outputs": [] + "### Explicit Device Placement\n", + "\n", + "The term \"placement\" in TensorFlow refers to how individual operations are assigned (placed on) a device for execution. As mentioned above, when there is no explicit guidance provided, TensorFlow automatically decides which device to execute an operation, and copies Tensors to that device if needed. However, TensorFlow operations can be explicitly placed on specific devices using the `tf.device` context manager. For example:" + ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "4E-2n7VbzY1n", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } - } + }, + "height": 53 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 1762, + "status": "ok", + "timestamp": 1526420547562, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "RjkNZTuauy-Q", + "outputId": "2e613293-ccac-4db2-b793-8ceb5b5adcfd" }, - "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "On CPU:\n", + "10 loops, best of 3: 35.8 ms per loop\n" + ] + } + ], "source": [ - "# Time a CPU-based matrix multiplication\n", + "def time_matmul(x):\n", + " %timeit tf.matmul(x, x)\n", "\n", - "print(\"Time to conduct matmul on CPU:\")\n", - "%time tf.matmul(cpu_tensor, cpu_tensor)" - ], - "execution_count": 0, - "outputs": [] + "# Force execution on CPU\n", + "print(\"On CPU:\")\n", + "with tf.device(\"CPU:0\"):\n", + " x = tf.random_uniform([1000, 1000])\n", + " assert x.device.endswith(\"CPU:0\")\n", + " time_matmul(x)\n", + "\n", + "# Force execution on GPU #0 if available\n", + "if tf.test.is_gpu_available():\n", + " with tf.device(\"GPU:0\"): # Or GPU:1 for the 2nd GPU, GPU:2 for the 3rd etc.\n", + " x = tf.random_uniform([1000, 1000])\n", + " assert x.device.endswith(\"GPU:0\")\n", + " time_matmul(x)" + ] }, { + "cell_type": "markdown", "metadata": { - "id": "vbSFW-T5zhZF", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } + "colab_type": "text", + "id": "YEOJTNiOvnpQ" }, - "cell_type": "code", "source": [ - "# Time GPU-based matrix multiplications.\n", + "## Next Steps\n", "\n", - "if tf.test.is_gpu_available():\n", - " # First use of the GPU will be slow:\n", - " print(\"Time to conduct first matmul on GPU:\")\n", - " %time tf.matmul(gpu_tensor, gpu_tensor)\n", - " print()\n", - "\n", - " # Subsequent uses are much faster:\n", - " print(\"Time to conduct second matmul on GPU:\")\n", - " %time tf.matmul(gpu_tensor, gpu_tensor)" - ], - "execution_count": 0, - "outputs": [] + "In this tutorial we covered the most fundamental concepts in TensorFlow - `Tensor`s, operations, and devices.\n", + "In [the next tutorial](https://github.com/tensorflow/models/tree/master/official/contrib/eager/python/examples/notebooks/2_gradients.ipynb) we will cover automatic differentiation - a building block required for training many machine learning models like neural networks." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "default_view": {}, + "name": "TensorFlow: An introduction", + "provenance": [], + "version": "0.3.2", + "views": {} } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb index 1e65b27bc8..9c1af9c208 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/2_gradients.ipynb @@ -7,12 +7,9 @@ "id": "vDJ4XzMqodTy" }, "source": [ - "# Eager Execution: Working with Gradients\n", + "# Automatic Differentiation\n", "\n", - "This notebook demonstrates:\n", - "\n", - "* How to get gradients using TensorFlow's eager execution capabilities\n", - "* How to apply the gradients so you can update your variables" + "In the previous tutorial we introduced `Tensor`s and operations on them. In this tutorial we will cover [automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation), a key technique for optimizing machine learning models." ] }, { @@ -22,7 +19,7 @@ "id": "GQJysDM__Qb0" }, "source": [ - "# Setup: Import eager and enable eager execution.\n" + "## Setup\n" ] }, { @@ -40,12 +37,10 @@ }, "outputs": [], "source": [ - "# Import TensorFlow.\n", "import tensorflow as tf\n", + "tf.enable_eager_execution()\n", "\n", - "\n", - "# Enable eager execution.\n", - "tf.enable_eager_execution()" + "tfe = tf.contrib.eager # Shorthand for some symbols" ] }, { @@ -55,28 +50,15 @@ "id": "1CLWJl0QliB0" }, "source": [ - "# Fitting a Simple Linear Model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "-39gouo7mtgu" - }, - "source": [ - "## Step 1: Synthesize some data\n", - "\n", - "To demonstrate fitting a model with TensorFlow's eager execution, we'll fit a linear model to some synthesized data (which includes some noise).\n", + "## Derivatives of a function\n", "\n", - "In the code, we use the variable names `w` and `b` to represent the single weight and bias we'll use to fit our model." + "TensorFlow provides APIs for automatic differentiation - computing the derivative of a function. The way that more closely mimics the math is to encapsulate the computation in a Python function, say `f`, and use `tfe.gradients_function` to create a function that computes the derivatives of `f` with respect to its arguments. If you're familiar with [autograd](https://github.com/HIPS/autograd) for differentiating numpy functions, this will be familiar. For example: " ] }, { "cell_type": "code", "execution_count": 0, "metadata": { - "cellView": "code", "colab": { "autoexec": { "startup": false, @@ -84,105 +66,53 @@ } }, "colab_type": "code", - "id": "rQsdCg9PfIL-" + "id": "9FViq92UX7P8" }, "outputs": [], "source": [ - "# The constants we'll try to fit our variables to:\n", - "true_w = 3\n", - "true_b = 2\n", - "\n", - "NUM_EXAMPLES = 1000\n", + "from math import pi\n", "\n", - "# Our inputs:\n", - "inputs = tf.random_normal(shape=[NUM_EXAMPLES, 1])\n", + "def f(x):\n", + " return tf.square(tf.sin(x))\n", "\n", - "# Our labels, with noise:\n", - "noise = tf.random_normal(shape=[NUM_EXAMPLES, 1])\n", - "labels = inputs * true_w + true_b + noise" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "cellView": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "base_uri": "https://localhost:8080/", - "height": 347 - }, - "colab_type": "code", - "executionInfo": { - "elapsed": 374, - "status": "ok", - "timestamp": 1525154227149, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "O4lsC4ckAcar", - "outputId": "f8becb3f-498b-4cb7-9ef3-608a68cb65d0" - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAecAAAFKCAYAAAAnj5dkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xt8VPWdP/7X3M5MkpkkM8mEAAER\nQoICgUBALkUEQ7FucekDEeWL3VZXu121dler39pu1Vbb77b+2m1/3277qNXa2kUptGttt/tDEWqp\nyDWBiC6ES8slXDJJJpfJ3C+/P8JM5nLOmTOTmWQm83r+RebMnJyTAO/z+Xzen/dbFQqFQiAiIqKc\noR7rCyAiIqJYDM5EREQ5hsGZiIgoxzA4ExER5RgGZyIiohzD4ExERJRjtGN9AWE220DWzm02F8Nu\nd2bt/LmukO+/kO8d4P0X8v0X8r0D+XH/VqtJ8lhBjJy1Ws1YX8KYKuT7L+R7B3j/hXz/hXzvQP7f\nf0EEZyIionzC4ExERJRjGJyJiIhyDIMzERFRjmFwJiIiyjEMzkRERDmGwZmIiCjHMDgTERHlGAZn\nIiKiJDy+ADrtTnh8gVH5fjlTvpOIiCjXBIJBbNt9Gq3tNvT0e2Ap1aOxzopNq2uhUWdvfMvgTERE\nJGHb7tPYdfhi5Ovufk/k683NdVn7vpzWJiIiEuHxBdDabhM91treldUpbgZnIiIiEX0OD3r6PaLH\n7ANu9DnEj2UCgzMREZGIMqMellK96DGzyYAyo/ixTGBwJiIiEqHXadBYZxU91lhXCb0ue20pmRBG\nREQkYdPqWgBDa8z2ATfMJgMa6yojr2cLgzMREZEEjVqNzc112LByBvocHpQZ9VkdMYcxOBMRESWh\n12lQZS4ete/HNWciIsqa0a6sNV5w5ExERBk3VpW1xgsGZyIiyrixqqw1XvDxhYiIMmosK2uNFwzO\nRESUUWNZWWu8YHAmIqKMGsvKWuMFgzMREWXUWFbWGi+YEEZERBk3VpW1xgsGZyIiyrixqqw1XjA4\nExFR1ox2Za3xgmvORESUMawIlhmKRs7t7e34x3/8R3zmM5/Bli1bcPnyZTzxxBMIBAKwWq34zne+\nA0EQYj7zzW9+E8eOHYNKpcJTTz2FhoaGrNwAERGNPVYEy6ykPzGn04lvfOMbWLp0aeS1H/zgB9i8\neTO2bt2K6667Djt27Ij5zMGDB3Hu3Dls27YNzz//PJ5//vnMXzkREeWMcEWw7n4PQhiuCLZt9+mx\nvrS8lDQ4C4KAF198EVVVVZHXDhw4gFtvvRUAsGrVKrz//vsxn3n//ffR3NwMAJgxYwb6+vrgcDgy\ned1ERJQjlFQE43R3apJOa2u1Wmi1sW9zuVyRaeyKigrYbLG/lK6uLsyePTvytcVigc1mg9FozMQ1\nExFRCjy+QFYzppNVBHt150mcPG/ndHcKRpytHQqFMvIes7kYWm320uytVlPWzp0PCvn+C/neAd5/\nId+/xVKCl3/3IfYfvwxbrwvW8iIsmTMR962bDY0mc4HRVFYEq7kInXZXwjG9oMW+41ciX4enu4uL\nBDywfm7GrkFMPv/u0wrOxcXFcLvdMBgMuHr1asyUNwBUVVWhq6sr8nVnZyesVvFqMWF2uzOdS1HE\najXBZhvI2vlzXSHffyHfO8D7L+T7t1pN+L+/ao3pDNVpd+HNvWfhdHkz3hmqYUZFzPcKC4WCou9/\n79glfGLxlKztfc6H373cw0Naj07Lli3Dzp07AQBvvfUWVqxYEXN8+fLlkeMffvghqqqqOKVNRDSK\n3F7/qHaG2rS6Fs1NNagoNUCtAipKDVg+pxpur3hwZgMMeUlHzsePH8e//uu/oqOjA1qtFjt37sQL\nL7yA//2//ze2bduGSZMmYf369QCAf/qnf8K3vvUtLFiwALNnz8bdd98NlUqFp59+Ous3QkREw+z9\nyTtDZaI4SPR6dnxFMAA4cd6ObpHrYAMMeUmD85w5c/Dqq68mvP6zn/0s4bXvfe97kT8//vjjI7w0\nIiJKl7l0qDNUssCYLFlM6rjcvubooN9YZxWd7mYDDHks30lENA54fAHYel1AKASruRhWQSsbGLUa\nFbbuapcsGpKsqEh4X3NYONELQMx6tlgDjIYZFqxqnAyPL8AALYHBmYgojwWCQbz+zim898EVuL1D\n68gGQY3mxdfhzlumAxDvDJUsuMod37Byhsx6tg0bVs6IBN3oBhg9/W7sOnIRbae78MfWS9xWJYPB\nmYgoj23bfRrvHOmIec3tDeL3f/4L3G6faGeoZEVD1i2bJnv85nmTJNezu/s9eHXnSXz29lkxAVev\n02BPawf2tHTEvFdstE1sfEFElLfkgiwAtJy0RaaOq8zFkdFssqIhFzsdsscRCsFSKp3Mte/4lYSy\nnUqqiNEwBmciojE0krKWckEWAOwDHtHtSmVGvWRwNZsMqKkySh4XdBpYyorQWCdfuyI+4CZ7IOC2\nqlic1iYiGgPpdHGKz5wOB1mxjGwAMJv0otuV9DqNbLKYqViQPO72BvDG3rPYtLoWLrcf70VV/4oW\nv11L7lq5rSoRgzMR0RhQmu0MyAdyqSAKAAvqrZLZ0GJZ1OFkMQBYv+J6/LntciTJLFprexc2rJyB\nLWvr8T/netAz4E14T3zATfZAwKztWAzORESjLNn6a3S2MyAfyDetrkUoFIrL1tagefFU/O2y6ySv\nITqLWmwfs8Ppg0ckMAOxo+IF9VWKA26yBwIaxuBMRDTKlKy/hqeDlQTy/7WmHnfeUhuzz7lmUrmi\n2tLhZLF4SqehUwm4yR4IaBiDMxHRKEtl/VVpINfrNKixKu9hkKwymNJp6HQCrtQDAQ1jcCYiGmWp\nrL9mOpHK6fFh69uncOJcD+wDXlhK9WiorUTzwhpYSg0x3zuVUTEDbmYxOBMRjQGlgS9TiVThpLL4\nJK/ufg/2tAwVB6mIyxgXGxUDQHefO/JnTk9nB4MzEdEYSGU6eP2K6+F0+3HinB29Do9oIE82TR2f\nVCZGKmNcr9OgoswQyRjv7vfAIKgBqODxBliGMwsYnImIxlB4v7LSzk9LZ1fjnjV1KNZrJd/TWGfF\nw3c1Rs6TrJJYPCUZ49F9mlmGM/MYnImIRlH0CFerUaXc+em941fg8vrxd7fNgqlYkNxmVVwkYP3y\naQCSVxKLl0rGeDSxoE7pYXAmIsqAZNPKYiPcYoMOFzodkfco7fzU0t6F1vY/Y7K1BE63T/Q9+49f\nxicWT1FUSSye2aSH1xeI1OVWGtzjgzqlj8GZiGgElJbhFBvhSgXLZJ2fACAE4KJtUPJ4V68LZzv6\nMH1ymWxSmZhBtw9Pv3woci/rV0xXFNxZhjNzGJyJiEZASRnOVNd8ozs/KR3tJlAB33n9aCQDO7G3\nsx71U83QaVU4ftYO+4Abgk4DtzcQWU+OvhclwZ1lODOHwZmIKE1Ky3CmuuZbbtQDKhUaaitj+h+n\nIngtXyv+YUEsO9zjC8DW68K//eqoZC3tZ+9fhEAgiNZTXeh1eGEQhj7r9QVYhjMLGJyJiNKktHpX\nqmu+To8fT790EGaTgImWYlzucY74WsMPC2L0Og0ErRp2kQYWwNC9bH37FE6et6PP4YXZqMf8ukps\nWDkdDqeP+5yzgMGZiChNSqt3KV3zFbQqeP2hyOh1qNuTF3qdGh5fUPazydgH3Hh150mcPG8XXRuX\nuxdBp8G+qNaQdsdQ4RKNWsWtU1nC3eJERCny+ALotA+NZhvrrKLviV9/3bS6FsvmVMue1+sPib6u\nUqV5oVEEnRr7jl9Bd78HIQxPd7/+zikAww8Q4sSvq7W9Cx6feOcqGhmOnImIFBLLzJ4/sxKrF07G\nsVPdsmU4NWo17l1bj5Pn7Sknebm9QSyfU40j7TbRNWElfH7xkfd7H1zBnbfUQq/TYNPq2si6cp/D\nC0upAbOmluO9qFFzNG6dyh4GZyIihcQys9850oHmpho898BNSctwprqlKUytGirh+T/netIKztWW\nIlzpcYkec3uHksEmVhRj2+7TaDvTjT6HF+VGPRpqK7Bh5QyckHig4Nap7OG0NhGNC+Gp5mxNsybL\nzAYQad0oJRAMIhQKRTKdlQqGgE67SzJhKxmXxy//hlAo8uARnvYOryu/sfes4ql7yhyOnIkor8kV\nAckkpZnZcrbtPo13jqS+Ncpi0qOmypj2vue+QR/0WjU8IlPbBmGogpjcg8ez9y+K/DlZ60jKDAZn\nIsprckVAHr1nYca+z0j7KqdaiCRaSZEOGo0K9VPNMVnTSpUbBcybWYl3Wy8lHFs2txouj1/2wcPh\n9CnuoEWZweBMRHkr2VSz25tkOjcFep0GDTMqsEckwCmZ3k21EEm0C50OPP7DffB4AzAIGoRCoZS2\nVjXOrMTmNXXQadRoOWlDz4AHZSU6LKiz4p5bZ8IfkK5GFr8ljMlfoyPt4Lx9+3a8+eabka+PHz+O\n1tbWyNezZ8/GggULIl+/8sor0Gj4pEVEmZNsqtne78nICCQ8dd52phvAUIJWMDQ03bygXnoKPboZ\nRqqFSOKFE8FSTQibaCnG5jV10KjVQ9nYwRCOtneh1+FB25luaDSnsWl1rWSiGteVx0baf283btyI\njRs3AgAOHjyI//7v/445bjQa8eqrr47s6oiIZCSbajaX6jHQJ56lrEQ4uO48dCGmjGbw2rbfeTMr\nRYtwSK2Dz59Zmdaas5jwA4Icg6DGV/6uKdKAY9vu0zH3Eb0EEH7A4LpybsjItPYPf/hDvPDCC5k4\nFRGRYnJbkxrrKmEQtBgQ+Vwq7R27+z1QSxQBaTvdDc+qQMI5tr7dHjP9HQ6CtyyYhBpriWw3KRWk\nSn7EShaYAeBjDZNQrB/6b37A6cXh/+kUfV+4tCfXlXPHiINzW1sbJk6cCKs1NtXe6/XiscceQ0dH\nB9auXYvPfvazsucxm4uh1WbvL4LVasraufNBId9/Id87MP7v/+G7GlFcJGD/8cuw2V0wl+qxZM5E\nPLh+LoDY+w8Egnj5dx8OvbfXBWt5EZbMmYj71s2GRjO8s/TFNz6ICfhSgdA+4IZG0MFaWRI5/0/e\n+ADvHktclwaAAx9ehcsjPy2tJDADgLXcgAX1VXj70PlIk4toRXotHlg/F3pBi5++eRxvHzwHj1d8\nnTr+PmoUXkOuy+e/+yMOzjt27MCnPvWphNefeOIJ3HHHHVCpVNiyZQuampowd+5cyfPY7SMv7C7F\najXBZhN7fi4MhXz/hXzvQOHc/7qlUzEw6MFRXxfs/R4cOH4ZXq8fD9/ViJ6e4VHq1l3tMUG30+7C\nm3vPwunyxrR3fO+Ysqlns8mAgNcX+RnHnz9essAMDK1jz5tZibbT3TFtHOPNq63E8jnVeOvAedHz\neLx+/OWCHbuOXExa9CT+PsaDfPi7L/fwMOLgfODAAXz1q19NeP2ee+6J/HnJkiVob2+XDc5EROmS\nWkstLhKwfvk0ANlp7xidLDWSrVLRFtRbsbm5Dp5VQ1PvxmIBb+w9G7MWPG9mBUKhEP7tV0clR9qV\n5UUo0mvRclJ8KlvqPig3jKhC2NWrV1FSUgJBEGJeP3v2LB577DGEQiH4/X60tLRg5syZI7pQIiIx\nckFx//HLkYphSoqIAMNJZmLUqqEmFBWlBjQ31WDT6tpIZTKb3Zk0qMtVBrOY9JFzAsPblor1Wmxu\nrsNzD9yEbz64BF/7TBM8ngDeOdJxrWuVuCVzJg7tX05SVWzZnGomfeWgEY2cbTYbLBZL5Ouf/OQn\nWLRoERobG1FdXY0777wTarUaq1evRkNDw4gvlojGv2TJWvHkgq7N7sLZjj5Mn1yWkfaOK+dPwtrF\nU1Fm1EOrUSVkZAs6FTw+8bGsXqfGktlV+GPr5YRjy+dUY8vaetn71WpU2HXkIlpOdsoG3IprmeH3\nrZuNy1f7YTEJku+3lOpx79r6SDY35Y4RBec5c+bgpz/9aeTrBx98MPLnL33pSyM5NREVGLkynHLB\nQy7oqtTAC68fjZxr3sxK7BbZyiTW3hEQ31YUvpb49eVk+5c9viBuXVgDrUYje14p8ZXQxKgAPHpn\nA2qqTNBo1NDrNFhQXyX5uQV1Vk5n5yhWCCOinCBXhlNsL3GY3Eg3nMUcPtetCyejualGci9v9Kg9\nflsRAHT3uSN/Tmd9efeRDty7dlbS7UrxswceX0DR2rGl1ABrXAWvTatrEQyFsO+DK5HEMoOgwfK5\nnM7OZQzORDTmlCZrSYke6fb0u6GSKNBx9FQ3nnvgpoTgGAgGsXVXu+iovaLMkDCir59qTqsUZ9uZ\nHnh8AckymFKzB6saJyddOwbEE7s0ajW2rKnHxltqYbM7AZUK1vIijphzHIMzEY25kXZ80qjVkZHu\n2Y4+vPD6UdH39Qy4I2vQ0eeTG7UHAsGEgiL7jl+BRF0SWcnuRap4idcfkK0IJuhUWNEwSXYkrNdp\nUFOVv/t+Cw2DMxGNuZF2fArT6zSYPrlMeg0awHdePxpJmtq0uhb+QEhy1L637RK8EoU7lBYLiSZ1\nL0Mj91N496h48ZK2092yFcG8vhBUKhUTu8YR/iaJaMyF143FpLoHV+5c4QAXHpFu231adtTu8QbT\nCsJSpO4lvE9bKgD3ObwoNwriB69pbe+KbBsDALfXj067M+Y1yh8cORNRTshk44XwZ9rOdMPW64IK\n4lPCre1dWLds2oi6RSlRbhTQNKtK9F6UFC+xlBrQUFsRU2glXnjKPLxG3namGza7S3HWO+UWBmci\nygnR68YjbbwQPtfnNhTh4LEOfEdiDdo+4IbL45fM9s6EcqOAZ+9bDFOx+MhXSUWy6IeUd1vFR9jh\nKfN0s94pt/AxiohySjiTOZXAHK7SJTaFayrWoUKi4lc4oG1aXYtbF05GNgaWbq8fb+w9i8vdg6LX\nl6wi2c3zqrGqcTL8gRDuWlWLxTdMEH1vY10lAOktXvHT3pTbOHImorwltfXozlumY8cfz0amdvWC\neNSNXgMOBkOi3Z1SMVTeU4VA1NDW7R3K9t7TeikmES08xSy3T3tSZQk+/Isde49dgV7QAAjB7Q3C\nIKgBqOD1BWKm/7v73CPKeqfcweBMRHlLagr35PleXOh0RF53X8u41qiBwLUAbBA0CIVCCASDQxnb\np7pGdC2L6q24Z00dnvv5Ick9yVJTzGLr7cUGbdw9RCd7Dd3EsjnVuDeq7Gemst5p7DE4E1Fekkuk\n6rA5RF8PRI2M3d6h5hHBENBUZ0WvI3mRDznGEgFeXwB2BcVC4gurxK+3F+m1+Porh5Ke5+T53piv\n5Ubh7DyVXxiciSjjUm1ekY6efrdkhrXcnuB477Z2yGZBp3Ieh9MLs0yjiTD7gBu2XhcErTrmZxRe\nb+9U0OEKGPoZxE9VR2eqd/W6RpT1TmOHwZmIMibd5hXp2HVEOrtarppWvFQCebLzHDphg0bBbQo6\nDb63rRV2hw8Wk4AF9VUxPyO56eloekGTMFUdnal+5q/dWX1AouxhtjYRZUx4Dbi734MQYot9ZJLH\nF0Dbaek14urKsUt6CihIKnN7A7A7fACAngEvdh2+iNfeORXJOgcgWUhFKYOgTTnrnXIHR85ElBEj\nbV6RCrkpbQCwlhlwyebMyPfKJLUKQAgQi99/bO3A0XYb7ANeWEr1mDezErcunIyWk12wO8Tv1Xtt\n+YAZ2OMPR85ElBFKmldkytuHz0seU6uAY6d7Mva9MikoEZiBofaWPQPeyIzD7iMdUKlUeOa+RZKl\nO5mBPX4xOBNRRsgV05ALIh5fABc7B3DR5lBUJMPjC2D/h9K9jTO1hpwLWtu7IOg0aJpVJXqcGdjj\nF6e1iQjAyDOsU93GEwgG8do7p7Dvg8uRfbsGQYPlc6tx960zJRPIbL2umD2/41l4xiGTdccpPzA4\nExW4TGZYpxJEtu0+jd1HYrcwhfceq1QqbG6uE39gCOXe0DiV7PBUhGccMll3nPIDgzNRgctkowSl\nQcTjC6DlpPTUdGu7DYFAEG1nuhMeGKzmYhgEdWS0nQuyNZUeP+MQ3gdN4x/XnIkKWLIMa6WNEuIb\nTyRrXtHn8MgW6uju92BP6yXRLVl6nQY33Sje/GG8ELRqNDfVcNq6gHHkTFTAlGRYy43U0p0SL9Jr\nIWhU8AbEh5xS08QtJ20IBEM4fjY3s7GTUauGZuXNJj2cHr/o2rleq8a3/mEpypmFXdAYnIkKWCqN\nEsTWf1OdEo8O5lKBGZCeJu4Z8GSk1OZYWXzDBKxfcT3KjHr8+t0zoslzK+ZPYmAmBmeiQqYkw1pq\ndLx+xfUpFx2JD+ZSykp06Bv0JbyercSr0WAQNNiyth7F+qH/dpmBTXIYnIkKXLIgITU6drr9KU2J\ny61vR6soNWDuDDP+2Ho54Vi+BmYA+FjDxEhgBpQnz1FhYnAmKnByQUIuoJ44Z1c0JR6eDvf6Aoo6\nLTXWVSKYg9ul0lVRKj8iZgY2iWFwJiIA4kFCLmGs1+HB4hsmoPujqwnH6qeWIxAMYeuu9pjpcL3M\nFihTkRY3Xl+B25dch+d/cXjkN5QDyo0CvvaZJpiKxctvEklJOzgfOHAAjz76KGbOnAkAqKurw7/8\ny79Eju/btw/f/e53odFocPPNN+Ohhx4a+dUS0aiSSxgTdBq0X+wFMLwWrNepgJAK7x+/gpb2zphA\nnKz94YDLjwMfXR1KFvPnzh7mkegf9MLl8TM4U8pGNHJevHgxfvCDH4gee+655/DSSy9hwoQJ2LJl\nC9auXYvaWiY6EOWD6MxsqYQxtzcQ2QoUXgv2+EIAQteOiwdYg6BGKAR4fOLHMxGY9Tq15PnV6qEm\nE+kyCBrF5UPZmILSlZVp7QsXLqCsrAwTJ04EAKxcuRLvv/8+gzNRjhPLzJ4/sxKrF07GsVPdsA+4\nUW6U3qOrRCYre5mNAubXWdF2ujsmmW353Go8+zPxqfFQEBC0Knj9qa1rq1XAyvmToFar8M4RZdu5\nGmZYmORFaRlRcD59+jT+4R/+AX19fXj44YexfPlyAIDNZoPFYom8z2Kx4MKFCyO7UiLKOrHM7HeO\ndKC5qQbPPXDTUGKXP4inXzo4hlc5zHXtAeHhDXOgUathLS+CVqPC1rfbJbddhQD4ZfZYSwmGgOam\nKagyF0GlUqG1vQs9A27ZUt/NTVNS/j5EwAiC87Rp0/Dwww/jE5/4BC5cuIBPf/rTeOuttyAI6a2t\nmM3F0Gqz94RptZqydu58UMj3X8j3Dii/f6fLiz+3JW5fAoDWU134zLo5qJlUDrfXD6u5CJ12VyYv\nMy1ubwB7Wjqwp6UDVnMR5s6ohKBTY0/rJdnPpbsl670Pr+LzG+bh0XsWwu3140r3IL7+0/2w9boT\n3qtWD73/wfVzodGMTaVk/t3P3/tPOzhPmDABt99+OwBg6tSpqKysxNWrVzFlyhRUVVWhq6sr8t6r\nV6+iqkq8H2mY3e5M91KSslpNsNkGsnb+XFfI91/I9w4ov/9AMIiv/fSg5FR1d58bD39nN5pmVWHT\n6lo0zKhQVExkNNnsLuw+nN0Zuv0fXMa6pddFpqpLtGrMq60U/VkEg8Af9v0VXq8/5QYimcC/+7l/\n/3IPD2k/zr355pt46aWXAAxNY3d3d2PChKFi9DU1NXA4HLh48SL8fj/27NkTmfImotwQ3axi665T\nuNwj/4Dc6/BGmk9sWl2L5qYaVJQaoFYNJUllmirjZxy5ngEP+hyxWeebVtdiVeMkqCUuOJUGIkRh\naY+cV69ejccffxzvvPMOfD4fnnnmGfz+97+HyWTCmjVr8Mwzz+Cxxx4DANx+++24/vrrM3bRRJS+\n+KQvs0nAoMuv+PPh0pzRhUuMxTq8sfcvOHyiE70O6W5TqcjFMiRq1VDTjmgatRprF0/FHyWm0pU0\nECGKl3ZwNhqN+PGPfyx5fNGiRdi2bVu6pyeiLIlP+pJr3SgmOthEFy7Z3FyHdcum4emXD2YsQOea\nYAii+5ZTaSBCpAT7ORMVEKX1reWIdasKT4+bigUUG8Zv4cGKUr1ooA03EBETbiBClIrx+6+IiBLI\nleNUSq5bVZFei0td2UvuHGuNdVbJQMsuU5RJDM5EBaTMqIfZJIhOZet1ahiLdOgZ8KC8RI95MysQ\nCoVw7HQ3+hxeWEqTd6sCRhb4c9nK+ZNkAy27TFEmMTgTFRC9ToOSIvHgXGUuxlP3LoxJ8Gptt6HP\n4UW5UY+G2gpsWl0LjVqdkenxfPOJm6ZCo06+EsguU5QJDM5EBcTjC8Dp9okec7p9sNmdsJqL8et3\nz8SMiu0OD/a0dECjVmFzc11GpsfzidmoY1IXjSoGZ6ICIhdUu/s9+NrLh2AxCXB6xPfltrZ3Yf2K\n6fjDgXNQqSBbunI8MRbLT1FHNwrhVDZlAoMzUQGR2/ITJre1yj7gxvM/P5y0YMl4M+jyweMLJARe\nsaS4xjprZPqfKF3820NUQOS2/Cih1agKLjADQK8jsTIYMJwU193vQQhDsw/hKmpEI8HgTDTORO87\nFhNdelOVYo1Mf7odI/KcWCERuaQ4luykkeK0NlEOS2UtU2yKdfm8yVi3NDbLOLzlZ/2K6/HLnSdx\n4KNOxaUyg5lrxZxXxAqJyK3fs2QnjRSDM1EOSmctU2zf8Zt7z8Lp8op2RXpj71+w/6POrN1DPlOr\nhmp7W2QKibBkJ2UTgzNRDhILtOGvxQJtsinWDStnxIz8CnGfcipWzp+EtYunxsxYDDi9uNjpQE2V\nEaZiIbJ+L9YukiU7aaQYnIlyTKqBFkg+xWrrdUHQqiPBps/hkc3YLiRTqoxwuv0JJTfDMxRevx/P\n/6IFHTYHgqGhUfVkqxFf+fQCluykrGFwJsox6axlyk2xCjoN/u1XR2Ef8Eamx29fMhVq1VCXpZEQ\ntGp4/bm/EF1jLcGDd8zGntYOtJ3uTgik/kBIcm3/+V+04EKnI/J1MARc6HTg+V+04Nn7FrNkJ2UF\ngzNRjklnLVNuitXtDcDtHcocDk+PO93+EQdmAPjnTQ048FEn3j16KSPny4abGybi3tvqoVGrce/H\n6+FZlZhkp1FDNHlrwOlFh82R8DoAdNgcGHB6I1PcTP6iTOJWKqIck077wUAwiFAoBIMwfMwgqGEQ\nxP+Jnzhnh8UkiB5LxSt/OIlFCLMMAAAgAElEQVSb508as0phgjb5XjCNJvY94UCqZIR7sdMh+dAR\nDA0dJ8oGBmeiHBS9F1mtAipKDWhuqpFcy9y2+zTeOdIRGSEDgNsbhNsrPuXc6/DghussI77OK3YX\n/s8vWyDoUtwwnQHlJTo01FZCp5X/b2xP66W0i4LUVBmhlrg1tWroOFE2cFqbKAeF9yKvWzYtJkM4\nnscXgM3uTDnzWtBpcM+aOpzvdMSsp6bD4xubNefeQR8On1B231KJdMmYigVMthpFf0aTreK/E6JM\nYHAmyiHhoiPRLRvF9jlH74NOJ+s6FArhavcgBl3SdbTHk5EUBfnKpxdIZmsTZQuDM1EOiC86ohc0\nMVPU8fuc4/dBp8rjC+Ibvzgy4uvOFyMpCiJotXj2vsUJ+5yJsolrzkQ5YOvb7TENFKIDc7TW9i4M\nOL0sIJKiTBQFMRULuGGahYGZRgVHzkRjKBAMYuuuU3j36CVF7+/pd+Nip0NyH3Suq7YUodPuyvq2\nK/W1XtNWcxEaZlSwKAjlHQZnojHi8QXwy50n8d7xK4o/oxc0qKkyJu3JnItunj8RaxdNRZFei1f/\nvxM4cd4OlzeYkWIo8UIAHr97PhbPm4yBPldmT040ChiciUZZeH35yImrsDt8KX46hDf2nsWgO9XP\njb332i7jT0cvJ7yejVF0eYke0yeXwSBoMZD50xNlHdeciUZZOJkr9cA8tHd5T+ulhP3LGjWwsnEi\nKkpztxNSIMmOK71O+X9HS26sgtmokzw+n40nKM8xOBMl4fEF0Gl3wuMLiH6d6rmykcwVCAJqlVqy\nslg+ULJf2iBo0NxUg/s/eSMWzpog+p4pVUZsbp6Z6csjGlWc1iaSEL+9yWwSUFIkwOn2Ke6xHC+b\n3aBaT9owt9aSN80o4pmKdBB0atmfT4lBiw0rZ0CjVsd0hOrpd6PMKKBxZiU2r6lT/PsgylUMzkQS\n4vcS9wx40TMwXLQjWY/leIFgEDsPXchKAhQA9A56sfeY8uSyXNNYXwlBq5Hdv20f8ESKiYSrqLEj\nFI1HIwrO3/72t3HkyBH4/X587nOfw8c//vHIsdWrV6O6uhoazdA/lhdeeAETJohPQxHlmlSmn5WW\nhty2+zT2tHSM6LoErQpef462f7rm5saJOHuxHxdtg4o/o9WocO/H6wEAgWAI77Z2iD7AiBUTYUco\nGo/SDs779+/HqVOnsG3bNtjtdnzqU5+KCc4A8OKLL6KkpGTEF0k02uR6KsdTUhoyU2vNC+qtOH6m\nBw63f8TnyhatSoWnP7sIW99uR+upLvQ5vBB0atk15dJiHfyBELQaFTRqFXRa8fdnopgIUT5IOzgv\nWrQIDQ0NAIDS0lK4XC4EAoHISJkon8n1VI6npDSkXLBXqQBTkYB+p3yda4OggVarzunADADvHb+C\njatm4t61s3DX6qFa4V5fAF97+ZDkZ+wOL/ocHuw6clF0WnsoG30yi4lQwUg7OGs0GhQXD40UduzY\ngZtvvjkhMD/99NPo6OjAwoUL8dhjj0Glkm4rZzYXQ6vNXmC3Wk1ZO3c+KOT7T/fel8+bjDf3nlXw\nvkmomVQu+x5TWRGs5qHqWAnXV16EuuvK8WeRPcDRqiuKse+D3F9T9niD8KtUqCwrwmC3EyUmA2pM\nBljLDbD1ukU/Yy0vQs2kcrT96pjo8UAQMOh1qJ5QlvL18O9+4crn+x9xQtiuXbuwY8cOvPzyyzGv\nf+ELX8CKFStQVlaGhx56CDt37sRtt90meR673TnSS5FktZpgsxVuKYJCvv+R3Pu6pVPhdHnR2t4F\n+4Ab5UY9Sop0cLp9sA94YDYZ0FhXiXVLp8JmG4h0lJJKTGqYUSE6Kuwf9CQNzBMtxfjr5fz5Hf7i\nDx/hg9Ndkf3YBkGDynKD5PsbZlTg4qVe0YeXsPfbLmPd0utSmtbm3/3CvHcgP+5f7uFhRMF57969\n+PGPf4yf/vSnMJliv8n69esjf7755pvR3t4uG5yJco1GrcaGlTNwc8NEQKWCtbwIep0mIQgP1cdu\nl2zvGHbnLdNx8nxvpPVgWHxBkXhmowCvP/U91WPp0EedMV+7vQFc7BxEtaUI9gFPZD3ZIGiwfG41\nNq2uhT8QQrlRQK9DfHq/d9CTdttHonyTdnAeGBjAt7/9bbzyyisoLy9POPbFL34RP/rRjyAIAg4d\nOoS1a9eO+GKJRkv8HufogBufHRy/5Sp+i1U4mO88eB4XOh0pX8sN0yx4P4X627nsSo8LZpMejTPL\nsPam61BtKY6MhDVqoHFmJfa0ijcBsYyg7SNRvkk7OP/hD3+A3W7HF7/4xchrN910E+rr67FmzRrc\nfPPN2LRpE/R6PW688UaOmimvSAXcQDAU2fIDAANOLw6f6BQ7BVrbbQgEgmg7042efg9kUi4kLZ9T\njXvWzMTJ8/a8a3QhxT7gwf6POqFRq7FlbX3Msc1r6nC6o1/0IYaZ2lRIVKFQKCc2TWZzbSAf1h6y\nqZDvP5179/gC+OqL+0WDoVoFLLphAjavmYk39v4FR050ot+ZnSYUpmItHr2zAZOtppS7V+ULi0nA\ngvqqmCWAcBvNo+1d6B30wHJtbT+VSmxh/LtfmPcO5Mf9Z23NmSjfhaeci/RauDx+lBn1stuegiHg\nwEdXceCjq0nPrbrWUzhdA04/nvtFCwyCGotvqIJBUCddn843PQPehCprGrUad62qxarGyUAoBKu5\nmCNmKjgMzlSQwmvKLSc70TPgjZTUrCjVY870CpSVCOgdlN93nEym5qTc3iD+dOwKplQZ01qzzgfh\nKmtajUpyrZ/1sqmQMDhTQYpfUw5nT3f3e/DuUfGEpLE2MDg+1pzFhKusxRchSbV+OdF4wUdRKjjZ\natuYbb2D2VnbzgVmkwFFeq3k76W1vSutFp1E+YrBmcalcM9ltzex1GUqdbMzqdwoYGXjJOi16f2z\ny9VlV4Mw8v9GGusq4fL4JX8v4ZE1UaHgtDaNK/H7k63mIjTMqIhZs0ylbnaYkh7J1RVF6OxxiXZT\nUqmAL909H5ayIrSf68XlntQr4uXqwLGi1ICOLvn7MQgaeH0BmE16FBt0GHT50OsYrrIWLkIi9XtR\nUr+caDxhcKZxJX4tudPuSliz1Os0aKyzyvYNjmYx6THnegsOfHQVnmsBWqMeanPo8YWgAhAC4HL7\nJfs0h0LAsz8/BBVUst2Z8pHD5cOqBZPRdrob3f3uoZkBNeDzBSPBd/2K6XA4vZGqamKlTjVqSP5e\nuMeZCg2DM40bcmvJ8T2Xw92NWk7a0DPgiWRriykp0uFPbbG1rwNBYIKlCJe6nAh/rC/JmrDXFwKQ\nE2UFMqp/0Ie1i6bgrlW1kYALICH4FuuH/7uR6sEc/r2E65lHj6yJCgmDM40bcmvJ8T2XNWo1NjfX\nYcPKGZF9zg6XD7sOX0DbmZ5IYGiYYUHbmW7Rc15KMpVbKCylhkgQjg646dTAjv+9SDURIRrvGJxp\n3JBbS5Zas4wOKKZiAfeunRUz5WqzOyVrPcspLdZlrXJYrsnGlLPUyJqoUDBbm8aN8FqymPgAEs7m\njt+eEw7MxmIBv373DL63vS3l67CY9Nh068yUP5ePjEVaTjkTZQFHzjSuxK9ZVpYPZ2sD0t2m7rxl\nOnb88Sxa223o7vcoys6W4nB58dPffZSxe8plTrcfTrcfpmJhrC+FaFxhcKZxJX7Ncsa0Cgz0uSLH\npbpNnTzfG1MaM93APPTZ8Zf0JSUYAi52OnDDNMtYXwrRuMJpbRqXwmuWBmH4+VMum3u81qzONrUK\nqKkyjvVlEI07DM5UMMaqMthYaaqvhEFQlqhVbhSgVg01/qixlkCvG/6vwSBoYCwSn2SbbDVySpso\nCzitTQWjzKiH2SSgZ2Bk3abyhVarwavP3ob/OW2D1+fHd147KloAxSBo8Ox9iyMtM8NFQmx2J6BS\nwVpeBJUqhOd/0YIOmwPB0NCIebLViK98esEY3BnR+MfgTAVDr9Ng1nUW7Dt+ZawvZVS0n+8FANRY\njRhwehGSqrICQNBpYkbAep0GNVWxjeCfvW8xBpxeXOx0oKaKI2aibGJwpoKyec1MtLTb4PbmaKHq\nDOp1eNDV68L2XSfx52OX4Q2IB2ePNxBToEWOqVhg8hfRKOCaM41LUl2p9DoNKssMY3RVo8tsMuB3\ne89i95EO2ezzcIUvIsodHDmTLLEGBbkm+hq1GpVsV6ptu0/jom0w49dQXiKgpEiLrj53zjS2mD3d\njP0fJK9uJlbhKx9+70TjGYMziZIq1hHdenGsiV1jsUEXsy0quivVhpUzJLdSjVTvoBcurz9nArOx\nSIu2093odcgnvy2bUx1T4Ssffu9EhYDBmURJFesAhlsvjjWxa5Tq0dza3oWlN07I6laqXAnMAOBw\n+ZO/CYBOp4r5Oh9+70SFgI/ClCBZ68X4etRjQe4axXT3u/GDX7eNw4aNI/Nu62Vs230aQH783okK\nBYMzJVDSenGspVNQJFm/5ULVctIWWWPO9d87UaFgcKYE4daLYqRaL442uWuk1NgHPJHkr1z/vRMV\nCgZnSpBK68WxIneNwFDVK7UKqCiQbVMjYTbpI1nZuf57JyoUDM4katPqWjQ31aCi1HCt5rIBzU01\nOdW7d/2K6yVrR5cYtHjms4vw/X++BRUcYctaUG+NBN58+L0TFYK0s7W/+c1v4tixY1CpVHjqqafQ\n0NAQObZv3z5897vfhUajwc0334yHHnooIxdLoye+9WIu7nd1OH3wSFT6Cq+dlhn1mDXVjPcKpGQn\nANRPLcPJ831J32cQNFg2N3YrVT783okKQVrB+eDBgzh37hy2bduGM2fO4KmnnsK2bdsix5977jm8\n9NJLmDBhArZs2YK1a9eitpZP3vko3HpxrMgVwwivkYptnwoB+P6ONiw/1Y31N08vmOCsVgGf/cQN\n+MqL+xEQ2dmlF9T40j2NEDRqWM3FkoF3rH/vRIUureD8/vvvo7m5GQAwY8YM9PX1weFwwGg04sKF\nCygrK8PEiRMBACtXrsT777/P4EwpUVIMI7xGGr0vN1p3vwdv7j2LQx8WRmAGhjpFVZmLcUvjZLxz\npCPh+MfmTsT0iWVjcGVElIq0gnNXVxdmz54d+dpiscBms8FoNMJms8FiscQcu3DhQtJzms3F0Gqz\nN31mtZqSv2kcy7f7f/GND0SLYRQXCXhg/dzI6w/f1YjiIgHvf3AJtl636Lku9zizfr1jTa0GplWX\n4juPrIAgaPHIpgUoKdZj//HLsPW6YC0vwpI5E3HfutnQaAor1STf/u5nUiHfO5Df95+RCmGh0MhL\nO9jt2fsP1Go1wWYbyNr5c12u33/81LXT48NbB86Jvve9Y5fwicVTYqZjP7F4CqZXG/Fv29tG65Jz\nyv23z0JDbSVMxQL6+lyR19cvn4Z7b78BZ/7aHfnZ9vRkvq54Lsv1v/vZVMj3DuTH/cs9PKQVnKuq\nqtDV1RX5urOzE1arVfTY1atXUVVVlc63oXFOaura4fZJtnQMF8OoMhcnfL4QVZQa0HTDBMm1Y4Og\n5doxUR5Ka35r+fLl2LlzJwDgww8/RFVVFYxGIwCgpqYGDocDFy9ehN/vx549e7B8+fLMXTGNG+E6\nzt39HoQwPHXderJT8jPlJj28vgA8vkDC5wsR9x8TjU9pjZwXLFiA2bNn4+6774ZKpcLTTz+N3/zm\nNzCZTFizZg2eeeYZPPbYYwCA22+/Hddff31GL5ryn1wdZ49POtQ6nD48/fIhmE0CnJ7Cq/WsUSOS\nhW0Q1AiGQggEg+wYRTTOpL3m/Pjjj8d8PWvWrMifFy1aFLO1igqT3DaodGpjA4DXPxSZegbkWyGO\nFwZBA68vALPJgCK9JqYXtdsbxO4jHVCrVOwYRTTOsGUkZZySbVBye5QNgkZyzXm8U6uG9mhbTAY0\n1lVi/Yrp6HN4sPPQefz52GXRz7S2d2HDyhmc3iYaRxicKeOU9ASW26NcWW5AV687EqD1WjU8/uz3\nSv7nuxrQM+DBG386g95BZf2QM23l/ElYu3hqzGzDG3vP4k9HxQMzEJskR0TjA4MzZZTcWvKREzas\nWzYNpmIBACJlI1vbu2AfcMNsMqDYoMWFTkfsOf1BGAQ13N7EAG0QNCjSa2Af4TS3SgW88t8n0DPg\nRaa3AWvUgFajhscn/4BhEDTYcEstivXD/yyV9K1mxyii8YfBmTJKtieww4OnXz6IpllVkSnu6DrO\nRXotvv7KIYkzq0RftZYXYVJlMQ58JJ3hrUQoNLyOLVb2MhUq1dD5wl2xnti8AIJWjadfPoheh/RD\nhNcXgMPpjQnOStbmmbFNNP4wxZMyKlmf5V6HF7sOX8R/vH0y8lq4jrPL45cMRF5fANWWooTXL3Q6\ncPRUl8gnxk64Jk8wBNh63fiXn+7H7/b9FQvrpVtcAuIjYLmfp1oFrGqcxI5RROMQgzNlVLI+y2F/\nbL2MV986iUBweJhqLNZBL9ECUqdV42qPS/RYsuniseb2BrHr8EWEADQ31Ui2uWyYYUGfwwOPbzgZ\nTu7nubJxMu5dO4vbqIjGIU5rU0Z5fAGsapyMQDCElnYb+mSmcfe0dECjHt4G9Js/nZXM0s71AKzE\nsVPdeO6Bm7B+xfXY+vYpnDhnR6/Dg3KjHiVFOrSd6cYfWy8lZLeLrc031lVyxEw0jjE4U0ZEb5/q\n7vdA0Krg9Sev2xXeBgQA+z6QzkiWky9br6Kzqv/+kzdG9oHvPHQBe1qGO0jFZ7ezxzJR4eF8GGVE\ndClNAIoCMzAcsGx2p2g2thJL50xAjbUkrc+Opvg1Zb1OgzKjHm2nxdfMW9u7Eqa4q2R6MBPR+MHg\nTCOmZLuPlEjAUolnYyvh8wfh8ozNvuRUiGVVy2a3X3twIaLCw+BMafH4Aui0OyNTs+l2hQoHLGt5\nETRq8QCdLGy/13ZFtNKYEjoNUG4U0vqsHL1ODUE7fOUGQYPQtTrY0eSysbl/mahwcc2ZUiJWmrOh\nthJmk6Co3nV8ecropCadVoWAN3E6XC+oMb/Wiv0fXRU950g6UvkCkN17nK4qc3FMMRW3N4B3jnRA\nFVcHW6tRodigE3244P5losLF4EwpESvNuaelA1OqjIqC88caqnHTDdWoqTJGKoUBQ9O7UmvObm8Q\nK+dPwoGPruZ0a0iVauiho6G2AsdOiU/zRyfA9Tk82HnwfEJFNACYUmVkNjZRAWNwJsXk1padbh9W\nNU7C+x9eFc2cVquBSRUl+PAvduw9diVhu5CxWCebdf3j3x5XFJjD1blGm8WkxxfvmoeyEgEXOx0x\n2dfRevrd+OXOkzhx3o6efo/kUrvT7Yc/EMp4KVEiyg8MzqSYfPKSB2sXT8WGW2rx2tvtQ8FnwIOy\nEgGzppZDr9fi3dZLkffHbxd6Y+9fZLdD9Q36FF3jWARmAJhfV4k/HbsUme5Xq4YqhMXT6dR47/iV\nyNdS18tmFkSFjcGZFJNr8xhOXtLrNLj/2h5eW68LCIVQZtRL1sxube/CumXT0HJyZLWxR5v62gjd\nUjq0dh4KhWKm+6WCrldhMRUmgxEVNgZnUkyuzWN08lIgGMSv3z0TGUWWGQXJpCv7gBsXOx2K1qtz\nycrGyVg1fxKgUqGsRJB8+JAaQSfDZDCiwsbgTEmFt0uVGfWKSknGJ43JZUObTQZUmYvSDmIjoVYD\nQZmBrFoFlJYMPViEr89i0mN+XSVUAL6/oy3pw0cIQGmxgH6n/MNH/EicyWBEhY3BmSSJbZsKJ3FJ\nlZJMtSBJY10lXB7/qAdmYCgw11SVoMM2KDoNfcuCydh4S22knaXL40eZUY9fv3tG8cOHqViH/sHk\nswIrGydj7aIpLM1JRAAYnEmG2Lap6CQusWSlPodHtiCI2ahH36AHZpMBc6ab4XT78b3tbZm/eIVc\n7gD+n4eW4Ve7T+PDv3RjwBWA2ShgYVTP6fB9moqFlB8++gd9srMCFpMeC+qHs9aJiAAGZ5IgF4TC\ne3XjR3iBYBA7D12QDEYVpQZ87TNNcLh82HXkIt4/fjntetqZYh9ww+UJwFgsQNBpoXIFoJaoVAbI\nZ6wDQw8f9riSm1KBefmcamxZW8+RMhEl4KM6iVJa8zm6jOe23aexp6VDMhg11lXCVCxgT2sH9rR0\njHlgBoCyEj12HjofadoRwvAMwbbdp2PeGwgGsfPgecm9yRWlBjx17wLJcqBq1VAp0opSA5qbavCZ\n22cxMBORKI6cCUBs0le4W5LctiljsYCtu9qHM7JLBLi80s0nJltLsGl17YiaZGSD3eHBn4+Jt6qM\nnyHYtvs09kTt1Y7XWFeJQDAk2cM6BODxu+dj+uQyBmUiksXgXODkkr6ktk0V6TX4j7dO4v0Ph2td\n9yZJehp0+eDxBbD17VNpN6nIFqmRfnQhELmHCrVqKKFr0+pa+AMhyYcai8nAwExEijA457H40W46\n5JK+Nq2uxcnzvQm1ny/aBnHRNpjS9+lzeLH17VPYF1UdK9eVG/WRQiBy0/yhELB20RRo1Gpo1FC0\nF5yISA6Dcx6SG+2mkvGbLOlr3bJpcLqVlc1Mxlyqx4lzPSM6h0EYCmweXwAqZH9fdEmRLhJM5ab5\nLaWx1byU7AUnIpLD4JyHkm1xUipZ0tfFTkfafZrjOZw+eP3KE8AMggZeXwDma12emhfWwFJqiFz3\nHw6cw5+Oiq8VZ4rTPTQVr9dpFFdHAwCNWo3NzXWSe8GJiJJJKzj7/X585Stfwfnz5xEIBPDEE0+g\nqakp5j2zZ8/GggULIl+/8sor0Gj4H9RIpbPFSUqypK+aKqPkcTla9VBWcnQZaaWBWaUCbmmcjA0r\nZ8Dh9IoGtipzMdYumpr14Gwf8MQ0n0h1RKzXadi4gojSklZw/u1vf4uioiK89tprOHXqFL785S9j\nx44dMe8xGo149dVXM3KRNEzJFielASHZaFDQaVA/1ZzyOnEKA+QEEyxFuPfj9QCAYr30X09LqQEG\nQZ32dixBq4I/EJKdGo9vPpFsRJyJHAAiIiDN4HzHHXfgk5/8JADAYrGgt7c3oxdF0pR0hkqF2Ghw\n/swKBEMhfPXF/ejp90TWeuVaOmaKvd+DAac3UipTPshJFwvR69TwyHSA8vmTL1hLJXDFj4gzlQNA\nRBSWVnDW6XSRP//85z+PBOpoXq8Xjz32GDo6OrB27Vp89rOfTf8qKSKVtU8lxEaDv373DN6JOn84\nKC+bUw29To22Mz2wD7gh6DIftD2+IJ5+6SD6Br2yQa7P4YFH4vuqVEBTfVVM3+R4ZpMeKhVEH3LU\nKmDl/EmKE7gylQNARBSWNDhv374d27dvj3ntkUcewYoVK/Af//Ef+PDDD/HjH/844XNPPPEE7rjj\nDqhUKmzZsgVNTU2YO3eu5Pcxm4uh1WZvKtBqNWXt3KPt4bsaUVwkYP/xy+jqdaGyvAhL5kzEfetm\nQ6MRH6kpuf8aAG6vH21nukWPn+7oww+fWA0AuNLtRCAYwH/vO4cDx6+g15G5vcvhPdPhIFdcJOCB\n9bF/d0xlRbCai9BpdyV83lpehEfubkTFzpN4++A5uDyJQfxj8ycDAN7cezbh2G1Lp+HzG+Ypula5\nn1fbmW58bkMRDMLY5l2Op7/76Sjk+y/kewfy+/6T/q+xceNGbNy4MeH17du3Y/fu3fj3f//3mJF0\n2D333BP585IlS9De3i4bnO12p9JrTpnVaoLNNpC18482jy+AZTdW4dbGSTHTvz094nuPU7n/TrsT\nNpGABwBdvS60n+3CntYOtLbb0i4mkmp7yPeOXcInFk9JmBWYO92Cd450JLx/7nQLnA4P1i+fhs1r\n6/H/vt6KE+ftsA94Iklc65ZOBQA4Xd6EBK9PfWxaxn5eZ/7aPaZJYePt736qCvn+C/negfy4f7mH\nh7Qe6S9cuIDXX38dv/zlL6HXJ65xnj17Fj/84Q/xwgsvIBAIoKWlBbfddls634qiiK1tNsyoQHPT\nFFhKDRlJQkq2pr3r8AXZEpZKTLYaEwqbyOnpF090k4rv/mAQnXYnyox6WIsE3P/JGyWTtUa65SnT\nOQBERECawXn79u3o7e3Fgw8+GHntpZdewiuvvIJFixahsbER1dXVuPPOO6FWq7F69Wo0NDRk7KIL\nldja5p7WS9jTegkVGUpC0us0aKitxJ4WkRHpDEtMyc50LJ9TjXtvq8P2PWfw3gdXIuvVekENny8o\nOqLWC5qEIOfxBXBUYkvZ3qOX8afWy7CU6rF83mSsWzpVdlvTSLY8ZToHgIgIAFShkFib+dGXzemH\nfJjeAOS34nh8AXz1xf1Jp5Kbm2oSkpCU3n94ZN5yshM9A97I9HNFqR6zpprhDwZx4KPOpOdRqYZK\nWsazmPR4/sElkXvz+AKw9boQCASx52iH5L5lg6DB9x75WORzgWAQL/3+I+xXcC2A+M8kk4ZnNBL3\nP491tna+/N3PlkK+/0K+dyA/7j/j09qUWUq24iTrIxyWaiGSaPEj8/Ao1uHy4r3jV2Q2LsWqNhfj\nck9iDsGCemvMdel1GtRYjdi6q122oIj32kNLlbkYgWAQX3/lcErT4iP5mSjBimBElGnchJkDwkFR\nrp9weG0zmehey6mQqzzm8Q1F6WRTLAZBA4OgwZUeZ+TP4f7FqxZMxqrGyfD4YjOnlbSQjF673fp2\ne0qBGUj/Z5Kq8PQ4AzMRjRSD8xhLVo4zHMzCa5vJpJuEpHRkLkavU+OmG6vg9gbg9gYQAiJ/Xjqn\nGg0zLGg73YWvvngAX31xP7buakcgGFT8fcNrtx5fAK2nulK+PiZmEVG+4bT2GEulHGd0Na/ufrfo\nZ9JNQpLLOk5m2Zxqyb2+Le22mCIl8QU65L5vdJ9kYOhn1euQ7xstholZRJRvOHIeY3LT1VK1nZ97\n4CY8/8BNWLVgMipKDVCrhqaOm5tq0m5LqHRkDgwFTVXU92xumiL5gCFVPSw8KyD3fVfOn4R7P14f\nWXcv0mtRbhQUXWNYkXhq+nIAABFXSURBVF6D9Sump/QZIqKxxpHzGEtnK45ep8HEihLc+/F6eFZl\nrtlCfJ1tQacRDa4r50/C2sVTI9/T4wukPOruiZoVSNbtKTphLtWRs8cbgMPplW2iQUSUa/g/Vg5I\ntRVhtEy2JYzOOu7pd+Otw+dx4MOrkc5PBkGDJbOr0Nw0JeFhYNZUs2wt63gqADsPnsfmNXVJs53j\ns8hTUVlexPVmIso7DM45YKRbcTLdqlCv02BPawfebY3d3uT2BrD/w068e63Ax/yZlQgBOHaqC939\nHhgENQAVvL6A5Kg7LBgC9rRegkajjuxBFnvQkEuY0+vUKDFo0evwSn6/JXMmcr2ZiPIOg3MOSXUU\nnK1WhXIBMRwAu/s9CXWtwyPsJbMnoP28XVG3qmR7kOUS5nz+IL5413wIWjWMxQLe2Hs2YfbhvnWz\nJWuOExHlKgbnPJatVoUj2VYFACfP9cKucG04PiM9nnztaj2s5UWRwC42+yDVpYuIKJfxf648pXR/\ntNznO+1O0fcpLXgixZ5CwY9ke5D1Og2KDYldzwCg2KBLGHGzEAgRjQccOecpudFtd78bPf1uTKwo\nSTgWPxVebtRjfl0lNjfPjEyFy2WQK5FKS8hke5A9vgAGXeKj8EGXL7Idi4hoPOHIOU8lG93uOnxB\n9PX4UqF2hwd7Wjrw9VcOR6p2AUMZ5M1NNZF91AZBeQBUEpjVKmDVgslJM9L7HB7YB8SDc6/DMypl\nOYmIRhtHzjkqWQa2XGtHAGg705MwqpSbCr/Q6cDWt9tx79pZAGIzyMOdo/7Udhltp7sjCVfzZ1Zc\ny9Yefq2htgLHTtnQIxFQw0IhYO2iKUkT19gvmYgKEYNzjkklA7t5YY1kcBZLtEqW6PXe8SvYcEtt\npGBHIBjEr989E3MtDTMq0Nw0BZZSQyTwb7wl9kFCo1YlnRK3lIoH1vBDSZFeC5fHjzKjnv2Siajg\nMDjnmFQysC2lBlSkMKosM+pRbtRLJmx5fUG89nY77v/kjZLXEr83GUjcApZODXC5XtLzZ1Zi9cLJ\nMSN0pUVaiIjyEYNzDpGbdj58ohPrlk2DqXi4tnSqpT/1Og3m10lPhQPAifP2SAa3XDa43N7k+Epj\nuw5fQNuZHtnAKtVLOryfurmpBs89cBP7JRNRQWBwziFy0869Di+eefkQFs6KneJOtfTn5uaZOPFX\nOy73OEWP2weGk6yUdsuSEqkBvnaW7Bq6kp7O4QeCTJUqJSLKZQzOOSRZ20a7I3GKO9XSnxq1Gl/5\nu4V47P++B48vmHA8PB0eCIagF9SRql9i70mlbKhc9TMlRU+UPhAQEY0HDM45ROn+4tb2LqxbNi2S\nMKXXaVIq/Vms12HFvEmS0+EAsPXtdtHADADzZlYkJIqNpGyokl7SzMwmokLC4JxjwtPRh090SrZH\n7O534+mXD6LP4U07MIpNh8+bWYFQKISvvrhfMlAaBA2CwRB2tw6vW4+0bKiShxJmZhNRIWFwzjHh\naep1y6bhmZcPSWZWhwN3KoExfho6fjr81++eSTpq93gDOHaqW/RYskQxOeGHhZaTNvQMeGKytcMP\nH0REhYLBOUeZigUsnKW8hGZ8YIwOxIFAEFt3tYtOQ4enw5UkZQFAmVFAr8QDw0jWhePXzqP3OXPE\nTESFhsE5h8VPPZeVSO9RDgfGijJDQhGTMqMeZy/1R94rNtpW2omqcWYl2s50Z61iV/TaefS2MSKi\nQsLgnMPERpNff+WQbGAUKxwitX4cPdpOlpRlMemxoP7a2rbmNCt2ERFlEYNzHogeTcoVHQGkC4eI\niZ6GlkvKWj6nGlvW1kcCb6p7q4mIKDUMznkkEAzCHwhCr1XD4x/a5mQQNFg2txqbVteiu8+taGo6\nLH4aWi7oRmeCp7q3moiIUsPgnMOik7q0GhW+/sphXOh0xLzH7Q3A6fLjctegov3C0eKnoVMNuqns\nrSYiIuXSCs6/+c1v8P3vfx9Tp04FACxbtgyf//znY97z5ptv4uc//znUajXuuusubNy4ceRXWyDE\nOlMZ9Fp02AZF37//o6vY/9FVGAQ1KsuKACQG5ylVRjjdfkXT0Ay6RERjK+2R8+23344nn3xS9JjT\n6cQPf/hD7NixAzqdDnfeeSfWrFmD8vLytC+0kIgldYkF3HhubxAXbYMJgXj5vElYt3Qq/IEQp6GJ\niPJAVqa1jx07hrlz58JkMgEAFixYgJaWFqxevTob3y4vKK1D7fEF0HKyc0Tfa9Dlw9OfXRTZJ1wz\nqRw22wA0anBETESUB9IOzgcPHsT9998Pv9+PJ598EjfeeGPkWFdXFywWS+Rri8UCm00+i9hsLoZW\nm73RnNVqysp53V4/7P0emEv1MAiJP85AIIiXf/ch9h+/DFuvC9byIiyZMxH3rZsNjUad8N4f/Ooo\negbEy3YqZR/woKjEgOnXlURey9b954NCvneA91/I91/I9w7k9/0nDc7bt2/H9u3bY177m7/5Gzzy\nyCO45ZZb0NraiieffBK/+93vJM8RCoWSXojdLt7CMBOsVhNstoGMnlNsXVisxvXWXe0xU9Sddhfe\n3HsWTpc3odzm1l3t2K2wIpgcs0mPgNcXueds3H++KOR7B3j/hXz/hXzvQH7cv9zDQ9LgvHHjRtlk\nrsbGRvT09CAQCECjGRr5VlVVoaurK/Kezs5OzJ8/P5Vrznli68LxVbfkSmKKldtMtkc5vJbc0++G\nTqeGV6TlIwAsqLdyTZmIKI+l3t8PwIsvvojf//73AID29nZYLJZIYAaAefPm4YMPPkB/fz8GBwfR\n0tKCpqamzFxxDkgWdD2+AAD5kpjhAiBhycpnLptTja99pgnPPXATvvW5Jfjuwx/DrQsnwyAM/9wN\nggarF05mMRAiojyX1przunXr8KUvfQmvv/46/H4/nn/+eQDAT37yEyxatAiNjY147LHHcP/990Ol\nUuGhhx6KJIeNB0qCbpW5WHbfcXwBELn3VpTqce/aemjU6pikrv+1ph533lILW68LCIVgvVbpi4iI\n8ltawbm6uhqvvvpqwusPPvhg5M+33XYbbrvttvSvLIcpDbpyJTHjC4DIv1d6mlqv06DGakz3VoiI\nKAexQlgaUgm6qdShZs1qIiICGJzTpjSQplISkzWriYgIYHBOWzbrULN8JhFRYUsrW5uGhQNpvoxw\nPb4AOu3OSEY5ERHlHo6cC4TSoilERDT2GJwLhJKiKURElBvG7ZCJ07fD3F6/oqIpRESUG8bdyFls\n+nb5vMlYt3Rqzk/fKu1clSp7v7KiKURElBvGXXAWm76VajSRK7K9HmwuVV6pjIiIxl5uDyVTpLTm\nda4JP1B093sQwvB68LbdpzNyfoOgRWOdVfRYfNEUIiIae+MqOKfSaCJXjNYDxabVtWhuqkFFqQFq\nFVBRakBzUw2rjxER5aBxNa2dSqOJXKG0icZIsfoYEVH+GFcj53DNazG5On0bfqAQk40HinwrmkJE\nVIjGVXAGxKdv71gxPWenb/PxgYKIiLJrXE1rA+LTtzWTymGzDYz1pUliNyoiIoo27oJzWD41j8jm\nerDHF8DlrkEEfAGOwomI8sS4Dc75KJMPFDF7pwc8sJhYS5uIKF8wOI9TrKVNRJS/OIQah/K1GAsR\nEQ1hcB6H8rEYCxERDWNwHodGe+80ERFlFoPzOMS900RE+Y0JYRmWrbaPqeLeaSKi/MXgnCHZbvuY\nqui90xpBh4DXxxEzEVGe4LR2hmS77WO69DoNJlaWMDATEeURBucM4NYlIiLKJAbnDODWJSIiyqS0\n1px/9KMfYd++fQCAYDCIrq4u7Ny5M3L84sWLWLduHebMmQMAMJvN+MEPfpCBy81N+dhHmoiIclda\nwfnzn/88Pv/5zwMA/vM//xPd3d0J77n++uvx6quvjuzq8kR461J0ucwwbl0iIqJUjShb2+/347XX\nXsMvfvGLTF1P3uLWJSIiypQRBee33noLH/vYx2AwGBKOdXV14Qtf+AI6OzuxefNm3HHHHSP5Vjkv\nm20fiYiosKhCoVBI7g3bt2/H9u3bY1575JFHsGLFCtx///149tlnUVNTE3Pc4XBg586duOOOOzAw\nMICNGzfitddeQ1VVleT38fsD0GoZzIiIiJIGZylOpxMbN27Ef/3XfyV976OPPop77rkHS5YskXyP\nzTaQzmUoYrWasnr+XFfI91/I9w7w/gv5/gv53oH8uH+r1SR5LO2tVCdOnMD06dNFj+3fvx/f+ta3\nAAwF8RMnTuD6669P91sREREVlLSDs81mg8ViiXnt+eefx4ULF9DU1IS+vj5s2rQJn/70p/Hggw9i\nwoQJI75YIiKiQpD2tHamcVo7ewr5/gv53gHefyHffyHfO5Af95+VaW0iIiLKDgZnIiKiHMPgTERE\nlGMYnImIiHJMziSEERER0RCOnImIiHIMgzMREVGOYXAmIiLKMQzOREREOYbBmYiIKMcwOBMREeWY\nggjO3d3d+Pu//3vce++9uPvuu3Hs2LGxvqRR4/f78eSTT+Kee+7BXXfdhcOHD4/1JY26gwcPYunS\npdizZ89YX8qo+uY3v4lNmzbh7rvvRltb21hfzqhrb29Hc3MzfvnLX471pYy6b3/729i0aRM2bNiA\nt956a6wvZ1S5XC48+uij2LJlCzZu3Ji3/+61Y30Bo+HNN9/E3/7t32LdunU4ePAgvv/97+Pll18e\n68saFb/97W9RVFSE1157DadOncKXv/xl7NixY6wva9ScP38eP/vZz7BgwYKxvpRRdfDgQZw7dw7b\ntm3DmTNn8NRTT2Hbtm1jfVmjxul04hvf+AaWLl061pcy6vbv349Tp05h27ZtsNvt+NSnPoWPf/zj\nY31Zo2bPnj3/f3v3D5JaFIAB/BNvRtHfK9ewLVqKIlqaoqJoimgTWguChhqL4g7NRrQooZiDQ2Bo\nBEFDEVE0BOGoREtLiFEXScqSQHhDcHnCe5EP3j3q+X7TuWf6DlzOxz2IB/39/VhYWEA6ncb8/DzG\nx8dFxyqbFOU8NzdnjjOZjFTXV87MzGB6ehoAoKoqXl5eBCeylqZp8Pv90HVddBRLXV9fY3JyEgDQ\n3d2NXC6Ht7c3NDU1CU5mDYfDgVAohFAoJDqK5YaGhjAwMAAAaGlpwcfHB4rFIux2u+Bk1piamjLH\n1bzfS1HOwNf904uLi8jn84hEIqLjWKaurs4cRyIRs6hl0dDQIDqCEIZhoK+vz3xWVRXPz8/SlLOi\nKFAUaba3Ena7HY2NjQCAeDyO0dFRaYr5d7Ozs3h8fEQgEBAd5Z/U3Nsbi8UQi8VK5paXlzEyMoKD\ngwNcXl5ifX29Jo+1v1v73t4eUqlU1b6oP/Hd+mXHf+mVz9nZGeLxeE3udT8RjUZxe3uLlZUVHB0d\nwWaziY5UlporZ4/HA4/HUzJ3c3ODXC6H1tZWjI2NYXV1VVC6/+tPawe+Suv8/Bw7OzslX9K15m/r\nl5HL5YJhGObz09MTNE0TmIisdHV1hUAggN3dXTQ3N4uOY6lkMgmn0wm3243e3l4Ui0Vks1k4nU7R\n0coixa+1T09PcXh4CAC4u7uD2+0WnMg6Dw8PiEaj8Pv9qK+vFx2HLDI8PIyTkxMAQCqVgsvlkuZI\nW3avr6/Y3NxEMBhEW1ub6DiWSyQS5mmBYRh4f39He3u74FTlk+JWqmw2i7W1NeTzeXx+fkLXdQwO\nDoqOZYnt7W0cHx+js7PTnAuHw3A4HAJTWefi4gLhcBj39/dQVRWapklzzLe1tYVEIgGbzYaNjQ30\n9PSIjmSZZDIJr9eLdDoNRVHQ0dEBn88nRVnt7+/D5/Ohq6vLnPN6vSV7QC0rFArQdR2ZTAaFQgFL\nS0uYmJgQHatsUpQzERFRNZHiWJuIiKiasJyJiIgqDMuZiIiowrCciYiIKgzLmYiIqMKwnImIiCoM\ny5mIiKjCsJyJiIgqzC8iivHPF8qqogAAAABJRU5ErkJggg==\n", - "text/plain": [ - "\u003cmatplotlib.figure.Figure at 0x7f7a18dfb8d0\u003e" - ] - }, - "metadata": { - "tags": [] - }, - "output_type": "display_data" - } - ], - "source": [ - "# Plot the Data (Optional)\n", + "assert f(pi/2).numpy() == 1.0\n", "\n", - "import matplotlib.pyplot as plt\n", "\n", - "plt.scatter(inputs, labels)\n", - "plt.show()" + "# grad_f will return a list of derivatives of f\n", + "# with respect to its arguments. Since f() has a single argument,\n", + "# grad_f will return a list with a single element.\n", + "grad_f = tfe.gradients_function(f)\n", + "assert tf.abs(grad_f(pi/2)[0]).numpy() \u003c 1e-7" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", - "id": "JaFHyAG9nDET" + "id": "v9fPs8RyopCf" }, "source": [ - "## Step 2: Define our TensorFlow variables\n", + "### Higher-order gradients\n", "\n", - "We'll use Keras's object-oriented [`Dense`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense) layer to create our variables. In this case, we'll create a `Dense` layer with a single weight and bias." + "The same API can be used to differentiate as many times as you like:\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { - "cellView": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, - "base_uri": "https://localhost:8080/", - "height": 34 + "height": 276 }, "colab_type": "code", "executionInfo": { - "elapsed": 332, + "elapsed": 730, "status": "ok", - "timestamp": 1525154229931, + "timestamp": 1527005655565, "user": { "displayName": "", "photoUrl": "", @@ -190,54 +120,61 @@ }, "user_tz": 420 }, - "id": "z9r-ZeyrXu3A", - "outputId": "e19a698e-5892-4fcd-80d3-1394605ee72c" + "id": "3D0ZvnGYo0rW", + "outputId": "e23f8cc6-6813-4944-f20f-825b8a03c2ff" }, "outputs": [ { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAEDCAYAAAAhsS8XAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXd0HNX5sJ/ZXrTq3ZLV3IvcDdgGGwOm2WCbHhJa6C2B\nUBISQioBfoQPkjhACA4QCIQSDITQbGMbsHHvVbZ6s7q0vc18f4xmJVltJa0q+5zDOXhn9s7dqzvv\nfe/briBJkkSYMGHChBkxqAa7A2HChAkTJrSEBXuYMGHCjDDCgj1MmDBhRhhhwR4mTJgwI4ywYA8T\nJkyYEUZYsIcJEybMCCNkgl0URVasWMHtt98eqibDhAkTJkwvCJlgf+2118jJyQlVc2HChAkTppeE\nRLBXVlayceNGrrjiilA0FyZMmDBh+kBIBPvjjz/OQw89hCAIoWguTJgwYcL0gT4L9g0bNhAfH8/E\niRMJVycIEyZMmMFH6GutmGeeeYYPP/wQtVqN2+3Gbrdz3nnn8dRTT3X6HUmSwtp9CKittvH8UxsQ\nxZY/4aXXTGfa7PRB7NXAU1dj5y9PrIfmYUgeFcnya2aQmBI5uB0bYE5WNPHS/9uE6JcHYukVucw8\nPWOQezXw7NhcyCfvH0Bqfi+uumkO4ycnD3KvBpY+C/bWbNu2jdWrV/PCCy90e291tTVUj+03EhIs\nQ7qfWzfls2tzMTNPH01UrJEv/3eU5LRIVnx/5mB3rUP6azw3fnaMQ7vLOX1RNrVVNvIOVZGeFcPS\nq6YNmT6GmlP7KYoi/3ltF9WVNhacO4btXxfi9fi5+Mpc0jJjhkw/+5t9O0r5Zu1xDEYtpy/KZuOn\nR4mOM3HlTbNRqTo3UAynv3swhOPYhymSJJF3sAqtTs35l05mQm4K6VkxVJY2UVdtH+zuDRgOu4ej\n+yqIjDYwbW4a514yiYTkCMqKGnC7vIPdvQFjz9YSqittjJuSxNTZaVywcgoAX3xwCL9PHOTeDRyH\ndpej0ai47PqZTJyWwoTcFOprHBzdf3KwuzaghFSwz507NyhtPUzfOVnehLXRRdbYeLQ6DQATp6UC\ncGhv+WB2bUA5sLMMv19i2pz0gEaWNS4BUZQoOlE3yL0bGDxuHzu+LsRk1jH/nDEApI6OZtL0VFxO\nLyfLmwa5hwNDU4OT+loHozJiiIw2AjB7QSYajYrtXxXg9foHuYcDR1hjH6bkHawCYOzkxMBnmWPj\nMJq1HDtwEt93YBJ7PT4O7CrDYNQwPrfFhpo1Lh6AgmPVg9W1AaWyrBG/X2JCbjIGozbweXqWbIIp\nLawfrK4NKMX58kI+Oic28FmERc/UOWnYbR7yDn53tPawYB+GiKLI8SNVGEzaNvZTtVrFhKkpuF0+\n8o+OfKGWd7gKt8vHlJmj0GrVgc9j4kxExRopzq/7Tixw5cWNAKSkR7f5PHV0NIIApUXfEcHevEMb\nnR3b5vPxU5IAqChpHPA+DRZhwT4MKS2sx+XwMmZCYjuH0MRpKQAc3lsxGF0bUJQXNWdiYpvPBUEg\ne1w8Pq9IyXdAW60oaUAQ5Gig1uj0GhJTIqkqb8Lj9g1S7wYGn89PWXE90XGmgBlGITrWhN6gobIs\nLNjDDGE6MsMoRMUYSUiOoLKsacQ7zaoqrGh1amLiTO2uZY1LAKDgWM1Ad2tA8Xr9VFVYSUi2oNNr\n2l1Py4xBkqC8pGEQejdwVJQ04vOKZJyirYO80CeNiqSpwYXD5h6E3g08YcE+zJAkiZKCOswWHUmp\nHcdpJyRbEEWJupqRGx3jdvloqHWQmGLpMCciMcWCOUJH0fEaRHHkLnBV5U2IokRKelSH10dlyOaZ\nkW5nD5hhcuI6vJ48Sh6fyrLvhiM5LNiHGQ67B6fDS2JyZKdJXgnJcqxrdeXQj8vtLcpv6ywJSRAE\nMsfF43L6RvTLXF4sa+Kn2tcVkkdFodGoKCvqu8b+zjtv8f3vX8Fvf/ton9sKNUX5tWi0KlLSOl7g\nFDPVSJ4LrWm/dwszpKk5aQMgLimi03u+C4JdCeFLTOk8YSMlLYqDu8qpOWkjtRPBN9wpb/YzpHai\nsas1KlLSoygpqMdhc2OK0Pf6WWvWvMsf//hnkpNTet1Gf9BY76Sxzknm2DjUmo51VXlnBye/I3b2\nsMY+zKitkgV7fGLngj02wYxKLVBdaRuobg04VRWyYO/MHAUQlyCPkTJmIw2/X+RkeRNxCWb0Bm2n\n943KaA577IPW/vTTf6C8vIyHH76ft99+s9ft9AeKUzQto/MMW61OQ1xiBFWV1hHve4Kwxj7sUDT2\n+C40drVaRVyCmdpqG36/iFo9stZvSZKoKrditugwWzrXQKNijahUwojLxH17/XF25VXj8fhx+Hzo\nGh1s/+vmTu8X/SJ2RA59egTDxhMd3jNnQiJXLh7TaRsPPPAztm79lj//+UUiI4dWDZ76Zl9SXBfK\nDshmqZqTNqpPWgM295HKyHrjvwPUVNnQGzRERHa9pU5ItiD6pREn1ADsVjcOu6fbIl9qtYqYeBN1\nNfYRWXlU0Ty7W7hVzddFf181VYlApbUhhDLHYxPMXd6XnCbPl5PfATt7WGMfRng9PhrrnM2JJ11X\nx5Tt7BVUn7QGbO4jhZPliuO0+98VlxBBbZWdpgYnUTHtwyKHI1cuHsNdV83g1b9upuhELdf/cG63\ntvN/v7ydpgYnN99xxoirrFpX48Bk1rXJuu2IlsiYRqYxsiughjX2YURts2bSlRlGocWBOvLsy4p9\nPZiyvLGJshZXWzXydi71tXYMJm1QDtHYeBM+r4itaWTFcXs9PqyNLmLiu1+0IyL1mCN0VJY2jcgd\nXGvCgn0YEbCvd2NLBIiNN6NSCdSMwMiYqoqeaeww8hyoPq9fFmixwe1CouPkBa6+ti8L3NDT9Otq\nHED3ZhiQQ2ATUyNx2D3YbZ7+7tqgEhbsw4hgHKcKao2K2AQztVWyA3WkIIoS1ZVWYuJNHWZankpc\n8wtfO8J8DbLfAKI7yLrtiNhmjba+WRD2hnfe+YDIyKHldAzY1+O7F+xAIEu5sa734zAcCAv2YURt\nlQ2VWgj6ZU5ItuD3S4GogZFAU4MTr8dPQlJwfgNThA6DUTPinMi11fIi31E5hY5Q5kx97cgSaMrc\nDkZjB7luDEBDnbPf+jQUCAv2YYIoitRW24mNNwcdvjgS7eyN9fILGR1r7OZOGUEQiE2IoLFeXhBG\nCjXNpqXoYE0xMSYEoa+mmKGHUjYjJi44wR7VPG/CGnuYIUFDnRO/TwzKDKOg3DuS7MuNzZpWVJAC\nDVrMMSOpdk5AsAepsas1KiJjjNTXOEaU47Cuxo7ZokdvCC7AL6yxhxlS9MRxqqBotU0NI2cSN9bL\nmlZUTHAaO7Qkrijmi5GAYpazRBmC/k5snBm3y4fTMTKODHS7vNitnqDNMAAGoxaDUUNDWGMPMxRQ\nbMTdZde1Rm/QojdoaGxw9Ve3BhzFFNMTwa68+HUjJORRkiRqquxExciZtcESHXCgjoxxCETEBBHq\n2JqoWBNNDc4RFVRwKn0W7B6PhyuuuILly5ezbNky/vKXv4SiX2FOQdG6eyLQlPubGpyI4sjYfjfW\nOzGatEFFxCgoERMjJTLGYfPgcfuCdpwqxI4wB2rAcRpkRIxCdIwRSQJr48hReE6lz4Jdp9Px2muv\nsWbNGtasWcOmTZvYt29fKPoWphVNDU7UGhWmCF2PvhcZbUT0S9itwz8xxe8XsTa6Ag6wYNHq1ETF\nGKkbIaYYRTAHa19XiGkWgL0NeWxdtvebb77ijTdeDfq7lZUVfPHFp0Hd+/jjv2bjxvXd3te6lMCa\nNe/x2Wf/C6r9qICdXR6HTz75L9XVLUdJPvnk7ykqKgyqraFKSEoKGI3yi+bxePD5RvYRXINFU4OL\nyChDj9PBFQ2/sd7ZI3vsUMTa6EKS6FVpgMgYIyX5TjxuX4+0/aGIIpCCTU5SUByHvY2MObVs7/z5\nZ7a7x+/3o1ar231eXl7GF198xnnnXdCrZ3eE4gyPjDawfPllQX9PGQfFEf+//33EzJlTSUrKAODh\nh38esj4OFiGZ4aIosnLlSoqLi7n22mvJzc0NRbNhmnG7vLhdvnZnWgZDZLQszGVTTudlTYcDgYiY\nHpqjACKjlHFw9SiyaCjS0EuNXatTY4nU98oU07ps78UXX4LFYuHIkUPcd99DPP74r7FYIsnLO8r4\n8ROZP/9MnnvuaQRBQKvV8OyzL/Dii6soKirkppuu5YILlnLllde0af+ZZ55k9+6dpKSktonaOXr0\nCH/+8zO4XC6ioqL5+c8fIzY2jnvuuQ3RFUt1XSGxH5Zht9sxmUycccYCfve7x3jpJXk3UVlZwcMP\n38+rr77JK6/8nW+++QqHw4lWSmTS9HvYsGEdR44c5sEHH0Sj0fL886t54IF7ufvu+zh8+ADl5eXc\neee9gKzZHz16hB//+AE+//wT3nnnLfx+H5MmTeEnP/npkKrBExLBrlKpWLNmDTabjTvvvJPjx48z\nZkznJUDD9IymZufnqYf0BoMiBEdCZExDLyJiFJQFztroHPaC/Vv315ROK+RP+ZsRCnomTJxjPfh8\nInnfbGgjiGYkTmXlmKWdfu/Usr2ffPLfNt8vLS3mT396AYCHH76Pn/zkp0yZkktEhIamJg+33343\nb731Ok8++f/atb1x45eUlpbwz3++TU1NDd///hUsXXopPp+PZ599iieeeIaoqGjWrfuCF19cxc9+\n9kskScLpsHPdlT9l6VXTWL36bwBkZGTi9/uoqCgnJSWVdes+55xzzgPgssuu4oYbbsbn9XPj9+9k\n566t3P/Idbz33tv88pe/ICGhbWGwRYvO5fbbbwwI9nXrPuf6639IUVEh69Z9zgsvrEatVvPHPz7J\n559/wvnnX9Sjv0V/EtI9aUREBHPnzuWrr77qVrAnJAyPioNDoZ/VzdUMU9OiO+1PZ58b9HLFO5fD\nNyR+S1/64HHKCUaZ2fE9bidttLxb8fukbr87FMapK9xuH6oIAU0v6uyrNSp8PhEkUKtbBLPJqOv2\nd6tUEBdnJjragsViwNj8HYNBy8KFSwPfP/30uTz//HMsW7aMJUuWkJSURHS0CZ1O0+Ezjh07wIoV\nl5KQYCEhwcK8eWcQGWnEZquhoCCfBx+8F0mSEEWRxMREEhIsCAhkpE4nMTmShAQLZrMes9lAQoKF\npUsvZuvWTdxyyy1s2rSeZ599loQEC7t2bebll1/G6XRSVV9FWXkaCQkWtFo1ktQyL7RaNTExJsaO\nTSczM4OKigJGjx5NeXkpixcv4I033uD48WPccceNSJKE2+0mLS15SM2bPgv2uro6tFotFosFl8vF\nli1buPXWW7v9XnX10C9OlZBgGRL9LC2WDyJWaYQO+9NVPyVJQqNVUV1pHfTf0tfxPFkhn5QjIva4\nHalZhlWUNnb53aHyN+8Mr9dPbN5YZo45gwvPn9Lj7x/aU87GT49x9sUTmDA1uc217n63KErU1trw\netVYrS6cTg/V1VZcLi8+X8vcXLHiGqZNm8uWLV9z5ZVX8swzq2hocODx+Dp8htPpwWZzB6653V6a\nmpzU1dnIysrm+edXt+uny+VFY9Gh0amorrZit7uRJDXV1VZOO+0sHn30p8yaNQ+/X8JojKGsrJZf\n/erXrF79OvHxCTx8/2+wNjgpL6vH6/W3+f1er5/6egfV1VYWLDibd99dQ0ZGJvPnL6S62orV6mTJ\nkou47ba7ejR+oSDYxaPPUTHV1dVcd911XHrppVxxxRUsWLCAhQsX9rXZMK1QzCi9McUIgkBktJHG\nBuewzzhsqHNiMut65fxsbYoZziip8PGJPQvxU1Ac6LZ+DPUrKyslOzuHa6+9nilTplBcXIjJZMZu\n79hpO23aTNau/RxRFKmpqWHXrp0AjB6dSX19AwcO7AfA5/NRUJAPtBwy0tE7MWpUGmq1ilde+TuL\nF8tmGI/HgyBAZGQUDoeD44W7geY5ZTJhs3UcMbVw4WK++mpDG5POrFlz2bBhHfX1ssLV1NREZWVl\nr8aqv+izxj5+/Hjef//9UPQlTCcoSTmW6N5FtcihfnacDi8mc8/CJYcKfr+IrcnV6yPN9AY59r1p\nmMcuK6nwSjninqIIdmtTb8YhOHv+O++8ya5dO1Cr1YwfP47TT58PgFqt4cYbv8eFFy5r4zxduPBs\ndu3azvXXX016egYzZswCQKPR8LvfPcmzz/4fNpsNUfRz5ZXXkJWVjd8vtfk9p7J48RKef/5P3HLL\nnYBsJl62bAXXXXcVKSmpZGeNw14vv1sXXbSMxx57DK1Wx/PPr27jO7BYLGRmZlNcXMiECZMAyMzM\n4pZb7uT+++9CFCW0Wi333/8QycnJHfZlMBCkQVLjhvJ2V2GobMtff/5b/H6R6++e1+H17vq5ef0J\n9m4rYcX3Z5CcNnhlV/synvW1dt56aTsTpiZz9sUTetXGO//YQUOtg5t/cmanEQxD5W/eGbu/Lebb\nDflcddOcwCEiPcHn8/PS018xKiOaS66Z3g89bEt/jeen/zlAwbEarr9nXq+UleL8Wj5+ez9zFmQy\ne0HmkP+7KwyYKSZM/6JoqpG91NahVSz7MI6MCZQS6GFyUmssUQZ8PhGnffgesqBkS0b38pg/jUaN\nyawb9lmX1kYXGo0Ko6nr4/A6I1AMrH5kZOGeSliwD3FsTW4kCSKjei/QomLkRUERjsORvsSwKyj2\n2OFsjrE1m1D6Mg4RUfrmeTV8fS7WRheWXiTsKUREGlCpBJrqh+9c6IqwYB/iBBynoRBoI0Fj78OB\n1C3JWsP3ZbY2udHp1d0e3NwVkVEGRFEatsfDuV0+3C5fnzKpVSoBc4QOm3X4zoWuCAv2IU5LclLv\nJ7GinQxrjT0g2Hs/DgHH4TBd4CRJwtroIiKyb6UhlO/3Z2RMf6LsWvpaIiMi0oDd6hmRVR7Dgn2I\n05dQRwWVSsASbRjW205rkwuDSYtW1/tAroDGPkwFmsftw+vxY4nU96kdRSAO13FQ+t3bKDGFiCh5\nHEdCgbxTCQv2IU6LYO/bJI6KNuJyyjVnhhuSJGFvchNhCZFAG6amGGujLIAi+qipBmLZexXyOPhY\nlV1sCDR2kP1YI42wYB/iNDXI3v++xp8PZzu72+XD5xOJ6KOmqtGoMUcM34gQJfbc0kdTjPL94TYO\nu3fv5KGH7gv0uzNTzD333MbRo0e6bU/Z+diaXPzpT39i587tverX22+/idvdsjg89NCPsdsHt0R0\nWLAPYSRJoqnBiSW6995/BWXbaRuG207lRY6w9L3ssCXagK3JNSztqoqG3dcFztI8F4abYAcQBLoV\n7MGiaOyNDU7uvfdeZs2a06t23nnnTdzulrF86qlnMZsHt9Dc8C5MPcJxu3x43H5S0ntvX1dQzBjD\n0Z6oLEZ9FWggh41WljZht7r75LcYDBRTTF8FmlanwWDU9Eiwu1wufvnLn1JdXYUoilx//c0sXnxu\np2V1y8pK+b//exybrQlJEvjtb58gNXUUq1Y9x9atmxEEFddddxPnnHMeu3fvZPXqvxEVFU1BwQkm\nTJjIo4/+FoBvv93Mn//8DNHRMYwdOx6ApkYnGq0qEBnkdrt5/PFfU1RUSEZGBh5PS7TP9u3f8vLL\nf8Pr9TJqVBqPPPIYBoOBK664hLMXXcAXm7/Er1vGV9veZNas09HrDfzvfx/xm9/8AZB3Cf/+9xs8\n8cQzPP30Exw9egi3282iRedw00238u67b1FTU80999xOdHQ0zz33PFdccQkvv/xP3njjNZKTU1ix\n4nIAVq/+G2azmauuupZ//euffPnlF3i9Ps46axE33dR9fa2eEBbsQxjlxeurLRFaBPtwtCfam0In\n2C2tQh6Hm2BXNHb/hv+yY9WePu065tg8iH6J/IffBsAyew4JV1zd6f1bt24mPj6Bp556FgCHw95l\nWd1f//oXXHfdjaxYsZTy8jpEUWTjxvWcOJHHa6/9m/r6Om6++TpmzJgJQF7eMV5//R3i4uK4444f\nsn//XsaPn8hTT/2eP//5RUaNSuOXv/wZ0D6Gfc2adzEajbzyyr84ceI4N910LQCNjQ28+upqnnvu\nr+j1Bt5441Xeeut1brjhZvk3R5pZMu8uRqfHcrSkVB6XOafx9NN/wO12odcbWLfuCxYvXgLAbbfd\nhcViQRRFfvSjO8jPP87ll1/Nv//9ZqCcsYzcr3PPXcJzz/0xINjXr1/LM8/8me3bv6W0tJiXXnoN\nSZJ4+OH72bt3D9OmhS4TOCzYhzC2EAo087DW2BUTRN8XuMCBG43D7+ARa5MLlUpAq1XTVxe4oBKQ\n/CKSJJs3uiM7ewyrVj3HCy/8hTPOWMC0adPJzz9Bfv4J7rvvruayuhLx8Qk4HA5qaqpZsEAuBqjV\nypr1vn17OPfc8wGIiYllxoxZHD58CJPJxKRJk4mPjwdgzJhxVFRUYDAYSU0dxahRaQAsWXIhH3zw\nH3kXm9ayKO/Zs5srmhelnJwxjBkzDoCDBw9QWJjPHXf8EEmS8Pl8TJkyLfC9JUvO57//ymuj7KjV\nak477Qy+/vorFi1azJYtX3PXXT8CYN26z/jwwzX4/X7q6mopKCggO3sMIDX/pyD//9ix42loaKC2\ntob6+noiIyNJTEzinXfeYvv2bdx007VyXXmni9LS4rBg/66gCGFzH6NBWrcxHCMhrMoCF4JxaHEi\nD79xsDW6MVv0JF55NQl33dKn2ibfrDvOvu2lrLxuJkmp3Z/MlZ4+mpdffp0tW77hxRf/wty5p3PW\nWYvIzs5pV1bX4ei4iuOpma6t/60IfwC1WoXf3/HS5fPKu5RTzVGtfVBKu5IkMWfO6Tz22O86bMto\nNBIRaWj3TixefB7/+c/bREZamDhxMkajkYqKct566w1efvmfmM0RPP74r/F4uleSzj77HL78ci21\ntbWcc86SQL9+8IMbuOSSFd1+v7eEnadDmIBtOQQCTa2WD8Iejs5TW5MbQQCzpe+VKZXdj32YmaT8\nPhGH3ROyc2t7GvJYU1ODXq9nyZILuOaa73Ps2NFOy+qaTGYSE5P46qsNAHi9XtxuF9OmzWTdui8Q\nRZH6+nr27dvDpEmTO31mRkYmlZUVlJeXAbB27Wf4mmuntx6H6dNn8PnnnwCQn3+cEyfyAJg8eSr7\n9++lrEw2s7jdLkpKits8IyJSj8ftlw8faWbGjFkcO3aUDz9cEyjVa7fbMRqNmExm6upq+fbbzYH7\nuypJvHjxeaxb9zkbN67n7LPPAeC0007n448/xOl0No9tdaAEcKgIa+xDmFBq7CAvEDVVNiRJGlLn\nM3aHvcmFKUKHStV3PSSwcxlmC5xijuprcpKCEvIYbJJSfv5xVq16DpVKQKPR8sADP+uyrO4vfvFr\n/u//HueVV15CENT89rdPsHDh2Rw8uI8bbrgGQVBx5533EhMTS2FhQZtnKXNTp9Px4IOP8OCDPyI6\nOobc3OmcrKiT+99KsC9ffjmPP/5rbrjhe4wdO45Jk+QDSKKjo3nkkcf41a8ewePxIggCt9xyB+np\no1Hs4Ip5T1kwQD7qc968BXzyycf84he/BmDMmLGMHTueH/zgKlJTR5Gb22LSueSS5TzwwL3Exyfw\n3HPP07q8cVZWNg6Hg4SEJGJj4wCYM+d0iooKuf32GwEwmUw8+uhviYkJnWkwXLa3Cwa7lOcH/9pD\neXEDtz54FuoujkELtp99LXXaV3oznqIo8dLTm0hIsbDyBzND0o9X/vQNOr2G7912Wkj6OBCUFtbz\n0Vt7mTUvg7lnZfW5nzUnrbzzj51MmZnKmUvGhbCnbQn1eH79RR77d5Zx+Q2zSEju+1F0u7YUsXVj\nAVf/cC4xCb2vQzRQhMv2jgDsVjdGs7ZLod4ThmPIo8PuQRSlkJijFMwWPXbr8KpuGKr6KAqBujnD\nLJY9lKGvcjtKlNTwS9zrirBgH6JIkoTN2vc0+tZERA6/kMdQJeW0JsKix+cTh1V5hUCSVojGQT5R\nSh1wTA8X7FY3KrXQp+qWrVHGczifVdARYcE+RHG7fPh9Ysjs69CqNsYwKlVqDziQQ6OpwvAM/VQW\n41Bp7CDb2a2NrmG1c7Fb3Zgj9CHzEQV8DcO48mlH9FmwV1ZWct1113HRRRexbNkyXnvttVD06zuP\nLYQhfgrDWaCFUmMfjg5UpU5MSOdDpB6vx4/X4+/+5iGAKMqRQaGIjlIwRegQhJGnsfc5KkatVvOz\nn/2MiRMnYrfbWblyJfPnzycnJycU/fvOEuqIGBie2afWfjDFBBY42zAah0YXRpMWjVYdsjbNES0L\nvU4/9APkHHYvktTS71AghwHrh/VZBR3RZ409ISGBiRMnAmA2m8nJyaGqqqrPHfuuE8oYdgVThKzp\nDCfB3qKxh84EEXAiD5NxCPhbQjgGMPwWuP5QdkAOIW1qdCGKw8ck1R0htbGXlpZy5MgRcnNzQ9ls\nv2I/sA9nfv5gd6Md/TGJ1WpV83Fg7V9kT2UFjsOHQvasUKE4y3p7aHFHKFv51uMgSRKi14vo9SL5\nhpZT1enwIvqlkO5aAMzNC73d2lI0S3S7se3Zjd/WtuyszWbj/fffDfxbKaHbEU8++XuKigq7fX5X\nbbRGKcMbeCeC0NhffvnFoMvwRkQakEQJR/MC9/bbb+JyOrHu2IansmJIlOHtKSHbf9ntdu69914e\neeQRzGZzt/cHG4/Z3xT+8xU8dfWkLL2IjB9ci1rfdtIMVj+V1OnRmXHExoduPKNiTVSWNRIfFwGS\nSPlHH1P15QYchUUApF9zFaOvvrL3HQ9RPxUcNg9R0UYSE7tPew+WSItcVsDr8ZOQYEH0ejn8uz/Q\nsGcvxwFUKjK+/z3SLuu/lO+eUOlpBCA+IaLN+PV1bqamRcv/I8lt+Z1ODj3zJE2HDiOo1URNyyV1\n6UXEzJqJ293IRx/9h1tvlZNqoqNN6PWaDvvw9NNPtPm3co8oim2SzLpqozVarZqYGBP2etlhmjIq\nqsvviKLIT3/6QPcD0ExisoXjh6vQqNUkJFh49+03mJF/HOn4CZIvWMI//vFy0G0NFUIi2H0+H/fe\ney+XXnoGQVm+AAAgAElEQVQp5557blDfGSpJIEm33U3l6r9R8dHH1Gzbwagf/wRdQiIwuMkqNVXy\nc90eb7d96Ek/DUYNol+iuKgW19frqHn3bVCrMU+bjqesjJI3/43D4SFu2aV9/g196SfIafQ2q5vU\n9KiQ/x10ejX1tQ6qq61UvfUGDXv2ohuVhikhDmt+AUX/fANfXDLmyVNC+tzeUFoip5urNEJgHEIx\nN33N1SGrKps4WVpD2XPP4Dx2FOOEiYgOBw27dtOwZy8Zj/2Gx//2V4qLi1m27BJmzz6NM86YT0ND\nE7fddme7Urv33HMbd999H+PHT2DJkrO46qpr2bbtW+6++8fY7fY2ZXg9Hl+733FqGV673Ul9vYP6\nCh8V1cf42aOrEVRSuzK8F198Cdu3b2XlyivZunUz8+efGVQZ3sYGG3GWCZQUTeTdF/8fVSdP8ugX\nnxEdHcOq85exaNHZg16GVyHYxTwkgv2RRx5hzJgxXH/99aFobkAxZmeT8cvfUPOfd2hY+wU1775N\n6h13D3a3sFvdGIyhdZZBS9hgQ1k19o8+QB1hIePXv0MTFYW3tpbS/3uC2g/eR9DpiD3/wpA+u6co\ntt9Q25ahJUnJumsnDWu/QJeSyuhHHiUpLZ6SbXspfuL3VP79RTIe+y2a6OiQP78nKONgajZBbF5/\ngsK8GsQ+HhaimJSP7KvEsXsHWXlHiZg9h5RbbkdQq7Ht3kX5qj9R9cY/uf32uykszGf16jcAWUB2\nVGp36tRpbZ7hdDrJyRnDD394Gx6Ph6uvXtGuDO+pdFaGt7qqhgN5a3nxpb+RkBTdrgyvTqdn1aqX\nALnMMARXhvfEkZM89PC9HDt8mNOLi3lPq+WPj/6G1IVnN4dVDn4Z3p7SZxv7zp07+eijj/j2229Z\nvnw5K1asYNOmTaHo24Ch0ulIuOp76DOzsO3cgfuUQkEDTX8kJykoNvvyz9Yjud3EX34lmqgoALRx\ncaQ9+DDqqGhqP3i/nZ11oOmPUEeFCIset8tH+auvIOh0pNx+F6pmM5whK5uEK67Cb7VS8dILSOLg\nnrak2MAVm3ioUELBRb+Ir64Oc+40Um6+DUEtKxMRM2ZinjET57Gj2Hbvavd9pdSuIAiBUrunotFo\nWLhwMQBFRYXtyvB2xJ49uwPXWpfhPX7iCI22kzz007u48cbv8emnH3Py5MnA95SCXa1pXYbX7/ez\nZcvXnHmmXE543brPuOmm7/PL395Do/Ukx3ftQLTZEIwmLDNntYqVb1+G9/jxvEAZ3m3btgbK8N50\n07UUFxdRWjq4MqTPGvusWbM4fPhwKPoyqAiCQPzyFZQ9+wy1H35A6l33DFpfPG4fPm9ok5MUApl2\nReUk5owhct78Nte1cfHELDmfmnf+TeNXm4i98KKQ9yFY+iPrVEFxwDk9kPW9a9GPGtXmevQ55+E4\nchj7nt3Y9+4hYkZo6tT0BsWpp8yHeYtzuPSq6SExT73+/Ld4GxsZW7uDhB8/jqBpKxISr7qGwoMH\nqPv4w3YLXDCldnU6Xa+SiToqw+tyekhLnsA//vFih98xGjs+OKW7MrySX8Odt/4Ya1UtKrMZVSft\nwOCV4e0p4czTVpgmT8WQnYNt905cxUWD1g8lWsPcLwJN1vpcmggSr/0BQgcVE6POPAtBr6dh/dpB\njRCx2xRNNfTjYDLJWqkvbhSR889sd10QBOIvXQlA49eDuwPtL40dwKgRcUtajJNz0aWktruujU8g\n9qKlaB0ObLW1PW6/dVZrR2V4O6KzMrwW4yiq6gq6LMPbEd2V4XW6rZRXH8EraIg9/0LM5oghV4a3\np4QFeysEQSDuUnnVrf1wzaD1w94PMewK2qZqAPyJ6RhGZ3R4j9pkJmr+Anz1dR1uwQeK/opbBlDX\nyMJFGJ/b4eIGoE9PR5+ZhX3/PnwNDSHvQ7DYbW40GlW/JBFprDVIggrDWZ0HPcScfwGRlkhydDqu\nu+5q/vrXP7W7p7WG3dn/63Q6Hnro5zz44I+4665bSOlgIQG5DK/D4eCGG77Hm2++zqRJU/B6fKgF\nI8vOv5lf/eoRrr/+Gm677SaKAwpY57sCpQzv1q1bmDdPXsRbl+F96onfkBSdgU+tJ3rxOYEyvD/6\n0R3t2u6sDO95553P7bffyPXXX82jjz6M0+notD8DQbhs7ylIkkTJE7/HdeI4s/72PFbVwJ+LeWhv\nORs/OcbZF09gwtTkbu/vSYTEybfe5D8FSSTGaLns9vaaqoLnZCWFP/8phpwxjP7ZL4Lue6j6CfDZ\n+wfJP1rNdXefEXKtffsfVrFDmMycuUnMXjyx0z42bFhP1euvEb/ycmIvWhrSPgTLK3/+Bp2ubZnh\nkETFNNTz6ZNvURI1kcuun0liSuchpZWvrKbp602kPfAwpgkTO73vVEIVWVZfY+etv29n4rQUFl04\nvs/ttWl7/Vo+3tSI0xTLzQ8uGtJnFYTL9vYSQRCInLcAgLqt2walD/Z+qBMDIIki9p3b0IsunGLX\n2p8uKRlz7jRcJ47jzD8R0n4Ei8Mun5xkNIXWBOEuL0dVIv8mp6/rqCPL3NMRtFoav/lqUIpl+f0i\nTrs3kDUcShq+XI/eKzvIFbNXZ0SedjoA1m1bQ96PYLDb+m/3Ztu1E53fgU8Uhk3dnO4IC/YOiJg+\nAwSB2i3fDsrzbf1kgnDmHcNXX49Rr8Jh93QrqKIXy9tza6tjwAYSu9WDyaxDpQqtBtX09Sb0Pnvg\nGV2hNpmImDUb78mTOPOOhbQfweC094+fQZIkmrZuwaCSfSjdFYYzjp+AOioK687tg+J3USKkQlkA\nDMBvteI8dpSICNkRPFzKK3RHWLB3gCYqCuOYsTQdPoKvqWnAn99iYw/tJLZukxeqiPhI/H4Jj7vr\nF9Q0YSIqgwH7/n0Drq1KkoTD7gm5pir5fDRt+Qa9SYtaLQRV4TFqwVkANH61MaR9CYYWB3Jox8FT\nUY6vpoao0SmAnOHbFYJKhWX2XES7HfuhgyHtSzD0lyPdtncPiCIxaXJSYncL/XAhLNg7IWLGTJAk\n7Ht2D/izbVY3Or0arS50zjLJ58O6cwfqqCgik2SnT3eTWNBoME2egre6Gm9l+xjl/sTjluvRm0L8\nIjvzjuG3Womae7qcpBSEhmYcPwFNfDz23bsGXFvtLweyfd9eAGInjmnznK6wzJVt/IqCMJD0V0CB\nbfdOABInZAItoaXDnbBg74SIGbOAlj/8QOKweUKumdgPHUS02bDMnoup+eVw2LufxObmTEJbsyAY\nKPorxM9+8IDcbm4uZoseh82Dv5sMTkEQME/JRXS5cBUMbME4RZMO9c7Fvm8vCALxM+SSCcEscIbs\nHLTxCdh270Z0D6wA7I8FTnS5cBw8gG5UGjHpSfJzutm5DBfCgr0TtAkJmLMycRw+hN85cLWa/c1H\ntoX6Rbbtkhcoy9zTWqr6BTGJzVOnyvfu3xfS/nSHsuiEWmN3HDyAoNFgHDs+ICQUO3ZXmCdPBhhw\nM0TAaRjCcfA77DiP52HIysIQG41OrwlqLgiCgGXuaUhu14BXArXb3KjVAnpD6Hax9gP7kXw+ImbM\nDJykFLaxfweIPf00JJ8P+/6B01Zb6oKEVrA7jxxGZTJhyMoOtN2dXRVAExWNPjNLNmEM4ALXHxq7\nr6kJd0kxxrHjUOn1LQePBGGGMI6fCCoVjmaNf6DoD03VcfAgiGJgN2a26II+VcvUXBTNcWSABbvV\ng9kSuiPxoGU3HjFzVuDIwWDeieFAWLB3QdzpcwGwD2CSjqNZezSZQ/cie2uq8dZUYxw3HkGlajk5\nJ0jtxDw1F/x+HIcGTqgFxiGEgt1xWNa2TZNk4WQyB7/AqU0mDNk5uAry8XeSldgf2PvBFKPY1825\nzYI9Qq6b4/N2H+pnyM5B0GpxHDkSsv50h9/ffCReCHctkt+Pfd9eNHFx6NNHN5+jGjbFfCcwZWSg\njo7GcfTIgEWFOPohCkJ5CZXEkp4INICIZgFg3zdw5pieHKoQLIq2bWo2q/Rk5wLIJXwlaUC1VbtN\nPrZOG6Iqn5IoYj+wD3VUNPrmzOOWk5S6HweVVotxzFg8pSX4rAMTMRYI+QzhrsVdXITodGKeMhVB\nEFCpBExmXdh5+l1AEARM48bjb2rC26qKXH/SH84yx1G5SJsi2I1mbY+0E31GJmpLJPb9ewes0mGo\nw/wkScJ+8CBqiwV9Wnpz280CLQgnMoBpkrwgOAbQzu6whfbwZldhAX6rFfPU3IBZo+UkpeDGwdg8\nj5xHj4asX13RktcRwnfimNx347gJgc9MEXrstu7zO4YDYcHeDcaxcvqy89jATGJFyChadV+RJAnn\nkcOoLRZ0qXIFQ5VKhdEUvHYiqFSYp0zF39SEp6wsJP3qDiXr1BCirFNPeRn+xgZMkyYHasP0VGM3\nZGahMhqxHzwwIC+/z+vH7fKFdtfSvCgpTnHo+dmnioLgODIwVV0d/RDDrrzPxrHjAp+ZI3T4fWK3\n+R3DgbBg7wbjOFmwO/IGRrAHJnGItp3eqpNytun4CW2KXZkidEFlnyooL4DzeF5I+tUdoc46DZhh\nJrWciNRTk5SgVmOaOAlfTQ3eATiwvT+Sk5S/n6KwyO03C/Ygk3MMGZkIegPOgRLsIfa3SKKIM+8Y\n2oQEtLGxgc+VMOCRkKQUFuzdoEtJQRURMWAae8AUEyKNXdGqTi3cZI7Q4fOKeNzB1cYwjh0LDIxg\n74+sUyVMUQlbBNDpNWi0qh5FQgSiQgbAkRyIkArRIi+JIq4Tx9EmJaGJbCn4pZg4gtXY5XDRcXgq\nK/A19H952lC/E56yMkSHo83iBq1MUiPAzh4W7N0gqFQYx47DV1uLt7am35/nsHnQaENXotXZiWBX\n4sODSVIC0CY3L3An+l+whzrrVBJFXMfz0CYno4mOaXPNZNYFbWMHMI1vti/n9f84hNqR7ikvQ3Q6\nMeaMbfO5orH3xHFomiDbph1H+z865tSjAfuKsvtWduMKph7kdwx1woI9CEwBO3v/F4Gy290hsyVK\nkoTjyBHU0dFok9qW/+2xGUIQMOaMwVdT0+9aWqhj2D3lZYguF8bsMe2umSL0uBxeRDE4k5Q2KUle\n4PKPh6RvXRHqyKCAGWZMW8FuNMsFsHq0c5kwSf7OAJye5rSHVmMP2NfHnaqx93yBG6qEBXsQKBPA\n2c92dlFsLtEaqi1nRTl+axOm8RPbJXa0bL+Df5kVgdDf5phQZ50qZYcNOe0FuzlChySB09GDBS47\nR17gGvv38I1Ql6pV/m6GUwS77EzXYg8iA1dBP3o0KpMJ59H+F+x2m6f5oJG+h3xKkoTz2FFZ2UlI\naHOtJToorLED8MgjjzBv3jyWLVsWiuaGHPr0dFQGQyBEqr9w2r1A6JxErhOyVqnYx1ujJED1RDsZ\nKMEeao3ddUIW7MacnHbXerpzATlJB8DVz3XqQ+08dR0/jspsRpfc/vAWU4QuqNIKCoJKhTFnDN7q\n6n6vgKr4W0KRdeo9eRJ/UxOmcePbtWfqYeLeUCYkgn3lypW8/PLLoWhqSCKo1RjGjMVbWYmvsbHf\nnhNq779SsEoRRK3paagfgD4zE0GjwXm8f80QIR+H/BOoDIZAuGdrejMOxmbN33mifwW70idjCHZw\nvoYGOfs4Z0yHRwGazDo8bj/eILJPFQxZ2QD9WhhNFCWcdk/ozTCnOE4BjCYtKpUwIsoKhESwz549\nm8jIzo/VGgmYAuaY/rOzh7rgkzM/H0GnQz8qrd21nhQCU1BpdegzMuWsvX6s7hdK27LfbsdTUY4h\nK7tjgdbDJCUAQ1YWCEJgR9RfOOweDEYtanXfX9PO7OsKyjj0RGs3ZCuCvf8WOJfTiySFbpFX3l/j\nuHHtrgmCIIcBjwCNPfSn445QAtvvgnwss+cAYPPa2V65G5WgIjMyndSIFLSq4IZUkiRKqmwcKqzH\n6/Oj16pxVcs1SEKhnYhuN56yUoxjxiKo29smjQETRM8msXHMGFwnjuMqyA9E2tS7GjhQewS7186M\nxFySTAndtNKCy+OjqNJKQYUVm9NLXJSB6pPyGZmhMEEoQqejXUvrZ/RES1MZjOhSR+EqKkTy+RA0\nGlw+F2W2SmpddVg9NqbGTySxB+NQ3eCk+KSVmkYXjXYPybEmbFY3lsj+ta8rKHPObvMQGR3cOb+G\nTEWwFwQ+a/JYOVhzBLVKjU6tY5pxLALB/4ayGjvHShpweXx4vCKG5jyLUNVOchacQGU0ouvkIG1T\nhI6aShuSJA3ps0+7Y9AEe7CHsg42Sj995qmUCgL+smI0ESJrDn/G+vxvcPtbBIJereOHs65mUdYZ\nnbbncvt4d30ea7cXU9voanMtFRiFig0HK7GkRzNtbPCC4dTxbDxYDJJEzKTxnY61KUKH2+Xr0d9C\nNWsa9Z99iqqiGPuUFJ7f/k8K6ksC1z/K/4zxcdlcPuVipiVP6rSftY1O3vz8KGu3FeM/JSJlAgIR\nCHyxr4JLzxpDQkzvDxR3VpYCkDRzKrEd/E7RKz9b8kuBvgUzHo1TJnLys1JM9joKLV6e3foyTW5b\n4PoHJ/7HOTkLuHzyxUQbOt7NSpLEoYI63t9wnG2HKmmdKyYAs1FRWu9k8+EqLpqXiVbTdoHuyd+t\nvCgfQaMhbfZU1Pr2QjIxSW5Lq1YF326ChbKUZNyFBcTGGvmycAtv7H0fu7elCqjmoIZrc5dz4biz\nUQkd7zx8fpEvthbxxbZi8kraOqQjgfGo2Ha8moRJSSyYntprgeuz2zlWWUlU7lQSk6La/5wECzGx\nZqrKrUSY9CEvGT2QDJpgD8XJ5f3NqSes65JTsB4/zs8+e4I6dwMx+miWZi3BrDVT2FTCjpO7+eu2\n1yisKueirPPaTEBJktiTV8O/1h6jtsmN2aDh9MlJ5GbHYTHpcHv9HPy2GFu5lX2FdWx9YTNn5qZw\n9TljMXYT097RSfB1u+WEHCk5vdOxNpq0NDW4evS38CXIduqSndt5Ub0Zl8/FxNhxTImbiElrZGvF\nTo7WHucPm1Zx69TrmBrfItwTEixUVDby4TcFfLatBK9PJCnWxPQxcWSlRBJl1lHX5GbfF3l4PH4+\n2JTPf78uYMVZ2Vxw2mhUvXiha/fLBbs8cakd/k63V3ZY19bYqa62djiWHZI6GoAvv3iP12MLUQkq\nFqbNJ9mUiFqlYm3RRj4/vomvCrfx4xm3k2ZpqyHaXV5Wf3yY3XlybkRWSiRzJiQSH2Ug0qyjsKSB\nE5sK8UgSf//gAO9/mcfV54xj1viEwFgG+3cT3W5s+QUYMjKpa/IA7XcnIvKqUlHeSHxK8AuGdnQW\nrq1b+MM7v2efUIlBrWdZ9gVEaE04vE6+LPuKV/e8y9aivVw/+WoidW3bLqux8/f/HqKo0oogQG5O\nHLPGJWAx6dBpVRzeW0HV4WqqrW6een0H//06hmvPG0dKnDnoPiooNeRVqe3fCWU8NVp58SkuriMu\nIaLHz+hvgl10QybYR0LhnO7QjE7HU1GOVFXDBdPO56LMc1GrZC3qtJRZLEybx1/3ruZ/hWupdzdy\n7YTLEQQBUZR4Y+0xvtxVhlolcPEZGSw9IxO9rq0GdnJfJTbgnqum8eaXJ/hqXwWHCuu5fflkclLb\naxhdoURsKHbQjjBF6KmtsuP1+II+hk9jiUSKjcZZkI97ZgLXTbqKuckzA9fnJs8krz6fv+59mb/v\n/ye35t7A5DjZP9Fk9/D/3t7L4aJ6Yix6li/IYt7UZNStbN+SJLH/02MkJ0bww9mjeG/jCd7dcIJj\nJQ3cvHQSEUZt0GMgiSKu/BNok5JQR3T8khqMvXOYGZtNO1VH9hC5KI2bp/6A7KjMwPXTk2ezsWwz\n7+V9xPP7/sFDs+8hSi9r7gUVTTy/5gA1jS7GpUez8qxsxqZFtVEEIlUCJyhk/oxR5Khh3c4yVr2/\nn0vmZ3LJgqwe9dVdUgx+f9dzQTHN9cDGDqDPysK6dQuewgJy58zmqvHLida3zNWLpy7iua//wcHa\nI/xt32vcN/P2wDuzbmcp/15/HJ9fZP6UZFYuzCHmlNBOZ7mVqsPVfO+C8aw7Ws3+/FoeW72dW5dN\nYvaExB711VUom4wMWZ2PnzIOTrsHgt8wDzlC4jz9yU9+wtVXX01BQQGLFi3ivffeC0WzQwqv38s2\nXSUAC8VMlmYtCUxQhWRzIg/MvovRllFsqdjOloodeLx+Vr2/ny93lZGWEMGvb5rLZQtz2gl1kF8q\ntVpgfGYsj14/m6XzMqizunj6zT0cKqzrUX9dBfmoLZFoYuM6vcds7rkDtdZZzwmLG6Nb5Oa0S9oI\ndYWxMdncnnsjgiDw0v5XKWgspqzGzk+e28jhonpmjI3ndzefxpnTUtsIdWjJOjVb9MyfmsKvbpzL\n5KxY9p2o5TevbKemIfjDPjyVFXKmZQeJSQqCIGDsRbnWfK0Nl05gVK3Iw3N+3EaoA6hVahann8ml\n2RfS4G7kxX2v4vF72Hm0isf/uZPaRheXzM/koWtmMC49up15QVlooqMMXLV4LI/dMJuEaAMfflPI\nX98/gKsHhapcRYUAGDK6EGi98DUA7NLLO46JNjO3TP1BG6EOEG2I5I7cG5mdNJ2CpiI+yP8ESZJ4\nb+MJ3vjiGEa9mrtXTuWHSye1E+rQ4sxNSbLw4ytyuXP5FNRqgefXHGDtjpJ293dFQLBndqXs9G4c\nhhohEex//OMf+frrrzlw4AAbNmzgsssuC0WzQ4qPC77ggFGO1811xXRq54vUWbhl6nUY1HrezfuQ\nJ975ht15NUzMiOGn184kNb7zLaTdJod1CYKARq1i5Vk53L1iKn5R5Nl39rEnL7iSBr6GBnx1dRiy\ns7u0R5osPZvEkiTx5tH3qIyRp022tXPn5vjYMdw85Qd4RR+vHnybJ/+1g8paB8vmZXLXyqmdmpdO\nTaOPNOu478ppLJ2XSU2ji6fe3E1NY3DCPbBr6SB+vTXmCB32HhREs3nsvHbk31TGa7FYvZjdnX/v\nvIxFnJ48myJrCX/Z9i9e+OAgGo2K+66axvIzszstcnZqyOeohAgevX4OE0ZHs+tYNb//xza8vuBC\nE92FhYBcfrkzeqOx76s+yIeu3fhVkNOk79SGLggC14xfSaIpnnXFm1i1di0fbykiMcbIo9fPZua4\nzlXj1rH8giAwe0IiP/3eTCLNOv61No/3NgYfkeMqKGhWdmI7vUcJKuhJstZQJJx5GgQn7VWsL/kK\nX1IcqFS4iwq6vD/WEMOKMctw+92UGzczd1Ii9105DVMX5zVKUnO87ikOmxnjEvjRFdNQqWDV+/vZ\nd6K22/4G4tezOtdMAMzmniVkbKvcxeG6YwGNx11U1OX9U+InMit+FtWuKlxRedxxWS4rzsru0lau\nCJbWsdsqQWDlWdmsODNLFu7/Ck64K9Ea3Y2DyaxD9Eu4Xd1rwZIk8caRd2n0WIkeI0cFKZpgRwiC\nwDUTVhKvTeaE8xCaqHruv3IaU7I630lBx4WvIoxa7r9qOtPHxLMnr5oXPjiIr5uDuEHW2AW9ocPE\nJIWeFkRz+py8ceRdVFodmrQ0vKWliN7Ov2vQGPjh5O+jktQckjaQkqziZ9fOJD6qa8d4R+WbM5It\n/PwHs0iKMfLxliI+3VrcbX99TU346moxZGV1qewoCoUzrLGPbCRJ4p28D/FLflZOvBR9Wjru4mIk\nX+dCQJQkDuw04a9PQB1Vx4QZTWi6iUV2OeV6JR3F607OjOX+K6ejUgk8/8EBik927TQLVrD3ZNvZ\n5LHybt6H6NU6zjvjGvk5XQg0gEa7h6Nbk5G8OvTp+czO7d7x4+iiLsiy+Vksbxbu/+/tvTi6EcTu\n4iJQqzuM429NT8Zhb81B9tUcZGx0NhNzF7Y8pwsKym1U7pPNIElTCsgZ1X3OR2dJWhq1ijuWT2ba\n2Hh259Xwj/8d7nKnIbrdchz/6NEdxvG3xmTWBa2xf160AZvXzgWZ5xA1Zjz4/biLuxawhw77cBWN\nQ9B4mTCnhqggok4cNg9GU/vyzfHRRh64egbRETre/vI4Ww5UdtmOMle72rVA730NQ42wYO+GvTUH\nOVx3jImx45iWMAVDZhaSz4e7vPMDJ9798gTbDlUxyn0GBrWeTwq/aBMW2RHdnZw0Lj2aW5ZOwuPx\n8+w7e6lrcnV4H7QW7F072QICLYhJvOb4/3D4nFyacxEJcaloE5PkOO5OhIrXJ7Lq/f1U1/qZrJ+P\niI+Xd/272+d0V6L1kvlZLJmTTkWtg+c/OIC/kxOdJJ8Pd0kx+lFpCJquHcPBVroUJZGP8j9DQDYt\nKDZrxYbdETUNTv7yn/34bdGMi5hMtfsk31bs6PI50PU4aDVqfn7jaeSkRrLl4En+u7nz57uL5bBX\nfWb3DldThB6n3dNtQbQ6Vz1flnxFtD6KxekLMGQpOR6dL/Q7j1bx7/XHMTtyiNPHsa1qBycd1V0+\np7vyzXFRBu6/ajomvYbV/zvMwS78UO4gHKcARlNYsI94vH4v7+V9hFpQc8XYSxAEAUPzC9LZJN5y\nsJJPtxWTEmfivhWncXb6mdi8djaVbu7yWQFbYhfJSbMnJHLl4jE02Dw89+4+3B2kf0uShKuwAG1S\nMmpT1yFhwdZJqXLUsK1yF6nmZM4cdToAhsxMRLsdX017u78kSbzxxVGOlzYyd2Iid5x1PuNixrC7\n4gDHG7rW8oMpJ3Dl2WOYlhPHwYI63lrbcfanp6ICyefDkJnZ5fOgbXJOV+w4uYdK+0lOS5lFkjkR\nTXQ06sjITk1STreP597bh9Xh5drzxnL9tOXoVFo+PPEpTl/nCzO0ONI7K99s1Gu457Jc4iL1vP9V\nAbuPdSwkXUWKwzCzy+eBPA6SJO8eu+Kj/M/wij4uyb4AnVrXUlqgsOPSAkWVVv720SF0WjX3XT6D\n5WMvDCySXeH1+PF5xS7nQlpCBPdenosgwAtrDlDdiXM9GMcpgFqjQm/QhJ2nI5mvirZT56rnrLQz\nSN51YmQAACAASURBVDLLoVXKit/RJC6qtPLqJ0cw6tXcc1kuEUYti9PPxKgxsLZ4Iy5f5xqhI8ia\n00vmpLNoeiolVTZe+7T9IdvemmpEpxNDN1tOCH7b+VnReiQkLsg8J+AgU7a0rg78Det3lbFpbwUZ\nSRZuvGgiKpWKZdnny20Vru/yWcEcqqBSCdx6yWRGJZhZt6uUTXvL292jaNHKgc1dEczOxS/6+Tj/\nc9SCmosyzwVk+7l+dCa+ulr81rbmMUmS+Pt/D1FWbeecWWmcPTONaH0USzIWY/XaWF+8qcs+OZr9\nLV3ZgyPNOu65LBedVsXf/nuI0mpbu3sCAi2I+dCShdv5PC2xlrG9cjdpEanMSZ4BgDYxEUFv6NAU\nY3V4WPX+frw+kdsumUxGsoUZCVPJsKSzu2ofRU2dR7Z0ZZZrzbj0aK49bxx2l49V/9nfTuGRJAlX\nQQGa2Lg2B4x0hnK62HAmLNg7QZREPjwiv8jnjl4Y+FyXOgpBpwts7RRsTi+r3t+Pxydy89JJJMea\nADBpjUFp7cEWvhIEgWvOHUd28zb8y91tTUKK9qgfPbrb36jRqtHp1V1O4hpnHdsqd5FkSmRGYss5\nmYqgcDVHXCjklTbw5to8Ik1a7rlsKnqtHNaZHZXB5MRxHKo7SnFTaafPC/ZlNuo1/OiyXMwGDa9/\nfoyiyraC1V0s90s/OrPLdiC4sgJbKrZT46pjfuppxBlboioMGfLC4TrFzv7p1uJANNTV57SEWy4e\nfSZmjYmNZZvxdGKeCzjSgygtMTrJwg8vnoTb42fVf/bjPCUM0l1UhMpgQJuY1G1bxiAW+k8L5UV+\n+ZiLAou8oFJhGD0aT0V5mxpCoiTx9Bs7qWkO7Zw+Nl6+XxC4NOdCAD488Wmnz+rJwe4Lp49i4fRU\niqtsvHqKwuOrq8NvberWDKNgMssZ2X7fwBzc3h+EBXsn7Ks5RLn1JHOTZ7aJzRXUavTpo3GXlSF6\n5IknShIvfXQoMIFnnFIKYHH6AowaY7PW3vEWvCfHf2k1Ku5cPoUIo5Y31+ZxpJVtUXHkBaOhKc/r\n6kX+vOhLREnkgszFbcLZFE3Y3cq+3OTw8MIHB5GQuGP5FGIjDW3aWjHxAgA+K/qy0+c57B50ejUa\nbfe1t+Ojjdy8dBI+v8hf1+zH4WoxIbiKikClQp/eteMUujdJ+UU/nxauR6vSckHm4jbXlJ1L63E4\nWlzPuxtPEB2h47ZLJreJ1derdZyZdgZ2r6NTW3tXjvSOmDMhkQtPG83Jeif/+KRFqIkuJ57KCvSj\nM7p1nEL3C1yNs5a91QcYbRnFhJi2NWf0ozNAknCXtSzaH35dwK4jVUzJjm2XVDU+dgzjonM4Up9H\nqbX9jgtaFcULsk7M984dR05qJN8ePMmGVgpPixkmSMHeA9/TUCUs2DtAkiQ+L/oSAaGNtq5gyMgA\nUcRdKk/iT74tYn9+LZOz2k9gAKPGyDnpZ2L3Odhcsb3DZ/a0VG1spIHbL52MKEk8+c8d2Jrtoorm\nqE/vXmMHWai5HF78HYTN1bsa+LZiB4nGeGYlTmtzTW0yoU1KDjhQlcWt3upm5VnZjB8d0669qUkT\nyLCks7f6AJX2kx32x9HDEq3TxsSzdF4G1Q0u/v5fOUJEEkXcJcXoUkeh0nbfVncF0fbWHKTe3cAZ\nKbMD2aMKp2rsjTY3z39wEAGB2y+dQmQHv2Vh2jw0Kg3rSr5ClNqPe7C7ltasaM5e3XGkivW7ypr7\nJDtOgxVoxm58DV+WfI2ExOL0s9qZiJQdorJjPFhQx0ffFJIYY+TWZZM7DHFdPPpMud3Srzt8Xkeh\nr12h1ai4Q1F41uUFdnGKshOMWQ5GRmRMWLB3QF7DCYqaSpgzahrJ5vZpywFttaSIYyUNvL+pgBiL\nnluWTeo0RvvMUWegUWnYVLq5y5fZaAo+ZX5SZizLF2RR0+Dk7/89hF8UcRcVoYmL6zSF/lSUhcTl\naO8w21S2Bb/k57yMRe2ybEHeFYgOB97qaj7eXMjBgjpyc+K48PSOXyBBEDg/82wkJD4v2tDuut8v\n4nL0/ASp5QuymZgRw57jNXy2rQRPZQWSxxP0rkWtVmHo4gShDSXfALAwbX67a5rYOFQREbiLChFF\niRc/PEiT3cPli3IYlx7dYXuROgunJc9s1oAPtrvem8ObNWoVt186BYtJy1vr8sgvbwoqMak1gRju\nDsbB4ZWVkmh9FDMTc9tdNzSbvNwlRdRb3fzto4OoVAIPXzen0zIQk+MmkGiMZ0flbqye9v6B3pz5\nGhtpaN7FSc27OF/LLjZowa4cQhMW7COKdc2OrUsnLunwuiLYrScKeOED+bT62y6ZTKSp8wkYoTMz\nO3E61c5aDte1P4HIYfc0F/rv2Z/k4jMymT4ugX0nalm34SB+a1PQmgl0blf1ij42l2/DrDExO2lG\nh99VIi3yt+9nzdcFxEbquXlp54sbwNT4SSQa49lZtReb197mmtOhnCDVs6p6ijM1yqzjvY0nKN4j\nH9emzwh+HMzmjk8QKrGWcaKxgImx4zpc5AVBwDA6A+//Z++9oyS560PfT3WOk3ty3JyjNiqsJAQS\nCiRjHgbDRRhjHDg8Xb/jc1+wr6/TxX6PCxiuMRgso4vBZIQQKGu1knalzTnvTs6xezqHqvdHdfX0\nzHRPV3XXzG6P+nMO54jpqq7f/vpX39/3942jo/zqlYtc7pli++oaHtzdsuDz3tVyDwICL/W8Ns8B\nnm+jkUq3lc8+thFRlPjGL87jV8JeVUTEwMLRQW8OHCWaiHJv850ZN3lLQ4Ncvri7i2/98gLTwRgf\nuX8VazKc3BQMgoF7W+4iLiV4vf/IvM+12NjT2bKymkf2yae4J399iXBvD6bKKoxudQW0SqaYZch4\naIIL41foKGtldXXmI6y1sQmMRgbOX2HKH+VDB1Zk1c7SOdCyH4BDfW/O+yzfLjEGg8Cffmwn5S4L\nxw+eBtRrJpDdvnxq5Cz+WIC9jXdgMWbWuJQN5PQbZzAIAn/4/k05i3QZBAN3Ne0lLsbn2ZgLaVpc\n7rTw2ffJpqmzb+QxD65kB6HobOfjweRvdW8GbV1BmYdTh85QU27j04/M7zE7lzpnLZtq1tPl66Fr\nTmRIPqYYhY0dVTx2ZzvjvjCjF6/JjlOPumJZNocFQZgv0BJigoN9b2I1WrizcU/GewWTCUtzC6He\nPq71TLBzjYcHdub2b+yp34ndZONQ/xFi4uy5L2QePnB3B2tbKrh0sYfE1JSqYAIFRw7TXDFQEuxz\nODxwFAmJu5Lx2pkQTCZC5R5c02NsX1HFQ3vULZpWdzMdZW1cGL/CaHCmNEAsliAaSeTdJabCbeVz\n79tIXVj+zkRt5iYCmZhJzpn9Mh/qO4KAwN2N2WvLm5plrbQiMMZv37eKlU3qKlDubbgDs8HE6/1v\nzTJL5auhKaxvq+T9d3VQ4RtBQsDctLDWnI5ycvGnNTKejvo5PnyaWnsNG6rnt1JTiNfKpYwbouP8\n4Qc24bSpM6cdaJI3+jcH3p7190Ln4X13drCp2YUjMEmgok6V4xRkJcHumF8Q7ezYRaYiXvY27MJh\nzl4CIFBei0FMsMYa5vGH16mqm24zWdnfuJvpqJ+Tw2dmfabFkT4Xo8HAH7x/Ix2CbGcPVOSOClIo\n2diXGQkxwZuDR7Gb7OyY4yxM5/zNca7HnJilBJ/YVampTviB5v1ISBzqnwl9DGl0EmVibWslO8rk\nF/IHF0JZMzLnkmkR90730+nrZn31GjyO7DVNfnFsiCmTi6b4FA/snN9PNBtOs4OdtdsYC41zZWIm\nwUirsywTj+xppSE2yZiljGeOZY62yIQjJdhnopYODxwlLsY50Hxn1gJXsbjIDy7K9+yqiNHRoL5F\n5NqqVVTbqjgxfJpQfCaxphBNFWQB/cntZRiQuBiyc6l7UvW9mWK4lY3nrizaOsDIZJBDI7IA/vB6\nKw6VmxvIG5yAwBsDb836e9BfWK/TCpeVRzrkMT3fk8AXVCeoS6aYZcaZsQtMR/3srd+Z1fwwPBHk\nn5++wIhdFniG4eylBTKxvXYzZRa3XNI3IduU83GWZaJ8eoSIxcGZ4Rg/Oaiu6l0mU8yhPtneqWiU\nmXjr4hDPvd2D112DNRpE1Nip/u5m+USUblstVKABJMZGMSViTLk8PHO4S3VFzJR9OdlvVZREDg8c\nxWIws6dhfmlihe+/dJXzkxA3WamYHtE0VoNg4M7G3UTFGMeGTqX+rkcTa9PYIAAjtiq59rvKcscO\np4V4TCSajIcfD01weeIaK8rbaHRlLiIWiSX4p5+fp9con9hck5kjnrJRba9iXdVqbnq7GUxGSyUS\nIuFQrOAuRmU+OSP3pljGN35+XlXRNKvNJNfoLwn25cGb/UnNpCmzZhIMx/jqT84SjMTZcfc2gJyF\nj+ZiMpjY23AHoXiI06Pn5O/N01mWTsLvJz4+TvmqFdRVO3n+aC+vn82tsc7VTkLxMMeHT1Ftq8xq\nfugemubffn0Zm8XI6js2AvMTdHLR5m6hxd3E2bGLTIbldmip7NsCBFqkV/491u/ZjNlk4F9+dYHB\n8UCOu2bmwZ8U7NcmbzIWnmB77Rbspszmh4On+3nt9ACtdW6cHe3ERoY1N/ne27ALg2DgjYG3U05U\nPZpYK+ty54Ht+EMxvp4hIzMTc9fD4cFjSEhZbeuiJPHtZy7SM+Jn3a4NcvXTXm3vBMD+xt3y8waO\nAoX5W9KJ9HZjcDhZtbGdK71TfO+FqznLM880tS4J9qJnJDjG5clrrKrooN453x4nihL//MsLDE0E\neXB3C3fcK0eKaBVoAPsa5GbYR5LOQz00VeVlcrS384UPyxmZTz13Jecx3GY3z3KYnRw5Q1SMsa9h\nd0bzw4QvzNd/dpZoXOSzj22kZu2qWc9XiyAI3N20FwmJI8nYfj02OGUc9RtW86n3riMUSfA/fniG\nqRyOsJQpxidfd3hQFjCKwJnL2RtjfO/5q7jsZrm+fFurnKDTp635Q7nVzZaajfT7B1NO1KA/e+Er\ntUR65cqW++/blsrI/PavLuYs8JV+gkuICY4MHMNusmUMcQT46cEbnLg6yrrWCn7noY1Y6hsI9/Qg\nqTQFKmyp2YDL7OTtoRPExLgu74QYDhEbGcHa2spnHt1Ia62LQ2cGePlE9sxnBSVxr1g7w5UEexJF\nuNzVON9pKkoS//aby5y/OcHmFdX89r2rMNrtmGvr5BK+Gn/8WkcNqyo6uDp5nbHQuC6mmHBaEkZ9\nlYM/+ZCc/v8/f3aOgbHsGqviMFM0pCMDxxEQ2Nuwc961/lCM//GjM4z7IvzWgRVsW12TSoTKVbo2\nEztrt2IxmHlr8ASiJBIMROXa2xra380lPUFr38Z6Pnh3B+O+MF/58Zl56fbppNvYg7Egp0fPU+fw\nsHJOZySQW9v90y/OYzQKfOHDW/BU2LG2JHMbNJ7gYMZ2/cbAW8RjCaKReEFrQUomz1kbGzGYzXz8\n3WtY21LBiSujPPX8lQXXa7rGfmH8Mt6oj11127EY54/ntdP9/ObtHuqqHPzRBzdjMhqwtrUhRcLE\nRrSZY0wGE3sadhKIBTk7ekGnTb5PTtBqacVqkes3lTkt/ODlaxy9tPD4lBr9UQ2dqm4nSoId2Z76\n9uAJ7CYbWz2bZn0mSRLfe+Eqb5wbpL3ezR+8b2OqNrS1pQUxGCA+kbv5xVz2N8ia4JHB4/os4jkZ\np2tbK/nUe9cRjMT5hx+con8B4a5oJ0OBYTp93ayrWk2lbXb4Zjga58s/OsPAWID37Grh4WQSkqmq\nCoPTSaRXm6YKcvOFHbVbGQ9PcH3qplx72zm/9rYWIr09mKpmErQe3d/OPVsb6Rn28z9/fo5INLM5\nIt0Uc3T4FHExzr6GXfMiO/pH/Xz1x2eIxUU+976NqUggm5J5mYcZQnaiVnJy5CyTPjlRpxDBHh0a\nQopGU2vBZDTw+d/aQmudrLH+7FDmKozpzw0GoryZNItkMsO8fnaAp567gtNm4n//7S2pMFdbARuc\n8k4cHjiqi8Ye7p1dN6m63MYXPrwFq9nIvzxzMWtFTCj+FnklwQ5cmriKN+pjZ922WU5TUZT4wUvX\nOHiqn5Zal1z7Oa0LUioDNQ9tdXvtZmxGK28NHk/VAS/UFCPHLM/UqblzcwMff/cafIEo//D9k/SN\nzM/uA7C7LMSiCd7slU1DiqlIwReM8qUfnqZz0Medm+r5yP2rUgJPEASsLa3ERoZJhNT3I1XY23AH\nMLPBFTIHce8UCa93VsyyIAh84sE1bFtVw8WuSf6/H55KlV9Ix2I1YTAK+H0RDg8cxSAY2DPn1HKj\n38sX//0kvmCM333PWrantXSzNDSC0ZiXYDcIBvbU7ySaiHKm/zKgjzkqPVHNYTPxnz+yLdV16Iev\nXEPMoLkr8z/pnebixBVa3U00u2eHzx483c+Tv76Mw2bi//joduoqHanPlLkP5zEP9c5aVpZ3cHny\nGmNTXnk8eig7afPQ0VDGEx/Zislo4J9+cT6rcz1XeYXbHV0E+6FDh3jooYd48MEH+da3vqXHVy4p\niq17X1LIAATCMf76X9/mpRN9NNY4+dOPbpuXfKMkwITz0E4sRgs767YxFfEy4Z1esPZ2LhKRCNHB\nQawt87vkvGtnM598cC3TwRh///2TnL0xfyErNeBP9VzAaXKwxbMx9dnAWIC/+e5xbvT72Luxjk89\nvG5eeKcyD1GN9mWAVRUdeOzVnB68KNfeLmhzk58/t06O0WDgjz64ib0b67jR7+Pv//0k497ZxdgE\nQcDhtOD1Buj3D7K5ej1llplMxbM3xvh//+MUoUiC33tkPfdtnx3eKZhMWBubiPT1IiXU9SJNZ09y\n7V3ol7OSC5qHLPWCypwW/vSj22iodvD80V65xO2cE4wiSPvGhxElkb1pm3xCFPnF6zd56rkruB1m\n/uxjO2irn53NaU3mNuSzwQHsa5Sf1z0qO/4Lm4ceBLMZS33DrL+vbq7gCx/egtEg8LWfnuVXh7vm\n+R6cRR7yWLBgF0WRv/7rv+Y73/kOv/rVr3j22We5cUN9g9lbTSAW5NzoBeqddbS55UV5Y8DLX/3b\nMY5fGmZjRxX/5eM7MpYLSBU+ykNjB9ifXMTT06FUE+t8CPb0yl1yWjIn5Ny7vYnfe2Q9kViCr/z4\nLN9/8eqsRsj25CKOhBLsqt+O2WAiIYocPNXP3/6vmbKrv//ohlmVChUUAZKPI1kQBFlrj8jfq4um\nmqEAmslo4DOPbuCBO5rpHwvwF//6NgdP9c/SWh1Oi6yhSTMCJhiO893nLvOVH59FkuBPPrSZOzc3\nzPt+5blSLEZ0WJt9GaDGXsWaipWMTckRQos1DzXldv6vT+xkXWsFp66N8bf/6wRXe6dSnyuCdGzK\ni0kwckfdtuT/D/H3/36KX77ZRXWZjT/72A5aaufXIzK6XJiqqvMW7Ns9m7EYLYwm5yHfkE8pHic6\n0I+lqRnBOD/BaV1bJX/2sR1UuK387NBN/vt3j+JNc7Ar85CpzEQxkJ+KmMbZs2dpa2ujqUnWYB55\n5BFefvllVuboDH+7cGz4FHEpwd76ndwY8PGrw12phtEfeWAN79nRlNXmayqvwFhenpd9GeSQv3pH\nHVLEgLUy/58ikOzmtFBFxzs3N9BS6+Kbv7zASyf6OHVtjPt2NHHXlobUIjbFrGyr3s6JKyM8/UYn\nfaMBrBYjv//oBvZtyt4I2VqAfRnktPJXzsjOaz00VVuWeTAIAr/zrtU0e1z88JXrPPX8FY5cGOL+\nHc1sXVWN3WkGUaDcUEGdqY3nj/bw/NEepvxRmj1OHn94/YIJSNbWVjgsR6RYG9Vn/yrsbbiD586f\nBPKfB0mSiPT2YK7xYHQ4Ml7jtMlNsb//4lUOnh7gi/9+kl3rannPrhbaG9wYTQKJMGz2bGRiUuSn\np65w5PwQkViC3etr+eSDaxdMQLK2thI4fYq4dwo86uqzKNhMVnZ4tjB8TijIkR4dHJA7aC1QVmJF\nYxn/9VO7+Oenz/PW+SFOXh7hwLYmHtzdoqo2/e1MwYJ9eHiYhoYZDaauro5z584V+rVLxuHXbuK2\n1fLTX0SIhk4AsKa5nA/cvYK772hldHThxtHWllaC58+R8PtVV1RUEASBXVU76ZQgYgzm/W8I3OyS\nx5KjNkprnZu/+NQufn7oJgdP9/OTgzf4+aGbNDsEagFLqIIvfvs6kgQCcNeWBj50zwoqciSJWOrl\nAlD5OMwAKm0VtFrlscfN+dfnCPf2YLDbMdXUZL1GEATu2drI5hXVfO+FK5y6Nsa1Pi9mk4GV9ghu\nrIhDTfyXf5ZzGkxGgQ/e3cF797blbEg+43PpgT3ZSzFkY1vtZl6NyzZ2m4Yqn+nEp6ZITE9jX71m\nwetMRgOffGgd+zc38IOXrnHs8gjHLo9gtRhZb4hgilk5fzzB4SHZgVpdZuV337OG/Zvqc54srS2y\nYI/09sIq9WUdFPY27OTZ2GWwJPJ2pCvm0VzlqxXz1MkbE/zwxSu8eLyXF4/3Umkzsgq41jfIPopD\nSU2nYMGeb5ynR+NOvliU9zVisVThrK6mbUMZ79ndxuZVM4Ih1zgDa1cRPH8O2/QYFR2Zj+gLsT+4\nk05OMMl43nMyeLMTwWikactaDJbcmt7nP7qDx9+/mVeO9/DayT68kWvgr0OYqmJ9exWbV9Zw59ZG\nOhrV1X4BGGxvI9DVTXWFDYM5u1DK9m9cX76Wq/gYZgCPJ3Ps+EIkwmGuDg9TtnEDtbW50/o9Hjd/\n9bkauod8vHlmgDfODBCMD+CmkcRoLVtX13Dn1ib2b26gXGX2Y9yxnj5AGh7I+7f0mGqJAn7HOOs8\nC6+nTM+Y6L4KQNW61arG4PG42bOliWMXhzhxeYSzN4eJ+idwBMqxhl3csb6Sh/a2cceGeowqhaxh\n01omngHT+FDWcS5Edc0WXox3EbIFcFeYsZltuW+aw/SY/Oy6LesoU/H8h+vKeffuVl4+1suxi8Pc\nnOwkOi4QIXbbyCotFCzY6+vrGRiYyXAcHh6mtjZ3NblcmvBS4XY5KBMcfOKTM45TZWwejzvnOMUa\n+eUbOXeZWEO75uf7RuQ42QlxnDOd17KmbWdDEkUC3d2Y6xsY90YA9RrvvnW17F3r4YsHj8BwHfva\nV/LgYzPt77T8RoaGJqTrNxg4dzWrlrTQfNojZYCPs5PnGRrOXP99IUI3roMkYahv1DRuh1Hg3Tua\n2L3RzZd+fhTGG/n9d+1gzUY5SS0aijIaUn8cN9d4mL5xk5ERX14+E1vcSVgI8XLnm7Q5s5/Ass3l\n+DlZ449X1WmahxV1Lvl/66Z58RdhhEAl//UTu1MmoYnxzBFVmYiVy9FCE5ev0Yz2dz0WjSMkjMRM\nYV64eDjl79DC1JVrIAiEnFVEVDzf43EzNRlk56pqdq6q5t8vXeTwwDH+eNunbxtZBeo3yYKdp5s3\nb6anp4f+/n6i0SjPPvss73rXuwr92iXD6ZKTc/I9eaQch3nalxUbXswS4a2hzK3SFiI2MoIYDmsq\nS5pOr7+fgbhc7yYRzj/LrpAIIYBIQN7gvMIklyauar9/AYehGo4NnyJqliNlCnGYWVtaSUxPk/BO\n5b44A2JIQLLEOTt2nmBMe/io1m5BczkyeCxlDss3httUXYPBbi/4nYibI6mINS2k/Ax1dRhs2rX9\nSCLKyZEzVNrKWVe1OvcNtyEFC3aj0cif//mf8+lPf5pHH32URx55pGgcpyB73RMFZJjJHdqteduX\nlZfHZIWjQydJiNpC5RSBls1hmIu3Bk8gGuIYjIU5itK7SuVD+sv81tAJzfcXItglSeLI4HEkc2zW\nWPIhFcedx3qQJEmO5XdZiIlxToyc1vwdkd4ejC43psrsDS6yMRme4vLENdxuuTZOvvOQym0YHiYR\nztzjdyGUd6LM7eCGt5ORYPZEokzEx8cQQ6G834nTI+cIJyLsadiZtarn7Y4uo77nnnt4/vnneeGF\nF/jsZz+rx1cuGWo61C+EYDBgbW6RO7THtH+H8vKsbmhjOurn4sQVTfcXItBiYpzjQ6dwW1w4XbaC\nsuysTc0gCPlvcIEoJrOB2rIazo1eIBDT5kyO9PSA0Sg3QdFIl6+HocAwq+rlzamgeSigxEIkHEcU\nJWoqKhAQNGuriWSbQmtLa15moLcGTyAhsaJWbpBR8AYnSQS7ta8H5bntHvm31DoPhZ7elAYwe+vv\nyHHl7Utxbkc6okdYk7W1FUSRaL/6+t8KyrH/jhbZtq2kcatFrfc/E+fGLhKIB9lVvx2nq7CiRwab\nDXNdHZFe7bVzYKb29r6GO4hLCY4Pq9dWpUSCSF8v1sYmBJN2t5FSUXBfm1yeVw+NPZ/QT+W55W4H\nG6rX0u3rZcA/pPr+mYxT7WtBlETeGjyGxWBmbf0KoHCTFID/Zqfme5WNdVVdG3aTnbcHj2s6yabe\niTzMUWOhCa5O3ZAT5xboRXC7844X7Hp0S0lpaXmYIZTnrqxrodXdxIXxy0xFvKrvj/T2YPXUaA61\nhJkyxXc27sbutCBJEM6Qbq8WW2sbYihEbEzb0VkUJULBKA6XlV11OzAIBt5MK2Obi+jQEFIslteL\nHI6HOT5yhipbJRtqV2O1mQpaC6bKKowud14ae3oxOKWsw9z2gQtRiGC/PtWZKlNcWe6aNZ58UHwu\ngc4uzfcq819WZmdX3Ta80WlNJ9lCNPa3FW29QbvD9naiJNiz9PzUQiGOQ7n9lwmTycj+xt2pgmRq\niHu9JLxTODsy92ZdiLHQOJcnr7GyvJ16Z50uRY9mzBDa5iEciiFJ8m9RbnWzuWYD/f5BeqZzl1eV\nn9clP19D82qFkyNniSai7G24A4NgwOW2FiTYBUHA2tpKbHSURDB3Hfh00ovBba5Zj9Ps4O2hE6q1\n1ZlSAtrnQaluuq9hly6nWKV2TiAfjT2tAJgSEXNk4Jjq+yM9PRjLyzGVqw/XBbmD2uHBY1iNw/az\nyAAAIABJREFUFrZ7Nue+4TamJNh1qAlhaWzKu8FAeu3tO+q2YTaYOTxwdFYv0GwoJwRnR7vm5x5O\nvihK5T5dTi55OlDnNthQxvRG/1tZ70lH2VBteQi0wwPHEBBSdYJcZTbCwRgJFZ12sjErUUkDMxq7\nFZPBxO76HfhjAc6MXVB1f7inB8FiwVKvLWQ2FA9xauQcHns1qyo6sCeTowra4JK1c4Ld3Zpr56QL\n9hZXE02uBs6NX8IXzR12mPD7iU+M56WtX5y4wlTEy676HdhMhXVuutWUBLsOGrvBYsFS30Ckt1dT\ng4FU+6/kGOReq1sYC09wbTJ7aVWFcHdSsK9coWm8CTHBW4PHsJvsbE82ULiVGvvcssXrq1ZTZavk\n+PBpQvHcURWRnm4QhKy1crIxmFamuMomR5G43PILHQ7mb5KaqSFU2DwovQFe7zuS9R4FMRYjOjiA\ntblZdfNqhbcGTxATY+xv2I0gCBiNBmx287ym1lqxtrUhRqNEhwY13Rf0y450s8WIIAjsb1B/kk1F\nieVhlns9qUjcnaEnQ7FREuw6VXGztrbKDQZG1duXQ0nh4XDOZGoq2qrSwWchlKO3a4U2wX5+/BLe\n6DS767enyhTrobGbysowVlRoPrnMbTSS3gv0+PCphW6diVmu1R6zrPgY0rskKYK9kHlImeY0nlzm\ntoOrd9aypmIlV6duMBRYuLBYdKAfEgnNZhhJkni9/wgmwTgrEShTU2utKPMQ6dY+D+lF8XbXb8ds\nMPN6/5GcJ9l87eujgXEujl+ho6x1XpniYuQdL9hNJiMWa2EOM8jPgTpjgpg59q0ob6PeUcupkXN4\nIwsfPSM93Rhdbiw12rz3mRooOJNp84U2FrC1thGfnCQ+rb65daZGI/uUXqD9CztR42NjiMFgqtGF\nWsLxMEcGj1NucbOlZkPq704dBLu5tg7BastbY7enbfR3N8s1Z17PYZbK13F6ZfI6w8FRttduxW2Z\nccA7nBaikQRxFX1Ss2FtawcgnPSBqCEVy59WBM1hdrC7fjvj4UkujF9e8P5wlpLFuXj55htyb9em\n4tfWoSTYAXRpXGtTFnFXl+p7UppqmkATBIEDzXeSkBK80Z/9CJ4IBOSY5bY2TTHLI8HRlGbS5Jqp\nRTLjMCvw+J2HGSJTa8ByaxmbazbQ5x9I9QLNhCI0tEbEvDV0gnAizN1N+zAZZkIkXW7brDHlg2Aw\nYG1J5jZE1X9PwB9JOdIVttZspMzi5u2hE0QS2b8rX8fpoeQaO9A8u2iZLj6X5hbZ96RBY1cc6XPL\n9R5ovhOAg71vLnh/pLtbDr1VUdZEISEmePnmYewmOzuz9HYtNkqCHXkRh0M6Ocw0LOJsLfH2NOzE\nbrJzqP8IsURmW2+mLjlqeLVX1kzua7l71t9TDrMCN7h8en9mm4d7mmRh82rv61nvjeQRsyxKIq/1\nvYlJMHLXHA3NVVa4xg7JVnnJ3qNqCQWiqYQ5BaNBjpYKxcOcWCC2P9zTI/sZmptVP28yPMXZ0Qu0\nuBppL5ut4ephojRYrdibGjU1t86k7AA0uRpYVSF3VxoKjGS8VwyHiQ4NYm1t0+RnOD16Dm/Yx976\nnRl7uxYjJcGOPkX1jQ4H5to6wt1dquOvszWxthot3NW4B38skDVRJ9zdBYBNQ4ifPxbgyOBxqmyV\nbJvT29VoNGBzmAno4GsArSappAliTqnatZWraHY1cnLkLGOhiYz3ztRGUX/0vjRxjZHgGDvrts0y\nP0CaYC/UcagxQkh2pMczNpa4q3EPAgIH+97MuLYkUSTS24uloUFVdU+FN/rfQkLinub98059ejWa\ncK1ckWxunVkYz2WhXqeK1n4oy0k20tsjN5xJnp7VIEkSL/a8hoDAPc3aSy3frpQEO/o5UG1tbXJz\n67HMfRTnEsiiqQIcaN6PQTDwat8bGV/mGYHWrnp8b/S/TUyMcV/znRmrJzqdloJfZHONB4PDkYrY\nUUMwEMXuNGOYo2UJgsC7Wu9BQuKVLFp7uKcHU2UVJnfuUr0KB/veAODepKBIJ2WKKXiD09YPN7TA\nWqi0VbCzbiv9/kHOj1+a93lsZBgpEtZkhgnHw7ze/xYOkz3VJSkdvRpNOJOOfbV29oUE+9aajZRb\nynh78HjGaKl8lJ0rk9fpne5nT/N2ah2e3DcUCSXBjj72REhzFiUXWC5CSU3VmaHed6Wtgu2ezfT7\nB7k2Nb/VYKS7G4PdPqt59ULExDiv9b2JzWhjX2PmeucOl+wwixXgMBMEAVtbO7HhIRJBdfVeFmpi\nvbN2K5XWCo4MHMUfm53wIzevntKkrQ/4h7g4foUV5e20ls03WzidFgRBB5NUY5Pc3FqlSWohgQbw\nnrb7AHi+65V5G324S04CsmlIVDvUf4RAPMj9LXdnND/oEQYMssYO6k2UC82D0WDkQPN+wolIRlv7\njGBvVz2+F7pfBeD969+j+p5ioCTY0U+w2zQK9kAggsEgYLVlrm9yX8tdAPxmzssshsNEh4dkW6JK\nx+nx4dP4otNy+QBT5rBAvY7fyganRluNRePEogkcWZpZGA1G7mu5i6gY4/W+2ZEh+djXn+18AYD3\ntN2b8XPBIMz0Pi0AwWTC2tSsurl1LsHe5Gpgc80GOn098zZ6xWFva1Mn2COJKC/3HMJmtKXMG3PR\n6xSrJM+p3uCy2NgVDjTvx2ly8HLvoXlljSPd3QhWG+Y6dQla3b5erkxeZ23lKlZW5Vfm+HalJNjR\nJzkH0h2oXaquV7JOswnnjvI2NlSt5erkdS5PXEv9PdIrN69Wm4QRS8T4deeLmAQj97ZkfpFhZh4K\nFWqK5hjuzJ1OHgwosfzZbcPKZnSw7w3CaUfwlIamUmPv8fVxevQ87WWtbKpen/U6R4EF0RSsrW1y\nc+vB3MXhsvlb0nmw7X4Anu96ddbfw12dsuNU5Ty80f8W/liA+1ruxGG2Z7xGL43d5HTKvqcedb6n\nXPNgM9l4oPUAoXiIV5MmNQAxEiE6OICttVW14/TF7oPAzGloOVES7OinsRudTswejyoHaqZ43Uy8\nb+V7AXj6xq9TyRlhjbVRXus/zER4kgPNd6YyLDOhxNMXHPrZnhTs3SoE+5xyAhm/z2Tj/pa78ccC\nPN89I9RmTBDqErSeufk8AI+teHDBk47DaSURF4lG8jdJyePqmDXOhcgWGZROR3kraytXcXnyGlfH\n5MxkKZEg0tONpbEJgzV3Gnw0EePFnoNYjZZ5kVHpWG0mDEZBl2bO1tY2xECA+MR4zmuV9ZDJiaxw\nT/N+XGYnr/a+ntLatTpOu329nB49T6u7ibWVq1TdU0yUBDv6aewgmyHEQID4+MIO1Eg4jpiQcgr2\nFncjd9Rto9c/wMmRs/K93eodp/5YgOe6XsZhsvNQ+/0LXjtz/C4sIsRUVS1XOFQR069GoAE80HqA\nSmsFr/S+zlhoAkmSCHfexFhRgakid1OJ61OdXJy4wpqKlTm74ug1D6kNrjN3eYhcphiFhzveDcC/\nnvwhoiQSHRpEikZTz8rFq72vMx31c6D5TpxmR9brBEE2Sekh2BVnphqHeiAQxeYwY1ygcbjNZE1q\n7WFe6T2U/O6u5LPacz5DlER+dPVpJCQ+uOqRvGrX3+6UBDtgs5sRhMJty6Dezp7LlpjOYysexCgY\neebm88TFOOGebtXFnp7rfJlQPMx729+FY4EXGfQ7uQiCgLW9ndjYKAn/wr0y1ZggACxGCx9Y9TBx\nMc7Prz9LfHKShNerSlsXJZFfXP81AI+tfDDn9XqZIaxNzQgmkzqTlMr1sKqig931O7g52cOhviMz\np5b29pzPGA6O8uuul3CbXTzQeiDn9Ypg18MkBRBRc3LxR3HmWAsga+1ui4sXe15jKDCcMn+q0djf\nGjxOl6+HnbVbWbMMtXUoCXZgRjsp1LYMaY7DHNrJjKaa+/hcY6/m7qa9jIXGefbys0T7+7C1tee0\nJfb7BznUf4QaWxV3N+/P+Rw9Ty6KoMm5wanUVEGOkFlR3s7p0XN0npdjme0qBPvzXa/S6etmR+0W\nVpS357xeL1+DYDJhbWsn0tebMwM1FIgiCLKSkYsPrXoUp8XBMzefw3dD7g9rzeE4FSWRf7/0E+Ji\nnI+s/cCC2rqCw2VBTEhEwvm1jVRIKTs5NrhYNJF0pOdeC1ajhY+u+SBxMc5TF39EuKsLwWrNqewE\nY0GevvEbLEYLH1z1iOp/Q7FREuxJHAU2tVZIFYDKqbHnti2n89iKB6m113DhzKuy4zRH4a9ALMi3\nzn6XhJTgw2veh9mQu7OQXho7gK09Gb+cQ0tTa4oBeQP+8OrHEBA4d+ol+Tk5BHunt5tfd71IhbWc\nj679kJqhF9wuMR1be4ecgZqjMJrib1FjFnBbXHx8ywcJJyIMXz0jtwTMUdnyzYG3ueHtZKtnk+pa\n44rSESgwWcvocmGuqyfcdXPBDFTF9KVG2QHYVruZXXU76J/sITI4ILcEXEDZkSSJH1/7Jf5YgIfb\nH6DSVqHtH1JEFCTYn3vuOR599FHWr1/PhQvqakbfrjicFuJxkVi0MIeZ0eXCVFOT04G6UHJSJmwm\nG7+36XdpnJDHF2/OrpmIksi/XfgBY+EJHmq7n81pRa4WwmwxYjIb9NXYcwl2laYYhbayFt634iEq\nRmQTj9CcvRJfOB7m3y78AEmS+E8bPqpKS4UZwVKojR3SI4Sy29klSSLojy7oMJzL/Sv2s8rVimPE\nR6imbMGWgDemuvj59Wexm2z8b2s+oNqmrOcGZ1+xEjEUWrCEb0CDeVLhI2veR7vfgiBJRBqqFrz2\nmZvPc3ToJK3uplQo8XKlIMG+Zs0avv71r7NrV3G3kQL9Mu1A1lZFv3/BEr4zyUnqF3Gzu5GdIbmS\n43+E3s7YvT0hJvjZtV9xceIKG6rX8sgK9YkXejrMTBWVGMsrcjpQlSbWFqv6XqUPtNxDw6TERJmR\n73U9k7HD0Hhokq+c+iZj4Qne3XYvaypXqv5+XU8uyRPFQoI9GkkQj4ua1oJBMPCJqvswiXDdHeLZ\nzhczXnd54hpfP/0vxMQ4v7vutym3qs/Q1cskBaROmOGb2edB2UDU2NgVHGYHDxnWAfBi4irnxi5m\nvO5g75s83/0KHns1f7T192YVfluOFCTYV6xYQXt7e8Hmi9sBPe3L9lWyQyZ841rWawIabMsKkiTh\nGJwk6rJxnXH++7Gv8ubA28QSMSRJotPbzd8f/0de7XsDj72axzf8DgZB20+smKREsfDf1NbeTnxy\ngrh3Kus1akI+5xIfGcYUjROsr+T06Dn+7uiXOTt6QY6UiYc5N3aRvz/+VXqn+9nXsItHO7RlFerl\nPAW5hK/B4Vjw5JIyy6k0QSiYBuSNPVBXwW+6XuKpiz+k0ys3E58IT/JKzyG+ceZfEZH47OZPsq1W\nW7s3p1OfujkAthXyxhq+OT+LWkFLQEE65UNyiejBWgvfPPtdXuh+lfHQJJIk0Tc9wJMXvs9Prv0S\nt8XFn2z7zLz6QMuR5b1taSC1iHXQ0uwrZcEeun6dsn2ZE4JSha80CLX4xAQJr5eqHTt5fOPd/MeV\nn/H9yz/l+5d/ikEwpOLc72zczftXPpwzCiYTDqc11dRaq8Cdi629g8CZ04S7unBtnV+PRBQlQoEo\ndU3qtUiYMe9s2v4uhhoDHB44xjfPfReb0Uo4IQsho2Dkd9Z+iDsb92gOZzOaDHJTax0EuyAI2No7\nCF68QMLvz9h0PJDH6Q1InYYevPPjdE78hreHTvD20AncZhfTMdlUZTGY+YMtn8oZ4pkJXcOAm5oR\nzGbCndkFeyCPDU6SJELXr2EsL+f37v5j/vncv/H0jd/w9I3f4DQ5CMTlshaNznr+04aPUmPX1rug\nWMkp2B9//HHGMhS1euKJJ7j//oXjohfC43Hnfe9iUN8oCxdBmj22fMYpVmygz2Ih1n0z6/3RcByH\n00J9vfqGu2NXzwFQvXkDWzfdza6Ojfzowq+YCE4RSUSxGM18eOPDrPdof4kVqmuc3LwyitVsKvg3\nMm3byPjTP8cw1IvnATkZJv07/dMRJAkqq5yanjU9JJfCbb1jO19Ys5rf8j3ED889Q59vkFpnNR5H\nNfd27GNVdXte4/Z43JRV2Jn2hnVZp8GN6whevIB1apjKjoZ5nw/2eAGoayjT9LxY900MFgsb9uzm\nHw17OTt8iYOdR7gwcpXtDRvZ0bCZXU1bqXLk5yS0W+UInXhMLGgelHuHV6/Cd/kKVS4TRvv8jFcx\nLp8SW1orqax2qvruyOgoiakpqvbuYf2qjaxs/L95vfsoNya6uTnZTXtVM4+tfTfbGzbm3OBvN5lU\nCDkF+5NPPrkoDx4dzd2YdimJJ731I8PTqbF5PO68x2ltayd4/RpDPSMZF7HPG8JVZtP0/aOnzgOQ\nqGtO3mfmtzs+OG+chcytYJQXf3/fJEZLYUFTiepGEATGz5zH8eD0vHGODcv/bTQZNI158uIVMBoJ\nuqoJj05jxcUn1/zO7IvE/OZBGaPVZmJ0KMbgwBQm8/xKmFoQa5sAGD59gXjzfFv/0IAs2EVJUj3m\nSrtAsKcX+9p1jE/K2ZdNplY+vroV0vb1RABGA/mtB1GUEASYnAjkvabSf3NjcxtcvETf8XM41s0v\n6TAxLhd5C0diqp83ffQMAIaW9uQ9RvbX7GN/zewSvGNjC+dTFPKuLyVqNx/dwh2L3c7u1Cm0S8G2\nchUksyPnEo8liEYSmk0doZs3wGDQVL1OK3ral40OB9bmFsKdNxFj8xuGaAl1VJDicSK9PVhbWjGY\nc8d858tSOlDzsS37Ll8BScK+Kv/TWS4MSkG06cLnANLs7FnmIdVBSsNGGrpxHZgxf5aQKUiwv/TS\nSxw4cIAzZ87wuc99js985jN6jWvJ0VOgAakXLpxceOnkLdB6urE2NauqCZIvelX1U7CvXo0Ui2Us\njKY11BFk+7oUj2PX2MBbK3rOg6miAlNVNaEb1zPGcSvKRKbyzdnwXZCjP+yr1xQ8voXQqyAazETG\nhLI4UIP++R2kchG6cT2ZCLa8qjMWSkHO0wceeIAHHnhAr7HcUowmAza7WZfQLgDbSlk7CV2fHxmT\nj0CL9PUixWIprWex0H2DW72WqVdeJnTtKuzbMeuzlNPQrX4eglfkZsb2Net0GV829HQcAtjXrmX6\nyGGiA/1yL9A0gn456zS9iXUufJcugyBgX7nI68FlZXTITzQSx2or7IRkqqzCWFFB+OYNJEmaZfNO\nxEUi4Tg1deojVsRIhEhPN7aOFRjMy6OlnV6UMk/TcLosuoR2AZjcZZjr6uRFPEdLyycRQ9FycmWc\nFopTx9hlkDV2QBbsc8hHUw1dvSJ/75q1OowuO8qY9BLsjrXyRhRMjj+dgD+C3WGZ10EqG2Isiv/a\ndaytbRhsmcvu6oWe60EQBOwdK0l4vfMqPeZzig13dYIolswwGSgJ9jQcbqvcQShaWG0MBfuKVXK2\n3eDsbDslo1GTQFM01UW0qQLYHMkOQjpkXYKcqGT2eAhdn2+GCE5re5mleJzQtatYGpswlWkLkdSK\ncnIJ6DQP9qRgV35HBSXrVJNA60yao1Yv7lqAxTjBJTf6K7M3uFSoo1P9O6GYOW2LfGopRkqCPQ29\ntVVbMlEpNCdRSUvhK5CbFQcvX8JUVY25tk6XsWXDYBCwOy26vcgA9lVrEIMBgr19s/4e8EcwGAVV\nha9Arr8jRaPY1y6utg76m2LMNR5MlVWErlyZZa/OJ+s0nDTvLbZ9HcDp1i9JCcCxXi5vEbw0O0M0\nmEcsf8lxmp2SYE9D7+O3suDC1+YIdo2mmEhPD2IggGPDhiWpHe10yZUu9Yp0UgSQ7+Lslzngj+J0\nWVX/mxRtVzFrLCZ6a6qCIGBfu5aEf5rowExHpXyyToNXZbOWfdXiC/bUyUWnebA0NWN0uwlcujBr\nfWk1xUiiSOjGdUzV1arq8b/TKAn2NGZqY+ijnVgam+RFfPH87EWs0XmqaDeKtrPYOF3WlDNLD5Tj\nt+/ijBlCFCWC/ogmDW2pHKdAMuzOoFt0EMxsSKErl1J/05p1Koki4RvXsDU2YCpXn9yWL3qfXASD\nAce69SSmpoilFQTT+k5EeroR/f4leyeKjZJgTyNlitEpblcwGHBs3ETC6yXa15v6e9AvF74yW9TF\n6wYvyZUzHeuWSLAnj9+BaX02OHN9A0aXG9/FGYEWDkaRJPWaqhSPE7p+DUtD46Lb1xWcLqu+Jqk1\n8x2oWjX2aH8fYihE2frsPVv1xKljpUsFx/qNAATSzDFaywkEzstZ2M5N2urfvFMoCfY0UuVaddLY\nYWbhKQsRwO+PqDZBiLGoLNCampdEQwP9fQ2CIGBfs4bo2BjRoaFZ36021DHc3YUUiaSckEuBw2kh\nFNSnIBqAubYWU2XlLDu7Vo1dOb2VbVwawa6EYOql7EBmO7tWG3vg/DkQhNQmUWI2JcGeht4CDcCx\ncRMIQkqwJ+Ii4WAspRXnInzjBlI0uqRHTr01dgDnlq0A+M+ckr9bY6ijEua4FPZ1BYfLgiRBKKjn\nBreOxLQvFSml1d/iP30KBIHKnTtyX6wDBoMBu9Osq0nK7PHIkVKXLyEl5JLLWk6xiUCA8I3r2Fas\nxOhUV1PmnUZJsKeRqsmuo8ZucpdhbWsndP0aYjg0I9BUaqpLbV8H/TrnpOPcvFXe4M6clr97WqOm\nelk24yx2/Ho6etuXIS2e/bL8u2rZ4BJ+P6Hr17CtWImlYum6/zhdVgL+iK5lQxzrNyCGQqkG14FA\nRHUHqeClCyBJODdv0W08y42SYE/DaJS1Ez01dkiaYxIJgpcupR291WmqwUsXwGDAsQQhfgrKpqPn\nPJjKy3GvWU3o+jUSfr8mm2oiECB4+RLW1rYlM0dBWv0gHU8ujqRpzn/yhPzdGrJOA+fPgihmLIG8\nmDhcFuKxwruLzfrOpAkleOkCoigSCsRK9nUdKQn2OSyGdpJuZ1eEhBpTTCIYINzZKadML3KGYTqu\nRTDFAFTt3gWiSODc2Rmbqop58J8+CYkE7juWtlNXyiSl48nFXFWFbeUqQlcuE/f5CGrIOvWflk87\nzq3bdRuPGmYK5OnoSF6XPLlcukgoEEs+J/fpTZIkAufPYXS5sbaW6sNkoyTY5+BcBO3E1rECg8NB\n4MI5/Elh6VIj0E6evCVHTovVhNFk0NUkBVC1+w5AtrPPmCByv8z+48cAcO1cWsGu/EZ+nTc4985d\nIElMnzyhOutUiscJnj+L2ePB0pi9z+ti4FgkE6WtYwWhq1fwDcvlBdTMQ7S/j8TUFI6NmxZsXP1O\npzQzc1gM+7JgNOLYsJH42BjTQxOAOk3Vd+RNAMr27Mtxpb4IgiAnKekYCQFgb2nB7PEQPH+OgC+C\n2WLM2es0EQwQuHgBa0srlrrFzbqdy4wTWd95cN0hb3BTx0+qzjoNXrmMGA7j3Lp9SZLU0tGz92k6\n7n37QRQZPymH86oxTwbOlcwwaigJ9jnoHcuu4Eoen729Q7Oek43Y+BihK5exr1mL2ePRdSxqcLqt\nBANREon5ZWbzRRAEnFu3I4bDBLxBVRqa/9QpSCRwLbEZBtJ8DTpr7OaqamwrVjJ1U85tUGNbDiSj\niVzbltYMA/pnZCuU7doDRiMTV+X67K6yhedBkiR8bx0GoxHHpk26jmW5URLsc9C7NoaCa+cdGBxO\npsenEYTcx07fW0cAKNu7X9dxqEV5mUM6hrmBLJhEDISjkioNzX9CNsMstX0dwGQyYrObdDfFgLwe\nIkbZb5Jrk5dEEf/p0xgcjkUvApeJmdr0+s6D0e3GuXkLfp86v1P4+jWi/X24tu/E5F6aJLVipSTY\n57BYx06DxULZnXcRESzYzCzoLJMkCd+RNxFMpluiqcLiRMaAXJ0yXlkLgMO+cMxyIhggcOE81pYW\nLHX1uo5DLU63VXeNHeSNShHsuTT2wNkzxCfGcW3fiWBa+v7zi3WKBSjbt5+ISW66nsvvNHXwFQAq\n7r1P93EsN0qCfQ56t8hLp/yee4kYnViiC/dfjHR1EhsawrV9B0aHQ/dxqGExQv0ABJMJy54DAJgm\nBhe8dvrYMdkMs8RO03ScbiuxaIJoRJ+6OQrm6hrE2mYArGSfY0mSmPj1rwCofM9Duo5BLQ6XFUEA\n/3RY9+92btlGxCr38XQ4sod8xqd9+E8cx9LQuKTZx8VKSbDPYTGSUhSkihpEgxGzf4LIQH/W6xSn\nqXvfrTHDwOKE+ikIa2THl3TzEmI08zyLkQgTv3oawWymbP9duo9BLYsV+gkgJRtbx46+kfWa0LWr\nhG/ewLltO9amJt3HoAaDQcDhshLw6T8HBrOZmKMKczxE5NrlrNf53ngdKR6n/MB9S+48LkZKgn0O\n9mSjCb1NEEDKlmiNB/AefDXjNdGhQbyvH8JYXoFzw61zEC3m8Tuc/EqzfwLf4cxCbfKlF4hPTlL5\n7gcxV1XpPga1KCeXxbCzx9zVAMRPHSHS25PxmolfPwtA1Xsf0f35WnCVWQn49auboyBJEiEs2OIB\nxn72E6T4/JORJIp4XzuIYLFQtv/WKTvFREGC/R/+4R9473vfy/vf/34+//nP4/cvbGIoBpTO7Ho7\nT2FG+7WbJbxvHCLS2zvrc0kUGXryO0ixGLUf+/gtsacqLKbGrmyaNqJMPv+bVL0QhbjPx+RvnsXo\nclP50MO6P18Li1E3R8E/HUEQwBIPMfqTH837PNLbQ/D8Wexr1t7yZhIutxVRlHR3pkfCcRIJCWeZ\njUh3FxPP/XreNVMvv0hsbBT37r0YHaXaMGooSLDfddddPPvsszz99NO0tbXxzW9+U69x3VIcLquu\njSYUFOHg2b0NKRql/2tfJj41lfp88sXnCd+4jnvXbjmJ5RaSciIvgkBTNovqbZuIjY4y+sMfzGqb\nN/7M04jhMFXve/8t8zEoLKZgD/giuMpsODdsIHjh/KwKoHHvFEPffRK49do6LF6yljJpmH0fAAAa\nGklEQVSvVWtXYKyoYPyZp4mklbj2nznN6I/+A2N5OdXve7+uz17OFCTY9+/fn4ru2LZtG0PJkqzF\njtNlIREXCQVjun6vsohrNq+j5kMfJj4xQf/XvkLg/Dkmnv8N47/4GUa3m9qPfULX5+aDEuq3GCYp\nZR4aH3svloZGpl55iYGvfYXg1Sv0f+0reF99GXNdHRX33Kv7s7WSEmg6z0MiIRLwR3G5rdR8+CMg\nCAz809cY/fF/ELx0kZ6//SsiXZ2U7b8zVV/mVuJMxpj7dbazKxuFu8pJ3Sc/BYkEg//yTbyHXmP6\nxDEGv/UNBLOZpj/5Auaqal2fvZzR7az/k5/8hEceufWahR64ymwA+KZCGC36uSHSC4BVvPcRosPD\n+N58nf6vfEm+QBCo/cSnMLrduj2zEBwuK36f/pEQQX8Um92EraaKlv/z/2Hwm/9E4NxZAufOAnIr\nvdqPfeKWmqIUUmGfOgs0xTnvKrNia22j/vd+n7Gf/pjJ559j8vnnAKj+4G9R9fCjt4WzcLGcyOm1\nk1ybtlF+zwG8h15j+KknU9c0/OGfYOtYoetzlzs535zHH3+csbGxeX9/4oknuP/++wH4xje+gdls\n5rHHHlP9YI/n9hBemahvLOP8yX68UyHWbtQvfjoWkW3JbR3VWG1map74Y3qb5DR5Z1srrlUrsdXn\n97zFmM/KagcTowHKy+w5U//V4vG4CQailFfak2N2U/fXf0H3975P4GYnTR98P+Vbt9xSYZY+l5Ik\nYbYYiYRius5xKOmU9tSV4fG48Tz2IB0P3sfwiy8x+tobNH3wfVTv26t6nItNJCg7NRNxUfNzF7pe\nTMjmzqaWSjweNzX/+fP4H32IYE8Pwd4+XCtX4jlwd/4D12mcxUbOt/XJJ59c8POf//znvPbaazz1\n1FOaHjw6Oq3p+qVEMMpCxTcZ0nWck+MBzBYjvukwJGOCHe95FAAJmAam83iex+NelPlUmh50dY5T\nWV24rdvjcTPQP0UkHMdqM80as/PhD+AEYsDY2K1zwmeaS4fLwtSUvmuht2cSAKNZmPW9pt1307D7\nbkQWfkcW6zfPRizp4B4dntb03FzjHB2SP4snEjPXVTVgqGrAtW2PfM0S/DuXej7zRe3mU5Cd4dCh\nQ3z729/mG9/4BhaL+qbEtzvKsdM7FdL1ewMamzffapyL0CpwOmnaUcxdxYDLbSUcjJGI61c3J6Ch\nyuftgMNpwWAQdDfF+DWUsS6hnoLO13/zN39DLBbj05/+NABbt27lL//yL/UY1y1FKUbkndRPsMfj\nCcKhONW1Lt2+c7FZjIgQxWbvLi8ewZ6ejVxWoU9dfH9qgysOgSYnKVkWJSrGZjdhNqtr7F5CHQUJ\n9hdeeEGvcdxWKCnUemrsWhpL3C4ojkM9X+Zpb1JTLRKBBmkRIdN6CnZlHopng3O5rQwP+BBFCYNB\nHx+IPKfFMwfFQinzNAMGg4DLbcWno8ZejEdOd1Lo6Bnipphi3MUk0Bahbo5/OoLJbMBqu/WRP2px\nlVmRJHRrbB2NxIlFE0VjjiomSoI9C64yG9O+sG71yFM2VZV9HW8HFG1y2qtfyGNRmmIWySTlcltv\ni1BGtSjzoFcIbDEqO8VCSbBnwVWe1E50SkyZidctHuep1WbCYjWltGw9mPZGVNWjv53Q2yQVi8n+\nlmIywwC43PJ49drgis2BXEyUBHsWUtqqTkJN0XqLSVMFKCu3Me0N61Zewe8L43RbMRqLZ+nNJOfo\nu8kXm0Bz6Zx9qrbBRgntFM/btcS4dV7ExSrYXeVW4jGRcKjw8gpiQiQwHSk6TdWuc6hfSqAVkQMZ\n0kwxemvsRTYPxUBJsGfBlXIc6qOx+7xhLFYjVlv2ZgK3I8pGpMcG5/OGkaSZTbNYUJp769Vowl+E\nDmSYEcC6bXAlG/uiURLsWdDz2ClJEtPeMGXl+oTKLSXuVN2cwoWaEj7qKrJTC8gbXGA6qkuS0kyo\nY3EJNLtDPrnodYpN+Z2KKKCgWCgJ9iwojiI9NPZwKEY8JhadGQbSNXYdBHsyfLTYNHYgFb+uh8/F\nX6Q2doNB55PLdASL1ahbHaISM5QEexasNhNWm4lpHbSTYrWvw8yY9Qh59E4GgeJKylFwV+h3cim2\nrNN0nGU2gv4ooljYyUWSJDnkswjXQjFQEuwLUF5h10VTTQn2Isyw01ewh2Z9ZzFRVj5TyrlQ/NMR\nrDYTZkvxaaoutz5hwJFwnGgkUco6XSRKgn0ByirtRCMJIuHCOtQrWl4xCjRZABl1MUEUsynGrZhi\nCtzgZE01UnRmGAW9fE/KWtCrREOJ2ZQE+wKUJxddoTZFRRiUFaFgFwQBV5lVN429WDXVMp1MMak0\n+iLc3CDNmV7gelBOPiWNfXEoCfYFKK9MCvYCtZNitrGDvCEVenKRJAnvVKho58DhtGA0GZj2FmaK\nmYlhL855KEu+E4XWUVI2yJLGvjiUBPsCpDT2As0QPm84lZ5fjLh0iIwJh2JFrakKgoC73Fawxp4K\ndSxSU0x5pbwWCq18OqOxlwT7YlAS7AugaCeFRMYoMezFqqmCPsdvRaAVW1JOOmXlNiLheEEnF8W2\nrJwGiw1XmQ1B0FFjL+L34namJNgXQA+NPZTsvFPMtsRULHsBgl0xRxVzeJvyGxZijil2wW40GnCX\n23TR2F1lVoymkghaDEqzugDu8qR2UsDxu9jt66BPyGOqDnt5cZogANzJzOFC1oMSy1+sgh3ksYcC\nMaKR/E4uibiI3xcpaeuLSEmwL4DRaKCswo53In/tRLElLgvBXsDJxZ/snFTM86BHZIx3MoTdaS5a\nfwvM2MXznQdlHZXs64tHSbDnoKLKTjgUy7u64XLQ2O0OczIiJH9fQzE2sZ7LzMklv40+kRCZ9oYp\nr3ToOawlRzlt5NsTuBTquPgUpDZ89atf5eWXX8ZgMFBdXc0Xv/hFPB6PXmO7LSivcsCNCbyTIWx2\n7ZUZZ2LYi1c7EQQBd4Gx7FMTQSxWE3ZHcVW3TCelqeY5D9PJ6pbFbIaBdI09T8E+mXwninwebmcK\n0tg/85nP8Mtf/pJf/OIX3HvvvXz961/Xa1y3DRVV8uKbGg/mdf+Mxl68tmWQtVUlZFEroijhnQxR\nU+sqqlZwc0nVD8rTBKGY9IpesCshjwVr7MU9D7czBQl2p9OZ+u9QKITBsPwsOxVV8rF5ajI/we7z\nhrHZzUWZbZmOklKfz8s87Q0jJiSqa525L77NcZfbknXltXeUUtaQoiwUKwVr7KnkpJIpZrEoWNp8\n+ctf5umnn8btdvPUU0/pMabbivKkYM/HgSpJEn5vmOpal97DWnIqq+V5mBwPUFOn7d+jnHZqlsE8\nlFXYGBv2EwxENdcR9y2T+ihmsxGny5J3LLtvKoTZYszLtFlCHTkF++OPP87Y2Ni8vz/xxBPcf//9\nPPHEEzzxxBN861vf4nvf+x6f//znVT3Y43FrH+0toL2jGrPFiN8X0TxmnzdEIiFRU+ta9H/vYn9/\n+4oa3uQ60VBC87OuXxgBWJJ50IOFxljXUM7NK2MYMWj+twT9sgN+5eparLbCT3C3ci6ra130dE5Q\nWenAZDIueG36OCVJwucNU1XjpLa2bLGHqYliWJtqybm6nnzySVVf9Oijj/IHf/AHqgX76Oi0qutu\nJR6Pm7ExP+UVdsZH/YyM+DTZiHs7JwCwuyyL+u/1eNyLPp8Gs/zv7uuZ1Pysvp5JAKprF3+chZJr\nLk0W2dzY0zWOzaVN4xwdnsbhtOCbDkGB07AUv/lCOJwWkODm9bHUaS4Tc8cZDESJRRM4Fvmd0Mqt\nnk+1qN18CjKKd3d3p/775ZdfZsWKFYV83W1LeZWdeEzU3OtxYjQAQLWn+G3LTpcFs8XI5HhA871T\n40EEAapqijvMD/KPZU8kRPy+cNE7ThXyLQZWcpwuDQWdB7/0pS/R2dmJwWCgsbGR//bf/pte47qt\nSDlQJ0Ka4rAnxmQhWFlT/IJdEAQqaxyMDfkRRVGTo3xyIoi73JbzyF4MKGtB6wbnm1oeoY4KqVh2\njQ7UkuN0aShIsP/jP/6jXuO4rSlXQh4ngjS3V6q+b2IsgMEgLJuXubLaycjANN7J8ILH73TCoRjh\nYIy6huVhv3SX2zBbjIyPaBPsqVICRR4Ro1Be0thva5ZffOIiUJFHZIwkSUyOBamodmA0Lo9pTkXG\njKkXalMTyRA/lRvB7Y4gCFTXOpmaCBKPq4/pXy4x7AqKxq1VY1fWTrGHfN7uLA+Js8ikkpQ0xLL7\nfRFi0QRVy8AMo1BZo5gh1M+DEuqobI7LgSqPC0mCyTH186AIwOUi2K02M1abSXNew9iwH4vVVNQl\nNoqBkmBXgdVmxuYwa9LYFcfpcnAYKlRWy5uUFvuysgksF40dZpzhym+shuWmsYN8gvNNhojH1J1c\nYtEEUxMhauqKOwO5GCgJdpVUVNnxTYVIJERV1yuO06plEBGj4C63YTQZNGmqisau1iZfDCgJZ+Oj\nftX3eCdDOFyWos9ATsdT70aSYFzlBqfM13JIVLvdKQl2lVRUOpAk9WFuyykiRsFgEKiosjM1HlSd\nUj81EcRqMy2rLEPFvKbWgRoJx5n2qnc4Fws19bJDfHRIXfz32LAs2Ks1Zi6X0E5JsKskPTJGDROj\nAYwmw7Lz/lfWOInHRVWVHhMJEd9UmIpqx7I6elttJtxlVtWmGEXw1TbcXpmWheKplwW0WsE+PlLS\n2JeKkmBXiWJSGVOxiEVRYmo8SGW1A4Nh+Qg0SK8Zk3uD802FEEWJymXkOFWoqnURDEQJBaM5rx0Z\n9AFQu0xCPhUqqx2YTAZNGrvBIKSc8CUWj5JgV0ldo6xtDfX7cl477Q0Rj4vLKiJGIeVAVWFnV65Z\nTo5TBcWBqsYcMzwgrxllDS0XDAYD1XUuJsdyh36Kosj4aICqGueyCf+9nSnNsErsDgsVVXaGB3yI\n4sL25YlRWaAtJ8epwkzIo3qBphzZlxNqHaiSJDEyMI3TbcHpLu6a/Jnw1LkRRSnnBuedCJGIiyX7\n+hJREuwaqG8qJxZN5EzQmXGcLj9NtbzSjsEgpOylCzHQO4XBIFDXWL4EI1taUiGPOQRaYDpCMBBd\ndvZ1BbV29jHFvl4S7EtCSbBroK5ZMcd4F7wuFeq4DE0xRqOB2sYyxob9RMLZ+8DGonHGhvx46t2Y\nLcVfI2Yu5VV2jEYhZ6jfyKDiOF1e9nUFj8rIGCUipuQ4XRpKgl0D9U2y5jnUl93OLkkSw33eZZ1d\n19xWgSTBQM9U1muG+mWTVUPL8tPWQbYvV9Y4mRgLLGiaW672dYXKGtmBOja08AkuFepYEuxLQkmw\na6Cy2oHFalpQY58cDzLti9C6onJZhfiloxRC6+uazHrNYK88R40tFUsypltBtcdJIi4u6G9QNHZF\ns11uGAwGqmtdTIwFsjpQJUlibMSPu9ymS4ORErkpCXYNCIJAfVMZvqkwwUDmMLeeG+MAtKyoXsqh\nLSm1jWWYzAb6urNr7AO98mf1zctTYwdobJM3uO7r4xk/F0WJ0aFpKmtkhWC54ql3IYpS1rj+oD9K\nOBgr2deXkJJg10h9k3ykHs4S9thzU+6a1LqiasnGtNQYjQYaWyuYGg/iz9B8JB5PMDLgo6bOtaw1\ntPZV1QgCdF6d3zoS5HIKsWhi2TpOFXLZ2ZV3Yrmao25HSoJdI3WKnT2DOSYaiTPY68VT75Jbhy1j\nmpPaan8Gc8zIwDSJxPK1ryvY7GYaWysYGZzOuMEp9vXl6jhV8CT/fdlMc9cuDgOwan3tko3pnU5J\nsGukrtGNIGROVOrrmkQUJVqXsRlGoSkp2Pu657/Mg0kzzHK2ryt0rKkBoCuD1t6f7PW63DXVqhon\nVR4nXdfG55kofd4Q/d1T1DeXL9tggtuRkmDXiNliorrWxeigj3BodrhfygyzcvmaYRSqa53YHGb6\nuybnFQQbSDpOl7vGDtCxWhbsN6+Ozvq7fzrCjUujVFTZl71tWRAENmxrQBQlrpwbmvXZhdMDAKze\nUNLWlxJdBPt3vvMd1q1bx9RUdmfacmLNxjoSCYmTR2aaeUuSRM/NcWx207K3qYL8Mje3VRDwR2cV\nRgsGogz1e6msdmB3LG9zFICrzEZtg5uBnqlZG/25432IosTWPS3LNjoqnTUb6zCaDFw6Mzhroz9/\nsh+DQWDlOs8tHN07j4IF+9DQEIcPH6axsVGP8RQFm3Y04S6zcu5Ef6rK4cRogMB0lJaOqmVX+Csb\nTcmwx7PH+1N/e/2Fa8RjIhu3v3PWQ8eaGiRpJjomEo5z8fQADqeFNRvrbvHolgarzcyqdR68k7Lp\nBeTQ38E+Ly0dle+ITf52omDB/nd/93f82Z/9mR5jKRqMJgO77ulATEgcfb0T72SIF56+CEB78mj+\nTmD1+lqqPE4unhrg3Ik+blwe4eaVUeqby9m0s+lWD2/J6Fgja6PnT8kb/cUzA0QjCTbf0YTJtPyy\nbrOxYZu8mV86M5A0ywwCsGrDO2Nzu50oKBbtlVdeoaGhgbVr1+o1nqJhzca6/7+9u4tpMkvjAP6v\ntIDDOKaK06DD6CwOG4gFRhPdgURtbeSjVlFRboymDUZvrCB+hKJGA8aAqJekxAjRZDTK2myI0Wym\nWiEIIsYFN6Q6bHAcjAVRMhSj9OvZC9dO2NJqzOgp5fndnSYn+acfT09P3/c56Or4DY/+PYBfe19g\n7I0H6Uu/mVI/OWXRUuQVKPH3c/fQ+nMvZNFSREmnQZX31ymx/fCOfPYXSPxOjt/6hvGT+Q6ipNMg\ni46aUr9aAEAx7yvI47/Af+zP8fiXFng8Psiio/Dd95F/MUG4eW9h1+v1GBoK/Me/uLgYZrMZZ8+e\n9T/2oafqRAKJRIK/rfwLrl56ALfLixU5yf4Vy1QyY2Yscjcq8Y+f/gXXmAc/qpIi6uDqD5W3KQ29\nPQPobP0Vvw+/RvrSRMTERs6pUR9CIpFg8Y/z0fLPX/DVzFjMmhOHH5Z+G1HHAU4WEvrIavzo0SPo\n9XrExsa+7Y8yMACFQoHLly9j9mz+hmaMMVE+urD/P7VaDYvFgpkzI/8SN8YYC2d/2nXsEolkSm3F\nMMZYuPrTVuyMMcbCA995yhhjEYYLO2OMRRgu7IwxFmGEFXa73Y7CwkLk5+ejoKAADx48EBXlvc6f\nP4+cnBzodDrU1NSIjhNUuPfsqa6uRm5uLtatW4ddu3ZhdPT9B2J/Ts3NzcjJyUF2djbq6upEx5mQ\nw+HA1q1bkZeXB51Oh3PnzomOFJTP58P69euxc+dO0VGCcjqdMBqNyM3NhVarRVdXl+hIE2poaMCa\nNWug0+lQWloKl2vig378SBCDwUAtLS1ERGSz2WjLli2iooTU3t5Oer2e3G43ERG9ePFCcKKJPXv2\njAwGA6lUKhoeHhYdZ0Ktra3k9XqJiOjEiRNUU1MjONEfvF4vaTQa6u/vJ5fLRWvXrqXe3l7RsQIM\nDg5ST08PERGNjo7S6tWrwzInEVF9fT2VlpbSjh07REcJ6sCBA9TY2EhERG63m5xOp+BEgRwOB6nV\nahobGyMiot27d5PFYgk5R9iKXSKRwOl8e+KK0+mEQhGe/SQuXLiA7du3Qyp9e/fcrFnh2ZJ3MvTs\nyczMxLRpb99yGRkZcDgc75nx+XR3d2P+/PmYN28eZDIZtFotrFar6FgB5syZg5SUFABAXFwckpKS\nMDg4KDhVIIfDgVu3bmHTpk2iowQ1OjqKzs5ObNy4EQAglUrx5Zfh2WLZ5/Ph9evX8Hg8ePPmDb7+\nOnQbZGH3+paVlaGoqAhVVVUgIly8eFFUlJAeP36Mzs5OnD59GjExMdi/fz+USqXoWONMxp49jY2N\n0Gq1omP4DQwMICEhwT9WKBRhvT0IAP39/bDb7UhLSxMdJcC7hca7xVs46u/vh1wuR1lZGex2OxYt\nWoTy8nLExobXgSAKhQJ6vR4rV67E9OnTkZWVhczMzJBzPmlhD9ZnpqSkBLdv30Z5eTk0Gg2uX78O\nk8mE+vr6TxknqFD9cLxeL0ZGRnDp0iV0d3ejuLhYyEpusvTsCfWaq9VqAEBtbS1kMhl0Ot3njheU\nyOfsY7x69QpGoxEmkwlxcXGi44xjs9kQHx+PlJQU3LlzR3ScoDweD3p6enD48GEolUocO3YMdXV1\nMBqNoqONMzIyAqvVips3b2LGjBkwGo1oamoK/fn55BtEQSxZsmTcePHixYKShFZUVEQdHR3+sUaj\noZcvXwpMNN7Dhw8pMzOT1Go1qVQqSk1NJZVKRUNDQ6KjTejKlStUWFjo3y8MF/fv3yeDweAfm81m\nMpvNAhMF53a7yWAwUENDg+goEzp58iStWLGC1Go1ZWVlUUZGBu3bt090rADPnz8ntVrtH9+9ezcs\n/w+4du0alZeX+8cWi4WOHj0aco6wPXaFQoGOjg4AQFtbGxYsWCAqSkgajQZtbW0AgL6+Png8Hsjl\ncsGp/pCcnIzW1lZYrVbcuHEDCoUCFoslLBuxNTc348yZM6itrUV0dHgdvKBUKvHkyRM8ffoULpcL\nV69exapVq0THmpDJZMLChQuxbds20VEmtGfPHthsNlitVpw6dQrLli1DdXW16FgB4uPjkZCQgL6+\nPgBAe3s7kpKSBKcKNHfuXHR1dWFsbAxE9EE5he2xV1RUoLKyEj6fDzExMaioqBAVJaQNGzbAZDJB\np9NBJpOhqqpKdKSQwrlnT2VlJdxuNwwGAwAgPT0dR44cERvqf6KionDo0CEYDAYQEQoKCsLyQ37v\n3j00NTUhOTkZ+fn5kEgkKCkpwfLly0VHm5QOHjyIvXv3wuPxIDExEcePHxcdKUBaWhqys7ORn58P\nqVSK1NRUbN68OeQc7hXDGGMRhu88ZYyxCMOFnTHGIgwXdsYYizBc2BljLMJwYWeMsQjDhZ0xxiIM\nF3bGGIswXNgZYyzC/Be68EGj7hfMcwAAAABJRU5ErkJggg==\n", "text/plain": [ - "[]" + "\u003cmatplotlib.figure.Figure at 0x7f385e198650\u003e" ] }, - "execution_count": 48, "metadata": { "tags": [] }, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "# Create TensorFlow Variables using Keras's Dense layer.\n", + "def f(x):\n", + " return tf.square(tf.sin(x))\n", "\n", - "wb = tf.keras.layers.Dense(units=1, use_bias=True)\n", + "def grad(f):\n", + " return lambda x: tfe.gradients_function(f)(x)[0]\n", "\n", - "# We can access the underlying TensorFlow variables using wb.variables.\n", - "# However, the variables won't exist until the dimensions of the input\n", - "# tensors are known. Once the dimensions of the input tensors are known,\n", - "# Keras can create and initialize the variables. Until then, Keras will\n", - "# report the variables as an empty list: [].\n", + "x = tf.lin_space(-2*pi, 2*pi, 100) # 100 points between -2π and +2π\n", "\n", - "wb.variables" + "import matplotlib.pyplot as plt\n", + "\n", + "plt.plot(x, f(x), label=\"f\")\n", + "plt.plot(x, grad(f)(x), label=\"first derivative\")\n", + "plt.plot(x, grad(grad(f))(x), label=\"second derivative\")\n", + "plt.plot(x, grad(grad(grad(f)))(x), label=\"third derivative\")\n", + "plt.legend()\n", + "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", - "id": "docKLUaonYG_" + "id": "-39gouo7mtgu" }, "source": [ - "## Step 3: *Define the loss function*\n", + "## Gradient tapes\n", "\n", - "Our loss function is the standard L2 loss (where we reduce the loss to its mean across its inputs)." + "Every differentiable TensorFlow operation has an associated gradient function. For example, the gradient function of `tf.square(x)` would be a function that returns `2.0 * x`. To compute the gradient of a user-defined function (like `f(x)` in the example above), TensorFlow first \"records\" all the operations applied to compute the output of the function. We call this record a \"tape\". It then uses that tape and the gradients functions associated with each primitive operation to compute the gradients of the user-defined function using [reverse mode differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation).\n", + "\n", + "Since operations are recorded as they are executed, Python control flow (using `if`s and `while`s for example) is naturally handled:\n", + "\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { - "cellView": "code", "colab": { "autoexec": { "startup": false, @@ -245,125 +182,42 @@ } }, "colab_type": "code", - "id": "0_w8ZJSCtuY7" + "id": "MH0UfjympWf7" }, "outputs": [], "source": [ - "def loss_fn(predictions, labels):\n", - " \"\"\"Calculates the mean L2 loss for our linear model.\"\"\"\n", - " return tf.reduce_mean(tf.square(predictions - labels))" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "cellView": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "colab_type": "code", - "executionInfo": { - "elapsed": 348, - "status": "ok", - "timestamp": 1525154234538, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "RkNbXoXkpjVH", - "outputId": "e4688f3c-e29f-416d-f541-6d81953b5660" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "\u003ctf.Tensor: id=1252, shape=(), dtype=float32, numpy=16.979801\u003e" - ] - }, - "execution_count": 50, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "# Test loss function (optional).\n", + "def f(x, y):\n", + " output = 1\n", + " for i in range(y):\n", + " output = tf.multiply(output, x)\n", + " return output\n", "\n", - "loss_fn(wb(inputs), labels)" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "base_uri": "https://localhost:8080/", - "height": 51 - }, - "colab_type": "code", - "executionInfo": { - "elapsed": 418, - "status": "ok", - "timestamp": 1525154260083, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "K_7beXoHOU7t", - "outputId": "8f55c028-fe2b-4edb-ad68-a849afc60623" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "w: -0.311619\n", - "b: 0.000000\n" - ] - } - ], - "source": [ - "# At this point, the variables exist, and can now be queried:\n", + "def g(x, y):\n", + " # Return the gradient of `f` with respect to it's first parameter\n", + " return tfe.gradients_function(f)(x, y)[0]\n", "\n", - "w, b = wb.variables\n", - "print(\"w: %f\" % w.numpy())\n", - "print(\"b: %f\" % b.numpy())" + "assert f(3.0, 2).numpy() == 9.0 # f(x, 2) is essentially x * x\n", + "assert g(3.0, 2).numpy() == 6.0 # And its gradient will be 2 * x\n", + "assert f(4.0, 3).numpy() == 64.0 # f(x, 3) is essentially x * x * x\n", + "assert g(4.0, 3).numpy() == 48.0 # And its gradient will be 3 * x * x" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", - "id": "JVDWpL9VYWdP" + "id": "aNmR5-jhpX2t" }, "source": [ - "## Step 4: Create an optimizer\n", + "At times it may be inconvenient to encapsulate computation of interest into a function. For example, if you want the gradient of the output with respect to intermediate values computed in the function. In such cases, the slightly more verbose but explicit [tf.GradientTape](https://www.tensorflow.org/api_docs/python/tf/GradientTape) context is useful. All computation inside the context of a `tf.GradientTape` is \"recorded\".\n", "\n", - "We'll use a `GradientDescentOptimizer` to fit our model." + "For example:" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { - "cellView": "code", "colab": { "autoexec": { "startup": false, @@ -371,36 +225,48 @@ } }, "colab_type": "code", - "id": "DudNEebMKDWN" + "id": "bAFeIE8EuVIq" }, "outputs": [], "source": [ - "optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)" + "x = tf.ones((2, 2))\n", + " \n", + "# TODO(b/78880779): Remove the 'persistent=True' argument and use\n", + "# a single t.gradient() call when the bug is resolved.\n", + "with tf.GradientTape(persistent=True) as t:\n", + " # TODO(ashankar): Explain with \"watch\" argument better?\n", + " t.watch(x)\n", + " y = tf.reduce_sum(x)\n", + " z = tf.multiply(y, y)\n", + "\n", + "# Use the same tape to compute the derivative of z with respect to the\n", + "# intermediate value y.\n", + "dz_dy = t.gradient(z, y)\n", + "assert dz_dy.numpy() == 8.0\n", + "\n", + "# Derivative of z with respect to the original input tensor x\n", + "dz_dx = t.gradient(z, x)\n", + "for i in [0, 1]:\n", + " for j in [0, 1]:\n", + " assert dz_dx[i][j].numpy() == 8.0" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", - "id": "YBeJYxY8YaiO" + "id": "DK05KXrAAld3" }, "source": [ - "### Step 5: Define a training step\n", - "\n", - "To fit model variables to the data we'll need to:\n", + "### Higher-order gradients\n", "\n", - "1. Calculate the gradients of the loss with respect to the model variables.\n", - "2. Use `optimizer` to compute updates to the variable values based on those gradients.\n", - "\n", - "To calculate the gradients, we use the [`tf.GradientTape`](https://www.tensorflow.org/api_docs/python/tf/GradientTape) context manager\n", - "and its `gradient` function to compute gradients through computation conducted within its context:\n" + "Operations inside of the `GradientTape` context manager are recorded for automatic differentiation. If gradients are computed in that context, then the gradient computation is recorded as well. As a result, the exact same API works for higher-order gradients as well. For example:" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { - "cellView": "code", "colab": { "autoexec": { "startup": false, @@ -408,163 +274,37 @@ } }, "colab_type": "code", - "id": "diDZfrMJM3OC" + "id": "cPQgthZ7ugRJ" }, "outputs": [], "source": [ - "def run_step(inputs, labels):\n", - " with tf.GradientTape() as g:\n", - " loss = loss_fn(wb(inputs), labels)\n", - " # Compute the partial derivatives of loss with respect to the variables\n", - " grads = g.gradient(loss, wb.variables)\n", - " optimizer.apply_gradients(zip(grads, wb.variables))\n", - " return loss" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "1WWepgmJQOzc" - }, - "source": [ - "Repeatedly running the training step will nudge the variables towards the values that best fit the data (i.e., \"w\" will move closer to 3.0, while \"b\" will tend to 2.0):\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "base_uri": "https://localhost:8080/", - "height": 51 - }, - "colab_type": "code", - "executionInfo": { - "elapsed": 380, - "status": "ok", - "timestamp": 1525154412590, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "ya5Qxz5XQlhU", - "outputId": "8dd47155-a6c1-44c5-c279-617c803f1723" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Values of w, b BEFORE applying gradients: 2.725763, 1.894334\n", - "Values of w, b AFTER applying gradients: 2.774932, 1.922555\n" - ] - } - ], - "source": [ - "w, b = wb.variables\n", - "print(\"Values of w, b BEFORE applying gradients: %f, %f\" % (w.numpy(), b.numpy()))\n", - "run_step(inputs, labels)\n", - "print(\"Values of w, b AFTER applying gradients: %f, %f\" % (w.numpy(), b.numpy()))\n" + "# TODO(ashankar): Should we use the persistent tape here instead? Follow up on Tom and Alex's discussion\n", + "\n", + "x = tf.constant(1.0) # Convert the Python 1.0 to a Tensor object\n", + "\n", + "with tf.GradientTape() as t:\n", + " with tf.GradientTape() as t2:\n", + " t2.watch(x)\n", + " y = x * x * x\n", + " # Compute the gradient inside the 't' context manager\n", + " # which means the gradient computation is differentiable as well.\n", + " dy_dx = t2.gradient(y, x)\n", + "d2y_dx2 = t.gradient(dy_dx, x)\n", + "\n", + "assert dy_dx.numpy() == 3.0\n", + "assert d2y_dx2.numpy() == 6.0" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", - "id": "61TgeLVlKEQp" - }, - "source": [ - "## Step 6: Create a training loop\n", - "\n", - "Of course, now we can simply turn all of this code into a self-standing training loop. We'll also capture our loss and approximations of `w` and `b` and plot them over time." - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "base_uri": "https://localhost:8080/", - "height": 364 - }, - "colab_type": "code", - "executionInfo": { - "elapsed": 580, - "status": "ok", - "timestamp": 1525154278709, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "VukGe-huNaJ4", - "outputId": "c79c8e63-c781-451e-f74f-20815d8da49f" + "id": "4U1KKzUpNl58" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0.9409681558609009, 1.3733772039413452, 1.7128530740737915, 1.9793939590454102, 2.188689708709717, 2.3530514240264893, 2.4821391105651855, 2.583533763885498, 2.6631851196289062, 2.7257626056671143]\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAFKCAYAAABcq1WoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xd4U2X/BvD7ZLRpumlLS6EDgbKh\niIggU7aAgPhDRKsIUoYgiK++ioAguBARXmZBEARFUBGhiChIEQcqe+/RMlpGd9KRcX5/nDZtaFra\nkuY07f25rlw5zXmSfPMk5OY5Oec8giiKIoiIiMhhFHIXQEREVN0wfImIiByM4UtERORgDF8iIiIH\nY/gSERE5GMOXiIjIwVSOeJJbtzLs/pi+vlqkpOjt/rhkjf3sGOxnx2A/Owb7WRIQ4FnsOqcd+apU\nSrlLqBbYz47BfnYM9rNjsJ/vzWnDl4iIyFkxfImIiByM4UtERORgDF8iIiIHY/gSERE5GMOXiIjI\nwRi+REREDsbwJSIih/vxx61YtGi+3GXIhuFLRETkYA45vSQREZEtGzeux65dPwMAOnbsjOeeG45/\n/tmHFSuWwNVVA1/fGnjnndk4eHB/kdtUKueNMKesPDZ2C7p37wSNxkfuUoiInN6MGVOxdetmuz2e\nQiGgb98BmDFjdontbty4hgMH/sGKFV8AAKKjX0DXrt3x3XcbMH78q2jZshX27PkVaWmpNm/z8/O3\nW82O5nSbnTMy0jFixHN48cUX5S6FiIjuw9mzZ9G0aXOoVCqoVCo0b94S58+fRdeu3fHxxx/giy9W\noUGDhvDz87d5mzNzupGvp6cXOnTohF27duHkyRNo0qSp3CURETm1GTNm33OUWhYBAZ6lms1OEABR\nFC1/GwwGCIICvXv3Rdu27fDbb3H4739fxezZc2zeFhYWbreaHc3pRr4AEB09DgCwYsVSmSshIqLy\niohoiOPHj8FoNMJoNOLkyROIiGiI1as/g1KpwoABT6Jbt564fPmizducmdONfAGgR49eqFevHr79\ndgPefnsG/P2de/MDEVF1FBQUjFatHsKECdEwm0X07z8AQUG1EBgYhEmTxsHT0wuenp4YOvQ56PX6\nIrc5M0EsPOavIKXZ/FBW69d/jokTJ+LNN6di8uQ37P74JCnt5iO6P+xnx2A/Owb7WRIQ4FnsOqfc\n7AwAL774Ijw9vbBq1Qrk5ubKXQ4REVGpOW34enp6YtiwKNy8mYQfftgkdzlERESl5rThCwAvvTQa\nCoUCMTFL4ICt50RERHbh1OEbFhaO3r374ujRw/j7731yl0NERFQqTh2+ADB6tHTY0fLlS2SuhIiI\nqHScPnwfeaQ9mjdviR9/3Ir4+Ctyl0NERHRPTh++giAgOnoszGYzVq5cLnc5REQkk/Pnz1kGYe+8\n8xZycrLL/ViHDx9ESkqyvUorwunDFwAGDhyMgICa+PLLL5CZmSl3OUREJIM9e35FQkI8AGDmzA/g\n6qop92Nt27alQsPXKc9wdTdXV1e8+OJLmDPnfWzY8BVGjoyWuyQiIrqHYcMGY+3ajRBFEX36PIaF\nC5ehUaMmmDx5PN54420EBdWCyWTCnDnv4fr1azAajXjppTFo3boNtm+PxaZNG6FSqVG/fgQGDhyM\nH37YhD17foWvry+mT38LX3yxAZ9+Oge+vr44c+Y0UlNT8OyzL2Dbtq1IS0vFokXLIQjAzJlTkZWV\nhezsbLz66uvQ6TKxd28cLl26iNmz5+DMmZP4+ut1UCpVaNiwMSZMePW+X3uVCF8AeOGFkZg/fy5W\nrFiKF198CQpFlRjUExFVOPcZU+FqxykFoRDg3ncAdPeYrKFhw8a4ePECjEYDGjVqjOPHjyIiohGS\nk5MRFFQLAPDLLz/Bz88fb701HampqZg4cQzWrPkaX3+9DnPmzEdgYBC2bduCOnXqoG3bdujSpRua\nNGlm9TxKpQoLFizFzJlTcezYUSxYsASzZk3DwYP7ER5eF/36DUSnTl1w4MC/+PLLNXjvvY9Rv34E\nJk9+A15eXlizZiWWLfscLi4umDbtTRw9ehgtWkTeVxdVmfANCAjA4MFDsH79Ouza9TN69Ogtd0lE\nRFSCyMgHceLEMeTm5uCpp57Gnj270bLleURENLS0OX78KI4cOYSjRw8DAHJycmAwGNC9ey9MmfI6\nevXqg+7de5W4iblxY2n2Oz8/f8tMSL6+ftDpMlGjhh/WrPkM69evhcFggEZj/TiXLl1EUlIiJk8e\nDwDQ6TKRmJiIFi3u77VXmfAFgFGjxmL9+nWIiVnK8CUiKiXdjNn3HKWWRUCAJ3SlOLdzq1atsW7d\nauTkZKNfvwHYtm0rjh07ggcffMjSRqVS4/nnRxT5To+KehE9evRBXNxOvPLKWCxeXPwOt0ql0uay\nKIrYuPEr+PvXxLRps3D69EksWjTf6r5qtbSped68Rfd8PWVRpbbNNmvWHB06dMJvv+3GqVMn5S6H\niIhKEBoahqSkJGRm6qDVusPPzw9798ZZhW+TJs3w++97AAApKcmIiVkMs9mMmJjF8Pf3x9Chz6FZ\ns+ZITEyEIAgwmUxlqiEtLRW1a9cBAOzZsxtGoxEAoFAoYDKZEBoajsuXL1l2vlq5Mga3bt2879de\nqvA9e/YsunfvjnXr1gEAbty4gaioKAwbNgwTJ06sVBMbcK5fIiLn4evri6CgIABS0N64cQM1awZa\n1j/2WHe4uWkxZswIvPHGq2jRIhIKhQJarTtGj34REyeOhSAIaNAgAi1btsL8+R9j//5/Sv38vXv3\nxYYNX+LVV19G06bNcOfOHWzbtgWRkQ9i6tT/4vr1a5g48TX85z8TMXbsCKSlpcLfP+C+X/c9pxTU\n6/UYPXo0wsPD0bBhQzz33HN466230KlTJ/Tp0wfz5s1DUFAQhg0bVuxjVMTUUsVNWWUymdCu3YO4\nceM6Dh06xbl+7xOnBnMM9rNjsJ8dg/0sua8pBV1cXLBixQrUrFnTctvff/+Nbt26AQC6du2Kv/76\nyw5l2odSqcSoUWOQk5ODtWs/l7scIiKiIu4ZviqVqsjeX1lZWXBxcQEA+Pn54datWxVTXTk988xz\nnOuXiIgqrfve27k0U/n5+mqhUinv2a6sihvSBwR44qWXRuLTTz9FXNxPePbZZ+3+3NVJSZtOyH7Y\nz47BfnYM9nPJyhW+Wq0W2dnZ0Gg0SEpKstokbUtKir5cxZXkXr8pDBv2IhYsWIC5cz9Bjx79IQiC\n3WuoDvjbjWOwnx2D/ewY7GfJff3ma0v79u2xY8cOAMDPP/+Mjh07lq+yChQWFo5evR7H4cOH8M8/\nf8tdDhERkcU9w/f48eOIiorC999/jy+++AJRUVEYP348Nm/ejGHDhiE1NRUDBw50RK1lxrl+iYio\nMrrnoUb24MhDjQoTRRHdunXEyZPH8e+/RxESEmr3Oqo6bj5yDPazY7CfHcPe/RwXtwtdunSz2+M5\nit03OzsLzvVLROTcbty4jp07d8hdht1V6fAFgEGDnoK/fwDWrVvDuX6JiCqRYcMGw2QywWg0okeP\nTjh9Wjot8OTJ45GYeAMAMG/eRzh8+CA+/3wFVq6MwaxZ0zFu3EvYv/8fTJ36huWx+vaVRsaXLl3E\nK6+MwcSJY/HWW68hI6Nybumo8uGbP9dvenoaNmz4Su5yiIgqpRqtm9m8aAptNfQcN8pmG8/o4ZY2\nmrWrgfDwUj1n/pSC586dsUwpaDabraYUfOaZKERGPogXXxwFADAaDViy5LNip42dP/9jvP76FCxY\nsBRt2jyCTZs2lqs/KlqVD19AmutXOlPXUpjNZrnLISIiFEwpeOzYETz11NM4efIELlywnlLwbvnT\nAxbn5MkT+Oij2Rg/Pho7dvxomRChsqlSUwoWp2bNmnjyyf/D119/ybl+iYhsSD5w/J5tMpasuGeb\n7Kjh8Jw8AbDTlIJ3U6vVAFDk3A35sxFpNBosXBhT6c/tUC1GvoA01y8AxMRwtiMiosqgNFMK5k/t\ndzd3d3fcuXMbAHD+/Dno9dLJnOrXb4B9+/4EAOzcuaNMMxw5UrUJ3+bNW+DRRztyrl8iokrkXlMK\nhoXVxZkzp/G//31idb/69SOg0bhhzJgR2LHjRwQFBQMAJk78D9au/Rzjx0fjxx9jS9yELacqfZzv\n3bZv34YXXngGzz33AubNW2j3mqoiHhfpGOxnx2A/Owb7WVJtj/O9W8+evREWFo5vvvkat2/flrsc\nIiKqpqpV+HKuXyIiqgyqVfgC0ly/Hh6enOuXiIhkU+3C19PTC88+G4WkpERs2fK93OUQEVE1VO3C\nFwBGjhwNQRCwfPkSOGB/MyIiIivVMnzDw+uid+++OHz4EP79t3IeA0ZERFVXtQxfgHP9EhHJ6ccf\nt2LRovl2eSydLhP//LMPALB27WocP3603I+VmJiIkyfvfbav+1Vtw7ddu0fRrFkLxMb+gISEeLnL\nISKicjpz5rQlfKOihqNZsxblfqyDB//FqVMn7FVasarFuZ1tyZ/r95VXxmLVqhV4551ZcpdERFSt\n3LhxDf/5zyu4eTMJQ4YMQ79+A6zWf/fdRuzc+RMEQYGOHbvgmWeew9mzp/HJJx9BrVbDxcUFM2d+\ngHnz5kCv1yEkJBTHjx9Fly7dkJaWisOHDyI1NRWXLl1EdPRY7Ny5A5cvX8L06bPRtGkzLFw4DydP\nnkBubi4GDhyMDh06Y9Wq5VCpVAgMDELt2iH49NM5EAQBWq0WU6bMgKdn8SfOKItqG76ANNfvu+9O\nx7p1a/Daa/+Fh4eH3CURETncjBmu2LrVfnGgUAB9+7pixoycEtslJMRj1aovodNlYvjwYejb9wnL\nhAjXr19DXNwuLFmyEgAwduxIdO3aHT/+uBWDBj2F3r374sCBf5GcfAfDhkXh4sULGDDgSatNzgkJ\n8Viy5DNs3boZ69atxqpVX2L79q3YuXMH6tdvgKCgYEyYMBk5OdkYMmQg+vcfiD59+sHHxwcdOnTG\nxIlj8frrUxASEopNm77Bpk0b8cILI+3SR9U6fPPn+v344w+wceN6jBgxSu6SiIiqjRYtIqFSqeDt\n7QN3d3ekpaXBx8cHAHDq1AlcvZqACRNGAwD0eh0SE6+jQ4fOmDv3QyQkxKNbtx4ICwvHiRPHbD5+\no0ZNIAgC/Pz8Ua9eAyiVSvj6+kGnOwJXV1ekp6dhzJgRUKlUSE1NKXL//OkJAcBgMKBx4yZ2e+3V\nOnwBaa7fBQs+wYoVSzF8+MhiJ2gmIqqqZszIuecotSykczuX5vGsp/0rPAugSqVGu3aP4o033i5y\nr88++wJ//rkXs2fPwPjxk4p9dKVSaXNZFEUcOnQABw/ux6JF0mbmHj06Frl/RU5PWO2TJn+u3wsX\nzuPXX3+RuxwiomrjxImjMJlMSElJQVZWFry8vC3rGjZsjIMHDyA7OxuiKGL+/LnIycnGd99tQHp6\nGnr27IOnnx6Gs2dPQxAEm9MOliQtLRU1awZCpVLh99/3wGQyw2AwWE1hWJHTE1b7kS8gzfX79ddf\nIiZmCbp37yV3OURE1UJoaDimTXsT164lIDp6nNUIMygoCEOGPIOXXx4FhUKBTp26wNVVg9q1QzBt\n2pvw8PCAWq3GlCnvIDU1BcuWLURAQM1SP/dDD7XFl1+uwfjx0ejYsTPat++AuXM/QPfuPTF79gz4\n+Phi4sT/YM6c9/Dll2vg4uKKGTNm2+21V6spBUsyaFBf/PHHXvz2299o1Kix3R7X2XFqMMdgPzsG\n+9kx2M8STilYCtHR0kk3VqxYKnMlRERU1TF88xSe6/fOnTtyl0NERFUYwzdP/ly/2dnZnOuXiIgq\nFMO3EM71S0REjsDwLSR/rt/ExBvYunWz3OUQEVEVxfC9S/5cvzExiznXLxERVQiG71041y8RUcUr\nzZSCu3fvdFA1jsfwtYFz/RIRyW/dujVyl1BhGL42cK5fIqKKlz+l4PPPP43Y2B+s1n311Rc4f/4s\npkx5HQcP7scbb0zC+PHROH36FPr27WZpN3XqGzh4cD/0eh2mTn0DEyeOxfjx0Th//pyjX06ZMHxt\nyJ/r12w2Y9WqFXKXQ0RU4Vq3drd5WblSbWkzbpzGZpvoaI2lzdq1aoSHl+45ExLi8eGH87BwYQxW\nroyx2s9m2LDn4eHhgfff/xgAcOHCecybt6jYMxBu3Lgebdu2x4IFS/Haa29i0aJPy94JDsTwLcbA\ngYPh7x+AdevWIDMzU+5yiIiqHFtTChanfv0GcHFxKXb9sWNHsXnzdxg/PhqffPIhdLrK/b3NiRWK\nodFoMHz4SMyd+yHn+iWiKu/AAd092yxZkn3PNlFRBkyerMGtW6V51uKnFLybWq22ebvRaMxbr8Kr\nr76OZs1alOaJZceRbwleeGEkXFxcsGLFUpjNZrnLISKqUkqaUhAAzGbbh3sKgoDs7GxkZ2fj7Nkz\nAIAmTZrht9/iAACXLl3E11+vq9Da7xfDtwSBgYEYNOgpzvVLRFQB8qcUnDRpbJEpBQEgIqIhRo16\nvsj9Bg58CtHRL+D992eiYUPpN+Cnnnoa164lYNy4l/DRR7MRGfmgQ15DeXFKwXs4duwIunXriM6d\nu+Kbb3649x2qGE4N5hjsZ8dgPzsG+1nCKQXvQ/PmLdG+fQfs2bMbp0+fkrscIiKqAhi+pcC5fomI\nyJ4YvqXQq1cfhIZyrl8iIrIPhm8pSHP9jkZ2djbWrVstdzlEROTkGL6lNGxYFDw8PLFy5XIYDAa5\nyyEiIifG8C0lT08vDBv2HOf6JSKi+8bwLQPO9UtERPbA8C2DunUfQK9ej+PQoYPYv59z/RIRUfmU\nK3x1Oh3Gjx+PqKgoDB06FHv37rV3XZVWwVy/POyIiIjKp1zh+/3336Nu3bpYu3YtFixYgPfee8/e\ndVVa7dt3QNOmzREb+wOuXk2QuxwiInJC5QpfX19fpKamAgDS09Ph6+tr16IqM0EQMHr0OJhMJs71\nS0RE5VLuczuPHDkS8fHxSE9PR0xMDCIjI4ttazSaoFIpy11kZZOdnY2wsDDk5ubi6tWrcHd3l7sk\nIiJyIuWaz/eHH35AcHAwVq5cidOnT2PKlCnYtGlTse1TUvTlLrA4cp+4+/nnR2Du3A+xePFyvPji\nS7LVUdHk7ufqgv3sGOxnx2A/S+w+scLBgwfRoUMHAECjRo1w8+ZNmEym8lXnpDjXLxERlVe5wjcs\nLAxHjhwBAFy7dg3u7u5QKqvOZuXSyJ/r9/z5c9i9e6fc5RARkRMpV/g+/fTTuHbtGp577jm89tpr\nmDFjhp3Lcg7R0WMBADExS2SuhIiInEm5fvN1d3fHggUL7F2L08mf6zcu7lecPn0KjRo1lrskIiJy\nAjzD1X0qmOt3mcyVEBGRs2D43qeCuX7XIzmZc/0SEdG9MXzvU+G5fteuXS13OURE5AQYvnbAuX6J\niKgsGL52wLl+iYioLBi+dsK5fomIqLQYvnbCuX6JiKi0GL52xLl+iYioNBi+dsS5fomIqDQYvnbE\nuX6JiKg0GL52NnDgYPj7B2Dt2tXQ6XRyl0NERJUQw9fONBoNhg8fibS0VGzcuF7ucoiIqBJi+FYA\nzvVLREQlYfhWgMDAQAwcOBjnz59DXNwuucshIqJKhuFbQTjXLxERFYfhW0FatIhEu3aPYvfuXThz\n5rTc5RARUSXC8K1AnOuXiIhsYfhWoN69H0doaBjn+iUiIisM3wqkVCrx0kujkZWVhXXr1shdDhER\nVRIM3wo2bFgU3N09ONcvERFZMHwrmJeXN4YNew43blxHbOwPcpdDRESVAMPXAV56aQwEQcAHH8xC\nZmam3OUQEZHMGL4OULfuA3j55Ym4fPkSpk79r9zlEBGRzBi+DvLmm1PRokUkvvpqLbZu3Sx3OURE\nJCOGr4O4uLhg2bKVcHNzw+TJr+Datatyl0RERDJh+DpQ/foNMGvWh0hLS8X48aNhMpnkLomIiGTA\n8HWwqKjh6NOnH/74Yy8WL/6f3OUQEZEMGL4OJggC5s1biMDAIHz44SwcPnxQ7pKIiMjBGL4y8PPz\nw6JFMTAajRgzZiR0Op3cJRERkQMxfGXSuXNXjBv3Ci5evIBp096UuxwiInIghq+M3nprGpo1a4F1\n69Zg61ae/YqIqLpg+MrI1dXVcvjRa69NwPXr1+QuiYiIHIDhK7OIiIaYOfN9pKamYsKEMTCbzXKX\nREREFYzhWwm88MII9O79OPbu3YMlSxbKXQ4REVUwhm8lIB1+tAg1awbigw/exdGjh+UuiYiIKhDD\nt5Lw9/fHwoXLYDAYePgREVEVx/CtRLp27YbRo1/G+fPnMH36FLnLISKiCsLwrWSmTp2Bpk2bY+3a\nz/Hjj7Fyl0NERBWA4VvJ5B9+pNFoMHnyeCQm3pC7JCIisjOGbyXUsGEjzJjxHpKTk/Hyy6N5+BER\nURXD8K2kXnzxJfTs2Rt798Zh2bLFcpdDRER2xPCtpARBwKefLkZAQE28994MHDt2RO6SiIjIThi+\nlVhAQAAWLlxqOfxIr9fLXRIREdkBw7eSe+yxHoiOHotz587inXfelrscIiKyA4avE5g6dSYaN26K\nNWtW4qeffpS7HCIiuk/lDt8tW7bgiSeewJNPPom4uDg7lkR302g0WLZsJVxdXfHqqy8jKSlR7pKI\niOg+lCt8U1JSsHjxYnz11VdYtmwZdu3aZe+66C6NGzfBjBmzcefOHc5+RETk5MoVvn/99RfatWsH\nDw8P1KxZE7NmzbJ3XWTDiBHR6N69J+LifsXy5UvkLoeIiMqpXOF79epVZGdnY8yYMRg2bBj++usv\ne9dFNgiCgAULlsLfPwCzZ8/AsWNH5S6JiIjKQRBFUSzrnZYvX46DBw9i0aJFuH79Op5//nns3r0b\ngiDYbG80mqBSKe+7WJJs374djz/+OBo3boz9+/dDq9XKXRIREZWBqjx38vPzQ6tWraBSqRAaGgp3\nd3ckJyfDz8/PZvuUFPsfnxoQ4IlbtzLs/rjO4KGHOuCll0bjs89iMH78RHz00bwKe67q3M+OxH52\nDPazY7CfJQEBnsWuK9dm5w4dOmDfvn0wm81ISUmBXq+Hr69vuQuksps+fRYaN26Czz//DD//vF3u\ncoiIqAzKFb6BgYHo1asXhgwZglGjRmHq1KlQKHjIsCNpNBosXSodfjRx4jgkJSXJXRIREZVSuX7z\nLauK2PzAzRqSFSuW4u23/4uuXbth/frv7P6fIPazY7CfHYP97BjsZ4ndNztT5fHSS2Pw2GPdsXv3\nLnz22TK5yyEiolJg+Dq5gsOP/PHuu9Nx4sRxuUsiIqJ7YPhWAYGBgZg/fzFyc3MxduxIZGVlyV0S\nERGVgOFbRfTs2QcjRozC6dOn8O670+Quh4iISsDwrULeeWc2GjZshJUrl2Pnzh1yl0NERMVg+FYh\nbm5uWLZsFVxcXPDKK+Nw8+ZNuUsiIiIbGL5VTNOmzTBt2kzcvn0LEyeOhQOOJCMiojJi+FZBo0aN\nRZcuj2HXrl+wcmWM3OUQEdFdGL5VkEKhwMKFy+Dn54eZM6fh1KmTcpdERESFMHyrqMDAIHz66WLk\n5ORgzJgRyM7OlrskIiLKw/Ctwnr3fhzDh4/EqVMnMWvWdLnLISKiPAzfKm7GjPcQEdEQK1Ysw65d\nP8tdDhERgeFb5Wm1WixdutJy+NGtW7fkLomIqNpj+FYDzZu3wNtvz8CtWzcxadI4Hn5ERCQzhm81\nMXr0OHTu3BW//LIDq1atkLscIqJqjeFbTeQfflSjRg3MnDkVp0+fkrskIqJqi+FbjQQF1cKnny5G\ndnY2xowZycOPiIhkwvCtZvr06Yvnnx+BkyeP4733ZspdDhFRtcTwrYZmznwP9es3QEzMYuzevUvu\ncoiIqh2GbzXk7u6OZctWQq1WY8KEMbh9+7bcJRERVSsM32qqRYtIvPXWdNy8mYRXX32Zhx8RETkQ\nw7caGzduAjp27IIdO7Zj9eqVcpdDRFRtMHyrMYVCgUWLlsHX1xfvvDMFZ8+ekbskIqJqgeFbzdWq\nFYx58xYhOzsbo0ePQE5OjtwlERFVeQxfQt++/REVNRwnThzD+++/K3c5RERVHsOXAADvvvsB6tWr\nj6VLFyIu7le5yyEiqtIYvgSg4PAjlUqFCRPG4M6dO3KXRERUZTF8yaJly1Z4881pSEpKxKuvjufh\nR0REFYThS1bGj5+IDh064aeftuGLLz6XuxwioiqJ4UtWpMOPYuDj44Pp09/C6dOn5S6JiKjKYfhS\nEcHBtfHJJwuRlZWF/v374+LFC3KXRERUpTB8yab+/Qdg8uTXcf78eTz+eDf8/fc+uUsiIqoyGL5U\nrDffnIbly5cjLS0Ngwf3w/fffyt3SUREVQLDl0o0atQorF//HVxdNRg9egQ+/fRj7gVNRHSfGL50\nT126PIbY2J9Rp04IPvhgFiZNehm5ublyl0VE5LQYvlQqjRs3wfbtuxAZ2Qrr16/DM88MRlpaqtxl\nERE5JYYvlVpgYBC+//5H9O7dF3v37kHfvj1w5cplucsiInI6DF8qE3d3d3z++TqMGTMeZ8+eQZ8+\nj+HAgX/lLouIyKkwfKnMlEol3n33fXz44SdITk7GoEF9sXXrD3KXRUTkNBi+VG4jRozCunUboFSq\nMHJkFBYtWsA9oYmISoHhS/ele/de2LLlJ9SqFYx3352G//xnEgwGg9xlERFVagxfum/Nm7fATz/9\nimbNWmDt2s/x7LP/h/T0NLnLIiKqtBi+ZBe1agVjy5af0KNHL8TF/Yr+/Xvh6tUEucsiIqqUGL5k\nNx4eHlizZj1GjozGqVMn0bv3Yzhy5JDcZRERVToMX7IrlUqFDz6Yi9mzP8StWzcxYEAfbN++Te6y\niIgqFYYvVYjo6HFYvforAMDw4cMQE7OYe0ITEeW5r/DNzs5G9+7dsWnTJnvVQ1VInz598cMP2xEQ\nUBPTpr2FKVNeh9FolLssIiLZ3Vf4Ll26FN7e3vaqhaqgli1b4aeffkXjxk2xcuVyvPDCM8jMzJS7\nLCIiWZU7fC9cuIDz58+jS5cudiyHqqI6dUIQG7sDXbo8hl9+2YEnnuiNGzeuy10WEZFsBLGcP8RF\nR0dj2rRp2Lx5M2rXro0nn3xblRT0AAAgAElEQVSy2LZGowkqlbLcRVLVYDAYMH78eCxfvhy1a9dG\nbGwsIiMj5S6LiMjhVOW50+bNmxEZGYmQkJBStU9J0ZfnaUoUEOCJW7cy7P64ZM3e/Txr1seoVSsU\nM2dOxaOPdsBnn61G9+697Pb4zoqfZ8dgPzsG+1kSEOBZ7LpyhW9cXBwSEhIQFxeHxMREuLi4ICgo\nCO3bty93kVQ9CIKAl19+BaGhYXj55VF47rmn8f77H2PEiFFyl0ZE5DDlCt/58+dblhcuXIjatWsz\neKlM+vcfgODgYERFDcWbb76GS5cuYsaM2VAq+fMEEVV9PM6XZNO6dRts374LERENEROzGC+++Bx0\nOp3cZRERVbj7Dt8JEyaUuLMVUUnCwsKxbdsv6NixM376aRsGDnwcSUmJcpdFRFShOPIl2Xl7+2D9\n+u/wzDPP4ciRQ+jTpxtOnTopd1lERBWG4UuVgouLC+bPX4wpU6bj6tUE9OvXE7t375K7LCKiCsHw\npUpDEARMmvQfxMSsQm5uDoYNewpr166WuywiIrtj+FKlM2jQU/j2263w9vbGa6+9glmz3oHZbJa7\nLCIiu2H4UqXUtu0j+PHHXahXrz4WLvwUo0YNR1ZWltxlERHZBcOXKq0HHqiHH3/ciXbtHsXWrZvx\n5JN9cevWLbnLIiK6bwxfqtR8fWtg48bNeOqpp3HgwH706dMNZ8+ekbssIqL7wvClSs/V1RWLFy/H\n66+/hfj4y+jbtwd+//03ucsiIio3hi85BUEQ8Prrb2HRohjo9ToMGTIQX3/9pdxlERGVC8OXnMqQ\nIc/gm29+gIeHB155ZSw+/HAWyjkrJhGRbBi+5HTat++AH3/chbCwcMyb9zHGjh2J7OxsucsiIio1\nhi85pfr1G2D79l/Rpk1bbNr0Lf7v/wbgzp07cpdFRFQqDF9yWv7+/vjuu60YOPBJ/P33X3j88W64\nePG83GUREd0Tw5ecmkajwbJlqzBp0n9w6dJF9OnTjXtCE1Glx/Alp6dQKDBlynTMn78YGRkZePLJ\nfnjhhWE4ffqU3KUREdnE8KUqY9iwKGzZ8hPatGmL7dtj0aVLO0yYMAbx8VfkLo2IyArDl6qUhx56\nGLGxP2Pdug1o1KgJNmz4Cu3aPYgpU17HzZs35S6PiAgAw5eqIEEQ0LNnH/z66+9YuvQzBAfXxmef\nxeDhh1vigw/eRVpaqtwlElE1x/ClKkuhUGDw4CH4888DmDPnU3h6euLTT+eiTZsWWLhwPvR6vdwl\nElE1xfClKk+tVmP48JH4++/DmDp1JgBg1qzpaNs2EqtXr4TBYJC5QiKqbgTRAefmu3Urw+6PGdCm\nOUzmoqXrx72C7JHRAADPcaOg/vuvIm0MrR9CxvLVAADN2tXQzp9r8zmS/zoIuLhAee4svIc+abNN\nxryFMHTuCgDw6dUFitu3i7TJHvIM9P99GwDg/s7bcI39oUgbU2gY0r7fBgBw2b4NHlP/a/P5Urfu\ngDm4NoTUFPh262izjW7KdOQMHgIA8Hr2/6CysddvbtfuyJw7HwDgtnA+3FZ/VqSNqNVCdfoUbt3K\ngGr/P/AaPcLm86WvWgtjy1YAAN+2kRCMxiJtsqLHImv0ywAAj0kvw2XvniJtjM1bIn21dL5m16+/\nhPvHH9h8vuQ9+wAPDyguX4LP4P4222TOmYfcbj0BAD79ekJx47plndlsRkZ6OlZl6fG60Yjw8Lr4\nrmEjtDxxHBAEq8cx1wpGauzPAACXXT/D443JNp8v9butMIfXBTIzUaPzIzbb6F5/CzlDnwUAeA1/\nFqpjRyzrlAoBJrOI3I6dkTl/MQDALWYx3JYvLfI4okqFlL8PAwBURw7Ba0SUzedLj1kF40MPAwB8\nOz4MwcZIP2v4S8iaMAkA4PGfSXDZvbNIG2Ojxkj/8hsAgOt3G+H+/rs2ny9l116IPr5QXL8Gn/69\nbLbJnP0Rcvv0BQB4D+oLpY2d4XL6DYBu5nsAAO1H70GzcX2RNmZ/f6TuiAMAqPfshufkCTafL+3r\nTTA1iAByc1Gj3YOWfi5MP+k/yI4aDgDwjB4O9YH9RR7H0LYdMpasAABoVi6Hdsn/bD5f8oHjAADl\nyRPwjnraZpuMRTEwtHsUAODb9VEI6WlF2mQ/+zz0k98AALhPeR2uO7YXaWOqVx9pGzcDAFy2bobH\njKk2ny9l+68Qa9aEcPMmfPs8ZrNN5ozZyO0/EADgPWQglBeKHi+f06sPdO9/DADQzpsDzZdfFGkj\nenkjZfcfCAjwROqWn+A5frTN50tbuwGmJk0BADVaN7PZRs7vcnsJCPAsdp3Krs9E5AQUCgW8fXzw\nwpBncBoivvjic+y4fAmBajW8vX3g5uYmd4lEVMU578g3wLNCHpesVYd+vnLlMj7++AN8883XEEUR\nDz/8CKZOnYFHHmnvsBqqQz9XBuxnx2A/S0oa+fI3X6r2wsLCsWhRDPbs2Yc+ffrhn3/24YknemPo\n0CdxrNCmYSIie2H4EuVp1Kgx1qz5Ctu370KHDp3w66870a1bR0RHD+c5o4nIrhi+RHdp3boNvvtu\nKzZu3IzIyFbYvHkTHn20DV577RVcv35N7vKIqApg+BLZIAgCunR5DDt2xGHlyrV44IF6WLt2Ndq2\njcQ777yN5GROX0hE5cfwJSqBIAjo338A9uzZhwULliAgoCaWLl2Ihx5qgblzP0RmJncqIXJqZjOg\n0wGZmQ59Wu7tTCViP1vLycnBmjUrMX/+XNy+fRv+/v6YOPE1vPDCSGg0mnI/LvvZMdjPjnHf/SyK\ngMEAITsLQlYWoNdDyM6GkKWHkJUFITsL0GdJfxe6HdlZEPRZBW2yCrXRF2pT+PbsbOkpFQqkffUt\nDI91t1MvlLy3M8OXSsR+ti0zMwMxMUuwZMlCZGSko3btOnj99bcwZMgzUKnKfvg8+9kx2M92IoqA\nTgdBp4Ogy4Sg00Ghy4SgywR0OngrTMhISraEoJCVBdwVggXhWNBG0OuB/DA1mexbsloN0U0L0c0N\n0GggarUQNRrLbaKPL3RvvwNznRC7PSfDl8qN/Vyy5OQ7+N//PsWqVcuRnZ2NBg0i8Oab09Cv3xMQ\n7jpbVknYz45RLftZFKWQKxSUQmZmwbLuruXM/GUdhMyMguXC6/Q6CHaIDlEQADc3KfzcCsIQbm4Q\nNW4QtXnrNG557Qq3KRScGqkd7g5UjRugzbsux3+K7xfDl8qN/Vw6169fwyeffISvvloLk8mEyMhW\nmDLlHXTu3LVUIcx+dgyn6Oe8sFSkp0FIS4OQnlZ8GBYXmlZ/Z0Iwm8tfjiBAdPeA6O4uXTw8Cy17\nWK9zl9Z5BvkhzaQoCNG84LQEZn6AuroWOaVrVcLwpXJjP5fNhQvn8NFH72Hz5k0AgA4dOuHtt99B\n69ZtSrwf+9kxHNLPZjOEjPS84Ey3CtGC5XTp70LLVuttnB+9tEStuyUMzR6eQKHl/Nvh7gGzR35o\neuSFqDtErUeRUIVWW+aA5OdZwvClcmM/l8+xY0fw/vvvYteuXwAAvXv3xVtvTUPjxk1stmc/O0ap\n+jk3F0JaGhQZxQVlqnVopqVBUWhZyMwo8yZZUauF2csbopcXRC9vmL3zl32k68JBefeIs/Cy1h1Q\nyH8QCz/PEoYvlRv7+f7s2/cnZs+egX/+2QdBEPDUU0/jjTemICws3Kod+9nOjEYIKSlQJN+BIvkO\nhDvStWeuDvrrN4sP0Yx0aeegMhAFAaKnF0Rvbyk4ve5a9vKG6C2FqNnLO29d/rJ0O9TqCuoIefDz\nLGH4Urmxn++fKIrYuXMH3nvvXZw8eRxqtRpRUcPx6qtvIDAwEAD7uURmM4TUFCiSky0hWjhQFcl3\nICTfgeJO3nVyMhRpqaV+eFGthujtDbMlQPOCMi9ArZel8LSMUr29IXp4VorRZmXCz7OE4Uvlxn62\nH7PZjM2bv8OHH87G5cuXoNVqER09Di+//Arq1w+pHv0sitIo0yo4k6G4Yx2iluWUZAgpKaXaYUhU\nqWCu4QfRz0+6riFdm/1qWJa9wmsjRVRLI868wIWbW5Xe6UcO/N6QMHyp3NjP9mcwGPDVV2sxd+6H\nSEpKhI+PDyZNmoQePfqhQYMIucsrPVGU9qS1MRq1CtSUZGn9HWm5NDsTiQoFxBo1pPDMD1K/QoFa\no4YUsr41LIErenrdM0T5eXYM9rOE4Uvlxn6uOHq9HitXLsfChfOQmiptJm3YsBH69n0C/foNQNOm\nzcp0rLDdmM0Qbt+GMukGFEmJUCQlQZFYaDnphnR96yaE3NzSPaSvr1WIWo9M85Z9a0D0ywtTb58K\n2ZTLz7NjsJ8lDF8qN/ZzxcvISMcff/yKr77agLi4XcjOO91deHhd9Os3AP36PYFWrVrffxAbjVDc\nvpUXoolQJCbeFah5t926WeLZhUQXF5gDg2AOCIDZz79oiNbwsx61+vjIcoIDW/h5dgz2s4ThS+XG\nfnaM/H7OzMzErl0/IzZ2C375ZQf0eh0AIDi4Nvr27Y9+/Qbg4YcfgVKpLLizwQDFrZt5o9OkvBC9\nAcXNJOuQvX2rxN9ORY0G5ppBMAcGwhxUC6bAQClk8y9BtWAODIToW8NpfyPl59kx2M8Shi+VG/vZ\nMWz1c1ZqKvbH/oCD27bg8l9/wFuvRy0AdTUaNK/hh1C1Gp46HRR3bpd4XKmo1cJcMxCmoFp5IRpk\nFbJSuAZKm3qdNFRLi59nx2A/S0oK38qxLYiousnKgjIhHsqEK1DExwOpt+B58Yr1ZuDkZIQCePLu\n+2ZnA9evIQPARYUCBv8AuNVvAP9mzSEE15HC1TJaDZIOhanioUrkbBi+RBUhOxvKawlQXLmSF7Lx\nUMRflpbj46G4dbPIXfInJDR7ecMcGAhj0+Yw1wwsGK3mBaohoCb+jr+CH3b9jG3btuLGjevArZvw\nOHYUPXr0RL/QAXisVWu4u7s79jUTUalxszOViP1cjNxcKK4m5IXpFSjyrqWQvQJlUqLNu4lqNcy1\n68AUGg5TaCjMIaEwhYbBq2kE7rh6wRwYJJ1Lt5TMZjMOHtyP2NgtiI3dgvj4ywAANzc3dO3aHX37\n9kevXn3g5eVtj1ft9Ph5dgz2s6RCfvOdM2cODhw4AKPRiNGjR6Nnz57FtmX4Oq9q288GAxTXrlqP\nWuPzlhPiobhx3ebvrKJSCXPtEJhCpVA1h4TCFBIKU2g4zKGhUrgW3lkqjz36WRRFHD9+DNu2/YDY\n2C04e/YMAECtVqNTpy7o128AevfuCz8/v/t6HmdWbT/PDsZ+ltg9fPft24eVK1dixYoVSElJwaBB\ngxAXF1dse4av86qy/Ww0QnHjuvWoNX85IR6K69ds7hksKhTSyDUktFCwhsEcGibdViu4XIfVVEQ/\nnz17BrGxUhAfP34UAKBUKtG+fQf07fsEHn+8H4KCatn1OSu7Kvt5rmTYzxK7h6/JZEJOTg60Wi1M\nJhPat2+PP//80/rwh0IYvs7LafvZZIIi8YYUpFcuW0asls3E167aPJZVFASYawVbNgebQkKlYM1f\nDq5dISfBr+h+vnz5ErZt24rY2B9w4MC/AABBEPDQQw+jX78B6Nu3P0JDwyrs+SsLp/08Oxn2s6RC\nDzXasGED9u/fj48//rjYNhXxJrRp4wmzjZHJuHG5GDnSkLeswd9/F/0PQevWJixfLp3IYO1aNebP\nd7H5HH/9pYOLC3DunAJDh7rZbDNvXjY6d5a+xHv10uL27aJ7lQ4ZYsB//yudCeidd1wRG1t0ZBQa\nasb330uzqWzfrsLUqa42n2/rVj2Cg0WkpgLdutneoWbKlBwMHiydwu/ZZ91w+nTRMwV17WrE3Lk5\nAICFC12wenXRQNFqRZw+rcStWxnYv1+B0aNt98GqVVlo2VJ6L9q2dYetswdGR+di9GjpfZk0yRV7\n9xbtg+bNTVi9Wnpfvv5ahY8/tt0He/bo4OEBXL4sYPBADWA0QDAYAEPetdGIJRiLx02xAIAO2Iur\nqFPwAAoloFLh/8L3YUbfP2EOCcP033rhu31hgEpptWdwrVpmxMZK78uuXUq88YYGtnz3nR7h4SIy\nM4HOnW2/L6+/noOhQ6XOGT5cg2PHCj6bCoUCZrMZHTsaMX++9L7ExKixfHnRz6ZKBfz9t3T875Ej\nCowYYft9iYnJwkMPSe9Lx45a6PXS6zIaTcjK0iMrS4/c3PkQxTkAAD+/b2A0doebmxvUhf6D0aiR\nGV9+mZX3OlV4/33b78uuXTr4+ADXrwvo39/279azZ+egTx+pDwYNckN8fNHPZr9+RsycKfXBRx+5\nYOPGop9Nf38RO3boAQB79igxebLt9+Xrr7PQoIEZublAu3buln4ubNKkXERFSZ/N6GgNDhwo+p3R\ntq0JS5ZIn82VK9VYssT2d8aBA9L7cvKkAlFRtt+XRYuy0a6d9J3RtasW6elFvzOefdaAyZOl74wp\nU1yxY0fRfy/16pmxcaP0vmzdqsKMGbbfl+3b9ahZU8TNmwL69LH9vsyYkYP+/aX3ZcgQN1y4UPR9\n6dXLiPffl96XefNc8OWXRd8XLy8Ru3frERDgiS1b9Bg/3vb7snZtFpo0kd6H1q1t/3uR87vcXirs\nUKOdO3fi22+/xapVq0ps5+urhUple1R8PxQ2Tj/n6alBQID0hms0ts9Q5+qqQECAOq998WexCwjw\nhIsLcOdO8W18fLQICJCWVSrb7dzdXREQIP3D0Gptt1GrFZY3ytu7+Ofz8/NAQEDxzwUAXl5ulppc\nXGy3c3NzQUCA9EH18LDdJn9DRkCAJ3x9i38+X193y/MplYCt8zh4eNzP+yICJhOQKwVswKyp8Dh/\nGBnH9VCkfFv0gRQKCA3qA62GAuHhwDf1gMy8sywpVZZwVT05CO4fDJJqugkoDhV9qLK+L25uxbfx\n9Cx4X1xdi7ZTKBTQaEr3vuTXVJb3Jb+di4sCLi7e8Pb2xvPPT0OdOvWwadMm/PxzMkQxFWlpqVCr\n1dBqtXB3d4eLi9ryfF5exT+fv7/0OcnJKb6Nt3dBH6jVtttptQV94F7M9LQqVUEf+JRwJsoaNaQ+\nyM0taHP390bh7wxb7wsAaDSl/86Qntd+3xnFfaZcXBSlfF+kz6bZbL/vjNK9L9p7vi9ASf9e5Psu\nd4Ryj3z37t2LBQsW4LPPPoOPj0+JbbnZ2Xk5tJ+NRigvX4Ly7Bkoz52BKu9aee4cFLpMq6aiQgFz\naBiMEQ1himgkXTeIgKlBBEQn3LO3MnyeU1NTsGPHdmzbtgW7d+9CTo40yqlb9wHLaS4jIx+U53zT\ndlIZ+rk6qMh+FkXAaATyNnbBYBBgMEj/wTIagdxcoci6/PWF/zYYhEL3kf7TMXSoAV5e9qvV7pud\nMzIyMGzYMKxevbpUe04yfJ1XhfRzVhaU589Bde4MlGfPQHXurBSyFy8UOVG/6OICU70GBeEa0RDG\nBg1hqldf+u9wFVHZPs+ZmRnYuVM6zeXOnT9bTnNZu3Ydy2ku27RpW+x+HpVVZevnyshkks7jkpsr\nBVlODpCTI11b3ybdLt0GZGdLyzk5AtRqV6Sm5tgIwMLhd3cAFl1XEKrW7SrKypVZls3v9mD38N2w\nYQMWLlyIunXrWm776KOPEBwcbLM9w9d53U8/C2mpBeFaaDSriL9S5DAds4cnTBERMDXIC9eIhtLf\noeGV5qT8Fakyf56zsrIQF/crYmN/wI4d25GengYA8PX1RYsWkWjZshVatoxEixaRCA0Nq9Qj48rc\nz2YzkJUlhVh2trSclSXkBaFgFXZSEBaEoK1l27cVF6QFj2syyfP+qdUi1GppBKpSiXnXgIuLdHvB\npaBd/nL+xcVFzLtP0XVqtfVjFrST1nl5AW3amGwdCVhuPLczlds9+1kUobiZJIXr2TPSaDY/bG2c\naMLsH5A3ipXC1digIUwNG8EcVKtanwLRWT7Pubm5+P33PYiN3YK9e/fgypXLVut9fHzQvHkkWraM\ntARyeHjdShPIZelnUcwf0UmBqNcXBGN2tmAVjkWvC9rq9db3KQhX679zciq+jwRBhKurFDouLiI0\nGmnZ1VXMuw3QaETL+vy2rq62lotfX7OmFnq9/q4ALByu1gGoUlXNf/4MXyo3Sz+bzVDEX8nbVHy2\n0G+yZ6FISy1yP1NIKEwNCsLVmBe2om8NGV5F5eesn+fU1BQcPXoER44cxtGj0uXSpYtWbby9fdCi\nRUs0b97SEsrh4Q/Y3GGyLERRGqnpdAJ0uruvbd8mii5ISTFYQq9wgNoKUbPZ/ong5iaFnkYjXbu5\niXBzk/7Ovy58e36wFQ486/C71/qCx3BUyDnr59neGL5UJkJKMlRHDkN15BA8Lp6F4dgJqM6fhZA3\nz2w+UaWCqe4DeTs8ReSNZhvCWK+BtDsklVpV+jynpaXi2LGjeYF8CEePHsGFCxcAuAPwAOABrTYI\nDzzQEqGhTREcHIGaNetBqw1EVpaiSGjq9cWHqz02kapURcPPVgjmL+cHp5tb6f8uuL1qjvDuVpU+\nz/eDsxpRsYSMdKiOHoHq8CGojhyE+tBBKO/alKjSamGMaGS9w1NEQ5jqPlAhJ5ygysFgANLTBaSn\nAxkZAtLSBMvf6ekCMjKKjjCloNRCpwuGTtfHchtgnTh6PXD8uHQpLa1WhLu7CHd3oEYNs2XZ+rrk\n22rXdkdWViY0GunxNJpqsUsBVUL82FUnej1Ux49BfeQgVIcOQnXkEJTnz1nt/GSuUQO5XbvB0OpB\nGFs+CO9Oj+C2WwkHk1KlZDYDmZn54WkdmmlpUnCmp8OynB+sGRkFt+WflKOslEoRHh5S2Pn6iqhT\n5+5QlJbV6hxkZNzAnTuXkZh4Htevn8aNG+cgiukAMgFkws1NRNOmddGqVSO0bNkSLVu2Qv36Dcq9\nl3VAAHDrVoVv7CO6J4ZvVZWbC9XJ49KI9vBBqA8fgvLMKatTKpo9vWB4tCOMkQ/CENkKxsgHYQ4J\ntd4uFuAJcPORQ4mi9Ptj4dC0DkncFZgC0tJQaFkKUVEsW3hKe3yK8PQEgoLM8PIS8y4otFxwm6en\nCA8PEVqtdai6uJRl02qtvEs7AIBOp8Px48dw9Oghy+/IBw/GYf/+Xy330Gq1aNq0uWWHrpYtW6FB\ngwioOIQlJ8LffKsCoxHKM6ehPnIob0R7EKqTJ6yOmRXd3GBs3jJvRCsFremBevcc0bKf7092NpCc\nLFhd7tyRrlNSCv7OzFQhOdlsGZ0aDGULTkGQQlMKTxHe3sWHZuG/vb0L7uPmVjl/j9Tr9Thx4hiO\nHj2MI0eky9mzp2Eq9B9JNzc3NGnSLG+HrlZo0SISDRs2KhLI/Dw7BvtZwh2uqhKzGcoL56E6fNAy\nolUdPwohK8vSRHRxgbFps7wRrRS2poiGlWa2HWeVk1NykOYvF/67tJtu3dwALy9zMSPNoiHq7Y1C\nISsWeyrKqiorKwsnTx63jI6PHDmMM2dOwVjoxOIajQZNmzbL28taCuSOHR9Gamp2CY9M9sDvDQnD\n11mJIhRXLhca0R6C6shhKDILXreoVMLUqIlls7ExshWMjZtK2/7soKr2c04OLCPPwkFaeDR6d5Dq\ndKULUq1WRI0a0sXXV4SfX/F/+/lJt4WEVM1+dqTs7GycOnXCKpBPnz4Jg8FgaaNUKlG7dh3UqRNi\nuYSEhFqua9euA1dX2xMUUOlV1e+NsuLezs5AFKG4cd0SsurD0rUiJaWgiSDA1CACuS1bFWw+btZC\nGjZVczodkJQkIClJgdu3bQdp4dFpZmbpglSjkQLygQfMRYLz7kt+kPLtkIdGo0GrVq3RqlVry205\nOTk4ffqkZXP1hQtncOnSZfz11x8obtwRGBiUF8YhCAkJsyzXqSOFtIeHh6NeElVhHPnKRLh1C+rD\nB6x2iFLcumnVxhReN29E21oa0bZoCdGj+P9JVQS5+zkzsyBUExMFJCUJSExU5N0mWNZlZNw7TF1d\nC8KzpCAt3EZrewY2u5O7n6uL/H7Ozc3FtWtXcfVqAq5eTUBCQrxlOT4+HtevX7XahF2Yr6/vXaEs\nBbMU1qHw8fGtNGf0kgs/zxKOfOWWlQX1/n+gOrgf6vxDfK5dtWpiql0HOY/3LxjRtoyssmeDEkUp\nVPNDND9Uk5IKQjV/3b029fr7mxESYkZgoIigIBGBgWYEBBQfpNX8O5HyuLi4oG7dB1C37gM215tM\nJiQlJSIhIQFXr8YjISHesnz1agLOnTuDo0cP27yvu7tHoVCWRs/5f4eEhCIgoOZ9n92LnB/DtyIY\njVAdPgiXvXug3rsH6n//hpA3PRsgnd84p0cvy2+0hpYPQqxZU8aC7UMUgfR0WI1MExMVuHlTsBq1\n3rxZ8o5IgiCFZt26+aEqXdesWRCwQUEiAgJEe/20TWRFqVQiOLg2goNro23bR4qsF0URd+7cQULC\nlbyRc0Ewx8dL16dPn7L52K6urggOrm0VyvnBHBISilq1gnnYVDXAd9geRBHKUyfhsjdOCts//7Da\nKcrYtDlyO3aG4eFHYGz1IMzBtZ1qCCaKQGoqrDb9Wo9SC/7Ozi7+dSkUIvz9RdSrZ7aEaGCgaBWw\ngYFSqPLEWVSZCYIAf39/+Pv7W/3GXFh6elpeKCcgIeGKZVkaSSfgt99227yfUqlErVrBVjuF1ahR\nA76+NQpd+6FGjRrw8vLmKNpJ8TffclJcvpQ3so2Dy++/QXH7tmWdse4DMHTsgtxOnWFo3xGiv79s\ndZaG0Qhcvy4gPl6BhAQBV64oLMtJSSrcuCGWOOOKQiGNSvM3/dasab0ZWLqWgpf/obdN7s9zdVGZ\n+lmv1+Patat3/d58xbKcmHgDZrO5xMdQKBTw9fWFr68Uyn5+fpbl/KDOX65RI3+dL1wqeJNRZepn\nOfE3XzsQkpLg8ru0Gdnl99+gjL9iWWcKDEL2U08jt1MXGDp0grlOiIyVFmU2SzstXbkiBWp+sMbH\nSyF77Zpg8wT1CoWIWolNSOEAAAkTSURBVLWAJk3Md41SrUet/v6iXefAJKoOtFotGjSIQIMGETbX\nGwwGXL9+DdevX0NycjJSUpILXd+x+jslJRmXLl20OvFISTw8PG2MpouGduEwd3d3r/Y7ktkTw7cY\nQloq1H/+IY1s9+6B6sxpyzqztw9yHu8vbUru1AWm+g1k3YwsisDt24JVoMbHFyxfvSogN9d2fUFB\nZjz4oBmhoWaEhZkREiIiNFT6OzhYRHCwJ27d0jv4FRGRWq1GWFg4wsLCS9XebDYjPT3NKpALL9+5\nU/T2s2dPI6vQCXpK4uLiYmMUbTu869ULQW6uAHd3d2i17uU+F3dVxvDNl5UF9T/7LJuSVUcOQ8jb\n5CO6uSG3y2PI7dgFhk6dpWNrHfxhSk0FEhIUVqPXwiPY4nZg8vc3o2lTsyVQ88M1LMyM2rWlWV2I\nyPkpFAr4+PjCx8cXQL1S30+v1xcJ6sIj7Ltvv379Ok6dOlmm2jQaDdzd3eHu7gGtVpsXyh5511q4\nuxddzg/u/PvZauvMv3dX3/A1GqE6dMB6j+S8cyGLKhWMDz1sGdkaHnxImp26AmVmSuEaHy9YQjZ/\nOT5egfR02+Hq5SWdACI/WMPCCpZDQszg+QCIqCRarRZarRa1a9cp9X2MRiNSUlKKDe2srAwkJ6dC\np9PlXTKh1+uh0+mQlJQInU6H3ELnnr+/2u8O6sIhbyvIrf8TkL/s6+sLb2+f+66ptKpP+JrN1nsk\n//WnZY9kURBgbNYChg6dYOjUGblt28PeqZWTgyKbhfODNT5ewJ07tv8Hp9VKI9VHHpHCVBrBFmwa\n9va2a5lERPekUqkQEBCAgIAAm+tLs8OVwWCAXq+zBHTBcmbe33rLsvV66zDPb5OamorMzIxS/+59\nN4VCga+++haPPda9XPcvq6obvqJYsEfy73uK7pFcrz5yBg+R9kh+tCPEGn52eVq9Hjh/XoEzZxQ4\nezb/WonLlwWYzUVHry4uIkJCRDRvbiwSrKGh0vGu3MeBiKoatVoNb28fu442RVFEbm6uzXC2DvOi\n6wEgIqKh3Wq5lyoVvoqkRGlUm79HckK8ZZ2pVjCyhzyD3A6dYOjYGeYybGKxJTMTOHs2P2CVlqBN\nSBCKzKNao4YZbdqYUK+eFKjSCFbaRFyzplitZqMhIqoogiDA1dUVrq6uqGGnAVVFcerwFdJSof7j\nd2lT8u+/We+R7OuLnH4DpLDt1AWmevXLtUdyaipw5owS584VjGbPnlXg2rWiiVmzphkdOpgQEWFG\nRIQZDRtK1/7+FX4oNRERORHnC19RhNvC+cCOWPgdOFCwR7JWi9zHukt7JHfsJO2RXIYh5e3bQqHN\nxAWbjG/eLPoYwcFmdOlitISrdDHB19dur5KIiKow5wtfnQ7un3wIGI0wPPwIDB07S5cHH7rnHLai\nCNy8Kdz1e6x0sbXDU2ioGd27G/NGsdKItkEDM7y8KurFERFRdeB84evhgTv7j8M/LBBpetunXhNF\n6XSJ1qNY6XfZtDTrTc+CICI8XESbNgarzcX165vh7u6IF0RERNWN84UvADEgAHB3hzkzAwkJgtVe\nxfnLd09Fp1RKx8N26GC22lxcr56Zk58TEZFDOV34iiIwfbor/v0XOHXKA1lZ1iGrVouoX99cZKen\nBx4wc/o5IiKqFJwufPV6YMMGNbKzYQnZ/IBt2NCE8HDOnENERJWb08WUuztw4kQmAgM9kZzME/4T\nEZHzccrTO6jVDp/XgIiIyG6cMnyJiIicGcOXiIjIwRi+REREDsbwJSIicjCGLxERkYMxfImIiByM\n4UtERORgDF8iIiIHY/gSERE5GMOXiIjIwRi+REREDiaIoijKXQQREVF1wpEvERGRgzF8iYiIHIzh\nS0RE5GAMXyIiIgdj+BIRETkYw5eIiMjBnC5833//fTz99NMYOnQojh49Knc5VdqcOXPw9NNPY/Dg\nwfj555/lLqdKy87ORvfu3bFp0ya5S6mytmzZgieeeAJPPvkk4uLi5C6nStLpdBg/fjyioqIwdOhQ\n7N27V+6SKi2V3AWUxT///IMrV65gw4YNuHDhAqZMmYINGzbIXVaVtG/fPpw7dw4bNmxASkoKBg0a\nhJ49e8pdVpW1dOlSeHv/f3v398r6H8Bx/LkzubBxzDJaIblRSigXWHJBLlz7kRa3cqVc0FKUq7lS\nKAp/gLZwI0pZuZgr5UJRXGExy8evxgU6d6fOt9x8a3vbp9fjbrt61i5ee38+n7bfpjNsy7IslpaW\niEajpNNpFhYW6OjoMJ1lO5ubm1RXVzM+Ps7d3R3Dw8Ps7u6azvqRcmp84/E4nZ2dANTU1PD09MTr\n6ytut9twmf00NzdTX18PQFFREW9vb3x+fuJ0Og2X2c/l5SUXFxcagwyKx+O0tLTgdrtxu93Mzs6a\nTrIlj8fD+fk5AM/Pz3g8HsNFP1dOXXZOpVL/fJglJSXc398bLLIvp9NJQUEBAJFIhPb2dg1vhoTD\nYSYnJ01n2Nr19TXv7++MjIwwODhIPB43nWRLPT09JBIJurq6CAaDTExMmE76sXLq5Ptf+mXMzNvf\n3ycSibC+vm46xZa2trZoaGigoqLCdIrtPT4+sri4SCKRYGhoiIODAxwOh+ksW9ne3sbv97O2tsbZ\n2RmhUEjPMXwjp8bX5/ORSqX+vk4mk5SWlhossrfDw0OWl5dZXV2lsLDQdI4txWIxrq6uiMVi3N7e\nkp+fT3l5Oa2trabTbMXr9dLY2EheXh6VlZW4XC4eHh7wer2m02zl+PiYQCAAQG1tLclkUrervpFT\nl53b2trY29sD4PT0FJ/Pp/u9GfLy8sLc3BwrKysUFxebzrGt+fl5otEoGxsb9Pb2Mjo6quHNgEAg\nwNHREV9fX1iWRTqd1v3IDKiqquLk5ASAm5sbXC6XhvcbOXXybWpqoq6ujoGBARwOB9PT06aTbGtn\nZwfLshgbG/v7Xjgcxu/3G6wS+X/Kysro7u6mr68PgKmpKX79yqmzR07o7+8nFAoRDAb5+PhgZmbG\ndNKPpb8UFBERyTJ99RMREckyja+IiEiWaXxFRESyTOMrIiKSZRpfERGRLNP4ioiIZJnGV0REJMs0\nviIiIln2BzQKNGAGnBgwAAAAAElFTkSuQmCC\n", - "text/plain": [ - "\u003cmatplotlib.figure.Figure at 0x7f7a18df6b50\u003e" - ] - }, - "metadata": { - "tags": [] - }, - "output_type": "display_data" - } - ], "source": [ - "# Train our variables.\n", - "\n", - "# numpy is used for its asscalar() function.\n", - "import numpy as np\n", - "\n", - "num_training_steps = 10\n", - "\n", - "def train_model(inputs, labels, wb, optimizer, num_training_steps):\n", - " loss_at_step = []\n", - " w_at_step = []\n", - " b_at_step = []\n", - " for step_num in range(num_training_steps):\n", - " loss_at_step.append(run_step(inputs, labels))\n", - " w, b = wb.variables\n", - " w_at_step.append(np.asscalar(w.numpy()))\n", - " b_at_step.append(np.asscalar(b.numpy()))\n", - "\n", - " print(w_at_step)\n", - " t = range(0, num_training_steps)\n", - " plt.plot(t, loss_at_step, 'k',\n", - " t, w_at_step, 'r',\n", - " t, [true_w] * num_training_steps, 'r--',\n", - " t, b_at_step, 'b',\n", - " t, [true_b] * num_training_steps, 'b--')\n", - " plt.legend(['loss', 'w estimate', 'w true', 'b estimate', 'b true'])\n", - " plt.show()\n", + "## Next Steps\n", "\n", - "train_model(inputs, labels, wb, optimizer, num_training_steps)" + "In this tutorial we covered gradient computation in TensorFlow. With that we have enough of the primitives required to build an train neural networks, which we will cover in the [next tutorial](https://github.com/tensorflow/models/tree/master/official/contrib/eager/python/examples/notebooks/3_neural_networks.ipynb)." ] } ], @@ -572,7 +312,7 @@ "colab": { "collapsed_sections": [], "default_view": {}, - "name": "Eager Execution Tutorial: Working with Gradients", + "name": "Automatic Differentiation", "provenance": [], "version": "0.3.2", "views": {} diff --git a/tensorflow/contrib/eager/python/examples/notebooks/3_training_models.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/3_training_models.ipynb new file mode 100644 index 0000000000..d9a9bffbb4 --- /dev/null +++ b/tensorflow/contrib/eager/python/examples/notebooks/3_training_models.ipynb @@ -0,0 +1,443 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "k2o3TTG4TFpt" + }, + "source": [ + "# Training Models\n", + "\n", + "In the previous tutorial we covered the TensorFlow APIs for automatic differentiation, a basic building block for machine learning.\n", + "In this tutorial we will use the TensorFlow primitives introduced in the prior tutorials to do some simple machine learning.\n", + "\n", + "TensorFlow also includes a higher-level neural networks API (`tf.keras`) which provides useful abstractions to reduce boilerplate. We strongly recommend those higher level APIs for people working with neural networks. However, in this short tutorial we cover neural network training from first principles to establish a strong foundation." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "3LXMVuV0VhDr" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "PJ64L90aVir3" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "tf.enable_eager_execution()\n", + "tfe = tf.contrib.eager # Shorthand for some symbols" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "eMAWbDJFVmMk" + }, + "source": [ + "## Variables\n", + "\n", + "Neural networks are characterized by a set of parameters (sometimes called \"weights\", sometimes called \"variables\") with fixed shapes and types, where the actual values are computed and adjusted during the training process. The `tfe.Variable` object encapsulates such parameters.\n", + "\n", + "Recall that `Tensor` objects are immutable, i.e., the underlying value of the `Tensor` cannot be changed. `Variable` objects act like `Tensor`s but are mutable via calls to `assign`, `assign_add` etc.\n", + "\n", + "For example:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "itxmrMil6DQi" + }, + "outputs": [], + "source": [ + "v = tfe.Variable(1.0)\n", + "assert v.numpy() == 1.0\n", + "\n", + "# Re-assign the value\n", + "v.assign(3.0)\n", + "assert v.numpy() == 3.0\n", + "\n", + "# Use `v` in a TensorFlow operation like tf.square() and reassign\n", + "v.assign(tf.square(v))\n", + "assert v.numpy() == 9.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "BMiFcDzE7Qu3" + }, + "source": [ + "## Example: Fitting a linear model\n", + "\n", + "Let's now put the few concepts we have so far ---`Tensor`, `GradientTape`, `Variable` --- to build and train a simple model. This typically involves a few steps:\n", + "\n", + "1. Define the model.\n", + "2. Define a loss function.\n", + "3. Obtain training data.\n", + "4. Run through the training data and use an \"optimizer\" to adjust the variables to fit the data.\n", + "\n", + "In this tutorial, we'll walk through a trivial example of a simple linear model: `f(x) = x * W + b`, which has two variables - `W` and `b`. Furthermore, we'll synthesize data such that a well trained model would have `W = 3.0` and `b = 2.0`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "gFzH64Jn9PIm" + }, + "source": [ + "### Define the model\n", + "\n", + "Let's define a simple class to encapsulate the variables and the computation." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "_WRu7Pze7wk8" + }, + "outputs": [], + "source": [ + "class Model(object):\n", + " def __init__(self):\n", + " # Initialize variable to (5.0, 0.0)\n", + " # In practice, these should be initialized to random values.\n", + " self.W = tfe.Variable(5.0)\n", + " self.b = tfe.Variable(0.0)\n", + " \n", + " def __call__(self, x):\n", + " return self.W * x + self.b\n", + " \n", + "model = Model()\n", + "\n", + "assert model(3.0).numpy() == 15.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xa6j_yXa-j79" + }, + "source": [ + "### Define a loss function\n", + "\n", + "A loss function measures how well the output of a model for a given input matches the desired output. Let's use the standard L2 loss." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "Y0ysUFGY924U" + }, + "outputs": [], + "source": [ + "def loss(predicted_y, desired_y):\n", + " return tf.reduce_mean(tf.square(predicted_y - desired_y))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "qutT_fkl_CBc" + }, + "source": [ + "### Obtain training data\n", + "\n", + "Let's synthesize the training data with some noise." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "gxPTb-kt_N5m" + }, + "outputs": [], + "source": [ + "TRUE_W = 3.0\n", + "TRUE_b = 2.0\n", + "NUM_EXAMPLES = 1000\n", + "\n", + "inputs = tf.random_normal(shape=[NUM_EXAMPLES])\n", + "noise = tf.random_normal(shape=[NUM_EXAMPLES])\n", + "outputs = inputs * TRUE_W + TRUE_b + noise" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "-50nq-wPBsAW" + }, + "source": [ + "Before we train the model let's visualize where the model stands right now. We'll plot the model's predictions in red and the training data in blue." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "height": 293 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 1210, + "status": "ok", + "timestamp": 1527005898290, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "_eb83LtrB4nt", + "outputId": "3873f508-72fb-41e7-a7f5-3f513deefe38" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEDCAYAAAA2k7/eAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXlgU1X2xz/pAhRautCWUsCwWVlcUHHGBUFQcSg7uM8P\nFLUICo4VpygObihI3UdmUHBB0IGZQbEgFNGCqKgMolV2pKylCy1pukDp+n5/3LxmaUsDTUjSns8/\nbZKXd09C+b7zvvfccw2apmkIgiAITR4/TwcgCIIgnB9E8AVBEJoJIviCIAjNBBF8QRCEZoIIviAI\nQjNBBF8QBKGZENDYE+Tk5JCUlER+fj7+/v7cdtttTJgwgcLCQhITEzl27BidOnXijTfeICQkxBUx\nC4IgCOeAobF1+Hl5eeTn59OrVy9OnjzJ2LFj+ec//8mnn35KWFgYCQkJLFy4kKKiIh5//HFXxS0I\ngiCcJY22dKKioujVqxcAbdq0oXv37uTm5pKWlsaYMWMAGDNmDF999VVjhxIEQRAagUs9/MzMTPbs\n2cNll13GiRMniIyMBNRFoaCgwJVDCYIgCGeJywT/5MmTPPLII8ycOZM2bdpgMBhcdWpBEATBBbhE\n8CsrK3nkkUcYNWoUN910EwDt2rUjPz8fUD5/REREg+eRtj6CIAjuo9FVOgAzZ86kR48e3HPPPTXP\nDR48mE8//ZRJkyaxcuVKbrzxxgbPYzAYyMsrdkVIbiUqKkTidCESp2vxhTh9IUbwrTidodGCv23b\nNlavXk1cXByjR4/GYDCQmJhIQkICjz76KJ988gmxsbG8+eabjR1KEARBaASNFvwrr7yS3bt31/na\n4sWLG3t6QRAEwUXISltBEIRmggi+IAhCM0EEXxAEoZkggi8IgtBMEMEXBEFoJojgC4IgNBNE8AVB\nEJoJIviCIAjNBBF8QRCEZoIIviAIQjNBBF8QBKGZIIIvCILQTBDBFwRBaCaI4AuCIDQTRPAFQRCa\nCSL4giAIzQQRfEEQhLOk0GTi84R7+XbIDXyecA+FBSZPh+QULtnTVhAEoTnx7YzHuDflUwyAlv4z\nizEwfNFiT4fVIJLhC4IgnCWhhw9hsPxusDz2BVwi+DNnzuTaa69lxIgRNc/Nnz+fAQMGMGbMGMaM\nGcM333zjiqEEQRA8TqHRiGb5XQMKjV08GI3zuMTSGTt2LOPHjycpKcnu+YkTJzJx4kRXDCEIguA1\nXJ/8OosxEHr4EIXGLlyf/JqnQ3IKlwh+v379OHbsWK3nNU2r42hBEATfJjQ8wic8e0fc6uF//PHH\njBo1iqeeeori4mJ3DiUIgiA0gNsE/+677+arr74iJSWFyMhI5s6d666hBEEQXMLRjAwW9u3FWmN7\nFvbtxeGMDE+H5FLcVpYZERFR8/vtt9/O5MmTnXpfVFSIu0JyKRKna5E4XYsvxOmNMb53xQhmZh1T\n5Zalx5h3ww08cfSop8NyGS4TfEe/Pi8vj6ioKAC+/PJL4uLinDpPXp73Wz9RUSESpwuROF2LL8Tp\nTTEWmkx8O+MxQg8fIjory67cMtZk8po4z4SzF0+XCP706dPZsmULZrOZG264gWnTprFlyxZ2796N\nn58fHTt25Pnnn3fFUIIgCC7FdhHVXFSZpcHyM8vGqWgKuETwX3311VrPjRs3zhWnFgRBcCu2i6ju\nBp4JDKR7QACZ4RH839dfezAy1yMrbQVBaNbYLqK6AOgaP4L4w7lMSt+NsXt3T4bmcqSXjiAIzRpf\nXUR1LojgC4LQrPHVRVTnglg6giA0WXy1jbG7kAxfEIQmi6+2MXYXIviCIDQZbGvqC41Ggg9k+GQb\nY3chgi8IQpPBMaOfE9vRrq7eV9oYuwsRfEEQfB49s/dbn2qX0V8QEcHiq/7odAWOyWRmxoyNHD7c\nFqOxkPffHwX4uzv884YIviAIPk2hycS/B1/HJVnH2In9StnK7heelWc/Y8ZGUlLGAwbS0zWmTFnO\n/PnD3RK3JxDBFwTBJzmakUHquOFE5mTTpbqaAcD1wDygQ1AQ1UOGnnVN/eHDbcHmHuHgwWDXBu1h\nRPAFQfBJUscNt3a2BJYDdwG9gRNDhp5TNY7RWEh6uvUeoWvXEhdG7HlE8AVB8BkKTSY2PDqVgB+/\nI8ZstvPrg1HCvz22I3ec42rZ5OTBwFKLh1/EggUjqapyTezegAi+IAhejz4pq23aQBuzmWHAAuz9\n+t8CA8mPH8Edya8RGn5uXS7tu7w3vS1aRfAFQfBq9ElZR/vmbuBZwGgwkN0hlqEr19C5a7dGjdXU\nJ22ltYIgCF7NtzMe4xKL2IPVvrkAuAgwjBzDpPTdjRZ7aPqTtiL4giB4NaGHD1GC1WDR7ZvktqFs\nbH85r2eMJiHhUwoKzI0ey2gstBtJJm0FQRDciF5u2anARGZ4BC169eIBlI3TBsuk7MbNPJ60Sdkv\nuQa279CApSxaNOasx7NdbNWhw0mGDn2P7OxImbQVBEFwF/rEbNba1cysqKjZSPyF6mo+GzWW0MOH\nOGHsUjMp62i/HD7cttZK2eTkwYSHh51xXEffftSopaxffyMAERHes/euKxDBFwTBK9D74HwO9u0R\nCs3E11FT71gzbzQW1RJvZ7L+ui4cTRWXCP7MmTP5+uuvadeuHatXrwagsLCQxMREjh07RqdOnXjj\njTcICXFuZ3VBEJo+u7Zt48vRQ+ladpqDBgNRrVtjAIqxL7fMrKfE0rFmPjl5EHfcsY2zFe+6LhxN\nFZcI/tixYxk/fjxJSUk1zy1cuJBrrrmGhIQEFi5cyDvvvMPjjz/uiuEEQWgCfDkmntllp5XMahpP\nnzyJBsQDy4BC/MhoFcawxcvqfH94eFit7P1cxLuuC0dTxSWC369fP44dO2b3XFpaGh999BEAY8aM\nYfz48SL4giCwa9s20sbG0+10qZ110x14JSwMf8LZZO7HKt6G0+Hs/8dSFi3q69S5z0W867pwNMS5\nzBV4A27z8E0mE5GRkQBERUVRUFDgrqEEQfAQZyN8+qTs6VUruUjTOIS9dZMNxAwczN8Pjyc9fXTN\n+87GUz8X8T4XzmWuwBvwuknbqCjf8PklTtcicbqW8xXn1Kmf2wlfy5bL+fe/76p1nPnECVbc1J/e\nmZmUAEOBpajOltHAAYOBsBtvZMz7i1g3ZZ2dLRMXV4qfXxUPPZTKwYPBdO1azIIF8UREnJ+Muq7v\nMisrHNu5gqyscJ/423Cb4Ldr1478/HwiIyPJy8sjIsK53ha+UAIVFeUbpVoSp2uROGuzb18QtsK3\nb18QeXnFtTL/+PIVzMjMtGuN0BUYDsxqFcRfjuQCUFEFs2dfT1mZ1ZaZPXsQ99+/qubCsnWrRlnZ\nUubNG+R2W6W+7zI21oTt/UlsbIFH/zacvdi4TPA1+65DDB48mE8//ZRJkyaxcuVKbrzxRlcNJQiC\nl1DfJKlueRgwcUH6FPBbb+fXtwF+A7a0CuLmVevszlmXLVNX6aQnbRVfneh1ieBPnz6dLVu2YDab\nueGGG5g2bRqTJk3iL3/5C5988gmxsbG8+eabrhhKEAQvoi7hKzSZCN34D17hUfIoYS4VLKu29+t3\nderEHWnfOd3Vsq4Liyfr58/XXIGrcYngv/rqq3U+v3jxYlecXhAEL8VW+ApNJj57YAKl337NDSgp\n7mD5GY+yccotO1FNfn8RuXknSUhY6ZQlU9eFJSlpQ7Opn3cVXjdpKwiCb/LtjMeI/fZr7sKayb9k\n+RkG3AkstuxEFRYRwr33fe60JaNfWPS5gTvu2Far742v2CqeRARfEASnqasMs9BUwOJxCVyanU4+\nUIgSeAOqffFLQPuwMAwDB3N98muYTGamTv2c9evB1pLJyPBvMOOvr++NyWQmKcn36uLPNyL4giA4\nja3g/pqeT+DqP3BJ9UHmY83ql6E2J9GAn4Hc9pezLCqRblRzHX4251iGrbNvMh1mx44nOVPGX59v\n76t18ecbEXxBEJxGF1wDJ5jM5fyjOrNWs7NC4G3gd/zZenUS3/74ol0LY6toK2c/KKiCIUPgwIE4\nsrLOPAnrOHl7/PguDhzowaZNucDnqE488U26AVpjEMEXBKEW9a2g7djhGMb0eG5mHa3R6mx2to6r\nWcUPwCrC9uRbXjEDqaxfD+HhO4GBNe8oKytl69Z8evUKtjuTPglr36++nPbtnyY39yrgJFlZUxg7\ndgFm85PY3mMYjZVn/BzNFRF8QRBqoSySEcA60tPD2bp1CY/cW0nv1GfpDeQBuWDX7Kwc2A2s4l+W\nV04C+ZbfU4E7KS01UFqqERT0DKWlLYGZVFcbyMrSqK5+gVGjate2O9o1YWGvACNrYi0o6ITtPUZY\n2GmSk2+u873N3eoRwReEZsDZZrrKElkH3Ik/WxmSNYaCOdX0B0qAB4BVwNNAJ9QFIB9Y1vZeKNoO\n/Aj8iWuuWU6LFktZvx5KS62iXFraDyXS1ucKC411irGjbw/tsL0TCA8/Smmp9fHAgQE1n6059bp3\nBhF8QWgG1JXpnqk1gdFYyK/pp4nnEv7ATiKBUGCA5edyIALoDOwliNfIJCzsM7744g/MmfOz5Zyr\nSU4eTnh4GAkJn5KSYmv8nLT8tBXuzDpjd/Ttr7mmmhYtrHcCM2eOYs6cule9Nqde984ggi8ITRDH\njP7AgTY01JrgxImX+emnYsrKuhKifcxf2EAw0BdqGp6lAnehWiMUA7tpxRtsB8Ixm1vx7LPf0aJF\na8s41nYrtgunjh/fRVbWFEs8y/DzKyYm5gQrV1ptGltqL7q6pdbdyaJFRiff27xr9UXwBaEJ4ijm\nsbFzcJwQdbQ7Nm8uAm0ioxjFzeykEHgCa06+HNCnVf8H/MKlrGUssMvyTDybNy+kqOhBHD1z2xW5\nBQVXMmvWOvbtC8JorCQ5Of6M9lJj2hj4agsEdyGCLwhNEEcxj4jowlVXnbk1gb92kkR6MM/yzCrs\nnfM2wK/Atxh4mXuB14A1qJ6X6hynToXSkGceHh7Gv/99l090Hm1qiOALQhNEedcFqInXlvz++06O\nHGmFn18nOnSoAuztjuh2+7k07Q36Y5XrEuzLLb8DXmYDMAhQ1TLV1aUUFS1BOfoltG5toqiocZ65\nlFK6DxF8QfBRMjIOM27cKgoKOhEefpSVK0fRtavyspOTB7N16wKyslR9elnZGMrKlgHxpKauZfPm\nLwgOziW8bTsuP/4SxvTddMZe5IcCs4COwC78mc8e4D+Wo4rp1CmW7t0rSUmZgC7w/fq9xZ49cy0x\nZTJzZm1fXm+toCyd2oIupZTuQwRfEFzI+cxOx41bVSPopaUaI0c+w9VX9yArK5zYWBMREUa7lasQ\nhFoDO4PiIhN9i/7ENVk/0Qb4G/AKcDvKq28DbAPKgHXczKqaupyLgRGAxv79s3jvvTuxnRQtLw+0\ni2nOnKW1JlQbEnQppXQfIviC4EKczU6dvTDUd5zJZCYnR8O2nUBeXsuasdUuTHOxN2X2AH3wYz/j\nuYgYNHoDpZYjwlBVOCGWM5qBv/Moyqu3LacEMHD6tCrBtP18Q4akoZorpALBbNqUQ0GB2e6z2Qt6\nIZs25TJkSFrN55NSSvchgi8ILsTZ7NTZC4PjcWVl7wGQlpZLdfVMbNsJaFqE3dgFBbG0ajWL06fb\no0Q4GH/SmcooWgNXo8wZ/Qy3AWuBTGB/y1DSus7B//dDVFUtAyqBY8Bky/mV+Dt+PiXWa8HSJNls\nHk5S0lK71saHDlUCHwPDgLWYzY+Tnm79HqSU0n2I4AuCC3E2O63vwmCb0cfE5PH99/arUX/80Q+z\neSLUallWTnCwieJi69iqdcFsYmJe4FTO10xhA3HAPuBFrEK/BJiLMmwOGAL5vPsLxPVpz6fJg3n0\n0dWkpgLkoMR+Hcrw2QU8SEzMJ3YtjWfOvJJNm/6H2Xzmjpb6pC+0q3WslFK6DxF8QXAhzmanHTpk\nk57+L5SBUkSHDrZ7waoeNtAeVd9uFfGiohzL744ty/IpKTlOy5azKC/vhqYFoaZdDbSqzOMeNjCX\nusstw1E9cF7yv4viqo9hv4Hd+zVSU5+ksjIEg8FAy5a5lJXNQ9PiwLLQKiZmPgZDJCkp92N7pzJw\noL/dqlr9oud4kevS5UKMxsI6jxXcgwi+ILgQ57PTQLDbG+o9TCazpc1viuX1AcD1wDwgBmhBdXWU\n5fjrUHl5NKqTzd1o2mbKyu5CtTK7k0BWkMjtdM2HFtRfbvk9MI+HoeoKm6MKKS9vA1wClHD69KXA\nBJt3LeP06WNkZ3fA8U7l3/++krouenXd/Yh9c35xu+APHjyY4OBg/Pz8CAgIYMWKFe4eUhC8nuzs\nSGyFMjs7khkzNmI2P255vgBVUdMH8ENJdjzqYvAekAHMwX4dbIjl8XX48xBTeJvLLM9uwb7c8kng\nAiCdIBaRALyB/YYka1G1O/r5P8T+viAEaFeniNd30bMV97i4UmbPHiT2zXnG7YJvMBhYunQpoaGh\n7h5KEHwG+4VRbTh+fCdVVRdhFdV1wAzL4+G0aJFEefkBVG/KAqAH9gIcDBThzxb+j6uJRTnt+j1E\nf+ApoCfKfd9LIPN4CUjEKub6VuOlqEla2/PnYX9fUMw111Rb2hA7l6HbintUVIistPUAbhd8TdOo\nrq529zCC4HHOpgbfcWFUVtYITKZZwDisjQysgtuyZSTl5UlYBVffHlx/vJcW/EoiH2EE2qLuC/Qz\nhANGlNjPYxgwGnXXsAw4hf1W48ss77I9fy7qziIAP78sbrklnDfeGC4Zuo9xXjL8+++/H4PBwB13\n3MHtt9/u7iEFwSMkJq4hNbUt4E96egDl5Z/z4Yf/V+ex4eFhREf3tlsYdfp0F5RfHw38jvLvwwGN\n4uJg7DPuzsALQCcCWMOdfEJHqJmYreuSsA94jf0og8d2/mA2yh5S7REgwTKOyvZbtjxAWdlUoAug\nMWKErHz1Vdwu+MuXLycqKgqTycTEiRPp1q0b/fr1q/f4qKgQd4fkEiRO1+LNcZ44Yeahh1I5eDCY\nrl2LWbAgnoiIsFrHfPVVNqA6RYLGjz++WvO5Tpww88ADKWzapAF5DBgQRocO2PnfaguRGTaP56E8\n/BIgG3v5Pgp0pxWf8hc+IRi4FPtLQk9Urm4GdhDCAn4BuqPyfNsjL0c1QHseqEB1vDcAd9Kp0zx+\n/fVxpkxJZd++X8jP38vhw0amTl1d5/dwNnjzv7ktvhKnM7hd8KOiogCIiIjg5ptvZvv27WcUfF/w\n9XzFf5Q4XUNCwqqaUsmtW4P57rt/sHHjBMLDw2r62eTktKO6ugu2QlpcHMK+fUctG4Cssus5k5Ky\nBPgNeBWIRAl4H+yFuDd6GwNYgLVBcQkBHGIqM7kQ5ei3pnb1zS6gCEjmJ9Qq226Wcxc5HKkvv7rC\n8rt1nLCwzlRV+TN//nASElaSnj6DzEx9Edi5Z/re/m+u40txOoNbBb+0tJTq6mratGnDqVOn+O67\n75g6dao7hxSEs8IZ3z0jwx94B1XXspOsrF4MGrSEjRsn2PWzUatHrUJaWdmKQYOWEh3d27K61FbM\nNVT3Guvyp8DAX6ioGIO9ZBuAdJSFcyd+7Gc0/ehOUU0bYw1l7tyLtQ/O98AxgviI7aisvrvlqF7A\nXtQU7oVAS9RkrS781cDdNWfu3n1pzfcgPW58H7cKfn5+PlOnTsVgMFBVVcWIESPo37+/O4cUhLPC\nmRYHJtNhVCHjcvQtQbKyNJKSllJQYFuHPgyVscehWo/dR1bWf8nK8kdNehage/JwANs+OFBJUFB7\n2rV7kZycSNS0613AZtQdwAECuZ1EVnARqqmZ7eWjI+oeIBz4BQMvk4Ta+1XP6kNRtf0VwHPAEWAp\nEAXMx8+vkN69+9K5cxHwHtnZkbJdYBPErYLfuXNnUlJS3DmEIDQKZ7LWdu3iLJOr9hOnq1ZVomm7\nsWb1eulxgeXnRqADavJ1OJCEqoTRd4PNRU3QLgBKKSp63tJL/hmU4P8XmI4BE4MZSD920hvV0cYP\ne1PGhDJqnuIOVOb+PKp/zjKgHFWRU2zzGb5HZfnqDDExc9mwwdrKWL/zueOObTV3PrJIyveRlbZC\ns8aZrLVbt5Ns365qz21lVrUviEOJahCqQUEgyjL5K9a+MwuAKajFSvYNz2AkyqefZxnNgLJdDgNt\n8eN1pjKDi6gkFHUPEYqa2l2GtbNlJvAmM1C1OXrzhDCUPbPaMsYCwsJ2YzYPx/Hi1a5dnN1nru/O\nR6pzfBsRfKFZo2etGRmtMZn2kZFhJCHhUzsv33qMH/v3P83p00bUQqRhwHpU24NiVLZ+P8qqWYeq\naTegxHYJyj5xXK2q/x6MkvA2qJLMSIJYwiNs4VpqbyLeFdiBWob1Bd1YxR2oOwioPX2rHgcGmtiy\nZQJJSUvZtCnHIvzqmG7dTtl9L+LXN01E8IVmjb5w6J57PmbHji5kZYWwY0cOP/zwHuXlFwD5XHNN\nMG+8MYJZs75jx47nsbY+eB3ohxL7oSj/Xq+jz0ZZKmGWn0eAVjiuVlVoKKPmYcBAC/L4Cw/QAlUh\nb9s8Qd9E/DCQg4G5bEU1MzuFaocQgrJwnkDdfRxH1c8vY8CAkJrPW1BgJimpfntG/PqmiQi+0OQ4\n212nTCYzX32VhbWG/l8cP/4Mutilpi6jRYuNZGWFY93cIwNV6a7L8XxUhYttHf0ylKWi96XRPfVi\nlOtegrJgwlGZfSEB/I9HeYCXUFOqtvcDbVDSvhmYxzxURq8Bn6CqbabYjP0CMBbdVoqN3cE//zm+\n5jM3tEJW/PqmiQi+0OQ42z1RZ8zYSEVFP6zyGoKj9ZKSchz4BiW5T6KyedvVqi+gJktt31cIvI+q\njLH11FehBD8Y/QLhzxbGE04HrF1yjlG7q2UJLfgHPwArLec5iTJ42tuN3bZtB6677hNLtY2Z5OTx\ntS56Z7owSsuEpokIvtDkcPSfMzL87TbpePLJK5k792ebTUayUTX2+i5MjguTTKiKmj4o774QVSrp\n2Opgj8P7TqKmWG1ragpQ9fVRwDEMHORmRtGXHXRB1ebonW3uRuX/eqOFDYRyqtfrxBauo23bzhQV\n7aBduzgOHy6nqGgnaq5AjT1oUIsGBVs2C29+iOALTQ6r/1wIrGXv3kPs2KGqY9LTNdasmUVl5eya\n15V4ZwMXofZvLUJZNsrDV4+fw75LDdiLeybKdHkS1aYsFHjA8vM9VK+aXqhFVOpcLXmTKXSnDfZe\n/RKsPStPAj8Bb/MhsbGZpG+6tdbn7dv3LYqKpqAvuwoK+onk5IRaxzkiE7PNDz9PByAIrsRkMlNe\nXkFY2AcEBLwCDKWiwr7LTGVlZ8vjVNRkaxFqknMsSozboiY6W6BE+yrss/k+KL9cL4FcjppwDUDZ\nQS1R+XmY5fho1H+1/6F8/+W04s88yqNcBfzB4ewRqPqeA0ApLXibn4AJhIZ2r/Mzq5LKcJTFNJKe\nPS8/45yFjtFYiLrEgEzMNg8kwxe8nrOZhH300S9Yt05tuWetbdFw3A5Q/QxGTWr2xl5y+6Hq4/X3\nV1PbqgkDLkbZKDp6q4IdDsf/BDwGrCKA9fyFJfRE4xDKvsHh6L3Ad0AyM7Dtf1lYmFHnZ7auE1DH\nXXjh6TN9nTXIxGzzQwRf8HrOxmv+8UfbLvB6bcsAVHVMIcq6Kbc8PoaycRzr1k/avL8Y5bnvRmX9\nB1CLqqC21/+75Zi7gVmoydTjwP34cZw7uI8LqKpZLfsAyux5DGsPnP8BvxDLWraj7gqWoyZ9Aykp\naUtBgbnWxc5RuBcsGElVVcPfq0zMNj9E8AWv5+y8Zj1710V4p817v0cJcg9U1h2E6lLZBiW90ZZj\nJlse630oo4CHULZJAaqRWm/LuZdg7SMfClwLfInK9A1AJa14iGmspSWq4YFt82Mj8E/LmX/AwFv8\nRHT0ajiul4BqqN2n/CkqaklS0sZaIu0o3BERvtHhUTj/iOALXo+zi4BMJjMtWxZjbTlcSfv2p+jQ\noYqYmFOsWxeNmjgNQVXbPIG1cuYN1H+HauADVNOx6Vjl+VUgFtXorA/KytmL/cbeT6IuAN2Bv+HP\nVhKYRDBVhKHW49ree8SiMv1iYBVhFF74MqN676CkJJy0tGVAlkMMS2RiVWgUIviC1+Ho2c+ceSWO\nXrPtMR06ZAOB/PCDH2ZzT/SOM4GBc7jiigt4440refTRNahM3LZ2XpffdcCzNs/PcXjdgLqABKP6\n4kRZXg/HWk+Ti6rq6QQYaM10pvI6ccAh1KXAcQeqXagcftfVf+XzVbNqPv+QIWmoLQhXO8QQjtFo\nbvwXLDRbRPAFr8Pesy9g69YFREf3tpuwTUhYaXPMv7AX8uXAXVRUXEpqan9++WW+peVwFUpiQdkx\noGrsq7AX1hhqL3tqgbXR2WzUHMCtKBtHb5u8kAC+ZzjzuAhqvPo4y1nuxtp4YR/wHZEcaD+Jrz+c\nbPf5rXc09s3aYmN3kJw8HkE4V0TwBa/D3rNfR1bWk2RlWSds580bxKZNucBnqLr2AOBDy/GjUR0r\n56JWn75ETs5L2Fe5t8Bq59S1+2suKovXNwjMBfoC/0JZOlGoOv3lqHmAUYCBEN5iCjsJwbbxMDxt\n+WlEratNAl7hCoYOfYCvLRuB22Jt1uaPyTSXdu3i6NbtVJ2rZQXhbBDBF7wG3aZRu0Ppq17bYJt9\n792rcdllb1NW9kfURGknVL2LrdeeidqnNQJrWwMsP09TO6PvgrU12V7L43hUnX4R9nbPMlRWfxKV\nvz+PH/uJJ5o+VDAX1SvT9uzdUReAjqgan9dYQlBQFR9+OK7O70GqZwR3IYIveA22Vg5otG37EuXl\nJzl9Wm8ZUMC+fXuprn4Re4G3ldeLUNOhQ1HevGPVTjFqktb2Ob2RgV4FvxtlEd2Ftbe8fv5y1F3E\nt0B/gunNFPbQBWtdTrHD2fcAJ4C5PI1a2KURGvqi6744QXASEXzBa3AsvywpiaK6uhxYCORjMBRS\nXd0fewFuR+3e7yFY+9G/i/1WIUUoF/0pVO69H2XLrEb5+WGoCdqXUP89CrHtUaNkPRQD2VxPF66h\niDhUBX6X0UVuAAAgAElEQVRLyxHxWHtiHgR+IJhv+NYS0xLgd/r0EWtGOP9IawXB45w4YSYhYSUH\nDuzFdql/dfUhVOVLS+AhNK071kVSYF3s9CyqNn4Z1lYJuhV0G9bSS/189wDXAPcB/ijhH2455/2o\n7cCfQOXl01F2zyrURSKHAJ5kEg9yDUX0Rjn8D6LuJf6Gala8C7WI6oer/8qivbsIC/sSVc4ZCEzn\nxIm62yQIgjtxe4b/zTffMGfOHDRNY9y4cUyaNMndQwpegG3ZZExMHgZDJdnZHepsjfDQQ6kWK8ex\nX/x0rJt+L0fVzt+OypL1TUNOowTeH+XNL0CJ/SFUZh6GyvR160evrNmCEnRQUv0qtdsi2/aoAX/2\ncieP0Qkl246LqK6wRP078D3h7G8/hc/evIvw8DAGDowmJcW6w5T0rRE8gVsFv7q6mtmzZ7N48WKi\no6O59dZbufHGG+neXbKbpo6jH6+EfDTp6Rrl5e/QokXrmjr7I0d0K0fvF78ENXG6DjWRql8AilCZ\nfABqQrYUJdIXUbsscwLwIsryse1cqS+gMqIyeX3B1KWoUk1be2h/zWMD+dxOEp1Qa2mzqb2Iapcl\nor/zHPA05GrMmbOURYuM0rdG8ArcKvi//fYbRqORjh07AjBs2DDS0tJE8JsBGRn+WCtfirGVx82b\n8ygq6g74k54eQIcOv6AmQnWhPWY51rZ0ch5KmF8GrkZZO9NRG4E4ZubBKHHvbvndtsGZXrnTEmuZ\nZXeUXD+OtavNVmASBhYxhme4kZyada/hqBoix0VUR4BlrETdbahY9JWxUnkjeANuFfzc3Fw6dOhQ\n87h9+/Zs377dnUMKHka3cvbu3U99PeSLiqqwzchPnnyRsLBXMJs7oCpkOlN7pWs0ag9Z2wqd5aiL\nQ0vs5fc3lGUzHXVn8aHl+TxUM7OHgR9Qwr4AlZf/EVv7BvJoxTKmMZN5DiPehSoYnYOq9P8dSOav\nQDLWuxn1WcW6EbwJtwq+pmkNH+RAVFSIGyJxPRJn3Uyd+rnFyvkMW8E2GNqiaUtQAh2DdW/YYIqK\nqhk6tA2pqX6orQIN1M6hg1Btix07YZajBFvvn3MUlbFnoCyhLOy3F1mG6pXzrOW5EahGadZiSj/2\n8Sce4BKUfeM4Iqj7h2LgZ+C+las5tKyYgwdX07GjCU2rICtrNV27lrBgwUgiIs7/34ov/H36Qozg\nO3E6g1sFPyYmhqysrJrHubm5REdHn/E9vtDlLyrKN7oReiLOffuCUNKod3pUQqtpLVFTnX1Q2fda\nrFn+cNLSnqBt21YUFenyOgwl4hEosR9qeY/tRWAr1lYJlUCZ5fl01DKnO1HzAbaSHYK6IDjePagW\nyi1ZwZ9ZSRRK7B0bJ+9EXbIOA/P4KzCPqsX1t2uuqjr/f9O+8PfpCzGCb8XpDG4V/EsuuYQjR45w\n7NgxoqKiWLNmDa+99po7hxRsUOWOq5zaOMRVqD4wBajVrh+ibJTTqHLIO1HSeT3wH2xFt7z8QgwG\n6ySpyqFjUcuWdGtoKMoaCkNV1kSiBL81+mbg1sVYRcBbqAzfceGVfZ8cP78f8av+HxN5kWiUQXQZ\nSuyHYu/qlwDbCWAZe1AXDqSDpeAzuFXw/f39mTVrFvfddx+apnHrrbfKhO15xFrueP42qU5OHszW\nrQvIyrLtJvMSyh/XbZxAVAXMIpS9UwSUU1b2V1Stew+sE7ftUNXtF6JEvhR1Z6B78Ccs4ziutu1v\nGddoOacR5d/HoCqBVJ+cmBgThTmnmcrrNVO8rbGK/TqsG5OUAIb7JnHqxLWQ0s0ynvj0gu/g9jr8\nAQMGMGDAAHcPI9TBwYPBOL9xyJlxdpvB8PAwoqN7k5VlK8DtgV9RkqnbOONQojsCdVF4EXVR6Im6\nIPwN6wVjBipTvxh157Ac1YuyBEgE3qb2att1KMG3nW69HVXW+RtgwJ9D9Mx5mb5Qk9lXAL9YzqqL\n/fdArrErr//8ExVVgRQUmJESS8EXkdYKTZiuXYvZurXhjUOcwXGbQb2WPiOjNSbTXiIiutC9eyXJ\nyYOJicnDXoBboeri11HbT9d/jwIWo5oRnLa8pxRVNtkVtSh8JKoLpm255nLUBWW25Ryhlvd84zBW\nBaq0cwYQTkve5EFeJghVgW9bxT8Pa9u0X4G+H3zMjcNGEGbZSUpKLAVfRQS/CbNgQTxlZWfORPXM\nvS7hts3gHfvc/PBDMWbzg+gymZX1Pjt2BLFmzVpURfoLqJbCe1GLnlRFTm0/HcvvIVgbmC1Bib6+\n4YgJlYNrqDsAx7qZwyg//3eU7/8ZyhKy9sDx89tD9+4XcOD3KQzj31yE2tMqHzUlbHvGCNQ9QC6w\ns/f/MX2YbR2/IPguIvhNmIiIhjNRxxWxWVnL2bFjZK1NRxy3GSwpsb0AFKJE/lkqKx27wFdYfgaj\nJmuXo7L3LagFSgtQ3vpfLOcyoKpt9K0D9bJJtayp9sYkO1Fi74eaatVQ62BNGAwLMBgKCAgopLz8\nSTJ/f5XH+DctsC/UdOyGvxdY3fmv9L7iYj4Su0ZoQojgN3McM3clzKvsNh3ZsuUFIiO70bLlLMrK\nugIFVFaWohqSrUMJdBeH8/RC9YzvgzJJ/FENyu5CyeoW1MpWfd1qqOW9GsqnL0RV46iWxMHBwbRu\nncHJkyGcPDkLuBJ1FzAFa0Y/E1sZ17TOaFoorQK2MLG8KxEUcrHlXbaRhqIWUYUDu6KiefS7//FE\neIQLvl1B8C5E8Jsp1s1Gcqg94Wm/yjUnpx05OX7AH1AZ9RSsbvdc6l4odQRrqeQIm2MvRpVq9gJS\nULtP9QdmWc5/EjVluhblxa8FWtO2bQGXXRZJaupk9L48+lixsVmUlMTY1PAb0Dcab8F7TDr1Fn1Q\nS7JKLKPbRhqGarV22YrV3DZgoAu+XUHwTkTwmxG2lTbHj+8kK+shlOwtIyTkFOXlBygr80ctWtJ3\nnApFudlTsIq33mCgN9YLg75QKhrlpV+OfR6t7yk7EiXYek2+vvq1o+U1nWLgH+hZe1aWRnb2U8BS\nlGe/gKCgIMLDs4mIMFJdfYCiIpvaevZzEy25iHIuR80QBAKnLL+/iJrizQUyW7Zk8jdb6Ny1G4LQ\nlBHBb0bY+/WjgPctrxRQXNwG5YPbNv3VO0teQG3bR29yZrtQqhglpzEoJ9w2j24DVGP1823PV4i6\nSNger0u09ThNuxp1UVCWTXi4ucZ6ggJiY+cSHd2bnN8/5s8nV9ADlc3bVuC8iqrSH44ylHq/9TZT\n7ri7MV+rIPgMIvhNGMeVthkZAdgLbQFK0O+zPLbfzi8oKJLQ0AxycsB+Zep2AgK+oby8M0pC26EE\nXpU8qvLKu7GuUf0NdRFohVoEFYSSXF2GQ1H5ti7HJSg7Zz72F4GdqBYIYfj5taekRP8cAOFEtA1j\n4P4JtDhZXNPw7ANq32f8AmwA/mgptxSE5oIIfhPmgQdSSElR1S7p6RrR0S9gK6ABAcFUVtq2Frbv\nHBMenkV09CXk5NyAEu8yoAXV1X+mvPxfqIlafU3qfJTYg8r030bZNDstj5+ynONFVEbvKO6foTJ6\n2wtBEQbDU5bM/iQwGVXeeSctWhykqKhnTbxBPMWf9syhB8qmsZ3yrVXT87fnmPlIoku+Y0HwJUTw\nmzCbNtlPvppMkSi/3AAcoqoqFNiBVWSHoiZXewO7aN26DXv2bANyUPZNN5QL/jFK7HeiRPtt7Gvs\ny7BfHDUHqxVkQElxLPbibsDffzdVVXrXSwNt215ARUUwpaW23n4psbFzadu2M3v2DMOfF3iAp4lE\nTfmWoNbTrkXdY4xCFYgagX1Az7feZqRYOEIzRQTfx6ivxUFdzzvWo1RXF6AmX5cBT6BpytYJCHgG\n6EBl5WGUtbINuICMjN/RtBmo0kt9kdW/UBuRLMde1Gdh3SzcfkMSgyHC0iq72CaeocTEvMjp07EY\nDCauvroNYCQ19YGacw4atJStW49SWmr9DLGxOaSnTyMh4VP279nCgzxNR2qvvT2NMpbyLaMa3nqb\nv4rQC80cEXwfo74WB5s2VWI2twRuID09FFjK1Ve3JDX1JZS1coyIiALy8x0nTcMJDu6C2TwRtcI1\nEHgMNUmql17G2hyvi7njxGtfVG/6LNRCKqtI33ijgV275pKV1cVyvjhiY/ewceM9hIeH1bSgLSgw\n06KF/cpgs7mQMWPmUlDQifDwTFauHMnRjAx6bnyEKyiqKcB0XHt7CDUzkN82lAlfbpIKHEFABN/n\naKjFgV4yefhwW7p00YBpNa/17fsOO3a8YKmpt9opRUU5qOy8LepPwlY+9SZluoAXYW2LYOuOH0Jd\nWKpQ3S5fom3bKAYNakFy8jAAkpI2cvhwT4uYj6/VfK2uHjXh4WGkp0+refzh669wdO7zRKNaIDiu\nHNCAHwEzEP3W20yXrF4QahDB9zEcWxyoCpnaJZNGYxHHjkXYvZafH0OfPhXk5LRCTZqGAK2orn4I\nlQ8/i6qksfXWT6ImVZfTqlUZoaEZ5OYuQS2YeslyjmKUp1+NukNQq2mvu+49OwFvTMOxXdu28Wn8\nYDprGiGWT51nGd22Z/1m4GhQax7/+nvJ6gXBARF8H0H36A8caENs7BxLk7MqysurSE21XgDCwvYw\ncGABTz55Bbfeuhpb8TYai9i06TQwFaugv4+qfAFlyVyCtY/8LtS+sGHAnUREzKWg4EJUnxtFQMBz\nVFY+jaqLWYtqoaA2B8/OjnTJZ1/98VL2JD7MGyhhn24T/VxUfVBHVGfLuLfe5nHJ6gWhTkTwfQTH\nJmeXXvoe0IKjR1sTGzuXdu3i6NbtFMnJdxIeHkZCwkoyMyej574xMb9SXh5JUVE7lH0TjxLyAlQd\n/nKs1TS6NdQH+CcBAZFERBwnK2sq6uKgoQt8dXVny/kqsDY8U6tnjcZKwPle+nXxzovPU/LmK/S0\njOLY2bIT6rK0u20od4lXLwhnRATfwzgrho7e/Y8/+mE2Wy8AV11l3c3KZDKzaVMlqi7+LgCOH99h\n6UNjK+h3Uv8kbAVwjKFDI/jww7sZMiSN48f15z9E7Vg1nerqcMv5PrR7f1jYaZKTbwZqTzQ7s/NW\nWspnbEmYQBuse8sOpfZWJzuB61es5o/SA0cQGkQE38M4K4a1vXt9az8A+92sZszYiNlchbJWQoAi\nqqvD7I4PCqrA3382JSX+qBW2O7H37gNRxY7v2Yy/FvssXu+pY8DP7xjV1db4Bg4MqLlwOV6sGtp5\n6z8LF3D0bzO4Cvu2CMtRl6VZqLqhA35+jFi3kd59Lz/j+QRBUIjgexhnxVDV1VtLFsvL29h590Zj\nUc3dwvr1oNab2u4rOwfb3HjIEPjii3KsneGvB55BbczdApVPG2p8+OTkwWza9CVms2MBJIDGLbdE\n1Cqp1HG8WNW389aWDRv45s7RdEXtcWVfza9G247aB6vlW28zQ7x6QTgr3Cb48+fP5z//+Q/t2rUD\nIDExUfa2rQNnxdCxZLGumvWkJFuf374vjiqvXEZY2GkGDgwgOXkQ69dX2RwTDlyFqrixdrLU4wkP\nD2PgQH9SUmwXQe0gOrqamJh8gHptKceLVV07b+kWzlUood9B7f2xvgcKWrfmwY1SgSMI54JbM/yJ\nEycyceJEdw7h8zgjhnWhXwD0rP6OO7ZZetvrXWRKsG5Q0gb4BX//Mq65xkhy8gjCw8MID8+yW8Wq\nO+V610nHeGrHOr5mgjgl5X7qs6XOtAfsrm3bWDlyCK0qKojCauH0x9pBPwzVmu1SaYsgCI3CrYKv\nVmoKZ+JMYujMhO6jj37BunVKbK37wd4DDMVgeBlNe9Hy2giqql4lNXUKLVooQV65cpRlFWsHNO0A\nXbv2IC5udZ2LovRY580bVBNTUtIGkpMHn7VHD8q++e6uMcRpGq1QbdG+xv5+oyewBwiZ+wp/u39S\ng+cUBOHMuFXwP/74Y1JSUrj44ot54oknCAkJcedwTQZd6Otql+B4cfjxRz9sxTYg4DQXX/yZpea+\nu4PnrpqS6YLctavRbhWrM9Q1yWw0ak7ZUjqrP17KvsSHa/bK0uvpY7G3cHYAwdNncKeIvSC4hEYJ\n/sSJE8nPz6/1fGJiInfffTcPP/wwBoOB119/nblz5zJnzpwGzxkV5RsXBXfFeeKEmZtu+pjMTH17\nQGs1TFZWeK1xDYYT2MpkSEgxv/zyIACjRy+289z1n3FxpWcV/4kTZh56KJWDB4PZv78a2wtMVlY4\n69Zdz5Qpyzl4MJiuXUtYsGAkERG1z3/49995+7rr0PLyuBb7GYYY1KaFy1BtEQ4FBjLhhx+49Mor\nnY7zfNDc/z5diS/ECL4TpzM0SvA/+OADp467/fbbmTx5slPH5uUVNyak84Le7MsdJCSsIjPTdutA\na7uE2NiCWuNefXUbUlP1LpXFXHGFH6NHL7HYQBUMHfoemZlhnDixj4gII927L2X27EHk5RU7vQYg\nIWGVzWSw/d61sbEFVFX5M3/+8Jrjq6pq/ztu2bCBNXeOph2qyfJOVF2QXsV/AHVZy42MYsSaL7nN\nMinrTX8P7vx3dyW+EKcvxAi+FaczuM3SycvLIyoqCoAvv/ySuLg4dw3VpFB2i307ML1dgu0Eqi7W\nmZnRxMbutWm10NbOchk1ailpabcAt9Qay9k1APYe/TDCwl6hS5cLnZpkLjSZ+PD2MZz67Rcisd9A\nUe+8vxXVxrjrW2/zkEzKCoLbcJvgv/zyy+zevRs/Pz86duzI888/766hmhQxMXnArVhbIvzGpk33\n1Mq8HVst6CtthwxJw9kJVGcnW+1LR0MZOLA9ixbd2OBnKTSZePe6fgSeyKcDEIf9fUsUkI5aQnaD\nbDcoCG7HbYKfnJzsrlM3aQyGSlS/GmXRXH55O6daLehi7Wxd/9kce7alo4UmEx/eOoLAHdvpgloC\n1prabYx/B4au38TAmwf4xG2zIPg6stLWy8jO7oCavtQff1bncfWJta04x8WVMnt2/eLsrJCfqXTU\nkS0bNrDqztG0R7VAsF3nexfWNsbfA/1XrJa2CIJwHhHB9zIam3XbinNDE05nI+QNUWgy8cn/3U7Z\nT/8jGrVm19a+CQcWoBZRHbj8Sh5Y/gmh4REuGVsQBOcQwT+POFMV8+STV7J1q76l31FmzhxV57lc\nKdaNpdBk4tUr+hB56iRdgQyUjWNr32QBtGzFdau/kKxeEDyECP55xJmqmLlzfyYr60nAQGmpxpw5\nS1m0yOiJcJ1CX0TVDvsKnGewt286zn1FFlAJgodp8oJfV1ataZzzhhyNwZmqmHNpU+AJ0lI+Iz1h\nAj1QmyN2wN7CuQDV1fJXoK9U4AiCV9DkBb+urBo46w05XIEz/vzZVNl4gkKTifWJD3MkdY1da4SZ\n2Fs4+1FCP12EXhC8hiYv+PVnzOc/i3amKuZcu2eeD45mZLDwun6EVVfVqqmPRO2EG4US+/C/PSdZ\nvSB4GU1e8OvOmM+u2ZercGai1ZsmY3UKTSa+eHgSpWnrCUNtObgT1XxZb41QBJQBmZf2JfG/n0kF\njiB4IU1e8OvPmL0zi/Y2jmZk8J8Bf2RuRTnLgenozZZVa4RoVEYfcNUfeOCj/4jQC4IX0+QFv76M\n2duyaG9k17ZtpA4dRFfq3ua8N/CjwY/79hwQoRcEH6DJC75w9hzNyODzUX/C73guc4FXULZNMbW3\nHBz6xUYRe0HwEUTwBTv0rL43qtfNEdTq2GUooX8JaAvkRbfn9tVfyN6yguBDiOALgEXoRw7hgooK\nLgGGoerrXwKmAGtRk7SFLVpy7efrZbWsIPggIvjNHL0C50Taeru6erXHFrRHZfeHUZ0tbxOhFwSf\nRQTfDTi7k5SnSUv5jG8SJhAFGKlrjy3YB1RFRnHXmi/FvhEEH0cEvwHqEu+GthNzdicpT1JoMvFz\nwgQ6A0+gsnjbCdm9wGZUVi/2jSA0DUTwG6Au8f7sswlnfI8398PRWyPkfLWei4BAVKTxKBvnJJCN\n2nLwZulXLwhNCj9PB+DtnIt4G42FqDwZvKkfztGMDN699CJOpK7huYoKgoBjqEjDgDtRi6hCbhzC\ntL2H+OOAgZ4MVxAEFyMZfgOcSzMzb+yHU2gy8emga7m2vAwT1qx+CfA00BXYHxjI7d9tFa9eEJoo\njRL8devWMX/+fDIyMlixYgV9+vSpee2dd97hk08+wd/fn6eeeor+/fs3OlhPcC7i7S39cMwnTvDf\ne+6hcPO3VBYXM1vTMAAfY83qpwFzAgOpvOkW7ntjviyiEoQmTKMEPy4ujvnz5/P000/bPZ+RkUFq\naipr164lJyeHiRMnsn79egwGQz1n8l68RbzPlqMZGSy89goiNI0eqEVU24FLUTX2r6I6XO5v2Yp7\nf9sjQi8IzYBGCX63burWX9M0u+fT0tKIj48nICCATp06YTQa+e2337jssssaM5zgJLp9E6VpdrtQ\nPY0S/FDADJyIiua2z9eL2AtCM8EtHn5ubi59+/atedy+fXtyc3PdMZTgQKHJxL8HX0e306VUYF9b\n3xVYDByL7ci9GzeL0AtCM6NBwZ84cSL5+fm1nk9MTGTw4MF1vscx4wectnMaqnH3FrwtTvOJE6Q8\n8ACZa9Yws6LCzqvXM/x9QI9Ro3j4/fcJi/Ausfe277M+JE7X4Qsxgu/E6QwNCv4HH3xw1ieNiYkh\nOzu75nFOTg7R0dFOvTcvr/isxzvfREWFeE2cu7Zt48sx8XQ9Xcpx7FfMDgNeADqixF7fW7aiyru+\nZ2/6Ps+ExOk6fCFG8K04ncFldfi2Wf3gwYNZu3Yt5eXlHD16lCNHjnDppZe6aijBhi/HxDP7dCn3\no1bM7sW6AiAU8IvtyIC9h5h+vEi2HBSEZk6jPPyvvvqK2bNnU1BQwOTJk+nZsyfvvvsuPXr0YOjQ\noQwbNoyAgACeeeYZn6zQ8WaOZmSQOm443U6X2vn03VFtEsqBnE6duCPtO/HqBUEAwKDVZbh7EF+5\nffJUnIUmE9/OeIyDa1fzXEUFy1BdLXWffhYQGhZG62uu488fLaGiKtAjcZ4NvnTbLHG6Bl+IEXwr\nTmeQlbY+gi702qYNtDSb6YZ9D5xS4ECrIG5eta6m/01YhG/8sQqCcH4QwfcRvp3xGPemfGqXydv2\nwJkT25G/pO/2ZIiCIHg5IvhejJ7Vhx4+hHbogJ1X3xO1G1V7Pz+yYzowdOUazwUqCIJPIILvxdhm\n9Y419dlhYcQMHMz1ya/JpKwgCE4hgu9l7Nq2jS9G/wljWRm5wALgblRN/SthYXTv0o1CYxfGiNAL\ngnCWiOB7GV+OiefFsrKaTH4ZkIry6SMHDub6RYs9GZ4gCD6MCL6X0a3stJ1XHwKYgoJYPGQo1ye/\n5sHIBEHwdUTwPYztxGyh0cjuwBZo5dYMvxioHjKU4ZLZC4LQSETwPYxduWX6z/x94CCe+vF7jGVl\nHDcYaHX9QMZIZi8IggsQwfcwoYcP2Vk4nQsLuftonidDEgShiSKbmJ9HCk0mPk+4l2+H3MDnCfdQ\nWGCi0Gi02e4cCo1dPBihIAhNGcnwzyOO9s1iDFyf/DqLMVg8/C4yMSsIgtsQwT+PONo3oYcPERoe\nIROygiCcF8TSOY+IfSMIgieRDN8NOJZaXp/8OqHhEWLfCILgUUTw3UBdXv3wRYvFvhEEwaOIpeMG\n6vLqBUEQPI0IvhsQr14QBG9ELB03IF69IAjeSKMEf926dcyfP5+MjAxWrFhBnz59ADh27Bjx8fF0\n69YNgMsuu4xnn3220cH6CuLVC4LgjTRK8OPi4pg/fz5PP/10rdcuuOACVq5c2ZjTC4IgCC6kUYKv\nZ/CapjVwpCAIguBp3DZpm5mZydixYxk/fjw//fSTu4YRBEEQnKTBDH/ixInk5+fXej4xMZHBgwfX\n+Z7o6Gi+/vprQkND2blzJw8//DBr1qyhTZs2DQYUFRXiRNjnD/OJE6Q+9BDBBw9S3LUr8QsWAN4X\nZ31InK5F4nQdvhAj+E6cztCg4H/wwQdnfdLAwEBCQ0MB6NOnD507d+bQoUM1k7pnIi+v+KzHcyef\nJ0yyLqLaupXFZZVM/OwTr4uzLqKiQiROFyJxug5fiBF8K05ncJmlY+vjm0wmqqurATh69ChHjhyh\nc+fOrhrqvCKLqARBaCo0atL2q6++Yvbs2RQUFDB58mR69uzJu+++y08//cTf//53AgIC8PPz4/nn\nn6dt27auivm8Umg0oqX/XLPloCyiEgTBV2mU4N90003cdNNNtZ4fMmQIQ4YMacypvQZZRCUIQlNB\nVto2gCyiEgShqSC9dARBEJoJzVLw69pbVhAEoanTLC2d+vrVC4IgNGWaZYYvpZaCIDRHmqXgS796\nQRCaI03e0qlrf1kptRQEoTnS5AW/Pr9ePHtBEJobTd7SEb9eEARB0eQFX/x6QRAERZO3dMSvFwRB\nUDR5wZfWCIIgCIomb+kIgiAIChF8QRCEZoIIviAIQjNBBF8QBKGZIIIvCILQTBDBFwRBaCY0SvCT\nk5MZOnQoo0aNYtq0aZSUlNS89s477zBkyBCGDh3Kd9991+hABUEQhMbRKMHv378/a9asISUlBaPR\nyDvvvAPA/v37SU1NZe3atSxatIjnnnsOTdMaOJsgCILgThol+Ndeey1+fuoUffv2JScnB4ANGzYQ\nHx9PQEAAnTp1wmg08ttvvzU+WkEQBOGccZmHv2LFCgYOHAhAbm4uHTp0qHmtffv25ObmumooQRAE\n4RxosLXCxIkTyc/Pr/V8YmIigwcPBmDBggUEBgYyfPhwgDrtG4PBUOs5QRAE4fzRoOB/8MEHZ3x9\n5cqVbNq0iSVLltQ8FxMTQ3Z2ds3jnJwcoqOjnQooKirEqeM8jcTpWiRO1+ILcfpCjOA7cTpDoyyd\nb775hnfffZcFCxbQokWLmucHDx7M2rVrKS8v5+jRoxw5coRLL7200cEKgiAI545Ba0T5zJAhQ6io\nqIMzjrUAAATvSURBVCAsLAyAyy67jGeffRZQZZkrVqwgICCAp556iv79+7skYEEQBOHcaJTgC4Ig\nCL6DrLQVBEFoJojgC4IgNBNE8AVBEJoJXiv47733Hj179sRsNns6lDp58803GTlyJKNHj+b+++8n\nLy/P0yHVyZn6HXkT69atY/jw4fTq1YudO3d6Ohw7vvnmG/70pz9xyy23sHDhQk+HUy8zZ87k2muv\nZcSIEZ4OpV5ycnKYMGEC8fHxjBgxwq6c25soLy/ntttuY/To0YwYMYL58+d7OqR6qa6uZsyYMUye\nPLnhgzUvJDs7W7vvvvu0QYMGaQUFBZ4Op05KSkpqfl+yZIn29NNPezCa+tm8ebNWVVWlaZqmvfzy\ny9orr7zi4YjqJiMjQzt48KA2fvx4bceOHZ4Op4aqqirtpptu0jIzM7Xy8nJt5MiR2v79+z0dVp1s\n3bpV27VrlzZ8+HBPh1Ivx48f13bt2qVpmvo/NGTIEK/9Pk+dOqVpmqZVVlZqt912m/brr796OKK6\n+eCDD7Tp06drDz74YIPHemWGP2fOHJKSkjwdxhlp06ZNze+lpaU1PYW8jfr6HXkb3bp1o0uXLl7X\nZO+3337DaDTSsWNHAgMDGTZsGGlpaZ4Oq0769etH27ZtPR3GGYmKiqJXr16A+j/UvXt3jh8/7uGo\n6iYoKAhQ2X5lZaWHo6mbnJwcNm3axG233ebU8Q2utD3fbNiwgQ4dOnDRRRd5OpQGef3110lJSSEk\nJMRrb01tWbFiBcOGDfN0GD5FXX2htm/f7sGImg6ZmZns2bPHaxdlVldXM3bsWI4cOcKf//xnr4xT\nT46Li4udOt4jgl9ff55HH32Ud955h/fff7/mOU9mfA31EUpMTCQxMZGFCxfy0UcfMW3aNA9EeXb9\njjzp7zoTp7fhbXccTYWTJ0/yyCOPMHPmTLu7ZW/Cz8+Pzz77jJKSEh566CH2799Pjx49PB1WDV9/\n/TWRkZH06tWLLVu2OPUejwh+ff159u3bx7Fjxxg1ahSappGbm8u4ceP473//S7t27c5zlA33EdIZ\nPnw4Dz74oMcE/1z6HXkCZ79PbyImJoasrKyax7m5uU73hRLqprKykkceeYRRo0Zx0003eTqcBgkO\nDuYPf/gD3377rVcJ/s8//8yGDRvYtGkTZWVlnDx5kqSkJJKTk+t9j1cZz3FxcWzevJm0tDQ2bNhA\n+/btWblypUfEviEOHz5c83taWhrdunXzYDT1U1+/I2/Gm7LqSy65hCNHjnDs2DHKy8tZs2YNN954\no6fDqhdv+u7qY+bMmfTo0YN77rnH06HUi8lkqrFJTp8+zQ8//OB1/8cfe+wxvv76a9LS0njttdf4\n4x//eEaxBy/08G0xGAxe+wf86quvcvDgQfz8/IiNjeW5557zdEh18sILL1BRUcF9990H2Pc78ia+\n+uorZs+eTUFBAZMnT6Znz568++67ng4Lf39/Zs2axX333Yemadx66610797d02HVyfTp09myZQtm\ns5kbbriBadOmMW7cOE+HZce2bdtYvXo1cXFxjB49GoPBQGJiIgMGDPB0aHbk5eXxxBNPUF1dTXV1\nNfHx8TX7ffgy0ktHEAShmeBVlo4gCILgPkTwBUEQmgki+IIgCM0EEXxBEIRmggi+IAhCM0EEXxAE\noZkggi8IgtBMEMEXBEFoJvw//5K32R/vBHAAAAAASUVORK5CYII=\n", + "text/plain": [ + "\u003cmatplotlib.figure.Figure at 0x7f5be3c99f50\u003e" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current loss: 9.48636\n" + ] + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.scatter(inputs, outputs, c='b')\n", + "plt.scatter(inputs, model(inputs), c='r')\n", + "plt.show()\n", + "\n", + "print('Current loss: '),\n", + "print(loss(model(inputs), outputs).numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "sSDP-yeq_4jE" + }, + "source": [ + "### Define a training loop\n", + "\n", + "We now have our network and our training data. Let's train it, i.e., use the training data to update the model's variables (`W` and `b`) so that the loss goes down using [gradient descent](https://en.wikipedia.org/wiki/Gradient_descent). There are many variants of the gradient descent scheme that are captured in `tf.train.Optimizer` implementations. We'd highly recommend using those implementations, but in the spirit of building from first principles, in this particular example we will implement the basic math ourselves." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + }, + "colab_type": "code", + "id": "MBIACgdnA55X" + }, + "outputs": [], + "source": [ + "def train(model, inputs, outputs, learning_rate):\n", + " with tf.GradientTape() as t:\n", + " current_loss = loss(model(inputs), outputs)\n", + " dW, db = t.gradient(current_loss, [model.W, model.b])\n", + " model.W.assign_sub(learning_rate * dW)\n", + " model.b.assign_sub(learning_rate * db)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "RwWPaJryD2aN" + }, + "source": [ + "Finally, let's repeatedly run through the training data and see how `W` and `b` evolve." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "height": 446 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 569, + "status": "ok", + "timestamp": 1527005915434, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "XdfkR223D9dW", + "outputId": "c43591ae-d5ac-4f2b-a8e7-bfce607e0919" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: W=5.00 b=0.00, loss=9.48636\n", + "Epoch 1: W=4.58 b=0.42, loss=6.28101\n", + "Epoch 2: W=4.24 b=0.76, loss=4.29357\n", + "Epoch 3: W=3.98 b=1.02, loss=3.06128\n", + "Epoch 4: W=3.78 b=1.23, loss=2.29721\n", + "Epoch 5: W=3.61 b=1.39, loss=1.82345\n", + "Epoch 6: W=3.49 b=1.52, loss=1.52970\n", + "Epoch 7: W=3.38 b=1.62, loss=1.34756\n", + "Epoch 8: W=3.30 b=1.70, loss=1.23463\n", + "Epoch 9: W=3.24 b=1.76, loss=1.16460\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW0AAAEDCAYAAAD+/1UIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4VOXdPvD7zJZ9XwmELQkQIAELsiTsi6xiEBGXAiIW\nbV8WBY2K0tLa4lbsr283qxURtIoioAi8SpFNg6whi0FJKAoJBgLZt5k5c87vj5OZLIRkgEnOGXJ/\nritXJsmZyT0sN1+enPOMIMuyDCIicgs6tQMQEZHzWNpERG6EpU1E5EZY2kREboSlTUTkRljaRERu\nxODMQePGjYOvry90Oh0MBgM2b97c1rmIiKgZTpW2IAjYuHEjAgIC2joPERG1wKnlEVmWIUlSW2ch\nIqJWCM5cETl+/HgEBARAEATMmTMH9957b3tkIyKiJpxaHvnggw8QFhaG4uJiLFiwAD179sTgwYPb\nOhsRETXh1PJIWFgYACA4OBgTJ05EVlZWi8fL3t6AIADdugFvvglYrTeflIiIWl8eqampgSRJ8PHx\nQXV1NR5++GEsXrwYI0aMuPadCgtRvfoFeL2zDkJtLWxdu6NqRSrMs+8DDE4N9y4XFuaHoqIKVb73\ntTCTc7SYCdBmLmZyjlYzOaPVSfvy5ct44IEHkJKSgjlz5mDcuHEtFzYAREai6oWXUHwkA9WPPApd\n4QX4L/sVgpMGwWPTvwFRdCocERE15tQPIm9Ew3/FdBcK4P3ntfB89x0IVivEmFhUP/kMzCmzAL2+\nLb79VbT6LysztU6LmQBt5mIm52g1kzPa5YpIKaozKl9+DcWHT6Jm7gLof/wB/r98BEGjh8Fj28cA\nTyckInJKu17GLnWJRuXaP6P40AnUPDgP+jN58F+0AEFjhsO0fRvLm4ioFarsPSJ1647KP/0VxWnH\nUTvnAehPf4+AhfMQNG4ETDs/A/hiOkREzVJ1wyipR09U/OV1lHx9FLX3zIH+uxwEPPQAAieMgunz\nXSxvIqImNLHLny0mDhV/fxMlB4+g9u57YMjORMDcOQicNAamPV+wvImI6miitO1scb1Q8fo6lOz/\nBrUzZsJ4Mh0B99+DwKnjYdy7h+VNRNftL395DR999IHj4+XLl2DVqlWOj//61/+HDz/8txrRboim\nStvO1iceFf96B8V702CeNgPG48cQOGcmAu+cBOOBfSxvInJa//6JyM7OAKBsfldWVorc3FzH17Oz\nM5GQMECteNdNk6VtZ+vXH+Vvv4uSPQdhnjwVxiPfIPCeGQhImQpj2ldqxyMiN5CQMBBZWZkAgLNn\nz6Bnzxj4+PigsrISVqsVP/74A+Liequc0nnqXFN+ncSEASjf8AEMJ0/A+9UX4bH7c5hSpsIycjSq\nnloJcdhwtSMSkRN8Vj8Pj+3bXPqY5jtTULX699f8emhoKPR6Ay5duoisrEz075+I6uoyZGdnwsfH\nBzExsTCotL3GjdD0pN2UOPBnKH/vI5Ts2gPL2PEwHdyPoBmTEDD7LhiOHlY7HhFpVGJiIrKyMpCd\nrZT2gAEDkJWVgaws91oaAdxk0m5KHHQ7yjZtheHIYfi8sgam/Xth2r8X5vETUZ26EuJtg9SOSETN\nqFr9+xan4rbSr18isrIy8d//KssjHh4y/vnPf8HX1wfTpt3V7nluhltN2k2JQ4aibPMnKP1kFyzJ\nI+GxZzeCJo2F/8/vhSHzpNrxiEgjEhIGIC3tIPz9/SEIAgICAlBZWYHs7Cz075+gdrzr4talbWcd\nnoyyrTtQuuUzWIYlweOL/0PQhFHwn3c/9HU/gCCijismJhbl5WXo3z+x0ef8/Pzg7+9er33bLrv8\ntStZhvHAPvi8/AcYjx0BAJin3wWPXz+Hom69lRdn0Ait7jTGTM7RYi5mco5WMznjlpi0GxEEWEeP\nRemO3Sj9YAusPxsEj88+AYYMQeCEUfBc/xaEinK1UxIR3ZBbr7TtBAHWcRNQuutLlG7aCsycCUNO\nNvxSn0BIQm/4Ll8CQ/pxXqhDRG7l1i1tO0GAdex4YMsWFJ88hapnV0EKCYHXu+8gaNJYBI4fCc+3\n/wWhvEztpERErbr1S7sBKSIS1U88heKjmSj9YAvM02bAcOpb+D29HCGJveH7xGIYThzj9E1EmtWh\nSttBp4N13ASUv/2uMn2v/DWk0DB4vbcBQZPHIWjcCHiue5PTNxFpTscs7QakiEhUP/4kio9koHTT\nVpinzYD++1Pwe2aFMn0//j8wHD/K6ZuINKHDl7aDTgfr2PHK9J2eg8rnfgMpNBxe/96IoCnjETQ2\nmdM3kZsqLPwJ8+bNUTuGS7C0myFFRKJm2QoUHzmpTN/T74L+9HfK9J3QC77LfgXDsSOcvonciKCh\nazRuBku7Jfbpe91GXEk/hcrnV0MKj4DX++8iaOoEZfp+6w0IZaVqJyWiVoiiiD/8YTXmz78fy5Yt\ng9lsVjvSDbn1roi8BpddASVJMB7YB6+N62Ha9RkEUYTs5QXzXXejZu5DEAcPcfqqS61elcVMztFi\nLq1nWr3aA9u3u3afujvvFLF6dcsFXFj4E2bPnoF//GMd+vdPwJ/+9CI6dYrGfff93KVZbkbHvSKy\nrel0sI4Zh/K3NuDKye9Q+fxvIYVHwPOD9xA0bSKCxiTB861/cvom0piIiEjH5lAzZsxAZmaGyolu\njFtuzaoVcng4apY+gZrFy2A8uB+eG9fDY+d2+D37FHx/92uYZ8xEzbwF1zV9E93KVq82tzoVt5Wm\na9ru+leSk7Yr6HSwjh6Lin+9o0zfq34HKSISnpv+XTd9D4fnv16HUFqidlKiDquw8Cd8+202AGDH\njh1ITByocqIbw9J2MTk8HDVLHkfxN+ko3fwpau+6G/q8XPitTEVIYm/4LXkMhiOHeeYJUTvr3r0H\ndu36DPPn34+ysjKkpNyjdqQbwh9EtgOhqAiem/4Nz41vw3D2vwAAsU88DAsfxpVREyH16KlKruZo\n/QdZWqLFXMzkHK1mcgYn7XYgh4WhZvEylBw6gdKPt6M25W7oz+QBTz2FkKEDETR6OLxf/gMMWRmc\nwImoRfxBZHvS6WAdORrWkaNReeUKQr/eA/Omj2A6sA8+a1+Gz9qXYYvuCvOUabBMvRPWIcMAN3qV\naCJqe2wElcghIcDChSifcS+EygoYv/wPPHZ+BtPuz+H9xj/g/cY/IAUHwzxpKixTpsMyeizg5aV2\nbCJSGUtbA2RfP1hmzIRlxkzAYoHx64NKgf/fDni9/y683n8Xsrc3LGMnwDx1OiwTJ0EODFI7NhGp\ngKWtNSYTrGPHKy/c8PJaGE4cg8euHTDt3A6PHZ/CY8enkA0GWJNGKgU+ZRqkTlFqpyaidsLS1jKd\nDuLgIRAHD0HV86uhP/09PHZ9BtPO7TAd2AvTgb3AMytg/dkgmKdMh2XqnbDF9VI7NRG1IZ494i4E\nAbbefVD9+JMo/WI/rqTnoOLFV2EZOQaGjJPw/cNvEZw8GEFJg+Dz+9XKHuCSpHZqItVVVlZi69bN\nbfb406dPQGVlJQDgypXLGDnydmRlZTT4+kSUl7vuxcSdLm1JkjBz5kw89thjLvvmdOOkzl1Qu/BR\nlH38Ka7knEH5X/8J89Q7oS/Ih/f/voagKeMRPDAevqlPwLjvS8BiUTsykSoqKsqxdetHzX5NcsFg\n07dvArKzMwEA2dmZ6NWrD7KylI/PnfsRgYFB8Pf3v+nvY+d0aW/YsAExMTEu+8bkOnJQMMz33o/y\n9e/h8qmzKHvnfdTOeQCCuRZe699C4L0pCOkbA79fPgLT9m1A3VRA1BG8/vpfceFCAR5++EH8/e//\ni/T045g3bx5++9vnMX/+fVe9QML777+Lt99+EwBQUJCPFSuW4pFH5mHx4kU4d+7Hqx4/ISHRUdpZ\nWZmYM+dBfPttfYknJCS69Pk4taZdWFiI/fv347HHHsPbb7/t0gDkYt7esEyZBsuUaYAowvhNGky7\nPoPHzs/g+fGH8Pz4Q8geHrCMGQfLlOkw3zEFcmio2qmpAwke1L/Zzxcfz3bJ8U398pdL8MMP/8W6\nde8BANLTjyMrKwsbNnyIyMhIFBb+dM0XSHjllTVITV2Jzp27ICcnG2vXvoQ///kfjY7p3z8R69e/\nBQA4depbPPLIY/joo38DUEo8IWGAUzmd5VRpr1mzBqmpqaio0NZln9QKgwHWEaNgHTEKVb9/GYas\nDOUslF074PH5Lnh8vgu+Oh2sQ4fDMnU6zFOmA2HN/wUhupUkJiYiMjKyxWNqamqQnZ2BVauehn23\nD1EUrzqub99+yM39HrW1tbDZbPD09ERUVGcUFOQjOzsD99/v2j27Wy3tffv2ITQ0FPHx8Th8+LDT\nD+zsdfTtqcNnGj9SeVv7CpCbC3zyCYStW2E6lAbToa/hu+pZICEBYWPGAGPGAKNGARqZwrX4ewdo\nM5fmMzWzxAAAYde68/Ue34TFUg69XufIEBjoDS8vL8fHklQNQajPaDQCOp0JwcHeCAgIwPbtn7by\nHfzQvXs37N//OQYMSEBYmB+GDBmMrKxjKC8vw6Br/E/hRrVa2idOnMCXX36J/fv3w2w2o6qqCqmp\nqXjllVdavJ8WN2NhpgYCI4H5jwLzH4Vw8SI8Pt8Jj53bYUr7CsjKAv7yFwCAGN8X1uHJsCSPhHVY\nMuQwZ/+quI4Wf+8AbeZipqvV1sqoqKh0ZCgtrQZQ31GSZMLly1dw5kwBPD09sXv3HgwbloSaGhkR\nEZ3w4YdbMXbsBABAXl4uYmPjrvoeffr0w7p1b2PhwkdRVFSBbt164YUXViE+vp/Tz93Zf2xbLe3l\ny5dj+fLlAIAjR45g3bp1rRY2uRc5IgK18xagdt4ChPmbUPrFPhi/Pghj2tcwHjsMw6kceK1TfjAj\n9u4D6/BkWJNHwjJ8BOTwcJXTE7XM3z8ACQkDMH/+fRg6NAnDhyc3+rrBYMCCBY9g0aL5iIrqjG7d\nuju+9utfv4A//vElvPPOOthsIsaPv6PZ0k5IGIDNmzehXz/llXF69+6DoqIizJgx0+XP57q2ZrWX\n9uuvv97qsfzXvnVukcligSH9BEyHvlKK/OhhCNXVji+Lcb1gHT4C1uQRsCaNgBTR8jqhSzJphBZz\nMZNztJrJGdxPW0VumclqheHkCRgPfQ3T1wdhOHIYuqr6UwjFmFhYk0Y43lxxib0Wf50AbeZiJudo\nNZMzeBk7XR+jEeLtQyHePhQ1S5crJZ55UllKSTsI4+Fv4LVxPbw2rgcA2Lr3UNbD65ZUpM5d1M1P\n5OZY2nRzjEaIg26HOOh21Cx5HBBFGLIylBI/9BWMh9Lg9d4GeL23AQBg69odluQR9SUe3VXlJ0Dk\nXlja5FoGA8TbBkG8bRBq/mcpYLPB8G0WjF9/VV/iddvNAoAtuiusSSNgsS+ndO3mvi+TTdQOWNrU\ntvR6iIkDISYORM0vFwM2G/Q538KUdtAxjXtu+jc8NylXkNk6d3Gsh1uSRkDq3kPlJ0CkLSxtal96\nPWwJiahJSETNo/8DSBL0p3Ial/hHH8Dzow8AALZOUcCY0fDqkwAxcQDEhETI/gEqPwki9bC0SV06\nHWz9+qOmX3/U/OKXSol//x2MaV/BlKYsqeD99+GL9x13EXv0VKb3hAF1RT5Aefk2og6ApU3aotPB\nFt8Xtvi+qF24CJBlhJUWonx/GgyZGcpb1kl4frIF+GSL4262LtH1JZ44AGLiwDY5Z5zcT2VlJXbv\n/j/MnHlPm32PNWt+i+TkkRg9elybfQ87ljZpmyAAvXrBHNQJ5pRZyudkGbr8844CN2RmwJhxEh67\nPoPHrs8cd7WFR9SXeMJAiIkDIHWJ5g86Oxj7ftpNS1uSJOh07vc6MCxtcj+CACm6KyzRXWGZdqfj\n07qLhTBknmwwkWfA4z9fwOM/XziOkYKCHAVuf7N17wm44V9edzVokE+znz9+vMolxzfVcD9tvV4P\nLy9vREVF4ttvc/Dqq39Gaurj2LBhEwBlL+3a2hosWPALFBTk47XXXkFZWSk8PT2Rmvocunbtds3v\nc/ToYXz44fsoKSnG4sVPIClphFP5rhdLm24ZUkQkLBMnwzJxsuNzwpUrMGTVl7gh82T962va7+fr\nBzEh0bE+LiYOhC02DjDwr8etoOF+2unpx5Ga+gTWrn0VRqPfTe+l3VBh4U/429/eRH7+eSxd+hg2\nbdoGo9Ho8ufDP5V0S5NDQmAdMw7WMfVrjUJ5GQzZWfVTeVYGjIcPwXTo6/r7eXlB7NvfsT4uJg6A\n2DseMJnUeBq3FGcn5Bs9vjV9+/ZDVFRUi5exO7uXdkPjxk0EAHTpEo2oqM748ccfmt1c6maxtKnD\nkf0DHOeCO1RVwZCT3WAiz4AhIx3G40fr72c0QozvpxR4/0Rg2CAIIZ2VnQ65Tu42PD09Hbf1ej1s\ntvrXibRYzAAAWZbg5+fveLUbZzSd2K81wd8sljYRAPj4OPZUcTCbYfgup9FZK4Zvs2HMPOk4JBSA\n5B8AW2wsbDFxsMX1glj33tajJ+Dh0f7PhRrx9vZGdd3OlE33xwsKCkZpaQnKy8vh6emJtLSvMGxY\nEry9fdCpUxT27v1Pq3tp2+3d+x9MnjwNFy4U4MKFghbXv28GS5voWjw8IA64DeKA2+o/Z7VCn3sa\nhqwM+F/4EeaMbOjP5MKQlQnjieON7i7rdJC6doMYG+codFtsHMTYXsqLSXA6bxcN99M2mTwQHBzs\n+Jor9tK2i47uhsWLF6GkpBhPPbWyTdazAW7Nqipmco4WMwFNcokidOd+hOFMLvS5udCfyVXKPS8X\nustFV93XMZ3H1he5LTbupqdzLf5aMZNzuDUrUXsyGCD1jIGlZwzQ4OwVABBKS6DPy4U+LxeGuvf6\nvNOtT+f2Iq9bcuF0TgBLm6jNyYFBEAcPgTh4CMwNvyCK0J/7oa7E86DPO11X7KeVc8sbnF8O1E3n\nccpSixjXS1lyccF0Ts7bsGEd9u79DwRBgCzLEAQBY8dOwNy5C9otA5dHVMRMztFiJqBtc101neee\nVpZczv4XgtXa6FjHdB7XCx59eqEyJBK26GhIXaJh69IVcmioqhO6Fn//tJrJGZy0iTTIqem8bu3c\nUFfoHrs/B3Z/Dt+mj+XlBVvnLkqJR3eFFN0VtrpCl6KjIUV2AvT6dnx2dDNY2kTuxGCArWcsbD1j\ngTumNPqSUFKM0MorKMv8Dvr8c9Dln4f+/Pm69z/CkJfb7EPKBgOkqM6wdbFP59GOYpeio2HrHM3l\nFw1haRPdIuSgYKBXN1iir3FaWmUl9PnnlUI/fx76/PPQ5Z9zFLvx0NcQrrFaaguPUAo8uiukLg0K\nvW5al32d+6893TyWNlFH4esLW5942PrEN/91sxm6CwV1ZX4e+vPn6m+fOwdDxkkYjx9r9q5SYKBS\n4F2i69bT64sdiX0A2YNLMC7C0iYihYcHpB49IfXo2fzXbTboLhbWTen1yy/224b/5kHIzmz2rqF6\nPaTQMEjhEZAiIhq/D49s9DG8vdvwSbo/ljYROUevhxTVGVJUZ4hDh139dVmGUFzcYPlFKXPv4iKI\n5wuUrXPP5ELIymjx20h+/pDCwyFFRCrvHcVu/1wEpIhIyMHBHXJLXZY2EbmGIEAOCYEYEgI0uPTf\nO8wPpQ1OrxMqK6C7dBG6ixfr3hdCd+lS3fv6z+v/e+aaa+xA3Q9Qw8KbTO0RjlJvWPJosEmUu2Np\nE1G7kn39YPP1U86AaYnVCt2Vy1eVeePCvwjD96cgZKS3+FBSQGD91B4RAXTtAm9PX0jBIZCCgyEH\nBUMKCoYcEgIpKFjTJc/SJiJtMhohRXZSziNviSxDqChvMq03md7r3gy5px13a/71cOoe0ttbKfSg\nukIPaVDswcH1X6u7LQcHQ/bxbZeLmFjaROTeBAGyfwBs/gHKKw61xGKB7nIRQsQqlJ45D11JMYSS\nYuiuXGl0Wygpga6kGIYzeRCqnXsRBtlobDSty0H1hS4FBSsTfXDj4pcDAq97XZ6lTUQdh8kEKaoz\nEOYHa9dezt3HbFYKvbgYuuIrSrHbbxcX15e9/eOfLsBwKseph5Z1OsiBgZCCQ4AG/wtoCUubiKgl\nHh7KEk1kJ9icvY8oQigtVQq9boq/ZvHX3XYWS5uIyNUMBsihobCFhgJOvkxkmJMP3fFOciQicmMs\nbSIiN8LSJiJyIyxtIiI30uoPIi0WCx588EFYrVbYbDZMmjQJixcvbo9sRETURKulbTKZsGHDBnh5\necFms+H+++/HqFGjkJiY2B75iIioAaeWR7y8vAAoU7coim0aiIiIrs2p0pYkCSkpKUhOTkZycjKn\nbCIilTh1cY1Op8O2bdtQWVmJX/3qV8jLy0NsbAs7dHXvjmDp6i0Vi49nN3t48KD+zX7epcfrhKsy\nqZoHuCqT6nmaZNJEngaZNJPH7tyPmsrD42+N41tzXVdE+vr6YsiQITh48GDLpQ1Ar7t6t6trvkR8\nM8e2xfFNM6mdp2kmLeRpmEkreeyZtJSnxfuolMd+/FX3UznPVffVQJ5GH2skj7MEWW5hl3EAxcXF\nMBqN8PPzQ21tLRYuXIhFixZh9OjRLT5wUYNNz7UgLMyPmZzATM7TYi5mco5WMzmj1Um7qKgIzzzz\nDCRJgiRJmDp1aquFTUREbaPV0u7duze2bt3aHlmIiKgVvCKSiMiNsLSJiNwIS5uIyI2wtImI3AhL\nm4jIjbC0iYjcCEubiMiNsLSJiNwIS5uIyI2wtImI3AhLm4jIjbC0iYjcCEubiMiNsLSJiNwIS5uI\nyI2wtImI3AhLm4jIjbC0iYjcCEubiMiNsLSJiNwIS5uIyI2wtImI3AhLm4jIjbC0iYjcCEubiMiN\nsLSJiNwIS5uIyI2wtImI3AhLm4jIjbC0iYjcCEubiMiNsLSJiNwIS5uIyI2wtImI3AhLm4jIjbC0\niYjciKG1AwoLC5GamorLly9Dr9dj9uzZmDdvXntkIyKiJlotbb1ej2effRbx8fGoqqrC3XffjeTk\nZMTExLRHPiIiaqDV5ZGwsDDEx8cDAHx8fBATE4NLly61eTAiIrrada1p5+fn47vvvkNiYmJb5SEi\noha0ujxiV1VVhaVLl2LlypXw8fFp8dju3QFJuvqY48ermj1+0KDmH8+Vx+t0V2dSMw+AqzKpnadp\nJi3kaZhJK3nszp1r9tOq5eHxt8bxrXGqtEVRxNKlS3HXXXdhwoQJTj2wTnf1EB8W5neNY5t/DFcf\n3zST2nmaZtJCnoaZtJLHnklLeVq6j1p57Mc3vZ/aeZre1kKehh9rJY+zBFmW5dYOSk1NRVBQEJ59\n9lmnH7ioqOKGArWVsDA/ZnICMzlPi7mYyTlazeSMVte0jx8/ju3bt+Obb75BSkoKZs6ciQMHDtx0\nQCIiun6tLo8MGjQIp06dao8sRETUCl4RSUTkRljaRERuhKVNRORGWNpERG6EpU1E5EacviKSiIiu\nnyQBZWVASYmA4uL6t5ISwfG5khIBn37q3OOxtImInGSxoFHRNizgpkWsfAyUlgqQJMFlGVjaRNTh\nyDJQWYmrCvdaU7D981VVzpWvXi8jKEhGaKiMuDgJQUEygoOVt6Ag1L2XHe+DgmQAvk49NkubiG4Z\nNTVAUZGAixcFXLqkw6VLyu2iIuVj5fMCLl8GLBbnLhv39lZKtUcPqVHR1pdw4/INCZHh5wcIrhuu\nG2FpE5GmSZIyEV+6JDhK2F7IDd8uXtShvLzlpvTwkBERIWPgQMDfX2yxfO23vbza6Yk6iaVNRKqo\nqUGjwm1cwvVTcVGRAFFsuYxDQiR07izhtttkhIfLiIiQEB5uvy3X3Zbg769MwMqGUTXt9Exdi6VN\nRC4likBhoYD8fB3OnxdQUQGcPevRYEpWStn5qVhCeLjUqIAblnJYmAyjsZ2enAawtInoutTUABcu\nCDh/Xof8fB3y8+23laK+cEGAzda0kE2OW9eaiusnYuVzbbku7M5Y2kTUSFkZGpVw49sCLl9u/po8\nQZARGSnjZz+T0KWL/U1GfLwnPD2rEBGhnE3RkabitsDSJupAZFlZR25Ywsq0XH+7oqL58dZolNG5\ns4z4eBFdusjo0kVCdLTkuB0VJcNkuvp+YWGeKCqS2viZdRwsbaJbiNUKnDvXtJDrlzIKCgSYzc2X\nso+P3KiEu3SxfywhOlpZtmjppdeofbC0idyMLAM//SQgL0+H3FwdzpzRIS9PeV9QAEhS8xdphIZK\niI9X1pPrC7m+mAMDuYbsDljaRBpVXQ2cOaOUccNyzsvTobr66naNiJCQlARERFgbTczR0TI6d5bg\n7a3CkyCXY2kTqcg+Nefm1heyfWrOz796LcLTU0bPnhJiYxu/xcQoZ1so5x/XqvBMqL2wtInagX1q\nbljK9um5uak5MlLCyJEiYmIal3OXLlxX7uhY2kQuIsvK+csNJ2b7W0HBtafmuDjJUc72277O7R1E\nHRBLm+g6WSzA99/rcOkScOKEqdH03NzU3KmTMjU3XMqIi5PQuTOnZrp+LG2iFtTUADk5OmRm6pGV\npbw/dUoHq9Vezh4AAC+va681c2omV2JpE9WprASys/XIzKwv6dOndY0uyfbwkJGQIKF/fxsGDzYh\nIqIasbGcmqn9sLSpQyopAbKylIJW3utx5kzj1vX2ljF4sA2JiRISEpT3cXGS4zLssDATiopsKqSn\njoylTbe8S5cEx9KGvaTPnWtc0AEBMkaOFJGQICEx0YbERBt69uT0TNrD0qZbhv3sjYblnJmpQ2Fh\n4+YNDZUwbpyIxESbo6S7dpV5NSC5BZY2uSVZBn74QXAUs30N+sqVxgUdFSVh8mRrgwlaQmQkC5rc\nF0ubNM9mA06f1jUq56ws/VWb6HfrJiEpyepYg05IkBAWJquUmqhtsLRJc8xmID1dj6+/1iMtTY/j\nx4Hqah/H1wVBeYXrCRPqp+f+/W0IDFQxNFE7YWmT6mprgRMnlIJOS9Pj2DE9amvrp+h+/YCEBKtj\nDbpfPxvPfaYOi6VN7a6mBjh+vL6kjx/XO/Z4FgQZfftKSE62YfhwG4YPF9G7NzdBIrJjaVObq64G\njh2rL+lQSYIFAAANpklEQVQTJ/SwWOpLun9/CUlJNiQl2TBsmIigIJUDE2kYS5tcrqrq6pK2X/at\n09WXdHKyiKFDuRZNdD1Y2nTTKiuBo0ftJW1AeroOolhf0omJ9klaKemAAJUDE7kxljZdt8pK4MgR\n+9kdBmRk1Je0Xi9jwAAJw4crk/SQITb4+6scmOgW0mppr1y5Evv27UNISAi2b9/eHplIYyoqgMOH\n6yfpjIz6TZT0ehkDB0pIShKRnGzDkCE8s4OoLbVa2nfffTfmzp2L1NTU9shDGlBeDnzzjVLQaWnK\nFYeSpJS0wSDjttskJCeLGD6cJU3U3lot7cGDB6OgoKA9spBKZBk4eVKHXbsMOHgQSE/3dZS00ajs\ndGc/u+P2223w8WnlAYmozXBNu4OyWoFDh/TYtcuAXbsMuHBB2bPDaARuv92G5GSlpAcPtvFVvIk0\npM1KOyzMr60e+oZ19EzV1cAXXwBbtwLbtyt7SgNAYCAwdy6QkgJMmgT4+BigtX/Ptfh7B2gzFzM5\nR4uZnNFmfzOLiira6qFvSFiYX4fMVFICfPGFATt3GrBvnwE1NcqyR2SkhAULREydKiIpyebY2N/H\np2P+Ot0ILeZiJudoNZMznCptWeZOae7kwgUBu3YpRZ2Wpnec6REba8PUqUpRDxwocYN/IjfUammv\nWLEChw8fRmlpKcaMGYMlS5Zg1qxZ7ZGNrkNurg47dypFnZ6ud3z+ttuUop4yRUSvXpKKCYnIFVot\n7bVr17ZHDrpOkqSc8WEv6rw8paj1euVls+xFHRXF/yUR3Uq09dMmapHVCqSl1Z/x8dNPyvqGl5eM\nKVOsmDpVxB13cMMlolsZS1vjqquBvXuVaXr3bgNKS5X16cBAGffeqxT1mDEiT8sj6iBY2hpUUgJ8\n/rlS1Pv315/xERUlYdYspaiHDas/44OIOg6WtkYUFAiOZY+GZ3z06lX/g8SBAyW+IC1RB8fSVtGp\nU8C775qwc6cBJ0/Wn/Hxs5/ZT82zIjaWP0gkonos7XZWWgp89JER775rxKlTAOABg0HGqFH1Z3x0\n6sSiJqLmsbTbgSwDR4/qsGGDCZ9+akBtrQCjUcbMmcCECTWYOFHkq7cQkVNY2m2orEyZqjduNOLU\nKWX5o0cPCXPnmjFnjoi+fX1RVCSqnJKI3AlL28VkGTh2TIeNG0345BPlzA+jUcZdd1kxd64VI0bY\nePk4Ed0wlraLlJUBmzcbsWFD/VTdrZuEuXMtuP9+K8LCuE5NRDePpX0TZBk4cUJZq962TZmqDQYZ\nM2YoU/XIkZyqici1WNo3oLy8fqrOyWk8Vd93nxXh4ZyqiahtsLSdJMtAeroOGzYYsW2bEdXVylQ9\nfboV8+ZZMWoUp2oianss7VZUVChT9caNRmRnK1N11671U3VEBKdqImo/LO1m2F/oduNGI7ZsUaZq\nvV7GtGnKVD16NKdqIlIHS7uBysr6qTorq36q/vnPlTNAOFUTkdpY2gAyMpS16o8/rp+qp05Vpuox\nYzhVE5F2dNjSrqwEtmxRzgDJzFSm6i5dJCxdasEDD1gRGcmpmoi0p8OVdmamDu+8o6xVV1UpU/Xk\nyVbMn69M1Xp9649BRKSWDlHalZXAtm3A3//u7dgCtXNnCYsXK1M1d9UjIndxS5d2eTnwxhsmvP66\nCeXlgE6nw+TJylr12LGcqonI/dySpV1ZCbz1lgl/+5sJpaUCQkIkrF4tICWliq9OTkRu7ZYq7epq\nYN06I/72NxOuXNEhMFDGc8+ZsXChBT16+KGoiIVNRO7tlijt2lpgwwYj/vxnE4qKdPD3l5Gaasai\nRRb4+6udjojIddy6tM1m4N13lbIuLNTBx0fG8uVmPPaYha8EQ0S3JLcsbasVeP99I/70JxMKCnTw\n9paxZIkZv/qVFSEhXAIholuXW5W2KAIffWTA2rUeOHdOB09PGY89ZsGSJRa+yAARdQhuUdo2G7Bl\niwF//KMHzp7VwWSS8cgjFixbZuF+IETUoWi6tCUJ+PRTA1591YTcXD2MRhkPPWTB449beOoeEXVI\nmixtSQJ27lTK+tQpPfR6GT//uVLWXbuyrImo49JUacsy8MUXerz8sgeys/XQ6WTMmWPF8uVm9OjB\nsiYi0kRpyzKwd69S1unpegiCjLvvtuLJJ82IjWVZExHZqVrasgwcPKiU9dGjykYgM2ZY8eSTFvTp\nI6kZjYhIk1Qr7UOH9HjpJRMOHVIiTJlixVNPWdC/P8uaiOha2r20jx7V4aWXPHDwoPKtJ04UkZpq\nxoABLGsiotY49UJaBw4cwOTJkzFp0iS88cYbN/SNTpzQ4b77vDBtmg8OHjRgzBgRu3ZV4b33aljY\nREROanXSliQJL7zwAtavX4/w8HDcc889GD9+PGJiYpz6BllZOrzyigc+/1z5ViNGiEhNtWDYMNvN\nJSci6oBaLe3MzEx069YNnTt3BgBMmzYNe/bsabW0c3J0ePVVE3bsMAIAhg4V8fTTFowYwbImIrpR\nrZb2xYsX0alTJ8fHERERyMrKavE+990HfPihN2RZwKBBNjz9tBmjR9sgCDcfmIioI2u1tGX5+s+T\n3rQJGDBAwtNPmzF+PMuaiMhVWi3tyMhIXLhwwfHxxYsXER4e3uJ9lJ7XA/C+yXiuFRbmp3aEqzCT\nc7SYCdBmLmZyjhYzOaPVs0cSEhJw7tw5FBQUwGKxYMeOHRg/fnx7ZCMioiZanbT1ej1WrVqFhx9+\nGLIs45577nH6zBEiInItQb6RRWsiIlKFUxfXEBGRNrC0iYjcCEubiMiNuHTDqAMHDmDNmjWQZRmz\nZs3CokWLXPnwN2TlypXYt28fQkJCsH37drXjAAAKCwuRmpqKy5cvQ6/XY/bs2Zg3b56qmSwWCx58\n8EFYrVbYbDZMmjQJixcvVjWTnSRJmDVrFiIiIvD666+rHQfjxo2Dr68vdDodDAYDNm/erHYkVFRU\n4LnnnkNubi50Oh3WrFmDAQMGqJrp7NmzeOKJJyAIAmRZxvnz57Fs2TLV/6yvX78emzdvhiAI6NWr\nF1588UWYTCZVM73zzjuOP0et9oHsIjabTZ4wYYKcn58vWywWecaMGXJeXp6rHv6GHT16VM7JyZGn\nT5+udhSHS5cuyTk5ObIsy3JlZaV8xx13aOLXqrq6WpZlWRZFUZ49e7ackZGhciLF22+/La9YsUJ+\n9NFH1Y4iy7Isjxs3Ti4tLVU7RiNPP/20vHnzZlmWZdlqtcoVFRUqJ2rMZrPJycnJ8oULF1TNUVhY\nKI8bN042m82yLMvysmXL5K1bt6qa6fTp0/L06dNls9ksi6IoP/TQQ/KPP/54zeNdtjzScI8So9Ho\n2KNEbYMHD4a/v7/aMRoJCwtDfHw8AMDHxwcxMTG4dOmSyqkALy8vAMrULYqiymkUhYWF2L9/P2bP\nnq12FAdZliFJ2tmZsrKyEseOHcOsWbMAAAaDAb6+viqnaiwtLQ1du3ZttCWGWiRJQk1NDURRRG1t\nbasXC7a1M2fOYODAgTCZTNDr9bj99tuxe/fuax7vstJubo8SLRSR1uXn5+O7775DYmKi2lEgSRJS\nUlKQnJyM5ORkTWRas2YNUlNTIWhoLwRBELBw4ULMmjULH374odpxkJ+fj6CgIDz77LOYOXMmVq1a\nhdraWrVjNbJz505MmzZN7RiIiIjAggULMGbMGIwaNQp+fn5ISkpSNVNcXByOHj2KsrIy1NTU4MCB\nA/jpp5+uebzLSlvm6d7XraqqCkuXLsXKlSvh4+OjdhzodDps27YNBw4cQEZGBvLy8lTNs2/fPoSG\nhiI+Pl5Tf74++OADbNmyBW+++Sbee+89HDt2TNU8oigiJycHDzzwALZu3QpPT88b3ve+LVitVnz5\n5ZeYMmWK2lFQXl6OPXv2YO/evTh48CCqq6tV/1lXTEwMfvGLX2DBggVYtGgR+vTpA4Ph2j9udFlp\n38geJR2ZKIpYunQp7rrrLkyYMEHtOI34+vpiyJAhOHjwoKo5Tpw4gS+//BLjx4/HihUrcPjwYaSm\npqqaCVCWtwAgODgYEydObHXXy7YWGRmJyMhIJCQkAAAmTZqEnJwcVTM1dODAAfTr1w/BwcFqR0Fa\nWhqio6MRGBgIvV6PiRMnIj09Xe1YmDVrFrZs2YKNGzciICAA3bp1u+axLittLe9RoqUpzW7lypWI\njY3F/Pnz1Y4CACguLkZFRQUAoLa2FocOHULPnj1VzbR8+XLs27cPe/bswWuvvYahQ4filVdeUTVT\nTU0NqqqqAADV1dX46quvEBcXp2qm0NBQdOrUCWfPngUAfPPNN5raamLHjh2YPn262jEAAFFRUcjI\nyIDZbIYsy5r5tSouLgYAXLhwAbt3727x18tlp/xpdY8S+4RWWlqKMWPGYMmSJY4f2Kjl+PHj2L59\nO3r16oWUlBQIgoAnnngCo0aNUi1TUVERnnnmGUiSBEmSMHXqVIwePVq1PFp1+fJlLF68GIIgwGaz\n4c4778SIESPUjoXnn38eTz75JERRRHR0NF588UW1IwFQBoC0tDT87ne/UzsKACAxMRGTJk1CSkoK\nDAYD+vbti3vvvVftWFiyZAnKyspgMBjwm9/8Bn5+196BkHuPEBG5EV4RSUTkRljaRERuhKVNRORG\nWNpERG6EpU1E5EZY2kREboSlTUTkRljaRERu5P8D+7Wym3BFpegAAAAASUVORK5CYII=\n", + "text/plain": [ + "\u003cmatplotlib.figure.Figure at 0x7f5be4b8ec50\u003e" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "model = Model()\n", + "\n", + "# Collect the history of W-values and b-values to plot later\n", + "Ws, bs = [], []\n", + "epochs = range(10)\n", + "for epoch in epochs:\n", + " Ws.append(model.W.numpy())\n", + " bs.append(model.b.numpy())\n", + " current_loss = loss(model(inputs), outputs)\n", + "\n", + " train(model, inputs, outputs, learning_rate=0.1)\n", + " print('Epoch %2d: W=%1.2f b=%1.2f, loss=%2.5f' %\n", + " (epoch, Ws[-1], bs[-1], current_loss))\n", + "\n", + "# Let's plot it all\n", + "plt.plot(epochs, Ws, 'r',\n", + " epochs, bs, 'b')\n", + "plt.plot([TRUE_W] * len(epochs), 'r--',\n", + " [TRUE_b] * len(epochs), 'b--')\n", + "plt.legend(['W', 'b', 'true W', 'true_b'])\n", + "plt.show()\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "vPnIVuaSJwWz" + }, + "source": [ + "## Next Steps\n", + "\n", + "In this tutorial we covered `Variable`s and built and trained a simple linear model using the TensorFlow primitives discussed so far.\n", + "\n", + "In theory, this is pretty much all you need to use TensorFlow for your machine learning research.\n", + "In practice, particularly for neural networks, the higher level APIs like `tf.keras` will be much more convenient since it provides higher level building blocks (called \"layers\"), utilities to save and restore state, a suite of loss functions, a suite of optimization strategies etc. \n", + "\n", + "The [next tutorial](TODO) will cover these higher level APIs." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "default_view": {}, + "name": "Training Models", + "provenance": [], + "version": "0.3.2", + "views": {} + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} -- GitLab From 3ef890dea820553b890980ab097189f96bce3140 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 25 May 2018 06:56:38 -0700 Subject: [PATCH 147/902] Minor clarification to model_to_estimator() doc string PiperOrigin-RevId: 198044106 --- tensorflow/python/estimator/keras.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py index 7bcf3d84bb..c80af08fba 100644 --- a/tensorflow/python/estimator/keras.py +++ b/tensorflow/python/estimator/keras.py @@ -458,10 +458,14 @@ def model_to_estimator(keras_model=None, @{$programmers_guide/estimators$creating_estimators_from_keras_models}. Args: - keras_model: Keras model in memory. - keras_model_path: Directory to a keras model on disk. + keras_model: A compiled Keras model object. This argument is mutually + exclusive with `keras_model_path`. + keras_model_path: Path to a compiled Keras model saved on disk, in HDF5 + format, which can be generated with the `save()` method of a Keras model. + This argument is mutually exclusive with `keras_model`. custom_objects: Dictionary for custom objects. - model_dir: Directory to save Estimator model parameters, graph and etc. + model_dir: Directory to save Estimator model parameters, graph, summary + files for TensorBoard, etc. config: Configuration object. Returns: @@ -473,7 +477,7 @@ def model_to_estimator(keras_model=None, ValueError: if the keras_model_path is a GCS URI. ValueError: if keras_model has not been compiled. """ - if (not keras_model) and (not keras_model_path): + if not (keras_model or keras_model_path): raise ValueError( 'Either `keras_model` or `keras_model_path` needs to be provided.') if keras_model and keras_model_path: @@ -495,8 +499,9 @@ def model_to_estimator(keras_model=None, if not hasattr(keras_model, 'optimizer') or not keras_model.optimizer: raise ValueError( - 'The given keras model has not been compiled yet. Please compile first ' - 'before calling `model_to_estimator`.') + 'The given keras model has not been compiled yet. ' + 'Please compile the model with `model.compile()` ' + 'before calling `model_to_estimator()`.') if isinstance(config, dict): config = run_config_lib.RunConfig(**config) -- GitLab From ff0d1e4592c5029b35808d895e8e857de751005f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 08:55:24 -0700 Subject: [PATCH 148/902] Code simplification in dump_graphviz.cc: Just output all arrays, before writing edges, so we don't need to keep track of which arrays we've already output. PiperOrigin-RevId: 198055327 --- tensorflow/contrib/lite/toco/dump_graphviz.cc | 32 +++++++------------ 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/tensorflow/contrib/lite/toco/dump_graphviz.cc b/tensorflow/contrib/lite/toco/dump_graphviz.cc index 6e5927295f..3aeebb14f1 100644 --- a/tensorflow/contrib/lite/toco/dump_graphviz.cc +++ b/tensorflow/contrib/lite/toco/dump_graphviz.cc @@ -16,8 +16,6 @@ limitations under the License. #include #include -#include -#include #include #include "absl/strings/str_replace.h" @@ -304,7 +302,15 @@ void DumpGraphviz(const Model& model, string* output_file_contents) { constexpr char kRNNBackEdgeFormat[] = "\t \"%s\" -> \"%s\" [color=\"#0F9D58\"];\n"; - std::set already_added_arrays; + for (const auto& array_kv : model.GetArrayMap()) { + // Add node for array. + const string& array_name = array_kv.first; + const auto& array_properties = GetPropertiesForArray(model, array_name); + AppendF(output_file_contents, kNodeFormat, array_name, + array_properties.label, "octagon", + array_properties.color.FillColorString().c_str(), + array_properties.color.TextColorString().c_str()); + } for (int op_index = 0; op_index < model.operators.size(); op_index++) { const Operator& op = *model.operators[op_index]; // Add node for operator. @@ -313,20 +319,13 @@ void DumpGraphviz(const Model& model, string* output_file_contents) { AppendF(output_file_contents, kNodeFormat, operator_id, op_properties.label, "box", op_properties.color.FillColorString().c_str(), op_properties.color.TextColorString().c_str()); - // Add nodes and edges for all inputs of the operator. + // Add edges for all inputs of the operator. for (const auto& input : op.inputs) { if (!model.HasArray(input)) { // Arrays should _always_ exist. Except, perhaps, during development. continue; } auto array_properties = GetPropertiesForArray(model, input); - if (!already_added_arrays.count(input)) { - AppendF(output_file_contents, kNodeFormat, input, - array_properties.label, "octagon", - array_properties.color.FillColorString().c_str(), - array_properties.color.TextColorString().c_str()); - } - // Draw lines that transport more data thicker (Otherwise, where would the // data fit? right?). float line_width = @@ -342,22 +341,14 @@ void DumpGraphviz(const Model& model, string* output_file_contents) { } AppendF(output_file_contents, kEdgeFormat, input, operator_id, line_width, weight); - already_added_arrays.insert(input); } - // Add nodes and edges for all outputs of the operator. + // Add edges for all outputs of the operator. for (const auto& output : op.outputs) { if (!model.HasArray(output)) { // Arrays should _always_ exist. Except, perhaps, during development. continue; } auto array_properties = GetPropertiesForArray(model, output); - if (!already_added_arrays.count(output)) { - AppendF(output_file_contents, kNodeFormat, output, - array_properties.label, "octagon", - array_properties.color.FillColorString().c_str(), - array_properties.color.TextColorString().c_str()); - } - // See comments above regarding weight and line_width calculations. float line_width = std::max(0.5f, array_properties.log2_buffer_size / 3.0f); @@ -367,7 +358,6 @@ void DumpGraphviz(const Model& model, string* output_file_contents) { } AppendF(output_file_contents, kEdgeFormat, operator_id, output, line_width, weight); - already_added_arrays.insert(output); } } -- GitLab From f5121d0b7be5b638148e72c67a28543c35b3501c Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Fri, 25 May 2018 10:45:27 -0700 Subject: [PATCH 149/902] Link to tf.estimator docs for premade estimators. PiperOrigin-RevId: 198070157 --- .../docs_src/programmers_guide/premade_estimators.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/premade_estimators.md b/tensorflow/docs_src/programmers_guide/premade_estimators.md index e5eca44297..f6dd75eaca 100644 --- a/tensorflow/docs_src/programmers_guide/premade_estimators.md +++ b/tensorflow/docs_src/programmers_guide/premade_estimators.md @@ -177,13 +177,11 @@ other features so you can concentrate on your model. For more details see An Estimator is any class derived from @{tf.estimator.Estimator}. TensorFlow provides a collection of -[pre-made Estimators](https://developers.google.com/machine-learning/glossary/#pre-made_Estimator) +@{tf.estimator$pre-made Estimators} (for example, `LinearRegressor`) to implement common ML algorithms. Beyond those, you may write your own -[custom Estimators](https://developers.google.com/machine-learning/glossary/#custom_Estimator). -We recommend using pre-made Estimators when just getting started with -TensorFlow. After gaining expertise with the pre-made Estimators, we recommend -optimizing your model by creating your own custom Estimators. +@{$custom_estimators$custom Estimators}. +We recommend using pre-made Estimators when just getting started. To write a TensorFlow program based on pre-made Estimators, you must perform the following tasks: -- GitLab From 9044538f0a4f473bbbaa2370f0581f25a1b70783 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 10:54:38 -0700 Subject: [PATCH 150/902] DepthwiseConv optimizations. PiperOrigin-RevId: 198071709 --- .../depthwiseconv_uint8_3x3_filter.h | 6839 ++++++----------- 1 file changed, 2395 insertions(+), 4444 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index 9b1a45ebdf..51fbd54906 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -25,3846 +25,2187 @@ namespace optimized_ops { #ifdef __aarch64__ -inline void preload_l1_keep(const uint8* ptr) { -#ifdef GEMMLOWP_ARM_64 - asm volatile("prfm pldl1keep, [%[ptr]]\n" ::[ptr] "r"(ptr) :); -#else - gemmlowp::Prefetch(ptr); -#endif -} - -// Implementation of quantized DepthwiseConv for 3x3 filters. - -// Below are helper structs to remove the use of arrays. -// There is an llvm bug that causes significant slowdown when using arrays for -// NEON intrinsics vector data types. -// See: https://bugs.llvm.org/show_bug.cgi?id=34945 - -struct Int32x8 { - int32x4_t low, high; -}; - -struct Filter3x3x8 { - int16x8_t f0, f1, f2, f3, f4, f5, f6, f7, f8; -}; - -// Loads 3x3 filter of depth 8 and adds filter offsets. -inline Filter3x3x8 Load3x3Filter(const uint8* filter_ptr, int32 filter_offset, - int output_depth) { - Filter3x3x8 filter; - - uint8x8_t temp_u8_0, temp_u8_1, temp_u8_2, temp_u8_3, temp_u8_4, temp_u8_5, - temp_u8_6, temp_u8_7, temp_u8_8; - int16x8_t filter_offset_vec = vdupq_n_s16(filter_offset); - - temp_u8_0 = vld1_u8(filter_ptr + 0 * output_depth); - temp_u8_1 = vld1_u8(filter_ptr + 1 * output_depth); - temp_u8_2 = vld1_u8(filter_ptr + 2 * output_depth); - temp_u8_3 = vld1_u8(filter_ptr + 3 * output_depth); - temp_u8_4 = vld1_u8(filter_ptr + 4 * output_depth); - temp_u8_5 = vld1_u8(filter_ptr + 5 * output_depth); - temp_u8_6 = vld1_u8(filter_ptr + 6 * output_depth); - temp_u8_7 = vld1_u8(filter_ptr + 7 * output_depth); - temp_u8_8 = vld1_u8(filter_ptr + 8 * output_depth); - - filter.f0 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_0)); - filter.f1 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_1)); - filter.f2 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_2)); - filter.f3 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_3)); - filter.f4 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_4)); - filter.f5 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_5)); - filter.f6 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_6)); - filter.f7 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_7)); - filter.f8 = vreinterpretq_s16_u16(vmovl_u8(temp_u8_8)); - - filter.f0 = vaddq_s16(filter.f0, filter_offset_vec); - filter.f1 = vaddq_s16(filter.f1, filter_offset_vec); - filter.f2 = vaddq_s16(filter.f2, filter_offset_vec); - filter.f3 = vaddq_s16(filter.f3, filter_offset_vec); - filter.f4 = vaddq_s16(filter.f4, filter_offset_vec); - filter.f5 = vaddq_s16(filter.f5, filter_offset_vec); - filter.f6 = vaddq_s16(filter.f6, filter_offset_vec); - filter.f7 = vaddq_s16(filter.f7, filter_offset_vec); - filter.f8 = vaddq_s16(filter.f8, filter_offset_vec); - - return filter; -} - -// Applies activation, offset and downquantize on a set of accumulator -// registers that correspond to a 2x2 output of depth 8. -// Stores results to output. -inline void DownquantizeAndStore2x2Output( - Int32x8 acc_0, Int32x8 acc_1, Int32x8 acc_2, Int32x8 acc_3, - int32 output_offset, int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - using gemmlowp::RoundingDivideByPOT; - const int32x4_t output_offset_vec = vdupq_n_s32(output_offset); - const int32x4_t output_activation_min_vec = - vdupq_n_s32(output_activation_min); - const int32x4_t output_activation_max_vec = - vdupq_n_s32(output_activation_max); - - // Fixed-point multiplication. - acc_0.low = vqrdmulhq_n_s32(acc_0.low, output_multiplier); - acc_0.high = vqrdmulhq_n_s32(acc_0.high, output_multiplier); - acc_1.low = vqrdmulhq_n_s32(acc_1.low, output_multiplier); - acc_1.high = vqrdmulhq_n_s32(acc_1.high, output_multiplier); - acc_2.low = vqrdmulhq_n_s32(acc_2.low, output_multiplier); - acc_2.high = vqrdmulhq_n_s32(acc_2.high, output_multiplier); - acc_3.low = vqrdmulhq_n_s32(acc_3.low, output_multiplier); - acc_3.high = vqrdmulhq_n_s32(acc_3.high, output_multiplier); - - acc_0.low = RoundingDivideByPOT(acc_0.low, output_shift); - acc_0.high = RoundingDivideByPOT(acc_0.high, output_shift); - acc_1.low = RoundingDivideByPOT(acc_1.low, output_shift); - acc_1.high = RoundingDivideByPOT(acc_1.high, output_shift); - acc_2.low = RoundingDivideByPOT(acc_2.low, output_shift); - acc_2.high = RoundingDivideByPOT(acc_2.high, output_shift); - acc_3.low = RoundingDivideByPOT(acc_3.low, output_shift); - acc_3.high = RoundingDivideByPOT(acc_3.high, output_shift); - - // Add the output offset. - acc_0.low = vaddq_s32(acc_0.low, output_offset_vec); - acc_0.high = vaddq_s32(acc_0.high, output_offset_vec); - acc_1.low = vaddq_s32(acc_1.low, output_offset_vec); - acc_1.high = vaddq_s32(acc_1.high, output_offset_vec); - acc_2.low = vaddq_s32(acc_2.low, output_offset_vec); - acc_2.high = vaddq_s32(acc_2.high, output_offset_vec); - acc_3.low = vaddq_s32(acc_3.low, output_offset_vec); - acc_3.high = vaddq_s32(acc_3.high, output_offset_vec); - - // Apply the activation function. - acc_0.low = vmaxq_s32(acc_0.low, output_activation_min_vec); - acc_0.high = vmaxq_s32(acc_0.high, output_activation_min_vec); - acc_1.low = vmaxq_s32(acc_1.low, output_activation_min_vec); - acc_1.high = vmaxq_s32(acc_1.high, output_activation_min_vec); - acc_2.low = vmaxq_s32(acc_2.low, output_activation_min_vec); - acc_2.high = vmaxq_s32(acc_2.high, output_activation_min_vec); - acc_3.low = vmaxq_s32(acc_3.low, output_activation_min_vec); - acc_3.high = vmaxq_s32(acc_3.high, output_activation_min_vec); - - acc_0.low = vminq_s32(acc_0.low, output_activation_max_vec); - acc_0.high = vminq_s32(acc_0.high, output_activation_max_vec); - acc_1.low = vminq_s32(acc_1.low, output_activation_max_vec); - acc_1.high = vminq_s32(acc_1.high, output_activation_max_vec); - acc_2.low = vminq_s32(acc_2.low, output_activation_max_vec); - acc_2.high = vminq_s32(acc_2.high, output_activation_max_vec); - acc_3.low = vminq_s32(acc_3.low, output_activation_max_vec); - acc_3.high = vminq_s32(acc_3.high, output_activation_max_vec); - - // Saturating cast to uint8 and store to destination. - int16x4_t acc_0_low_s16 = vqmovn_s32(acc_0.low); - int16x4_t acc_0_high_s16 = vqmovn_s32(acc_0.high); - int16x4_t acc_1_low_s16 = vqmovn_s32(acc_1.low); - int16x4_t acc_1_high_s16 = vqmovn_s32(acc_1.high); - int16x4_t acc_2_low_s16 = vqmovn_s32(acc_2.low); - int16x4_t acc_2_high_s16 = vqmovn_s32(acc_2.high); - int16x4_t acc_3_low_s16 = vqmovn_s32(acc_3.low); - int16x4_t acc_3_high_s16 = vqmovn_s32(acc_3.high); - - int16x8_t res_0_s16 = vcombine_s16(acc_0_low_s16, acc_0_high_s16); - int16x8_t res_1_s16 = vcombine_s16(acc_1_low_s16, acc_1_high_s16); - int16x8_t res_2_s16 = vcombine_s16(acc_2_low_s16, acc_2_high_s16); - int16x8_t res_3_s16 = vcombine_s16(acc_3_low_s16, acc_3_high_s16); - - uint8x8_t res_0_u8 = vqmovun_s16(res_0_s16); - uint8x8_t res_1_u8 = vqmovun_s16(res_1_s16); - uint8x8_t res_2_u8 = vqmovun_s16(res_2_s16); - uint8x8_t res_3_u8 = vqmovun_s16(res_3_s16); - - vst1_u8(output_ptr, res_0_u8); - vst1_u8(output_ptr + output_depth, res_1_u8); - vst1_u8(output_ptr + output_depth * output_width, res_2_u8); - vst1_u8(output_ptr + output_depth * output_width + output_depth, res_3_u8); -} - -inline void DownquantizeAndStore(Int32x8 acc, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, - uint8* output_ptr) { - using gemmlowp::RoundingDivideByPOT; - const int32x4_t output_offset_vec = vdupq_n_s32(output_offset); - const int32x4_t output_activation_min_vec = - vdupq_n_s32(output_activation_min); - const int32x4_t output_activation_max_vec = - vdupq_n_s32(output_activation_max); - - acc.low = vqrdmulhq_n_s32(acc.low, output_multiplier); - acc.high = vqrdmulhq_n_s32(acc.high, output_multiplier); - - acc.low = RoundingDivideByPOT(acc.low, output_shift); - acc.high = RoundingDivideByPOT(acc.high, output_shift); - - acc.low = vaddq_s32(acc.low, output_offset_vec); - acc.high = vaddq_s32(acc.high, output_offset_vec); - - acc.low = vmaxq_s32(acc.low, output_activation_min_vec); - acc.high = vmaxq_s32(acc.high, output_activation_min_vec); - - acc.low = vminq_s32(acc.low, output_activation_max_vec); - acc.high = vminq_s32(acc.high, output_activation_max_vec); - - int16x4_t acc_low_s16 = vqmovn_s32(acc.low); - int16x4_t acc_high_s16 = vqmovn_s32(acc.high); - - int16x8_t res_s16 = vcombine_s16(acc_low_s16, acc_high_s16); - uint8x8_t res_u8 = vqmovun_s16(res_s16); - vst1_u8(output_ptr, res_u8); -} - -inline void DownquantizeAndStore2Output( - Int32x8 acc_0, Int32x8 acc_1, int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, int32 output_activation_max, - uint8* output_ptr, int output_ptr_offset) { - { - using gemmlowp::RoundingDivideByPOT; - const int32x4_t output_offset_vec = vdupq_n_s32(output_offset); - const int32x4_t output_activation_min_vec = - vdupq_n_s32(output_activation_min); - const int32x4_t output_activation_max_vec = - vdupq_n_s32(output_activation_max); - - // Fixed-point multiplication. - acc_0.low = vqrdmulhq_n_s32(acc_0.low, output_multiplier); - acc_0.high = vqrdmulhq_n_s32(acc_0.high, output_multiplier); - acc_1.low = vqrdmulhq_n_s32(acc_1.low, output_multiplier); - acc_1.high = vqrdmulhq_n_s32(acc_1.high, output_multiplier); - - acc_0.low = RoundingDivideByPOT(acc_0.low, output_shift); - acc_0.high = RoundingDivideByPOT(acc_0.high, output_shift); - acc_1.low = RoundingDivideByPOT(acc_1.low, output_shift); - acc_1.high = RoundingDivideByPOT(acc_1.high, output_shift); - - // Add the output offset. - acc_0.low = vaddq_s32(acc_0.low, output_offset_vec); - acc_0.high = vaddq_s32(acc_0.high, output_offset_vec); - acc_1.low = vaddq_s32(acc_1.low, output_offset_vec); - acc_1.high = vaddq_s32(acc_1.high, output_offset_vec); - - // Apply the activation function. - acc_0.low = vmaxq_s32(acc_0.low, output_activation_min_vec); - acc_0.high = vmaxq_s32(acc_0.high, output_activation_min_vec); - acc_1.low = vmaxq_s32(acc_1.low, output_activation_min_vec); - acc_1.high = vmaxq_s32(acc_1.high, output_activation_min_vec); - - acc_0.low = vminq_s32(acc_0.low, output_activation_max_vec); - acc_0.high = vminq_s32(acc_0.high, output_activation_max_vec); - acc_1.low = vminq_s32(acc_1.low, output_activation_max_vec); - acc_1.high = vminq_s32(acc_1.high, output_activation_max_vec); - } - - // Saturating cast to uint8 and store to destination. - int16x8_t res_0_s16; - { - int16x4_t acc_0_low_s16 = vqmovn_s32(acc_0.low); - int16x4_t acc_0_high_s16 = vqmovn_s32(acc_0.high); - res_0_s16 = vcombine_s16(acc_0_low_s16, acc_0_high_s16); - } - - int16x8_t res_1_s16; - { - int16x4_t acc_1_low_s16 = vqmovn_s32(acc_1.low); - int16x4_t acc_1_high_s16 = vqmovn_s32(acc_1.high); - res_1_s16 = vcombine_s16(acc_1_low_s16, acc_1_high_s16); - } - - uint8x8_t res_0_u8 = vqmovun_s16(res_0_s16); - uint8x8_t res_1_u8 = vqmovun_s16(res_1_s16); - vst1_u8(output_ptr, res_0_u8); - vst1_u8(output_ptr + output_ptr_offset, res_1_u8); -} - -// Performs multiply accumulate on 3 inputs of depth 8. -inline Int32x8 MultiplyAccumulateRow(Int32x8 accum, int16x8_t f0, int16x8_t f1, - int16x8_t f2, int16x8_t i0, int16x8_t i1, - int16x8_t i2) { - accum.low = vmlal_s16(accum.low, vget_low_s16(f0), vget_low_s16(i0)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f0), vget_high_s16(i0)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f1), vget_low_s16(i1)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f1), vget_high_s16(i1)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f2), vget_low_s16(i2)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f2), vget_high_s16(i2)); - return accum; -} - -// Performs multiply accumulate on 3 inputs of depth 8. -inline Int32x8 MultiplyAccumulate3x3Filter(const Filter3x3x8& f, int16x8_t i0, - int16x8_t i1, int16x8_t i2, - int16x8_t i3, int16x8_t i4, - int16x8_t i5, int16x8_t i6, - int16x8_t i7, int16x8_t i8, - Int32x8 accum) { - accum.low = vmlal_s16(accum.low, vget_low_s16(f.f0), vget_low_s16(i0)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f.f0), vget_high_s16(i0)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f.f1), vget_low_s16(i1)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f.f1), vget_high_s16(i1)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f.f2), vget_low_s16(i2)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f.f2), vget_high_s16(i2)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f.f3), vget_low_s16(i3)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f.f3), vget_high_s16(i3)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f.f4), vget_low_s16(i4)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f.f4), vget_high_s16(i4)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f.f5), vget_low_s16(i5)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f.f5), vget_high_s16(i5)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f.f6), vget_low_s16(i6)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f.f6), vget_high_s16(i6)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f.f7), vget_low_s16(i7)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f.f7), vget_high_s16(i7)); - accum.low = vmlal_s16(accum.low, vget_low_s16(f.f8), vget_low_s16(i8)); - accum.high = vmlal_s16(accum.high, vget_high_s16(f.f8), vget_high_s16(i8)); - return accum; -} - -inline void DotProductAndStore(const Filter3x3x8& filter, int16x8_t i0, - int16x8_t i1, int16x8_t i2, int16x8_t i3, - int16x8_t i4, int16x8_t i5, int16x8_t i6, - int16x8_t i7, int16x8_t i8, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr) { - Int32x8 acc; - acc.low = vld1q_s32(bias_ptr); - acc.high = vld1q_s32(bias_ptr + 4); - - acc = MultiplyAccumulate3x3Filter(filter, i0, i1, i2, i3, i4, i5, i6, i7, i8, - acc); - - DownquantizeAndStore(acc, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, - output_ptr); -} - -// Performs multiply-accumulate on a 3x4 input for 2 horizontal outputs. -inline void DotProductAndStore2xStride1( - const Filter3x3x8& filter, int16x8_t i0, int16x8_t i1, int16x8_t i2, - int16x8_t i3, int16x8_t i4, int16x8_t i5, int16x8_t i6, int16x8_t i7, - int16x8_t i8, int16x8_t i9, int16x8_t i10, int16x8_t i11, - const int32* bias_ptr, int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, int32 output_activation_max, - uint8* output_ptr, int output_ptr_offset) { - Int32x8 acc_0, acc_1; - acc_0.low = vld1q_s32(bias_ptr); - acc_1.low = vld1q_s32(bias_ptr); - acc_0.high = vld1q_s32(bias_ptr + 4); - acc_1.high = vld1q_s32(bias_ptr + 4); - - acc_0 = MultiplyAccumulate3x3Filter(filter, i0, i1, i2, i4, i5, i6, i8, i9, - i10, acc_0); - acc_1 = MultiplyAccumulate3x3Filter(filter, i1, i2, i3, i5, i6, i7, i9, i10, - i11, acc_1); - DownquantizeAndStore2Output(acc_0, acc_1, output_offset, output_multiplier, - output_shift, output_activation_min, - output_activation_max, output_ptr, - output_ptr_offset); -} - -// Performs multiply-accumulate on a 4x3 input for 2 vertical outputs. -inline void DotProductAndStore2yStride1( - const Filter3x3x8& filter, int16x8_t i0, int16x8_t i1, int16x8_t i2, - int16x8_t i3, int16x8_t i4, int16x8_t i5, int16x8_t i6, int16x8_t i7, - int16x8_t i8, int16x8_t i9, int16x8_t i10, int16x8_t i11, - const int32* bias_ptr, int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, int32 output_activation_max, - uint8* output_ptr, int output_ptr_offset) { - Int32x8 acc_0, acc_1; - acc_0.low = vld1q_s32(bias_ptr); - acc_1.low = vld1q_s32(bias_ptr); - acc_0.high = vld1q_s32(bias_ptr + 4); - acc_1.high = vld1q_s32(bias_ptr + 4); - - acc_0 = MultiplyAccumulate3x3Filter(filter, i0, i1, i2, i3, i4, i5, i6, i7, - i8, acc_0); - acc_1 = MultiplyAccumulate3x3Filter(filter, i3, i4, i5, i6, i7, i8, i9, i10, - i11, acc_1); - DownquantizeAndStore2Output(acc_0, acc_1, output_offset, output_multiplier, - output_shift, output_activation_min, - output_activation_max, output_ptr, - output_ptr_offset); -} - -// A kernel that is optimized on the number of output cells in the x and y -// direction, and the stride. Assumes 3x3 filters of 8 depth. -template -struct ConvKernel3x3FilterDepth8 {}; - -template <> -struct ConvKernel3x3FilterDepth8<8, 8, 1, 1> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - const int output_row_size = output_depth * output_width; - - // To process 8x8 outputs using a 3x3 filter, we require 10x10 inputs. - // Load inputs for the first 2 filters on the top left, then slide to - // the right, down, left, down, right, etc. in a snake-like path. This - // minimizes the total number of loads. - // - // INPUT OUTPUT - // |\----------------\ |\------------\ - // | \ \ | \ \ - // | \----------------\ | \------------\ - // | | 0 ... 9 | | | 0 ... 7 | - // | | 10 ... 19 | ---> | | 8 ... 15 | - // | | 20 ... 29 | \ | .. ... .. | - // \ | .. ... .. | \| 56 ... 63 | - // \| 90 ... 109 | |------------| - // |----------------| - // - // The first set of loads corresponds to: - // - // INPUT OUTPUT - // |\----------------- |\----------- - // | \ | \ - // | \----------------- | \---------- - // | | 0 1 2 3 ... | | 0 1 ... - // | | 10 11 12 13 ... ---> | | .. ... - // | | 20 21 22 23 ... | .. ... - // | | .. ... ... - // - // The next set of loads correspond to a sliding window to the right. - // It loads inputs 4, 5, 14, 15, 23, 24 and keeps 2, 3, 12, 13, and 22: - // - // INPUT OUTPUT - // |\------------------- |\------------- - // | \ | \ - // | \------------------- | \------------ - // | | .. 2 3 4 5 ... | | .. 2 3 ... - // | | .. 12 13 14 15 ... ---> | | .. ... - // | | .. 21 22 23 24 ... | .. ... - // | | .. ... ... - // - // And so on... - - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11; - - // Load inputs for 1x2 outputs starting from the top left. Referring to the - // indexes in the diagram above, this corresponds to outputs (0) and (1). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth); - - // Slide to the right for outputs x = [2, 3], y = 0. Referring to the - // indexes in the diagram above, this corresponds to outputs (2) and (3). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_depth, output_depth); - - // Slide to the right again for outputs x = [4, 5], y = 0. Referring to the - // indexes in the diagram above, this corresponds to outputs (4) and (5). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 6 * input_depth; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 4 * output_depth, output_depth); - - // Slide to the right one last time for outputs x = [6, 7], y = 0. - // Referring to the indexes in the diagram above, this corresponds to - // outputs (6) and (7). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 8 * input_depth; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 6 * output_depth, output_depth); - - // Slide to down for outputs x = [6, 7], y = 1. Referring to the indexes in - // the diagram above, this corresponds to outputs (14) and (15). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 6 * input_depth + 3 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, - input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 6 * output_depth + output_row_size, - output_depth); - - // Slide left for outputs x = [4, 5], y = 1. Referring to the indexes in - // the diagram above, this corresponds to outputs (12) and (13). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth + input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, - input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 4 * output_depth + output_row_size, - output_depth); - - // Slide left again for outputs x = [2, 3], y = 1. Referring to the indexes - // in the diagram above, this corresponds to outputs (10) and (11). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 2 * input_depth + input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, - input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_depth + output_row_size, - output_depth); - - // Slide left one more time for outputs x = [0, 1], y = 1. Referring to the - // indexes in the diagram above, this corresponds to outputs (8) and (9). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, - input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + output_row_size, output_depth); - - // Slide down for outputs x = [0, 1], y = 2. Referring to the - // indexes in the diagram above, this corresponds to outputs (16) and (17). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 4 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, - input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_row_size, output_depth); - - // Slide right for outputs x = [2, 3], y = 2. Referring to the - // indexes in the diagram above, this corresponds to outputs (18) and (19). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth + 2 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, - input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 2 * output_depth + 2 * output_row_size, output_depth); - - // Slide right for outputs x = [4, 5], y = 2. Referring to the - // indexes in the diagram above, this corresponds to outputs (20) and (21). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 6 * input_depth + 2 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, - input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 4 * output_depth + 2 * output_row_size, output_depth); - - // Slide right one more time for outputs x = [6, 7], y = 2. Referring to the - // indexes in the diagram above, this corresponds to outputs (22) and (23). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 8 * input_depth + 2 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, - input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 6 * output_depth + 2 * output_row_size, output_depth); - - // Slide down for outputs x = [6, 7], y = 3. Referring to the indexes in - // the diagram above, this corresponds to outputs (30) and (31). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 6 * input_depth + 5 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 6 * output_depth + 3 * output_row_size, output_depth); - - // Slide left for outputs x = [4, 5], y = 3. Referring to the indexes in - // the diagram above, this corresponds to outputs (28) and (29). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth + 3 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 4 * output_depth + 3 * output_row_size, output_depth); - - // Slide left for outputs x = [2, 3], y = 3. Referring to the indexes in - // the diagram above, this corresponds to outputs (26) and (27). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 2 * input_depth + 3 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 2 * output_depth + 3 * output_row_size, output_depth); - - // Slide left one more time for outputs x = [0, 1], y = 3. Referring to the - // indexes in the diagram above, this corresponds to outputs (24) and (25). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 3 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 3 * output_row_size, output_depth); - - // Slide down for outputs x = [0, 1], y = 4. Referring to the indexes in - // the diagram above, this corresponds to outputs (32) and (33). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 6 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, - input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 4 * output_row_size, output_depth); - - // Slide right for outputs x = [2, 3], y = 4. Referring to the indexes in - // the diagram above, this corresponds to outputs (34) and (35). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth + 4 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } +// clang-format gets confused with this file and ends up formatting lines to +// be larger than 80 characters. Turn off here and back on at the end of the +// file. - DotProductAndStore2xStride1( - filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, - input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 2 * output_depth + 4 * output_row_size, output_depth); - - // Slide right for outputs x = [4, 5], y = 4. Referring to the indexes in - // the diagram above, this corresponds to outputs (36) and (37). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 6 * input_depth + 4 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, - input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 4 * output_depth + 4 * output_row_size, output_depth); - - // Slide right one more time for outputs x = [6, 7], y = 4. Referring to the - // indexes in the diagram above, this corresponds to outputs (38) and (39). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 8 * input_depth + 4 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, - input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 6 * output_depth + 4 * output_row_size, output_depth); - - // Slide down for outputs x = [6, 7], y = 5. Referring to the indexes in - // the diagram above, this corresponds to outputs (46) and (47). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 6 * input_depth + 7 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, - input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 6 * output_depth + 5 * output_row_size, output_depth); - - // Slide left for outputs x = [4, 5], y = 5. Referring to the indexes in - // the diagram above, this corresponds to outputs (44) and (45). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth + 5 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, - input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 4 * output_depth + 5 * output_row_size, output_depth); - - // Slide left for outputs x = [2, 3], y = 5. Referring to the indexes in - // the diagram above, this corresponds to outputs (42) and (43). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 2 * input_depth + 5 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, - input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 2 * output_depth + 5 * output_row_size, output_depth); - - // Slide left one more time for outputs x = [0, 1], y = 5. Referring to the - // indexes in the diagram above, this corresponds to outputs (40) and (41). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 5 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, - input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 5 * output_row_size, output_depth); - - // Slide down for outputs x = [0, 1], y = 6. Referring to the indexes in - // the diagram above, this corresponds to outputs (48) and (49). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 8 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 6 * output_row_size, output_depth); - - // Slide right for outputs x = [2, 3], y = 6. Referring to the indexes in - // the diagram above, this corresponds to outputs (50) and (51). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth + 6 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 2 * output_depth + 6 * output_row_size, output_depth); - - // Slide right for outputs x = [4, 5], y = 6. Referring to the indexes in - // the diagram above, this corresponds to outputs (52) and (53). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 6 * input_depth + 6 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 4 * output_depth + 6 * output_row_size, output_depth); - - // Slide right one more time for outputs x = [6, 7], y = 6. Referring to the - // indexes in the diagram above, this corresponds to outputs (54) and (55). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 8 * input_depth + 6 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 6 * output_depth + 6 * output_row_size, output_depth); - - // Slide down for outputs x = [6, 7], y = 7. Referring to the indexes in the - // diagram above, this corresponds to outputs (62) and (63). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 6 * input_depth + 9 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, - input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 6 * output_depth + 7 * output_row_size, output_depth); - - // Slide left for outputs x = [4, 5], y = 7. Referring to the indexes in the - // diagram above, this corresponds to outputs (60) and (61). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth + 7 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, - input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 4 * output_depth + 7 * output_row_size, output_depth); - - // Slide left for outputs x = [2, 3], y = 7. Referring to the indexes in the - // diagram above, this corresponds to outputs (58) and (59). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 2 * input_depth + 7 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, - input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 2 * output_depth + 7 * output_row_size, output_depth); - - // Slide left one more time for outputs x = [0, 1], y = 7. Referring to the - // indexes in the diagram above, this corresponds to outputs (56) and (57). - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 7 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, - input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 7 * output_row_size, output_depth); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<4, 4, 1, 1> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - const int output_row_size = output_depth * output_width; - - // To process 4x4 outputs using a 3x3 filter, we require 6x6 inputs. - // Load inputs for the first 2 filters on the top left, then slide to - // the right, down, left, down, right, etc. in a snake-like path. This - // minimizes the total number of loads. - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11; - - // Load inputs for 1x2 outputs starting from the top left. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth); - - // Now load 1x2 inputs on the top right. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_depth, output_depth); - - // Now load next inputs when sliding window down. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 2 * input_depth + 3 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, - input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_depth + output_row_size, - output_depth); - - // Now load next inputs when sliding window left. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, - input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + output_row_size, output_depth); - - // Now load next inputs when sliding window down. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 4 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - } +// clang-format off - DotProductAndStore2xStride1( - filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, - input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_row_size, output_depth); - - // Now load next inputs when sliding window right. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth + 2 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_10, input_11, input_8, input_9, input_2, input_3, input_0, - input_1, input_6, input_7, input_4, input_5, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 2 * output_depth + 2 * output_row_size, output_depth); - - // Now load next inputs when sliding window down. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 2 * input_depth + 5 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, - output_ptr + 2 * output_depth + 3 * output_row_size, output_depth); - - // Now load next inputs when sliding window left. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 3 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 3 * output_row_size, output_depth); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<4, 2, 1, 1> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - const int output_row_size = output_depth * output_width; - - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11; - - // Load inputs for 1x2 outputs starting from the top. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth); - - output_ptr += output_row_size; - - // Now load next inputs one row down. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 3 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, - input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth); - - output_ptr += output_row_size; - - // Now load next row. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 4 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_8, input_9, input_10, input_11, input_0, input_1, input_2, - input_3, input_4, input_5, input_6, input_7, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth); - - output_ptr += output_row_size; - - // Now load last row. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 5 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<4, 1, 1, 1> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - const int output_row_size = output_depth * output_width; - - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11; - - // Load inputs for 2x1 outputs starting from the top. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2yStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_row_size); - - // Load inputs for bottom 2 rows. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - } - - DotProductAndStore2yStride1( - filter, input_6, input_7, input_8, input_9, input_10, input_11, input_0, - input_1, input_2, input_3, input_4, input_5, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_row_size, - output_row_size); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<2, 2, 1, 1> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - Int32x8 acc_0, acc_1, acc_2, acc_3; - - acc_0.low = vld1q_s32(bias_ptr); - acc_1.low = vld1q_s32(bias_ptr); - acc_2.low = vld1q_s32(bias_ptr); - acc_3.low = vld1q_s32(bias_ptr); - - bias_ptr += 4; - acc_0.high = vld1q_s32(bias_ptr); - acc_1.high = vld1q_s32(bias_ptr); - acc_2.high = vld1q_s32(bias_ptr); - acc_3.high = vld1q_s32(bias_ptr); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - - // Add scope for input registers to help the compiler know that it is - // not needed. - { - // To process 2x2 outputs using a 3x3 filter, we require 4x4 inputs. - // Load inputs for the top two filters first. - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11; - - const uint8* ptr = input_ptr; - - // Load top 3 rows. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - // Multiply-accum for top-left output. - acc_0 = MultiplyAccumulate3x3Filter(filter, input_0, input_1, input_2, - input_4, input_5, input_6, input_8, - input_9, input_10, acc_0); - - // Multiply-accum for top-right output. - acc_1 = MultiplyAccumulate3x3Filter(filter, input_1, input_2, input_3, - input_5, input_6, input_7, input_9, - input_10, input_11, acc_1); - - // Now load the bottom row. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - } - - // Multiply-accum for bottom-left output. - acc_2 = MultiplyAccumulate3x3Filter(filter, input_4, input_5, input_6, - input_8, input_9, input_10, input_0, - input_1, input_2, acc_2); - - // Multiply-accum for bottom-right output. - acc_3 = MultiplyAccumulate3x3Filter(filter, input_5, input_6, input_7, - input_9, input_10, input_11, input_1, - input_2, input_3, acc_3); - } - - DownquantizeAndStore2x2Output(acc_0, acc_1, acc_2, acc_3, output_offset, - output_multiplier, output_shift, - output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<2, 4, 1, 1> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - const int output_row_size = output_depth * output_width; - - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11; - - // Load inputs for 1x2 outputs starting from the top left. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth); - - // Now load 1x2 inputs on the top right. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + 4 * input_depth; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_depth, output_depth); - - // Now load next inputs when sliding window down. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr + 2 * input_depth + 3 * input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_6, input_7, input_4, input_5, input_10, input_11, input_8, - input_9, input_2, input_3, input_0, input_1, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_depth + output_row_size, - output_depth); - - // Now load next inputs when sliding window left. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_4, input_5, input_6, input_7, input_8, input_9, input_10, - input_11, input_0, input_1, input_2, input_3, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + output_row_size, output_depth); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<1, 4, 1, 1> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11; - - // Load inputs for 1x2 outputs starting from the left. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3; - - const uint8* ptr = input_ptr; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth); - - // Now load 1x2 inputs on the right. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr + input_depth * 4; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_2 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - DotProductAndStore2xStride1( - filter, input_2, input_3, input_0, input_1, input_6, input_7, input_4, - input_5, input_10, input_11, input_8, input_9, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr + 2 * output_depth, output_depth); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<2, 1, 1, 1> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - // To process 2x1 outputs using a 3x3 filter, we require 4x3 inputs. - // Load all inputs at the beginning. - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11; - - // Load inputs for 1x2 outputs starting from the top left. - { - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5; - - const uint8* ptr = input_ptr; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_10 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_11 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - input_10 = vaddq_s16(input_10, input_offset_vec); - input_11 = vaddq_s16(input_11, input_offset_vec); - } - - DotProductAndStore2yStride1( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9, input_10, input_11, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth * output_width); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<4, 2, 2, 2> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - const int output_row_size = output_depth * output_width; - - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - Int32x8 acc_0, acc_1; - acc_0.low = vld1q_s32(bias_ptr); - acc_1.low = vld1q_s32(bias_ptr); - acc_0.high = vld1q_s32(bias_ptr + 4); - acc_1.high = vld1q_s32(bias_ptr + 4); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9; - - const uint8* ptr = input_ptr; - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4; - - // Load first 2 rows. - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, - input_2, input_3, input_4); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, - input_5, input_6, input_7); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, - input_7, input_8, input_9); - - // Load next 2 rows. - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, - input_2, input_3, input_4); - - DownquantizeAndStore2Output( - acc_0, acc_1, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_ptr, output_depth); - - output_ptr += output_row_size; - - // Moving onto the next row of outputs. - acc_0.low = vld1q_s32(bias_ptr); - acc_1.low = vld1q_s32(bias_ptr); - acc_0.high = vld1q_s32(bias_ptr + 4); - acc_1.high = vld1q_s32(bias_ptr + 4); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, - input_2, input_3, input_4); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, - input_5, input_6, input_7); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, - input_7, input_8, input_9); - - // Load next 2 rows. - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, - input_2, input_3, input_4); - - DownquantizeAndStore2Output( - acc_0, acc_1, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_ptr, output_depth); - - output_ptr += output_row_size; - - // Moving onto the next row of outputs. - acc_0.low = vld1q_s32(bias_ptr); - acc_1.low = vld1q_s32(bias_ptr); - acc_0.high = vld1q_s32(bias_ptr + 4); - acc_1.high = vld1q_s32(bias_ptr + 4); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, - input_2, input_3, input_4); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, - input_5, input_6, input_7); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, - input_7, input_8, input_9); - - // Load next 2 rows. - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, - input_2, input_3, input_4); - - DownquantizeAndStore2Output( - acc_0, acc_1, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_ptr, output_depth); - - output_ptr += output_row_size; - - // Moving onto the next row of outputs. - acc_0.low = vld1q_s32(bias_ptr); - acc_1.low = vld1q_s32(bias_ptr); - acc_0.high = vld1q_s32(bias_ptr + 4); - acc_1.high = vld1q_s32(bias_ptr + 4); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, - input_2, input_3, input_4); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, - input_5, input_6, input_7); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, - input_7, input_8, input_9); - - // Load last row. - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, - input_2, input_3, input_4); - - DownquantizeAndStore2Output( - acc_0, acc_1, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_ptr, output_depth); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<4, 4, 2, 2> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - // Reuse 4x2 kernel twice. - ConvKernel3x3FilterDepth8<4, 2, 2, 2>::Run( - input_ptr, input_depth, input_offset, input_row_size, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_ptr, output_depth, - output_width); - - ConvKernel3x3FilterDepth8<4, 2, 2, 2>::Run( - input_ptr + 4 * input_depth, input_depth, input_offset, input_row_size, - filter_ptr, filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr + 2 * output_depth, output_depth, output_width); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<4, 1, 2, 2> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - const int output_row_size = output_depth * output_width; - - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8; - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, - temp_8; - - const uint8* ptr = input_ptr; - - // Load all inputs for top output. - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_6 = vld1_u8(ptr); - temp_7 = vld1_u8(ptr + input_depth); - temp_8 = vld1_u8(ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - - DotProductAndStore( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - - // Second output. - output_ptr += output_row_size; - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - - DotProductAndStore( - filter, input_6, input_7, input_8, input_0, input_1, input_2, input_3, - input_4, input_5, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - - // Third output. - output_ptr += output_row_size; - - ptr += input_row_size; - temp_6 = vld1_u8(ptr); - temp_7 = vld1_u8(ptr + input_depth); - temp_8 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - - DotProductAndStore( - filter, input_3, input_4, input_5, input_6, input_7, input_8, input_0, - input_1, input_2, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - - // Fourth output. - output_ptr += output_row_size; - - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_6 = vld1_u8(ptr); - temp_7 = vld1_u8(ptr + input_depth); - temp_8 = vld1_u8(ptr + 2 * input_depth); - - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); - - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - - DotProductAndStore( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - } -}; - -template <> -struct ConvKernel3x3FilterDepth8<2, 2, 2, 2> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - Int32x8 acc_0, acc_1, acc_2, acc_3; - acc_0.low = vld1q_s32(bias_ptr); - acc_1.low = vld1q_s32(bias_ptr); - acc_2.low = vld1q_s32(bias_ptr); - acc_3.low = vld1q_s32(bias_ptr); - - bias_ptr += 4; - acc_0.high = vld1q_s32(bias_ptr); - acc_1.high = vld1q_s32(bias_ptr); - acc_2.high = vld1q_s32(bias_ptr); - acc_3.high = vld1q_s32(bias_ptr); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - - // Add scope for input registers to help the compiler know that it is - // not needed. - { - // To process 2x2 outputs using a 3x3 filter at stride 2, we require - // 5x5 inputs. We load the first 5x2 inputs at a time. - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, input_9; - - const uint8* ptr = input_ptr; - - // Load inputs. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4; - - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f0, filter.f1, filter.f2, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f0, filter.f1, filter.f2, - input_2, input_3, input_4); - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f3, filter.f4, filter.f5, - input_5, input_6, input_7); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f3, filter.f4, filter.f5, - input_7, input_8, input_9); - - // Load next inputs. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4; - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_9 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_9 = vaddq_s16(input_9, input_offset_vec); - } - - acc_0 = MultiplyAccumulateRow(acc_0, filter.f6, filter.f7, filter.f8, - input_0, input_1, input_2); - - acc_1 = MultiplyAccumulateRow(acc_1, filter.f6, filter.f7, filter.f8, - input_2, input_3, input_4); - - // Moving onto the two bottom outputs. - acc_2 = MultiplyAccumulateRow(acc_2, filter.f0, filter.f1, filter.f2, - input_0, input_1, input_2); - - acc_3 = MultiplyAccumulateRow(acc_3, filter.f0, filter.f1, filter.f2, - input_2, input_3, input_4); - - acc_2 = MultiplyAccumulateRow(acc_2, filter.f3, filter.f4, filter.f5, - input_5, input_6, input_7); - - acc_3 = MultiplyAccumulateRow(acc_3, filter.f3, filter.f4, filter.f5, - input_7, input_8, input_9); - - // Load last input row. - { - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4; - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - temp_3 = vld1_u8(ptr + 3 * input_depth); - temp_4 = vld1_u8(ptr + 4 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - } - - acc_2 = MultiplyAccumulateRow(acc_2, filter.f6, filter.f7, filter.f8, - input_0, input_1, input_2); - - acc_3 = MultiplyAccumulateRow(acc_3, filter.f6, filter.f7, filter.f8, - input_2, input_3, input_4); - } +#define DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE 10 * 10 * 64 - DownquantizeAndStore2x2Output(acc_0, acc_1, acc_2, acc_3, output_offset, - output_multiplier, output_shift, - output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - } +// Encapsulates constant parameters used in DepthwiseConv. +// 64-bit is used for types that will be added to 64-bit addresses in asm. +struct DepthwiseConvParams { + int64_t input_depth; + int64_t input_row_size; + int64_t output_depth; + int64_t output_row_size; + int32 input_offset; + int32 output_offset; + int32 filter_offset; + int32 output_multiplier; + int32 output_activation_min; + int32 output_activation_max; + int32 output_shift; + int32 input_width; + int32 input_height; + int32 output_width; + int32 output_height; }; -template <> -struct ConvKernel3x3FilterDepth8<2, 4, 2, 2> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - // Reuse 2x2 kernel twice. - ConvKernel3x3FilterDepth8<2, 2, 2, 2>::Run( - input_ptr, input_depth, input_offset, input_row_size, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_ptr, output_depth, - output_width); - - ConvKernel3x3FilterDepth8<2, 2, 2, 2>::Run( - input_ptr + 4 * input_depth, input_depth, input_offset, input_row_size, - filter_ptr, filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr + 2 * output_depth, output_depth, output_width); - } -}; +#define STR(s) STR_UNEXPANDED(s) +#define STR_UNEXPANDED(s) #s + +// Represents the number of bytes offset from the start of the +// DepthwiseConvParams struct. This is used in the asm to load parameters. +// Keep these values in sync with the static_asserts below. +#define OFFSET_INPUT_DEPTH 0 +#define OFFSET_INPUT_ROW_SIZE 8 +#define OFFSET_OUTPUT_DEPTH 16 +#define OFFSET_OUTPUT_ROW_SIZE 24 +#define OFFSET_INPUT_OFFSET 32 +#define OFFSET_OUTPUT_OFFSET 36 +#define OFFSET_FILTER_OFFSET 40 +#define OFFSET_OUTPUT_MULTIPLIER 44 +#define OFFSET_OUTPUT_ACTIVATION_MIN 48 +#define OFFSET_OUTPUT_ACTIVATION_MAX 52 +#define OFFSET_OUTPUT_SHIFT 56 +#define OFFSET_INPUT_WIDTH 60 +#define OFFSET_INPUT_HEIGHT 64 +#define OFFSET_OUTPUT_WIDTH 68 +#define OFFSET_OUTPUT_HEIGHT 72 + +static_assert(offsetof(DepthwiseConvParams, input_depth) == + OFFSET_INPUT_DEPTH, ""); +static_assert(offsetof(DepthwiseConvParams, input_row_size) == + OFFSET_INPUT_ROW_SIZE, ""); +static_assert(offsetof(DepthwiseConvParams, output_depth) == + OFFSET_OUTPUT_DEPTH, ""); +static_assert(offsetof(DepthwiseConvParams, output_row_size) == + OFFSET_OUTPUT_ROW_SIZE, ""); +static_assert(offsetof(DepthwiseConvParams, input_offset) == + OFFSET_INPUT_OFFSET, ""); +static_assert(offsetof(DepthwiseConvParams, output_offset) == + OFFSET_OUTPUT_OFFSET, ""); +static_assert(offsetof(DepthwiseConvParams, filter_offset) == + OFFSET_FILTER_OFFSET, ""); +static_assert(offsetof(DepthwiseConvParams, output_multiplier) == + OFFSET_OUTPUT_MULTIPLIER, ""); +static_assert(offsetof(DepthwiseConvParams, output_activation_min) == + OFFSET_OUTPUT_ACTIVATION_MIN, ""); +static_assert(offsetof(DepthwiseConvParams, output_activation_max) == + OFFSET_OUTPUT_ACTIVATION_MAX, ""); +static_assert(offsetof(DepthwiseConvParams, output_shift) == + OFFSET_OUTPUT_SHIFT, ""); +static_assert(offsetof(DepthwiseConvParams, input_width) == + OFFSET_INPUT_WIDTH, ""); +static_assert(offsetof(DepthwiseConvParams, input_height) == + OFFSET_INPUT_HEIGHT, ""); +static_assert(offsetof(DepthwiseConvParams, output_width) == + OFFSET_OUTPUT_WIDTH, ""); +static_assert(offsetof(DepthwiseConvParams, output_height) == + OFFSET_OUTPUT_HEIGHT, ""); + +template +struct DepthwiseConvWindow {}; template <> -struct ConvKernel3x3FilterDepth8<2, 1, 2, 2> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - const int output_row_size = output_depth * output_width; - - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8; - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, - temp_8; - - const uint8* ptr = input_ptr; - - // Load all inputs for top output. - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_6 = vld1_u8(ptr); - temp_7 = vld1_u8(ptr + input_depth); - temp_8 = vld1_u8(ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - - DotProductAndStore( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - - // Second output. - output_ptr += output_row_size; - - ptr += input_row_size; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - - DotProductAndStore( - filter, input_6, input_7, input_8, input_0, input_1, input_2, input_3, - input_4, input_5, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); +struct DepthwiseConvWindow<8, 1, 1> { + public: + static void Run(const uint8* input_ptr, const uint8* filter_ptr, + const int32* bias_ptr, uint8* output_ptr, int64_t input_depth, + int64_t input_row_size, int32 output_window_height, + int32 output_window_width, + const DepthwiseConvParams* params_ptr) { + const int64_t input_width_increment = 2 * input_depth; + const int64_t input_height_increment = 2 * input_row_size; + const int64_t output_height_increment = 2 * params_ptr->output_row_size; + +#define DEPTHWISECONV_LABEL_HEIGHT_2_LOOP "1" +#define DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LOOP "2" +#define DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER "3" +#define DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LEFTOVER "4" +#define DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_AFTER_LOOP "5" +#define DEPTHWISECONV_LABEL_HEIGHT_2_AFTER_LOOP "6" +#define DEPTHWISECONV_LABEL_HEIGHT_1 "7" +#define DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LOOP "8" +#define DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER "9" +#define DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LEFTOVER "10" +#define DEPTHWISECONV_LABEL_HEIGHT_1_END "11" + + asm volatile( + // Performs depthwise convolutions for a window specified by + // |output_window_height| and |output_window_width|. The inner-most loop + // processes 2x2 outputs, and any leftovers at the end. + // + // Algorithm works as follows: + // + // 1. Load filters of 8 depth (8x3x3). Registers v0--v8 hold filter + // values. + // 2. For 2 output heights at a time: + // i. For 2 output widths at a time, load inputs for a 2x1 (2 + // height, 1 width) output window (4x3 input window). + // Registers v9--v20 hold input values. Mul-add with + // accumulators v21--v24. Then run activation, downquantize + // and store. Repeat for the next 2x1 output window, + // leveraging overlapping inputs. + // ii. Handle single leftover width if exists. + // 3. Handle single leftover height if exists. + // i. For 2 output widths at a time, load inputs for a 1x2 (1 + // height, 2 width) output window (3x4 input window). + // Registers v9--v20 hold input values. Mul-add with + // accumulators v21--v24. Then run activation, downquantize + // and store. Repeat for the next 1x2 output window, + // leveraging overlapping inputs. + // ii. Handle single leftover width if exists. + // + // Loads are placed as soon as the register is no longer needed and + // interleaved with arithmetic operations to take advantage of + // dual-issue pipelines. We also add input offsets as far from the loads + // as possible to give loads enough cycles to fetch data from memory. + + // Set "constant" registers. These registers may be replaced with temp + // values from time to time when there are not enough NEON registers. + // We use x9--x15 general purpose registers as they are caller-saved + // temporary registers (see http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf). // NOLINT + "ldr w9, [%[params_ptr], #" STR(OFFSET_INPUT_OFFSET) "]\n" + "ldr x3, [%[params_ptr], #" STR(OFFSET_OUTPUT_DEPTH) "]\n" + "cmp %w[output_window_height], #2\n" + "dup v26.8h, w9\n" + "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_MULTIPLIER) "]\n" + "ldr w2, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n" + "dup v27.4s, w9\n" + "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n" + "dup v29.4s, w2\n" + "ldr w4, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n" + "dup v30.4s, w4\n" + "ldr w0, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MAX) "]\n" + "dup v31.4s, w0\n" + "neg w9, w9\n" + "dup v28.4s, w9\n" + "ldr w9, [%[params_ptr], #" STR(OFFSET_FILTER_OFFSET) "]\n" + "add x10, %[bias_ptr], #16\n" + "ldr x1, [%[params_ptr], #" STR(OFFSET_OUTPUT_ROW_SIZE) "]\n" + "dup v9.8h, w9\n" + + // Load filters and add offsets. + "ld1 {v0.8b}, [%[filter_ptr]], x3\n" + "ld1 {v1.8b}, [%[filter_ptr]], x3\n" + "uaddw v0.8h, v9.8h, v0.8b\n" + "ld1 {v2.8b}, [%[filter_ptr]], x3\n" + "uaddw v1.8h, v9.8h, v1.8b\n" + "ld1 {v3.8b}, [%[filter_ptr]], x3\n" + "uaddw v2.8h, v9.8h, v2.8b\n" + "ld1 {v4.8b}, [%[filter_ptr]], x3\n" + "uaddw v3.8h, v9.8h, v3.8b\n" + "ld1 {v5.8b}, [%[filter_ptr]], x3\n" + "uaddw v4.8h, v9.8h, v4.8b\n" + "ld1 {v6.8b}, [%[filter_ptr]], x3\n" + "uaddw v5.8h, v9.8h, v5.8b\n" + "ld1 {v7.8b}, [%[filter_ptr]], x3\n" + "uaddw v6.8h, v9.8h, v6.8b\n" + "ld1 {v8.8b}, [%[filter_ptr]], x3\n" + "uaddw v7.8h, v9.8h, v7.8b\n" + "uaddw v8.8h, v9.8h, v8.8b\n" + + "blt " DEPTHWISECONV_LABEL_HEIGHT_2_AFTER_LOOP "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_HEIGHT_2_LOOP ":\n" + // This loop processes 2x2 outputs. To avoid register exhaustion, + // inputs for the left 2 outputs are loaded first, then the right + // two outputs. + "mov x11, %[input_ptr]\n" + "mov x12, x11\n" + "ld1 {v9.8b}, [x12], %[input_depth]\n" + "add x13, x11, %[input_row_size]\n" + "ld1 {v10.8b}, [x12], %[input_depth]\n" + "add x14, x13, %[input_row_size]\n" + "ld1 {v11.8b}, [x12], %[input_depth]\n" + "add x15, x14, %[input_row_size]\n" + "ld1 {v12.8b}, [x13], %[input_depth]\n" + "mov w5, %w[output_window_width]\n" + "ld1 {v13.8b}, [x13], %[input_depth]\n" + "mov x6, %[output_ptr]\n" + "ld1 {v14.8b}, [x13], %[input_depth]\n" + "add x7, %[output_ptr], x1\n" + "ld1 {v15.8b}, [x14], %[input_depth]\n" + // The height 2 / width 2 loop loads an extra 2x1 outputs (2 height, + // 1 width) in anticipation for the next iteration. Make sure + // |output_window_width| is large enough to handle the additional + // loads, otherwise jump to specific the appropriate label to handle + // smaller widths. + "cmp w5, #2\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "ld1 {v16.8b}, [x14], %[input_depth]\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "ld1 {v17.8b}, [x14], %[input_depth]\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + "ld1 {v18.8b}, [x15], %[input_depth]\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "ld1 {v19.8b}, [x15], %[input_depth]\n" + "uaddw v13.8h, v26.8h, v13.8b\n" + "ld1 {v20.8b}, [x15], %[input_depth]\n" + "uaddw v14.8h, v26.8h, v14.8b\n" + "ld1 {v21.4s}, [%[bias_ptr]]\n" + "uaddw v15.8h, v26.8h, v15.8b\n" + "ld1 {v22.4s}, [x10]\n" + "uaddw v16.8h, v26.8h, v16.8b\n" + "ld1 {v23.4s}, [%[bias_ptr]]\n" + "uaddw v17.8h, v26.8h, v17.8b\n" + "ld1 {v24.4s}, [x10]\n" + "uaddw v18.8h, v26.8h, v18.8b\n" + "uaddw v19.8h, v26.8h, v19.8b\n" + "uaddw v20.8h, v26.8h, v20.8b\n" + + "beq " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LEFTOVER "f\n" + "cmp w5, #1\n" + "beq " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LOOP ":\n" + // Mul-add left outputs. + "smlal v21.4s, v0.4h, v9.4h\n" + "subs w5, w5, #2\n" + "smlal2 v22.4s, v0.8h, v9.8h\n" + "cmp w5, #3\n" + "smlal v23.4s, v0.4h, v12.4h\n" + "ld1 {v9.8b}, [x12]\n" + "smlal2 v24.4s, v0.8h, v12.8h\n" + "smlal v21.4s, v1.4h, v10.4h\n" + "smlal2 v22.4s, v1.8h, v10.8h\n" + "smlal v23.4s, v1.4h, v13.4h\n" + "smlal2 v24.4s, v1.8h, v13.8h\n" + "smlal v21.4s, v2.4h, v11.4h\n" + "smlal2 v22.4s, v2.8h, v11.8h\n" + "smlal v23.4s, v2.4h, v14.4h\n" + "smlal2 v24.4s, v2.8h, v14.8h\n" + "smlal v21.4s, v3.4h, v12.4h\n" + "smlal2 v22.4s, v3.8h, v12.8h\n" + "ld1 {v12.8b}, [x13]\n" + "smlal v23.4s, v3.4h, v15.4h\n" + "smlal2 v24.4s, v3.8h, v15.8h\n" + "smlal v21.4s, v4.4h, v13.4h\n" + "smlal2 v22.4s, v4.8h, v13.8h\n" + "smlal v23.4s, v4.4h, v16.4h\n" + "smlal2 v24.4s, v4.8h, v16.8h\n" + "smlal v21.4s, v5.4h, v14.4h\n" + "smlal2 v22.4s, v5.8h, v14.8h\n" + "smlal v23.4s, v5.4h, v17.4h\n" + "smlal2 v24.4s, v5.8h, v17.8h\n" + "smlal v21.4s, v6.4h, v15.4h\n" + "smlal2 v22.4s, v6.8h, v15.8h\n" + "ld1 {v15.8b}, [x14]\n" + "smlal v23.4s, v6.4h, v18.4h\n" + "smlal2 v24.4s, v6.8h, v18.8h\n" + "ld1 {v18.8b}, [x15]\n" + "smlal v21.4s, v7.4h, v16.4h\n" + "smlal2 v22.4s, v7.8h, v16.8h\n" + "smlal v23.4s, v7.4h, v19.4h\n" + "smlal2 v24.4s, v7.8h, v19.8h\n" + "smlal v21.4s, v8.4h, v17.4h\n" + "smlal2 v22.4s, v8.8h, v17.8h\n" + "smlal v23.4s, v8.4h, v20.4h\n" + "smlal2 v24.4s, v8.8h, v20.8h\n" + + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v25.16b, v21.16b, v28.16b\n" + "and v29.16b, v22.16b, v28.16b\n" + "and v30.16b, v23.16b, v28.16b\n" + "and v31.16b, v24.16b, v28.16b\n" + "sshr v25.4s, v25.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v21.4s, v21.4s, v25.4s\n" + "sqadd v22.4s, v22.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v23.4s, v23.4s, v30.4s\n" + "dup v30.4s, w4\n" + "sqadd v24.4s, v24.4s, v31.4s\n" + "dup v31.4s, w0\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "srshl v23.4s, v23.4s, v28.4s\n" + "srshl v24.4s, v24.4s, v28.4s\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "ld1 {v22.4s}, [x10]\n" + "sqxtn2 v23.8h, v24.4s\n" + "ld1 {v24.4s}, [x10]\n" + "sqxtun v21.8b, v21.8h\n" + "sqxtun v23.8b, v23.8h\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "st1 {v21.8b}, [x6], x3\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "st1 {v23.8b}, [x7], x3\n" + "uaddw v15.8h, v26.8h, v15.8b\n" + "ld1 {v21.4s}, [%[bias_ptr]]\n" + "uaddw v18.8h, v26.8h, v18.8b\n" + "ld1 {v23.4s}, [%[bias_ptr]]\n" + + // Mul-add right outputs. + "smlal v21.4s, v0.4h, v10.4h\n" + "add x11, x11, %[input_width_increment]\n" + "smlal2 v22.4s, v0.8h, v10.8h\n" + "mov x12, x11\n" + "smlal v23.4s, v0.4h, v13.4h\n" + "add x13, x11, %[input_row_size]\n" + "smlal2 v24.4s, v0.8h, v13.8h\n" + "add x14, x13, %[input_row_size]\n" + "smlal v21.4s, v1.4h, v11.4h\n" + "add x15, x14, %[input_row_size]\n" + "smlal2 v22.4s, v1.8h, v11.8h\n" + "smlal v23.4s, v1.4h, v14.4h\n" + "smlal2 v24.4s, v1.8h, v14.8h\n" + "smlal v21.4s, v2.4h, v9.4h\n" + "smlal2 v22.4s, v2.8h, v9.8h\n" + "ld1 {v9.8b}, [x12], %[input_depth]\n" + "smlal v23.4s, v2.4h, v12.4h\n" + "ld1 {v10.8b}, [x12], %[input_depth]\n" + "smlal2 v24.4s, v2.8h, v12.8h\n" + "ld1 {v11.8b}, [x12], %[input_depth]\n" + "smlal v21.4s, v3.4h, v13.4h\n" + "smlal2 v22.4s, v3.8h, v13.8h\n" + "smlal v23.4s, v3.4h, v16.4h\n" + "smlal2 v24.4s, v3.8h, v16.8h\n" + "smlal v21.4s, v4.4h, v14.4h\n" + "smlal2 v22.4s, v4.8h, v14.8h\n" + "smlal v23.4s, v4.4h, v17.4h\n" + "smlal2 v24.4s, v4.8h, v17.8h\n" + "smlal v21.4s, v5.4h, v12.4h\n" + "smlal2 v22.4s, v5.8h, v12.8h\n" + "ld1 {v12.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v5.4h, v15.4h\n" + "ld1 {v13.8b}, [x13], %[input_depth]\n" + "smlal2 v24.4s, v5.8h, v15.8h\n" + "ld1 {v14.8b}, [x13], %[input_depth]\n" + "smlal v21.4s, v6.4h, v16.4h\n" + "smlal2 v22.4s, v6.8h, v16.8h\n" + "smlal v23.4s, v6.4h, v19.4h\n" + "smlal2 v24.4s, v6.8h, v19.8h\n" + "smlal v21.4s, v7.4h, v17.4h\n" + "smlal2 v22.4s, v7.8h, v17.8h\n" + "smlal v23.4s, v7.4h, v20.4h\n" + "smlal2 v24.4s, v7.8h, v20.8h\n" + "smlal v21.4s, v8.4h, v15.4h\n" + "smlal2 v22.4s, v8.8h, v15.8h\n" + "ld1 {v15.8b}, [x14], %[input_depth]\n" + "smlal v23.4s, v8.4h, v18.4h\n" + "ld1 {v16.8b}, [x14], %[input_depth]\n" + "smlal2 v24.4s, v8.8h, v18.8h\n" + "ld1 {v17.8b}, [x14], %[input_depth]\n" + + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "ld1 {v18.8b}, [x15], %[input_depth]\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "ld1 {v19.8b}, [x15], %[input_depth]\n" + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "ld1 {v20.8b}, [x15], %[input_depth]\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v25.16b, v21.16b, v28.16b\n" + "and v29.16b, v22.16b, v28.16b\n" + "and v30.16b, v23.16b, v28.16b\n" + "and v31.16b, v24.16b, v28.16b\n" + "sshr v25.4s, v25.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v21.4s, v21.4s, v25.4s\n" + "sqadd v22.4s, v22.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v23.4s, v23.4s, v30.4s\n" + "dup v30.4s, w4\n" + "sqadd v24.4s, v24.4s, v31.4s\n" + "dup v31.4s, w0\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "srshl v23.4s, v23.4s, v28.4s\n" + "srshl v24.4s, v24.4s, v28.4s\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "ld1 {v22.4s}, [x10]\n" + "sqxtn2 v23.8h, v24.4s\n" + "ld1 {v24.4s}, [x10]\n" + "sqxtun v21.8b, v21.8h\n" + "sqxtun v23.8b, v23.8h\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "st1 {v21.8b}, [x6], x3\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "st1 {v23.8b}, [x7], x3\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "uaddw v13.8h, v26.8h, v13.8b\n" + "uaddw v14.8h, v26.8h, v14.8b\n" + "uaddw v15.8h, v26.8h, v15.8b\n" + "ld1 {v21.4s}, [%[bias_ptr]]\n" + "uaddw v16.8h, v26.8h, v16.8b\n" + "ld1 {v23.4s}, [%[bias_ptr]]\n" + "uaddw v17.8h, v26.8h, v17.8b\n" + "uaddw v18.8h, v26.8h, v18.8b\n" + "uaddw v19.8h, v26.8h, v19.8b\n" + "uaddw v20.8h, v26.8h, v20.8b\n" + + "bge " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LOOP "b\n" + + // At this point, there will be one of 2 width or 1 width leftover, + // not both. + "cmp w5, #2\n" + "blt " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER "f\n" + + // Handle last 2 columns if exists. + DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LEFTOVER ":\n" + // Mul-add left outputs. + "smlal v21.4s, v0.4h, v9.4h\n" + "smlal2 v22.4s, v0.8h, v9.8h\n" + "smlal v23.4s, v0.4h, v12.4h\n" + "ld1 {v9.8b}, [x12]\n" + "smlal2 v24.4s, v0.8h, v12.8h\n" + "smlal v21.4s, v1.4h, v10.4h\n" + "smlal2 v22.4s, v1.8h, v10.8h\n" + "smlal v23.4s, v1.4h, v13.4h\n" + "smlal2 v24.4s, v1.8h, v13.8h\n" + "smlal v21.4s, v2.4h, v11.4h\n" + "smlal2 v22.4s, v2.8h, v11.8h\n" + "smlal v23.4s, v2.4h, v14.4h\n" + "smlal2 v24.4s, v2.8h, v14.8h\n" + "smlal v21.4s, v3.4h, v12.4h\n" + "smlal2 v22.4s, v3.8h, v12.8h\n" + "ld1 {v12.8b}, [x13]\n" + "smlal v23.4s, v3.4h, v15.4h\n" + "smlal2 v24.4s, v3.8h, v15.8h\n" + "smlal v21.4s, v4.4h, v13.4h\n" + "smlal2 v22.4s, v4.8h, v13.8h\n" + "smlal v23.4s, v4.4h, v16.4h\n" + "smlal2 v24.4s, v4.8h, v16.8h\n" + "smlal v21.4s, v5.4h, v14.4h\n" + "smlal2 v22.4s, v5.8h, v14.8h\n" + "smlal v23.4s, v5.4h, v17.4h\n" + "smlal2 v24.4s, v5.8h, v17.8h\n" + "smlal v21.4s, v6.4h, v15.4h\n" + "smlal2 v22.4s, v6.8h, v15.8h\n" + "ld1 {v15.8b}, [x14]\n" + "smlal v23.4s, v6.4h, v18.4h\n" + "smlal2 v24.4s, v6.8h, v18.8h\n" + "ld1 {v18.8b}, [x15]\n" + "smlal v21.4s, v7.4h, v16.4h\n" + "smlal2 v22.4s, v7.8h, v16.8h\n" + "smlal v23.4s, v7.4h, v19.4h\n" + "smlal2 v24.4s, v7.8h, v19.8h\n" + "smlal v21.4s, v8.4h, v17.4h\n" + "smlal2 v22.4s, v8.8h, v17.8h\n" + "smlal v23.4s, v8.4h, v20.4h\n" + "smlal2 v24.4s, v8.8h, v20.8h\n" + + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v25.16b, v21.16b, v28.16b\n" + "and v29.16b, v22.16b, v28.16b\n" + "and v30.16b, v23.16b, v28.16b\n" + "and v31.16b, v24.16b, v28.16b\n" + "sshr v25.4s, v25.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v21.4s, v21.4s, v25.4s\n" + "sqadd v22.4s, v22.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v23.4s, v23.4s, v30.4s\n" + "dup v30.4s, w4\n" + "sqadd v24.4s, v24.4s, v31.4s\n" + "dup v31.4s, w0\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "srshl v23.4s, v23.4s, v28.4s\n" + "srshl v24.4s, v24.4s, v28.4s\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "ld1 {v22.4s}, [x10]\n" + "sqxtn2 v23.8h, v24.4s\n" + "ld1 {v24.4s}, [x10]\n" + "sqxtun v21.8b, v21.8h\n" + "sqxtun v23.8b, v23.8h\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "st1 {v21.8b}, [x6], x3\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "st1 {v23.8b}, [x7], x3\n" + "uaddw v15.8h, v26.8h, v15.8b\n" + "ld1 {v21.4s}, [%[bias_ptr]]\n" + "uaddw v18.8h, v26.8h, v18.8b\n" + "ld1 {v23.4s}, [%[bias_ptr]]\n" + + // Mul-add right outputs. + "smlal v21.4s, v0.4h, v10.4h\n" + "smlal2 v22.4s, v0.8h, v10.8h\n" + "smlal v23.4s, v0.4h, v13.4h\n" + "smlal2 v24.4s, v0.8h, v13.8h\n" + "smlal v21.4s, v1.4h, v11.4h\n" + "smlal2 v22.4s, v1.8h, v11.8h\n" + "smlal v23.4s, v1.4h, v14.4h\n" + "smlal2 v24.4s, v1.8h, v14.8h\n" + "smlal v21.4s, v2.4h, v9.4h\n" + "smlal2 v22.4s, v2.8h, v9.8h\n" + "smlal v23.4s, v2.4h, v12.4h\n" + "smlal2 v24.4s, v2.8h, v12.8h\n" + "smlal v21.4s, v3.4h, v13.4h\n" + "smlal2 v22.4s, v3.8h, v13.8h\n" + "smlal v23.4s, v3.4h, v16.4h\n" + "smlal2 v24.4s, v3.8h, v16.8h\n" + "smlal v21.4s, v4.4h, v14.4h\n" + "smlal2 v22.4s, v4.8h, v14.8h\n" + "smlal v23.4s, v4.4h, v17.4h\n" + "smlal2 v24.4s, v4.8h, v17.8h\n" + "smlal v21.4s, v5.4h, v12.4h\n" + "smlal2 v22.4s, v5.8h, v12.8h\n" + "smlal v23.4s, v5.4h, v15.4h\n" + "smlal2 v24.4s, v5.8h, v15.8h\n" + "smlal v21.4s, v6.4h, v16.4h\n" + "smlal2 v22.4s, v6.8h, v16.8h\n" + "smlal v23.4s, v6.4h, v19.4h\n" + "smlal2 v24.4s, v6.8h, v19.8h\n" + "smlal v21.4s, v7.4h, v17.4h\n" + "smlal2 v22.4s, v7.8h, v17.8h\n" + "smlal v23.4s, v7.4h, v20.4h\n" + "smlal2 v24.4s, v7.8h, v20.8h\n" + "smlal v21.4s, v8.4h, v15.4h\n" + "smlal2 v22.4s, v8.8h, v15.8h\n" + "smlal v23.4s, v8.4h, v18.4h\n" + "smlal2 v24.4s, v8.8h, v18.8h\n" + + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v25.16b, v21.16b, v28.16b\n" + "and v29.16b, v22.16b, v28.16b\n" + "and v30.16b, v23.16b, v28.16b\n" + "and v31.16b, v24.16b, v28.16b\n" + "sshr v25.4s, v25.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v21.4s, v21.4s, v25.4s\n" + "sqadd v22.4s, v22.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v23.4s, v23.4s, v30.4s\n" + "dup v30.4s, w4\n" + "sqadd v24.4s, v24.4s, v31.4s\n" + "dup v31.4s, w0\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "srshl v23.4s, v23.4s, v28.4s\n" + "srshl v24.4s, v24.4s, v28.4s\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "sqxtn2 v23.8h, v24.4s\n" + "sqxtun v21.8b, v21.8h\n" + "sqxtun v23.8b, v23.8h\n" + "st1 {v21.8b}, [x6], x3\n" + "st1 {v23.8b}, [x7], x3\n" + "b " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_AFTER_LOOP "f\n" + + DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER ":\n" + "smlal v21.4s, v0.4h, v9.4h\n" + "smlal2 v22.4s, v0.8h, v9.8h\n" + "smlal v23.4s, v0.4h, v12.4h\n" + "smlal2 v24.4s, v0.8h, v12.8h\n" + "smlal v21.4s, v1.4h, v10.4h\n" + "smlal2 v22.4s, v1.8h, v10.8h\n" + "smlal v23.4s, v1.4h, v13.4h\n" + "smlal2 v24.4s, v1.8h, v13.8h\n" + "smlal v21.4s, v2.4h, v11.4h\n" + "smlal2 v22.4s, v2.8h, v11.8h\n" + "smlal v23.4s, v2.4h, v14.4h\n" + "smlal2 v24.4s, v2.8h, v14.8h\n" + "smlal v21.4s, v3.4h, v12.4h\n" + "smlal2 v22.4s, v3.8h, v12.8h\n" + "smlal v23.4s, v3.4h, v15.4h\n" + "smlal2 v24.4s, v3.8h, v15.8h\n" + "smlal v21.4s, v4.4h, v13.4h\n" + "smlal2 v22.4s, v4.8h, v13.8h\n" + "smlal v23.4s, v4.4h, v16.4h\n" + "smlal2 v24.4s, v4.8h, v16.8h\n" + "smlal v21.4s, v5.4h, v14.4h\n" + "smlal2 v22.4s, v5.8h, v14.8h\n" + "smlal v23.4s, v5.4h, v17.4h\n" + "smlal2 v24.4s, v5.8h, v17.8h\n" + "smlal v21.4s, v6.4h, v15.4h\n" + "smlal2 v22.4s, v6.8h, v15.8h\n" + "smlal v23.4s, v6.4h, v18.4h\n" + "smlal2 v24.4s, v6.8h, v18.8h\n" + "smlal v21.4s, v7.4h, v16.4h\n" + "smlal2 v22.4s, v7.8h, v16.8h\n" + "smlal v23.4s, v7.4h, v19.4h\n" + "smlal2 v24.4s, v7.8h, v19.8h\n" + "smlal v21.4s, v8.4h, v17.4h\n" + "smlal2 v22.4s, v8.8h, v17.8h\n" + "smlal v23.4s, v8.4h, v20.4h\n" + "smlal2 v24.4s, v8.8h, v20.8h\n" + + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v9.16b, v21.16b, v28.16b\n" + "and v12.16b, v22.16b, v28.16b\n" + "and v15.16b, v23.16b, v28.16b\n" + "and v18.16b, v24.16b, v28.16b\n" + "sshr v9.4s, v9.4s, #31\n" + "sshr v12.4s, v12.4s, #31\n" + "sshr v15.4s, v15.4s, #31\n" + "sshr v18.4s, v18.4s, #31\n" + "sqadd v21.4s, v21.4s, v9.4s\n" + "sqadd v22.4s, v22.4s, v12.4s\n" + "sqadd v23.4s, v23.4s, v15.4s\n" + "sqadd v24.4s, v24.4s, v18.4s\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "srshl v23.4s, v23.4s, v28.4s\n" + "srshl v24.4s, v24.4s, v28.4s\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "sqxtn2 v23.8h, v24.4s\n" + "sqxtun v21.8b, v21.8h\n" + "sqxtun v23.8b, v23.8h\n" + "st1 {v21.8b}, [x6], x3\n" + "st1 {v23.8b}, [x7], x3\n" + + DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_AFTER_LOOP ":\n" + "subs %w[output_window_height], %w[output_window_height], #2\n" + "add %[input_ptr], %[input_ptr], %[input_height_increment]\n" + "cmp %w[output_window_height], #2\n" + "add %[output_ptr], %[output_ptr], %[output_height_increment]\n" + "bge " DEPTHWISECONV_LABEL_HEIGHT_2_LOOP "b\n" + + DEPTHWISECONV_LABEL_HEIGHT_2_AFTER_LOOP ":\n" + "cmp %w[output_window_height], #1\n" + "blt " DEPTHWISECONV_LABEL_HEIGHT_1_END "f\n" + + DEPTHWISECONV_LABEL_HEIGHT_1 ":\n" + "mov x12, %[input_ptr]\n" + "ld1 {v9.8b}, [x12], %[input_depth]\n" + "add x13, %[input_ptr], %[input_row_size]\n" + "ld1 {v10.8b}, [x12], %[input_depth]\n" + "add x14, x13, %[input_row_size]\n" + "ld1 {v11.8b}, [x12], %[input_depth]\n" + "add x15, x14, %[input_row_size]\n" + "mov w5, %w[output_window_width]\n" + "ld1 {v13.8b}, [x13], %[input_depth]\n" + "mov x6, %[output_ptr]\n" + "ld1 {v14.8b}, [x13], %[input_depth]\n" + "add x7, %[output_ptr], x1\n" + "ld1 {v15.8b}, [x13], %[input_depth]\n" + // The height 1 / width 2 loop loads an extra 1x1 output in anticipation + // for the next iteration. Make sure |output_window_width| is large + // enough to handle the additional load, otherwise jump to the + // appropriate label to handle smaller widths. + "cmp w5, #2\n" + "ld1 {v17.8b}, [x14], %[input_depth]\n" + "ld1 {v18.8b}, [x14], %[input_depth]\n" + "ld1 {v19.8b}, [x14], %[input_depth]\n" + "ld1 {v21.4s}, [%[bias_ptr]]\n" + "ld1 {v22.4s}, [x10]\n" + "ld1 {v23.4s}, [%[bias_ptr]]\n" + "ld1 {v24.4s}, [x10]\n" + + "uaddw v9.8h, v26.8h, v9.8b\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + "uaddw v13.8h, v26.8h, v13.8b\n" + "uaddw v14.8h, v26.8h, v14.8b\n" + "uaddw v15.8h, v26.8h, v15.8b\n" + "uaddw v17.8h, v26.8h, v17.8b\n" + "uaddw v18.8h, v26.8h, v18.8b\n" + "uaddw v19.8h, v26.8h, v19.8b\n" + + "beq " DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LEFTOVER "f\n" + "cmp w5, #1\n" + "beq " DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LOOP ":\n" + // Load inputs for 3x4 input window which corresponds to a 1x2 output + // window. + "smlal v21.4s, v0.4h, v9.4h\n" + "ld1 {v12.8b}, [x12]\n" + "smlal2 v22.4s, v0.8h, v9.8h\n" + "ld1 {v16.8b}, [x13]\n" + "smlal v23.4s, v0.4h, v10.4h\n" + "ld1 {v20.8b}, [x14]\n" + "smlal2 v24.4s, v0.8h, v10.8h\n" + "subs w5, w5, #2\n" + "smlal v21.4s, v1.4h, v10.4h\n" + "cmp w5, #3\n" + "smlal2 v22.4s, v1.8h, v10.8h\n" + "add %[input_ptr], %[input_ptr], %[input_width_increment]\n" + "smlal v23.4s, v1.4h, v11.4h\n" + "mov x12, %[input_ptr]\n" + "smlal2 v24.4s, v1.8h, v11.8h\n" + "ld1 {v9.8b}, [x12], %[input_depth]\n" + "smlal v21.4s, v2.4h, v11.4h\n" + "ld1 {v10.8b}, [x12], %[input_depth]\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "smlal2 v22.4s, v2.8h, v11.8h\n" + "ld1 {v11.8b}, [x12], %[input_depth]\n" + "add x13, %[input_ptr], %[input_row_size]\n" + "smlal v23.4s, v2.4h, v12.4h\n" + "add x14, x13, %[input_row_size]\n" + "smlal2 v24.4s, v2.8h, v12.8h\n" + "smlal v21.4s, v3.4h, v13.4h\n" + "add x15, x14, %[input_row_size]\n" + "smlal2 v22.4s, v3.8h, v13.8h\n" + "ld1 {v13.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v3.4h, v14.4h\n" + "smlal2 v24.4s, v3.8h, v14.8h\n" + "smlal v21.4s, v4.4h, v14.4h\n" + "smlal2 v22.4s, v4.8h, v14.8h\n" + "ld1 {v14.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v4.4h, v15.4h\n" + "smlal2 v24.4s, v4.8h, v15.8h\n" + "smlal v21.4s, v5.4h, v15.4h\n" + "uaddw v16.8h, v26.8h, v16.8b\n" + "smlal2 v22.4s, v5.8h, v15.8h\n" + "ld1 {v15.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v5.4h, v16.4h\n" + "smlal2 v24.4s, v5.8h, v16.8h\n" + "smlal v21.4s, v6.4h, v17.4h\n" + "smlal2 v22.4s, v6.8h, v17.8h\n" + "ld1 {v17.8b}, [x14], %[input_depth]\n" + "smlal v23.4s, v6.4h, v18.4h\n" + "smlal2 v24.4s, v6.8h, v18.8h\n" + "smlal v21.4s, v7.4h, v18.4h\n" + "smlal2 v22.4s, v7.8h, v18.8h\n" + "ld1 {v18.8b}, [x14], %[input_depth]\n" + "smlal v23.4s, v7.4h, v19.4h\n" + "smlal2 v24.4s, v7.8h, v19.8h\n" + "smlal v21.4s, v8.4h, v19.4h\n" + "uaddw v20.8h, v26.8h, v20.8b\n" + "smlal2 v22.4s, v8.8h, v19.8h\n" + "ld1 {v19.8b}, [x14], %[input_depth]\n" + "smlal v23.4s, v8.4h, v20.4h\n" + "smlal2 v24.4s, v8.8h, v20.8h\n" + + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v25.16b, v21.16b, v28.16b\n" + "and v29.16b, v22.16b, v28.16b\n" + "and v30.16b, v23.16b, v28.16b\n" + "and v31.16b, v24.16b, v28.16b\n" + "sshr v25.4s, v25.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v21.4s, v21.4s, v25.4s\n" + "sqadd v22.4s, v22.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v23.4s, v23.4s, v30.4s\n" + "dup v30.4s, w4\n" + "sqadd v24.4s, v24.4s, v31.4s\n" + "dup v31.4s, w0\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "srshl v23.4s, v23.4s, v28.4s\n" + "srshl v24.4s, v24.4s, v28.4s\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "ld1 {v22.4s}, [x10]\n" + "sqxtn2 v23.8h, v24.4s\n" + "ld1 {v24.4s}, [x10]\n" + "sqxtun v21.8b, v21.8h\n" + "sqxtun v23.8b, v23.8h\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "st1 {v21.8b}, [%[output_ptr]], x3\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "st1 {v23.8b}, [%[output_ptr]], x3\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "uaddw v13.8h, v26.8h, v13.8b\n" + "uaddw v14.8h, v26.8h, v14.8b\n" + "uaddw v15.8h, v26.8h, v15.8b\n" + "ld1 {v21.4s}, [%[bias_ptr]]\n" + "uaddw v16.8h, v26.8h, v16.8b\n" + "ld1 {v23.4s}, [%[bias_ptr]]\n" + "uaddw v17.8h, v26.8h, v17.8b\n" + "uaddw v18.8h, v26.8h, v18.8b\n" + "uaddw v19.8h, v26.8h, v19.8b\n" + "uaddw v20.8h, v26.8h, v20.8b\n" + + "bge " DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LOOP "b\n" + + // At this point, there will be one of 2 width or 1 width leftover, + // not both. + "cmp w5, #2\n" + "blt " DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER "f\n" + + // Handle last two horizontal outputs if exists. + DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LEFTOVER ":\n" + "smlal v21.4s, v0.4h, v9.4h\n" + "ld1 {v12.8b}, [x12], %[input_depth]\n" + "smlal2 v22.4s, v0.8h, v9.8h\n" + "ld1 {v16.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v0.4h, v10.4h\n" + "ld1 {v20.8b}, [x14], %[input_depth]\n" + "smlal2 v24.4s, v0.8h, v10.8h\n" + "smlal v21.4s, v1.4h, v10.4h\n" + "smlal2 v22.4s, v1.8h, v10.8h\n" + "smlal v23.4s, v1.4h, v11.4h\n" + "smlal2 v24.4s, v1.8h, v11.8h\n" + "smlal v21.4s, v2.4h, v11.4h\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "smlal2 v22.4s, v2.8h, v11.8h\n" + "smlal v23.4s, v2.4h, v12.4h\n" + "smlal2 v24.4s, v2.8h, v12.8h\n" + "smlal v21.4s, v3.4h, v13.4h\n" + "smlal2 v22.4s, v3.8h, v13.8h\n" + "smlal v23.4s, v3.4h, v14.4h\n" + "smlal2 v24.4s, v3.8h, v14.8h\n" + "smlal v21.4s, v4.4h, v14.4h\n" + "smlal2 v22.4s, v4.8h, v14.8h\n" + "smlal v23.4s, v4.4h, v15.4h\n" + "smlal2 v24.4s, v4.8h, v15.8h\n" + "smlal v21.4s, v5.4h, v15.4h\n" + "uaddw v16.8h, v26.8h, v16.8b\n" + "smlal2 v22.4s, v5.8h, v15.8h\n" + "smlal v23.4s, v5.4h, v16.4h\n" + "smlal2 v24.4s, v5.8h, v16.8h\n" + "smlal v21.4s, v6.4h, v17.4h\n" + "smlal2 v22.4s, v6.8h, v17.8h\n" + "smlal v23.4s, v6.4h, v18.4h\n" + "smlal2 v24.4s, v6.8h, v18.8h\n" + "smlal v21.4s, v7.4h, v18.4h\n" + "smlal2 v22.4s, v7.8h, v18.8h\n" + "smlal v23.4s, v7.4h, v19.4h\n" + "smlal2 v24.4s, v7.8h, v19.8h\n" + "smlal v21.4s, v8.4h, v19.4h\n" + "uaddw v20.8h, v26.8h, v20.8b\n" + "smlal2 v22.4s, v8.8h, v19.8h\n" + "smlal v23.4s, v8.4h, v20.4h\n" + "smlal2 v24.4s, v8.8h, v20.8h\n" + + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v25.16b, v21.16b, v28.16b\n" + "and v29.16b, v22.16b, v28.16b\n" + "and v30.16b, v23.16b, v28.16b\n" + "and v31.16b, v24.16b, v28.16b\n" + "sshr v25.4s, v25.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v21.4s, v21.4s, v25.4s\n" + "sqadd v22.4s, v22.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v23.4s, v23.4s, v30.4s\n" + "dup v30.4s, w4\n" + "sqadd v24.4s, v24.4s, v31.4s\n" + "dup v31.4s, w0\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "srshl v23.4s, v23.4s, v28.4s\n" + "srshl v24.4s, v24.4s, v28.4s\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "sqxtn2 v23.8h, v24.4s\n" + "sqxtun v21.8b, v21.8h\n" + "sqxtun v23.8b, v23.8h\n" + "st1 {v21.8b}, [%[output_ptr]], x3\n" + "st1 {v23.8b}, [%[output_ptr]], x3\n" + "b " DEPTHWISECONV_LABEL_HEIGHT_1_END "f\n" + + // Handle bottom right output if exists. + DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER ":\n" + "smlal v21.4s, v0.4h, v9.4h\n" + "smlal2 v22.4s, v0.8h, v9.8h\n" + "smlal v21.4s, v1.4h, v10.4h\n" + "smlal2 v22.4s, v1.8h, v10.8h\n" + "smlal v21.4s, v2.4h, v11.4h\n" + "smlal2 v22.4s, v2.8h, v11.8h\n" + "smlal v21.4s, v3.4h, v13.4h\n" + "smlal2 v22.4s, v3.8h, v13.8h\n" + "smlal v21.4s, v4.4h, v14.4h\n" + "smlal2 v22.4s, v4.8h, v14.8h\n" + "smlal v21.4s, v5.4h, v15.4h\n" + "smlal2 v22.4s, v5.8h, v15.8h\n" + "smlal v21.4s, v6.4h, v17.4h\n" + "smlal2 v22.4s, v6.8h, v17.8h\n" + "smlal v21.4s, v7.4h, v18.4h\n" + "smlal2 v22.4s, v7.8h, v18.8h\n" + "smlal v21.4s, v8.4h, v19.4h\n" + "smlal2 v22.4s, v8.8h, v19.8h\n" + + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "and v9.16b, v21.16b, v28.16b\n" + "and v12.16b, v22.16b, v28.16b\n" + "sshr v9.4s, v9.4s, #31\n" + "sshr v12.4s, v12.4s, #31\n" + "sqadd v21.4s, v21.4s, v9.4s\n" + "sqadd v22.4s, v22.4s, v12.4s\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "sqxtun v21.8b, v21.8h\n" + "st1 {v21.8b}, [%[output_ptr]]\n" + DEPTHWISECONV_LABEL_HEIGHT_1_END ":\n" + : + // Outputs. + [filter_ptr] "+r"(filter_ptr), [input_ptr] "+r"(input_ptr), + [output_ptr] "+r"(output_ptr), + [output_window_height] "+r"(output_window_height) + : + // Inputs. + [bias_ptr] "r"(bias_ptr), [input_row_size] "r"(input_row_size), + [input_depth] "r"(input_depth), + [output_window_width] "r"(output_window_width), + [input_width_increment] "r"(input_width_increment), + [input_height_increment] "r"(input_height_increment), + [output_height_increment] "r"(output_height_increment), + [params_ptr] "r"(params_ptr) + : + // Clobbers. + "cc", "memory", + // We use these NEON registers. + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "v30", "v31", + // We use these general-purpose registers. + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x9", "x10", "x11", "x12", "x13", "x14", "x15"); +#undef DEPTHWISECONV_LABEL_HEIGHT_2_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER +#undef DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LEFTOVER +#undef DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_AFTER_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_2_AFTER_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_1 +#undef DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER +#undef DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LEFTOVER +#undef DEPTHWISECONV_LABEL_HEIGHT_1_END } }; template <> -struct ConvKernel3x3FilterDepth8<1, 2, 2, 2> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8; - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, - temp_8; - - const uint8* ptr = input_ptr; - - // Load all inputs for top output. - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_6 = vld1_u8(ptr); - temp_7 = vld1_u8(ptr + input_depth); - temp_8 = vld1_u8(ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - - DotProductAndStore( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - - // Second output. - output_ptr += output_depth; - - ptr = input_ptr + 3 * input_depth; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - ptr += input_row_size; - temp_6 = vld1_u8(ptr); - temp_7 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - - DotProductAndStore( - filter, input_2, input_0, input_1, input_5, input_3, input_4, input_8, - input_6, input_7, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); +struct DepthwiseConvWindow<8, 2, 2> { + static void Run(const uint8* input_ptr, const uint8* filter_ptr, + const int32* bias_ptr, uint8* output_ptr, int64_t input_depth, + int64_t input_row_size, int32 output_window_height, + int32 output_window_width, + const DepthwiseConvParams* params_ptr) { + const int64_t input_width_increment = 4 * input_depth; + const int64_t input_height_increment = 4 * input_row_size; + const int64_t output_height_increment = 2 * params_ptr->output_row_size; + +#define DEPTHWISECONV_LABEL_HEIGHT_2_LOOP "1" +#define DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LOOP "2" +#define DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER "3" +#define DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LEFTOVER "4" +#define DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_AFTER_LOOP "5" +#define DEPTHWISECONV_LABEL_HEIGHT_2_AFTER_LOOP "6" +#define DEPTHWISECONV_LABEL_HEIGHT_1 "7" +#define DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LOOP "8" +#define DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER "9" +#define DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LEFTOVER "10" +#define DEPTHWISECONV_LABEL_HEIGHT_1_END "11" + + asm volatile( + // Performs depthwise convolutions for a window specified by + // |output_window_height| and |output_window_width|. The inner-most loop + // processes 2x2 outputs, and any leftovers at the end. + // + // Algorithm works as follows: + // + // 1. Load filters of 8 depth (8x3x3). Registers v0--v8 hold filter + // values. + // 2. For 2 output heights at a time: + // i. For 2 output widths at a time at stride 2, a 5x5 input + // window is required. To avoid register exhaustion, we load + // the first 2 rows of the 5x5 input window into registers + // v9--v18, and use the same registers to load the next 2 + // rows, and finally v9--v13 to load the last row. + // Accumulators for all 2x2 outputs are reserved by registers + // v21-v22 (top left output), v23-v24 (top right output), + // v19-v20 (bottom left output), v25-v26 (bottom right + // output). + // ii. Handle single leftover width if exists. + // 3. Handle single leftover height if exists. + // i. For 2 output widths at a time at stride 2, load inputs for + // a 1x2 (1 height, 2 width) output window (3x5 input + // window). Registers v9--v24 hold input values. Mul-add with + // accumulators v24--v27. + // ii. Handle single leftover width if exists. + // + // Loads are placed as soon as the register is no longer needed and + // interleaved with arithmetic operations to take advantage of + // dual-issue pipelines. We also add input offsets as far from the loads + // as possible to give loads enough cycles to fetch data from memory. + + // Set "constant" registers. These registers may be replaced with temp + // values from time to time when there are not enough NEON registers. + // We use x9--x15 general purpose registers as they are caller-saved + // temporary registers (see http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf). // NOLINT + "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n" + "ldr w0, [%[params_ptr], #" STR(OFFSET_INPUT_OFFSET) "]\n" + "cmp %w[output_window_height], #2\n" + "dup v28.8h, w0\n" + "neg w9, w9\n" + "ldr w1, [%[params_ptr], #" STR(OFFSET_OUTPUT_MULTIPLIER) "]\n" + "dup v26.4s, w9\n" + "ldr w2, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n" + "dup v27.4s, w1\n" + "ldr w3, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n" + "dup v29.4s, w2\n" + "ldr w4, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MAX) "]\n" + "dup v30.4s, w3\n" + "ldr x5, [%[params_ptr], #" STR(OFFSET_OUTPUT_DEPTH) "]\n" + "dup v31.4s, w4\n" + "ldr x19, [%[params_ptr], #" STR(OFFSET_OUTPUT_ROW_SIZE) "]\n" + "ldr w20, [%[params_ptr], #" STR(OFFSET_FILTER_OFFSET) "]\n" + + // Load filters and add offsets. + "add x10, %[bias_ptr], #16\n" + "ld1 {v0.8b}, [%[filter_ptr]], x5\n" + "dup v9.8h, w20\n" + "ld1 {v1.8b}, [%[filter_ptr]], x5\n" + "uaddw v0.8h, v9.8h, v0.8b\n" + "ld1 {v2.8b}, [%[filter_ptr]], x5\n" + "uaddw v1.8h, v9.8h, v1.8b\n" + "ld1 {v3.8b}, [%[filter_ptr]], x5\n" + "uaddw v2.8h, v9.8h, v2.8b\n" + "ld1 {v4.8b}, [%[filter_ptr]], x5\n" + "uaddw v3.8h, v9.8h, v3.8b\n" + "ld1 {v5.8b}, [%[filter_ptr]], x5\n" + "uaddw v4.8h, v9.8h, v4.8b\n" + "ld1 {v6.8b}, [%[filter_ptr]], x5\n" + "uaddw v5.8h, v9.8h, v5.8b\n" + "ld1 {v7.8b}, [%[filter_ptr]], x5\n" + "uaddw v6.8h, v9.8h, v6.8b\n" + "ld1 {v8.8b}, [%[filter_ptr]]\n" + "uaddw v7.8h, v9.8h, v7.8b\n" + "uaddw v8.8h, v9.8h, v8.8b\n" + + "blt " DEPTHWISECONV_LABEL_HEIGHT_2_AFTER_LOOP "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_HEIGHT_2_LOOP ":\n" + // Load the first two rows of the 5x5 input window, then reuse the + // same registers to load subsequent rows as they become available. + "mov x11, %[input_ptr]\n" + "mov x12, x11\n" + "add x13, x12, %[input_row_size]\n" + "ld1 {v9.8b}, [x12], %[input_depth]\n" + "mov w14, %w[output_window_width]\n" + "ld1 {v10.8b}, [x12], %[input_depth]\n" + // The height 2 / width 2 loop loads an extra 1 output horizontally in + // anticipation for the next iteration. Make sure + // |output_window_width| is large enough to handle the additional + // load, otherwise jump to the appropriate label to handle smaller + // widths. + "cmp w14, #2\n" + "ld1 {v11.8b}, [x12], %[input_depth]\n" + "add x15, x13, %[input_row_size]\n" + "ld1 {v14.8b}, [x13], %[input_depth]\n" + "mov x6, %[output_ptr]\n" + "ld1 {v15.8b}, [x13], %[input_depth]\n" + "add x7, %[output_ptr], x19\n" + "ld1 {v16.8b}, [x13], %[input_depth]\n" + "ld1 {v21.4s}, [%[bias_ptr]]\n" + "ld1 {v22.4s}, [x10]\n" + "ld1 {v23.4s}, [%[bias_ptr]]\n" + "uaddw v9.8h, v28.8h, v9.8b\n" + "ld1 {v24.4s}, [x10]\n" + "uaddw v10.8h, v28.8h, v10.8b\n" + "ld1 {v19.4s}, [%[bias_ptr]]\n" + "uaddw v11.8h, v28.8h, v11.8b\n" + "ld1 {v20.4s}, [x10]\n" + "uaddw v14.8h, v28.8h, v14.8b\n" + "ld1 {v25.4s}, [%[bias_ptr]]\n" + "uaddw v15.8h, v28.8h, v15.8b\n" + "ld1 {v26.4s}, [x10]\n" + "uaddw v16.8h, v28.8h, v16.8b\n" + + "beq " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LEFTOVER "f\n" + "cmp w14, #1\n" + "beq " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LOOP ":\n" + "smlal v21.4s, v0.4h, v9.4h\n" + "ld1 {v12.8b}, [x12], %[input_depth]\n" + "smlal2 v22.4s, v0.8h, v9.8h\n" + "ld1 {v13.8b}, [x12]\n" + "add x12, x15, %[input_row_size]\n" + "smlal v23.4s, v0.4h, v11.4h\n" + "ld1 {v17.8b}, [x13], %[input_depth]\n" + "smlal2 v24.4s, v0.8h, v11.8h\n" + "ld1 {v18.8b}, [x13]\n" + "add x13, x12, %[input_row_size]\n" + "smlal v21.4s, v1.4h, v10.4h\n" + "ld1 {v9.8b}, [x15], %[input_depth]\n" + "smlal2 v22.4s, v1.8h, v10.8h\n" + "ld1 {v10.8b}, [x15], %[input_depth]\n" + "smlal v21.4s, v2.4h, v11.4h\n" + "smlal2 v22.4s, v2.8h, v11.8h\n" + "ld1 {v11.8b}, [x15], %[input_depth]\n" + "smlal v21.4s, v3.4h, v14.4h\n" + "smlal2 v22.4s, v3.8h, v14.8h\n" + "ld1 {v14.8b}, [x12], %[input_depth]\n" + "smlal v23.4s, v3.4h, v16.4h\n" + "subs w14, w14, #2\n" + "smlal2 v24.4s, v3.8h, v16.8h\n" + "cmp w14, #3\n" + "smlal v21.4s, v4.4h, v15.4h\n" + "uaddw v12.8h, v28.8h, v12.8b\n" + "smlal2 v22.4s, v4.8h, v15.8h\n" + "ld1 {v15.8b}, [x12], %[input_depth]\n" + "smlal v21.4s, v5.4h, v16.4h\n" + "uaddw v13.8h, v28.8h, v13.8b\n" + "smlal2 v22.4s, v5.8h, v16.8h\n" + "ld1 {v16.8b}, [x12], %[input_depth]\n" + "smlal v23.4s, v1.4h, v12.4h\n" + "uaddw v17.8h, v28.8h, v17.8b\n" + "smlal2 v24.4s, v1.8h, v12.8h\n" + "ld1 {v12.8b}, [x15], %[input_depth]\n" + "smlal v23.4s, v2.4h, v13.4h\n" + "uaddw v18.8h, v28.8h, v18.8b\n" + "smlal2 v24.4s, v2.8h, v13.8h\n" + "ld1 {v13.8b}, [x15]\n" + "smlal v23.4s, v4.4h, v17.4h\n" + "uaddw v9.8h, v28.8h, v9.8b\n" + "smlal2 v24.4s, v4.8h, v17.8h\n" + "ld1 {v17.8b}, [x12], %[input_depth]\n" + "smlal v23.4s, v5.4h, v18.4h\n" + "uaddw v10.8h, v28.8h, v10.8b\n" + "smlal2 v24.4s, v5.8h, v18.8h\n" + "ld1 {v18.8b}, [x12]\n" + + "smlal v21.4s, v6.4h, v9.4h\n" + "smlal2 v22.4s, v6.8h, v9.8h\n" + "smlal v19.4s, v0.4h, v9.4h\n" + "uaddw v11.8h, v28.8h, v11.8b\n" + "smlal2 v20.4s, v0.8h, v9.8h\n" + "ld1 {v9.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v6.4h, v11.4h\n" + "smlal2 v24.4s, v6.8h, v11.8h\n" + "smlal v21.4s, v7.4h, v10.4h\n" + "smlal2 v22.4s, v7.8h, v10.8h\n" + "uaddw v12.8h, v28.8h, v12.8b\n" + "smlal v19.4s, v1.4h, v10.4h\n" + "smlal2 v20.4s, v1.8h, v10.8h\n" + "ld1 {v10.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v7.4h, v12.4h\n" + "smlal2 v24.4s, v7.8h, v12.8h\n" + "smlal v25.4s, v1.4h, v12.4h\n" + "smlal2 v26.4s, v1.8h, v12.8h\n" + "smlal v21.4s, v8.4h, v11.4h\n" + "smlal2 v22.4s, v8.8h, v11.8h\n" + "add x11, x11, %[input_width_increment]\n" + "smlal v19.4s, v2.4h, v11.4h\n" + "mov x12, x11\n" + "smlal2 v20.4s, v2.8h, v11.8h\n" + "uaddw v13.8h, v28.8h, v13.8b\n" + "smlal v25.4s, v0.4h, v11.4h\n" + "smlal2 v26.4s, v0.8h, v11.8h\n" + "ld1 {v11.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v8.4h, v13.4h\n" + "ld1 {v12.8b}, [x13], %[input_depth]\n" + "smlal2 v24.4s, v8.8h, v13.8h\n" + "smlal v25.4s, v2.4h, v13.4h\n" + "smlal2 v26.4s, v2.8h, v13.8h\n" + "ld1 {v13.8b}, [x13]\n" + "add x13, x12, %[input_row_size]\n" + "add x15, x13, %[input_row_size]\n" + + "dup v28.4s, w9\n" + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v27.16b, v21.16b, v28.16b\n" + "and v29.16b, v22.16b, v28.16b\n" + "and v30.16b, v23.16b, v28.16b\n" + "and v31.16b, v24.16b, v28.16b\n" + "sshr v27.4s, v27.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v21.4s, v21.4s, v27.4s\n" + "dup v27.4s, w1\n" + "sqadd v22.4s, v22.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v23.4s, v23.4s, v30.4s\n" + "dup v30.4s, w3\n" + "sqadd v24.4s, v24.4s, v31.4s\n" + "dup v31.4s, w4\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "srshl v23.4s, v23.4s, v28.4s\n" + "srshl v24.4s, v24.4s, v28.4s\n" + "dup v28.8h, w0\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "ld1 {v22.4s}, [x10]\n" + "sqxtn2 v23.8h, v24.4s\n" + "ld1 {v24.4s}, [x10]\n" + "sqxtun v21.8b, v21.8h\n" + "sqxtun v23.8b, v23.8h\n" + "uaddw v9.8h, v28.8h, v9.8b\n" + "st1 {v21.8b}, [x6], x5\n" + "uaddw v10.8h, v28.8h, v10.8b\n" + "st1 {v23.8b}, [x6], x5\n" + "uaddw v11.8h, v28.8h, v11.8b\n" + + "smlal v19.4s, v6.4h, v9.4h\n" + "smlal2 v20.4s, v6.8h, v9.8h\n" + "ld1 {v9.8b}, [x12], %[input_depth]\n" + "smlal v25.4s, v6.4h, v11.4h\n" + "smlal2 v26.4s, v6.8h, v11.8h\n" + "smlal v19.4s, v7.4h, v10.4h\n" + "uaddw v12.8h, v28.8h, v12.8b\n" + "smlal2 v20.4s, v7.8h, v10.8h\n" + "ld1 {v10.8b}, [x12], %[input_depth]\n" + "smlal v25.4s, v7.4h, v12.4h\n" + "smlal2 v26.4s, v7.8h, v12.8h\n" + "smlal v19.4s, v8.4h, v11.4h\n" + "uaddw v13.8h, v28.8h, v13.8b\n" + "smlal2 v20.4s, v8.8h, v11.8h\n" + "ld1 {v11.8b}, [x12], %[input_depth]\n" + "smlal v25.4s, v8.4h, v13.4h\n" + "uaddw v14.8h, v28.8h, v14.8b\n" + "smlal2 v26.4s, v8.8h, v13.8h\n" + "uaddw v16.8h, v28.8h, v16.8b\n" + "smlal v19.4s, v3.4h, v14.4h\n" + "uaddw v15.8h, v28.8h, v15.8b\n" + "smlal2 v20.4s, v3.8h, v14.8h\n" + "ld1 {v14.8b}, [x13], %[input_depth]\n" + "smlal v25.4s, v3.4h, v16.4h\n" + "ld1 {v21.4s}, [%[bias_ptr]]\n" + "smlal2 v26.4s, v3.8h, v16.8h\n" + "ld1 {v23.4s}, [%[bias_ptr]]\n" + "smlal v19.4s, v4.4h, v15.4h\n" + "uaddw v17.8h, v28.8h, v17.8b\n" + "smlal2 v20.4s, v4.8h, v15.8h\n" + "ld1 {v15.8b}, [x13], %[input_depth]\n" + "smlal v25.4s, v4.4h, v17.4h\n" + "smlal2 v26.4s, v4.8h, v17.8h\n" + "smlal v19.4s, v5.4h, v16.4h\n" + "uaddw v18.8h, v28.8h, v18.8b\n" + "smlal2 v20.4s, v5.8h, v16.8h\n" + "ld1 {v16.8b}, [x13], %[input_depth]\n" + "smlal v25.4s, v5.4h, v18.4h\n" + "smlal2 v26.4s, v5.8h, v18.8h\n" + + "dup v28.4s, w9\n" + "sqrdmulh v19.4s, v19.4s, v27.4s\n" + "sqrdmulh v20.4s, v20.4s, v27.4s\n" + "sqrdmulh v25.4s, v25.4s, v27.4s\n" + "sqrdmulh v26.4s, v26.4s, v27.4s\n" + "and v27.16b, v19.16b, v28.16b\n" + "and v29.16b, v20.16b, v28.16b\n" + "and v30.16b, v25.16b, v28.16b\n" + "and v31.16b, v26.16b, v28.16b\n" + "sshr v27.4s, v27.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v19.4s, v19.4s, v27.4s\n" + "dup v27.4s, w1\n" + "sqadd v20.4s, v20.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v25.4s, v25.4s, v30.4s\n" + "dup v30.4s, w3\n" + "sqadd v26.4s, v26.4s, v31.4s\n" + "dup v31.4s, w4\n" + "srshl v19.4s, v19.4s, v28.4s\n" + "srshl v20.4s, v20.4s, v28.4s\n" + "srshl v25.4s, v25.4s, v28.4s\n" + "srshl v26.4s, v26.4s, v28.4s\n" + "dup v28.8h, w0\n" + "add v19.4s, v19.4s, v29.4s\n" + "add v20.4s, v20.4s, v29.4s\n" + "add v25.4s, v25.4s, v29.4s\n" + "add v26.4s, v26.4s, v29.4s\n" + "smax v19.4s, v19.4s, v30.4s\n" + "smax v20.4s, v20.4s, v30.4s\n" + "smax v25.4s, v25.4s, v30.4s\n" + "smax v26.4s, v26.4s, v30.4s\n" + "smin v19.4s, v19.4s, v31.4s\n" + "smin v20.4s, v20.4s, v31.4s\n" + "smin v25.4s, v25.4s, v31.4s\n" + "smin v26.4s, v26.4s, v31.4s\n" + "sqxtn v19.4h, v19.4s\n" + "sqxtn v25.4h, v25.4s\n" + "sqxtn2 v19.8h, v20.4s\n" + "ld1 {v20.4s}, [x10]\n" + "sqxtn2 v25.8h, v26.4s\n" + "ld1 {v26.4s}, [x10]\n" + "sqxtun v19.8b, v19.8h\n" + "sqxtun v25.8b, v25.8h\n" + "uaddw v9.8h, v28.8h, v9.8b\n" + "st1 {v19.8b}, [x7], x5\n" + "uaddw v10.8h, v28.8h, v10.8b\n" + "st1 {v25.8b}, [x7], x5\n" + "uaddw v11.8h, v28.8h, v11.8b\n" + "ld1 {v19.4s}, [%[bias_ptr]]\n" + "uaddw v14.8h, v28.8h, v14.8b\n" + "ld1 {v25.4s}, [%[bias_ptr]]\n" + "uaddw v15.8h, v28.8h, v15.8b\n" + "uaddw v16.8h, v28.8h, v16.8b\n" + + "bge " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LOOP "b\n" + + // At this point, there will be one of 2 width or 1 width leftover, + // not both. + "cmp w14, #2\n" + "blt " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER "f\n" + + // Handle last 2 columns if exists. + DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LEFTOVER ":\n" + "smlal v21.4s, v0.4h, v9.4h\n" + "ld1 {v12.8b}, [x12], %[input_depth]\n" + "smlal2 v22.4s, v0.8h, v9.8h\n" + "ld1 {v13.8b}, [x12]\n" + "add x12, x15, %[input_row_size]\n" + "smlal v23.4s, v0.4h, v11.4h\n" + "ld1 {v17.8b}, [x13], %[input_depth]\n" + "smlal2 v24.4s, v0.8h, v11.8h\n" + "ld1 {v18.8b}, [x13]\n" + "add x13, x12, %[input_row_size]\n" + "smlal v21.4s, v1.4h, v10.4h\n" + "ld1 {v9.8b}, [x15], %[input_depth]\n" + "smlal2 v22.4s, v1.8h, v10.8h\n" + "ld1 {v10.8b}, [x15], %[input_depth]\n" + "smlal v21.4s, v2.4h, v11.4h\n" + "smlal2 v22.4s, v2.8h, v11.8h\n" + "ld1 {v11.8b}, [x15], %[input_depth]\n" + "smlal v21.4s, v3.4h, v14.4h\n" + "smlal2 v22.4s, v3.8h, v14.8h\n" + "ld1 {v14.8b}, [x12], %[input_depth]\n" + "smlal v23.4s, v3.4h, v16.4h\n" + "smlal2 v24.4s, v3.8h, v16.8h\n" + "smlal v21.4s, v4.4h, v15.4h\n" + "uaddw v12.8h, v28.8h, v12.8b\n" + "smlal2 v22.4s, v4.8h, v15.8h\n" + "ld1 {v15.8b}, [x12], %[input_depth]\n" + "smlal v21.4s, v5.4h, v16.4h\n" + "uaddw v13.8h, v28.8h, v13.8b\n" + "smlal2 v22.4s, v5.8h, v16.8h\n" + "ld1 {v16.8b}, [x12], %[input_depth]\n" + "smlal v23.4s, v1.4h, v12.4h\n" + "uaddw v17.8h, v28.8h, v17.8b\n" + "smlal2 v24.4s, v1.8h, v12.8h\n" + "ld1 {v12.8b}, [x15], %[input_depth]\n" + "smlal v23.4s, v2.4h, v13.4h\n" + "uaddw v18.8h, v28.8h, v18.8b\n" + "smlal2 v24.4s, v2.8h, v13.8h\n" + "ld1 {v13.8b}, [x15]\n" + "smlal v23.4s, v4.4h, v17.4h\n" + "uaddw v9.8h, v28.8h, v9.8b\n" + "smlal2 v24.4s, v4.8h, v17.8h\n" + "ld1 {v17.8b}, [x12], %[input_depth]\n" + "smlal v23.4s, v5.4h, v18.4h\n" + "uaddw v10.8h, v28.8h, v10.8b\n" + "smlal2 v24.4s, v5.8h, v18.8h\n" + "ld1 {v18.8b}, [x12]\n" + + "smlal v21.4s, v6.4h, v9.4h\n" + "smlal2 v22.4s, v6.8h, v9.8h\n" + "smlal v19.4s, v0.4h, v9.4h\n" + "uaddw v11.8h, v28.8h, v11.8b\n" + "smlal2 v20.4s, v0.8h, v9.8h\n" + "ld1 {v9.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v6.4h, v11.4h\n" + "smlal2 v24.4s, v6.8h, v11.8h\n" + "smlal v21.4s, v7.4h, v10.4h\n" + "smlal2 v22.4s, v7.8h, v10.8h\n" + "uaddw v12.8h, v28.8h, v12.8b\n" + "smlal v19.4s, v1.4h, v10.4h\n" + "smlal2 v20.4s, v1.8h, v10.8h\n" + "ld1 {v10.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v7.4h, v12.4h\n" + "smlal2 v24.4s, v7.8h, v12.8h\n" + "smlal v25.4s, v1.4h, v12.4h\n" + "smlal2 v26.4s, v1.8h, v12.8h\n" + "smlal v21.4s, v8.4h, v11.4h\n" + "smlal2 v22.4s, v8.8h, v11.8h\n" + "smlal v19.4s, v2.4h, v11.4h\n" + "smlal2 v20.4s, v2.8h, v11.8h\n" + "uaddw v13.8h, v28.8h, v13.8b\n" + "smlal v25.4s, v0.4h, v11.4h\n" + "smlal2 v26.4s, v0.8h, v11.8h\n" + "ld1 {v11.8b}, [x13], %[input_depth]\n" + "smlal v23.4s, v8.4h, v13.4h\n" + "ld1 {v12.8b}, [x13], %[input_depth]\n" + "smlal2 v24.4s, v8.8h, v13.8h\n" + "smlal v25.4s, v2.4h, v13.4h\n" + "smlal2 v26.4s, v2.8h, v13.8h\n" + "ld1 {v13.8b}, [x13]\n" + + "dup v28.4s, w9\n" + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v27.16b, v21.16b, v28.16b\n" + "and v29.16b, v22.16b, v28.16b\n" + "and v30.16b, v23.16b, v28.16b\n" + "and v31.16b, v24.16b, v28.16b\n" + "sshr v27.4s, v27.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v21.4s, v21.4s, v27.4s\n" + "dup v27.4s, w1\n" + "sqadd v22.4s, v22.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v23.4s, v23.4s, v30.4s\n" + "dup v30.4s, w3\n" + "sqadd v24.4s, v24.4s, v31.4s\n" + "dup v31.4s, w4\n" + "srshl v21.4s, v21.4s, v28.4s\n" + "srshl v22.4s, v22.4s, v28.4s\n" + "srshl v23.4s, v23.4s, v28.4s\n" + "srshl v24.4s, v24.4s, v28.4s\n" + "dup v28.8h, w0\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "ld1 {v22.4s}, [x10]\n" + "sqxtn2 v23.8h, v24.4s\n" + "ld1 {v24.4s}, [x10]\n" + "sqxtun v21.8b, v21.8h\n" + "sqxtun v23.8b, v23.8h\n" + "uaddw v9.8h, v28.8h, v9.8b\n" + "st1 {v21.8b}, [x6], x5\n" + "uaddw v10.8h, v28.8h, v10.8b\n" + "st1 {v23.8b}, [x6]\n" + "uaddw v11.8h, v28.8h, v11.8b\n" + + "smlal v19.4s, v6.4h, v9.4h\n" + "smlal2 v20.4s, v6.8h, v9.8h\n" + "smlal v25.4s, v6.4h, v11.4h\n" + "smlal2 v26.4s, v6.8h, v11.8h\n" + "smlal v19.4s, v7.4h, v10.4h\n" + "uaddw v12.8h, v28.8h, v12.8b\n" + "smlal2 v20.4s, v7.8h, v10.8h\n" + "smlal v25.4s, v7.4h, v12.4h\n" + "smlal2 v26.4s, v7.8h, v12.8h\n" + "smlal v19.4s, v8.4h, v11.4h\n" + "uaddw v13.8h, v28.8h, v13.8b\n" + "smlal2 v20.4s, v8.8h, v11.8h\n" + "smlal v25.4s, v8.4h, v13.4h\n" + "uaddw v14.8h, v28.8h, v14.8b\n" + "smlal2 v26.4s, v8.8h, v13.8h\n" + "uaddw v16.8h, v28.8h, v16.8b\n" + "smlal v19.4s, v3.4h, v14.4h\n" + "uaddw v15.8h, v28.8h, v15.8b\n" + "smlal2 v20.4s, v3.8h, v14.8h\n" + "smlal v25.4s, v3.4h, v16.4h\n" + "smlal2 v26.4s, v3.8h, v16.8h\n" + "smlal v19.4s, v4.4h, v15.4h\n" + "uaddw v17.8h, v28.8h, v17.8b\n" + "smlal2 v20.4s, v4.8h, v15.8h\n" + "smlal v25.4s, v4.4h, v17.4h\n" + "smlal2 v26.4s, v4.8h, v17.8h\n" + "smlal v19.4s, v5.4h, v16.4h\n" + "uaddw v18.8h, v28.8h, v18.8b\n" + "smlal2 v20.4s, v5.8h, v16.8h\n" + "smlal v25.4s, v5.4h, v18.4h\n" + "smlal2 v26.4s, v5.8h, v18.8h\n" + + "dup v28.4s, w9\n" + "sqrdmulh v19.4s, v19.4s, v27.4s\n" + "sqrdmulh v20.4s, v20.4s, v27.4s\n" + "sqrdmulh v25.4s, v25.4s, v27.4s\n" + "sqrdmulh v26.4s, v26.4s, v27.4s\n" + "and v27.16b, v19.16b, v28.16b\n" + "and v29.16b, v20.16b, v28.16b\n" + "and v30.16b, v25.16b, v28.16b\n" + "and v31.16b, v26.16b, v28.16b\n" + "sshr v27.4s, v27.4s, #31\n" + "sshr v29.4s, v29.4s, #31\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v19.4s, v19.4s, v27.4s\n" + "dup v27.4s, w1\n" + "sqadd v20.4s, v20.4s, v29.4s\n" + "dup v29.4s, w2\n" + "sqadd v25.4s, v25.4s, v30.4s\n" + "dup v30.4s, w3\n" + "sqadd v26.4s, v26.4s, v31.4s\n" + "dup v31.4s, w4\n" + "srshl v19.4s, v19.4s, v28.4s\n" + "srshl v20.4s, v20.4s, v28.4s\n" + "srshl v25.4s, v25.4s, v28.4s\n" + "srshl v26.4s, v26.4s, v28.4s\n" + "dup v28.8h, w0\n" + "add v19.4s, v19.4s, v29.4s\n" + "add v20.4s, v20.4s, v29.4s\n" + "add v25.4s, v25.4s, v29.4s\n" + "add v26.4s, v26.4s, v29.4s\n" + "smax v19.4s, v19.4s, v30.4s\n" + "smax v20.4s, v20.4s, v30.4s\n" + "smax v25.4s, v25.4s, v30.4s\n" + "smax v26.4s, v26.4s, v30.4s\n" + "smin v19.4s, v19.4s, v31.4s\n" + "smin v20.4s, v20.4s, v31.4s\n" + "smin v25.4s, v25.4s, v31.4s\n" + "smin v26.4s, v26.4s, v31.4s\n" + "sqxtn v19.4h, v19.4s\n" + "sqxtn v25.4h, v25.4s\n" + "sqxtn2 v19.8h, v20.4s\n" + "sqxtn2 v25.8h, v26.4s\n" + "sqxtun v19.8b, v19.8h\n" + "sqxtun v25.8b, v25.8h\n" + "st1 {v19.8b}, [x7], x5\n" + "st1 {v25.8b}, [x7]\n" + "b " DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_AFTER_LOOP "f\n" + + // Handle last column if exists. + DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER ":\n" + // Registers v9, v10, v11, v14, v15, and v16 have already been loaded + // with the correct values at this point. This corresponds to the + // first two input rows of the top left output. Now load the last + // input row for this output. Once these inputs are no longer needed, + // load the input rows for the bottom left output. + "add x12, x15, %[input_row_size]\n" + "add x13, x12, %[input_row_size]\n" + + "ld1 {v12.8b}, [x15], %[input_depth]\n" + "smlal v21.4s, v0.4h, v9.4h\n" + "ld1 {v13.8b}, [x15], %[input_depth]\n" + "smlal2 v22.4s, v0.8h, v9.8h\n" + "ld1 {v17.8b}, [x15]\n" + "smlal v21.4s, v1.4h, v10.4h\n" + "ld1 {v9.8b}, [x12], %[input_depth]\n" + "smlal2 v22.4s, v1.8h, v10.8h\n" + "ld1 {v10.8b}, [x12], %[input_depth]\n" + "smlal v21.4s, v2.4h, v11.4h\n" + "smlal2 v22.4s, v2.8h, v11.8h\n" + "ld1 {v11.8b}, [x12]\n" + "smlal v21.4s, v3.4h, v14.4h\n" + "smlal2 v22.4s, v3.8h, v14.8h\n" + "ld1 {v14.8b}, [x13], %[input_depth]\n" + "smlal v21.4s, v4.4h, v15.4h\n" + "smlal2 v22.4s, v4.8h, v15.8h\n" + "ld1 {v15.8b}, [x13], %[input_depth]\n" + "smlal v21.4s, v5.4h, v16.4h\n" + "uaddw v12.8h, v28.8h, v12.8b\n" + "smlal2 v22.4s, v5.8h, v16.8h\n" + "uaddw v13.8h, v28.8h, v13.8b\n" + "ld1 {v16.8b}, [x13]\n" + + "smlal v21.4s, v6.4h, v12.4h\n" + "smlal2 v22.4s, v6.8h, v12.8h\n" + "smlal v23.4s, v0.4h, v12.4h\n" + "uaddw v17.8h, v28.8h, v17.8b\n" + "smlal2 v24.4s, v0.8h, v12.8h\n" + "smlal v21.4s, v7.4h, v13.4h\n" + "smlal2 v22.4s, v7.8h, v13.8h\n" + "smlal v23.4s, v1.4h, v13.4h\n" + "smlal2 v24.4s, v1.8h, v13.8h\n" + "smlal v21.4s, v8.4h, v17.4h\n" + "smlal2 v22.4s, v8.8h, v17.8h\n" + "smlal v23.4s, v2.4h, v17.4h\n" + "smlal2 v24.4s, v2.8h, v17.8h\n" + + "dup v26.4s, w9\n" + "sqrdmulh v21.4s, v21.4s, v27.4s\n" + "sqrdmulh v22.4s, v22.4s, v27.4s\n" + "and v18.16b, v21.16b, v26.16b\n" + "and v19.16b, v22.16b, v26.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v21.4s, v21.4s, v18.4s\n" + "sqadd v22.4s, v22.4s, v19.4s\n" + "srshl v21.4s, v21.4s, v26.4s\n" + "srshl v22.4s, v22.4s, v26.4s\n" + "add v21.4s, v21.4s, v29.4s\n" + "add v22.4s, v22.4s, v29.4s\n" + "smax v21.4s, v21.4s, v30.4s\n" + "smax v22.4s, v22.4s, v30.4s\n" + "smin v21.4s, v21.4s, v31.4s\n" + "smin v22.4s, v22.4s, v31.4s\n" + "sqxtn v21.4h, v21.4s\n" + "sqxtn2 v21.8h, v22.4s\n" + "sqxtun v21.8b, v21.8h\n" + "uaddw v9.8h, v28.8h, v9.8b\n" + "st1 {v21.8b}, [x6]\n" + "uaddw v10.8h, v28.8h, v10.8b\n" + + "smlal v23.4s, v3.4h, v9.4h\n" + "uaddw v11.8h, v28.8h, v11.8b\n" + "smlal2 v24.4s, v3.8h, v9.8h\n" + "uaddw v14.8h, v28.8h, v14.8b\n" + "smlal v23.4s, v4.4h, v10.4h\n" + "uaddw v15.8h, v28.8h, v15.8b\n" + "smlal2 v24.4s, v4.8h, v10.8h\n" + "uaddw v16.8h, v28.8h, v16.8b\n" + "smlal v23.4s, v5.4h, v11.4h\n" + "smlal2 v24.4s, v5.8h, v11.8h\n" + + "smlal v23.4s, v6.4h, v14.4h\n" + "smlal2 v24.4s, v6.8h, v14.8h\n" + "smlal v23.4s, v7.4h, v15.4h\n" + "smlal2 v24.4s, v7.8h, v15.8h\n" + "smlal v23.4s, v8.4h, v16.4h\n" + "smlal2 v24.4s, v8.8h, v16.8h\n" + + "sqrdmulh v23.4s, v23.4s, v27.4s\n" + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "and v18.16b, v23.16b, v26.16b\n" + "and v19.16b, v24.16b, v26.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v23.4s, v23.4s, v18.4s\n" + "sqadd v24.4s, v24.4s, v19.4s\n" + "srshl v23.4s, v23.4s, v26.4s\n" + "srshl v24.4s, v24.4s, v26.4s\n" + "add v23.4s, v23.4s, v29.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "smax v23.4s, v23.4s, v30.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smin v23.4s, v23.4s, v31.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "sqxtn v23.4h, v23.4s\n" + "sqxtn2 v23.8h, v24.4s\n" + "sqxtun v23.8b, v23.8h\n" + "st1 {v23.8b}, [x7]\n" + + DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_AFTER_LOOP ":\n" + "subs %w[output_window_height], %w[output_window_height], #2\n" + "add %[input_ptr], %[input_ptr], %[input_height_increment]\n" + "cmp %w[output_window_height], #2\n" + "add %[output_ptr], %[output_ptr], %[output_height_increment]\n" + "bge " DEPTHWISECONV_LABEL_HEIGHT_2_LOOP "b\n" + + DEPTHWISECONV_LABEL_HEIGHT_2_AFTER_LOOP ":\n" + "cmp %w[output_window_height], #1\n" + "blt " DEPTHWISECONV_LABEL_HEIGHT_1_END "f\n" + + DEPTHWISECONV_LABEL_HEIGHT_1 ":\n" + "mov x11, %[input_ptr]\n" + "mov x12, x11\n" + "add x13, x12, %[input_row_size]\n" + "ld1 {v9.8b}, [x12], %[input_depth]\n" + "add x15, x13, %[input_row_size]\n" + "ld1 {v10.8b}, [x12], %[input_depth]\n" + "mov x6, %[output_ptr]\n" + "ld1 {v11.8b}, [x12], %[input_depth]\n" + "mov w14, %w[output_window_width]\n" + // The height 1 / width 2 loop loads an extra 1x1 output in anticipation + // for the next iteration. Make sure |output_window_width| is large + // enough to handle the additional load, otherwise jump to the + // appropriate label to handle smaller widths. + "cmp w14, #2\n" + "ld1 {v12.8b}, [x13], %[input_depth]\n" + "ld1 {v13.8b}, [x13], %[input_depth]\n" + "ld1 {v14.8b}, [x13], %[input_depth]\n" + "ld1 {v15.8b}, [x15], %[input_depth]\n" + "ld1 {v16.8b}, [x15], %[input_depth]\n" + "ld1 {v17.8b}, [x15], %[input_depth]\n" + + "uaddw v9.8h, v28.8h, v9.8b\n" + "ld1 {v24.4s}, [%[bias_ptr]]\n" + "uaddw v10.8h, v28.8h, v10.8b\n" + "ld1 {v25.4s}, [x10]\n" + "uaddw v11.8h, v28.8h, v11.8b\n" + "ld1 {v26.4s}, [%[bias_ptr]]\n" + "ld1 {v27.4s}, [x10]\n" + "uaddw v12.8h, v28.8h, v12.8b\n" + "uaddw v13.8h, v28.8h, v13.8b\n" + "uaddw v14.8h, v28.8h, v14.8b\n" + "uaddw v15.8h, v28.8h, v15.8b\n" + "uaddw v16.8h, v28.8h, v16.8b\n" + "uaddw v17.8h, v28.8h, v17.8b\n" + + "beq " DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LEFTOVER "f\n" + "cmp w14, #1\n" + "beq " DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LOOP ":\n" + "smlal v24.4s, v0.4h, v9.4h\n" + "ld1 {v18.8b}, [x12], %[input_depth]\n" + "smlal2 v25.4s, v0.8h, v9.8h\n" + "ld1 {v19.8b}, [x12]\n" + "smlal v26.4s, v0.4h, v11.4h\n" + "ld1 {v20.8b}, [x13], %[input_depth]\n" + "smlal2 v27.4s, v0.8h, v11.8h\n" + "ld1 {v21.8b}, [x13]\n" + "smlal v24.4s, v1.4h, v10.4h\n" + "ld1 {v22.8b}, [x15], %[input_depth]\n" + "smlal2 v25.4s, v1.8h, v10.8h\n" + "ld1 {v23.8b}, [x15]\n" + "smlal v24.4s, v2.4h, v11.4h\n" + "subs w14, w14, #2\n" + "smlal2 v25.4s, v2.8h, v11.8h\n" + "cmp w14, #3\n" + "smlal v24.4s, v3.4h, v12.4h\n" + "add x11, x11, %[input_width_increment]\n" + "smlal2 v25.4s, v3.8h, v12.8h\n" + "mov x12, x11\n" + "smlal v26.4s, v3.4h, v14.4h\n" + "add x13, x12, %[input_row_size]\n" + "smlal2 v27.4s, v3.8h, v14.8h\n" + "add x15, x13, %[input_row_size]\n" + "smlal v24.4s, v4.4h, v13.4h\n" + "ld1 {v9.8b}, [x12], %[input_depth]\n" + "smlal2 v25.4s, v4.8h, v13.8h\n" + "ld1 {v10.8b}, [x12], %[input_depth]\n" + "smlal v24.4s, v5.4h, v14.4h\n" + "ld1 {v11.8b}, [x12], %[input_depth]\n" + "smlal2 v25.4s, v5.8h, v14.8h\n" + "ld1 {v12.8b}, [x13], %[input_depth]\n" + "smlal v24.4s, v6.4h, v15.4h\n" + "ld1 {v13.8b}, [x13], %[input_depth]\n" + "smlal2 v25.4s, v6.8h, v15.8h\n" + "ld1 {v14.8b}, [x13], %[input_depth]\n" + "smlal v26.4s, v6.4h, v17.4h\n" + "ld1 {v15.8b}, [x15], %[input_depth]\n" + "smlal2 v27.4s, v6.8h, v17.8h\n" + "smlal v24.4s, v7.4h, v16.4h\n" + "smlal2 v25.4s, v7.8h, v16.8h\n" + "ld1 {v16.8b}, [x15], %[input_depth]\n" + "smlal v24.4s, v8.4h, v17.4h\n" + "uaddw v18.8h, v28.8h, v18.8b\n" + "smlal2 v25.4s, v8.8h, v17.8h\n" + "ld1 {v17.8b}, [x15], %[input_depth]\n" + "uaddw v19.8h, v28.8h, v19.8b\n" + + "smlal v26.4s, v1.4h, v18.4h\n" + "uaddw v20.8h, v28.8h, v20.8b\n" + "smlal2 v27.4s, v1.8h, v18.8h\n" + "smlal v26.4s, v2.4h, v19.4h\n" + "uaddw v21.8h, v28.8h, v21.8b\n" + "smlal2 v27.4s, v2.8h, v19.8h\n" + "smlal v26.4s, v4.4h, v20.4h\n" + "smlal v26.4s, v5.4h, v21.4h\n" + "smlal2 v27.4s, v4.8h, v20.8h\n" + "uaddw v22.8h, v28.8h, v22.8b\n" + "smlal2 v27.4s, v5.8h, v21.8h\n" + "uaddw v23.8h, v28.8h, v23.8b\n" + "smlal v26.4s, v7.4h, v22.4h\n" + "smlal2 v27.4s, v7.8h, v22.8h\n" + "smlal v26.4s, v8.4h, v23.4h\n" + "smlal2 v27.4s, v8.8h, v23.8h\n" + + "dup v28.4s, w1\n" + "dup v29.4s, w9\n" + "sqrdmulh v24.4s, v24.4s, v28.4s\n" + "sqrdmulh v25.4s, v25.4s, v28.4s\n" + "sqrdmulh v26.4s, v26.4s, v28.4s\n" + "sqrdmulh v27.4s, v27.4s, v28.4s\n" + "dup v28.4s, w2\n" + "and v30.16b, v24.16b, v29.16b\n" + "and v31.16b, v25.16b, v29.16b\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v24.4s, v24.4s, v30.4s\n" + "sqadd v25.4s, v25.4s, v31.4s\n" + "and v30.16b, v26.16b, v29.16b\n" + "and v31.16b, v27.16b, v29.16b\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v26.4s, v26.4s, v30.4s\n" + "dup v30.4s, w3\n" + "sqadd v27.4s, v27.4s, v31.4s\n" + "dup v31.4s, w4\n" + "srshl v24.4s, v24.4s, v29.4s\n" + "srshl v25.4s, v25.4s, v29.4s\n" + "srshl v26.4s, v26.4s, v29.4s\n" + "srshl v27.4s, v27.4s, v29.4s\n" + "add v24.4s, v24.4s, v28.4s\n" + "add v25.4s, v25.4s, v28.4s\n" + "add v26.4s, v26.4s, v28.4s\n" + "add v27.4s, v27.4s, v28.4s\n" + "dup v28.8h, w0\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smax v25.4s, v25.4s, v30.4s\n" + "smax v26.4s, v26.4s, v30.4s\n" + "smax v27.4s, v27.4s, v30.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "smin v25.4s, v25.4s, v31.4s\n" + "smin v26.4s, v26.4s, v31.4s\n" + "smin v27.4s, v27.4s, v31.4s\n" + "sqxtn v24.4h, v24.4s\n" + "sqxtn v26.4h, v26.4s\n" + "sqxtn2 v24.8h, v25.4s\n" + "ld1 {v25.4s}, [x10]\n" + "sqxtn2 v26.8h, v27.4s\n" + "ld1 {v27.4s}, [x10]\n" + "sqxtun v24.8b, v24.8h\n" + "sqxtun v26.8b, v26.8h\n" + "uaddw v9.8h, v28.8h, v9.8b\n" + "st1 {v24.8b}, [x6], x5\n" + "uaddw v10.8h, v28.8h, v10.8b\n" + "st1 {v26.8b}, [x6], x5\n" + "uaddw v11.8h, v28.8h, v11.8b\n" + "uaddw v12.8h, v28.8h, v12.8b\n" + "uaddw v13.8h, v28.8h, v13.8b\n" + "uaddw v14.8h, v28.8h, v14.8b\n" + "ld1 {v24.4s}, [%[bias_ptr]]\n" + "uaddw v15.8h, v28.8h, v15.8b\n" + "ld1 {v26.4s}, [%[bias_ptr]]\n" + "uaddw v16.8h, v28.8h, v16.8b\n" + "uaddw v17.8h, v28.8h, v17.8b\n" + + "bge " DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LOOP "b\n" + + // At this point, there will be one of 2 width or 1 width leftover, + // not both. + "cmp w14, #2\n" + "blt " DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER "f\n" + + // Handle last two horizontal outputs if exists. + DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LEFTOVER ":\n" + "smlal v24.4s, v0.4h, v9.4h\n" + "ld1 {v18.8b}, [x12], %[input_depth]\n" + "smlal2 v25.4s, v0.8h, v9.8h\n" + "ld1 {v19.8b}, [x12]\n" + "smlal v26.4s, v0.4h, v11.4h\n" + "ld1 {v20.8b}, [x13], %[input_depth]\n" + "smlal2 v27.4s, v0.8h, v11.8h\n" + "ld1 {v21.8b}, [x13]\n" + "smlal v24.4s, v1.4h, v10.4h\n" + "ld1 {v22.8b}, [x15], %[input_depth]\n" + "smlal2 v25.4s, v1.8h, v10.8h\n" + "ld1 {v23.8b}, [x15]\n" + "smlal v24.4s, v2.4h, v11.4h\n" + "smlal2 v25.4s, v2.8h, v11.8h\n" + "smlal v24.4s, v3.4h, v12.4h\n" + "smlal2 v25.4s, v3.8h, v12.8h\n" + "smlal v26.4s, v3.4h, v14.4h\n" + "smlal2 v27.4s, v3.8h, v14.8h\n" + "smlal v24.4s, v4.4h, v13.4h\n" + "smlal2 v25.4s, v4.8h, v13.8h\n" + "smlal v24.4s, v5.4h, v14.4h\n" + "smlal2 v25.4s, v5.8h, v14.8h\n" + "smlal v24.4s, v6.4h, v15.4h\n" + "smlal2 v25.4s, v6.8h, v15.8h\n" + "smlal v26.4s, v6.4h, v17.4h\n" + "smlal2 v27.4s, v6.8h, v17.8h\n" + "smlal v24.4s, v7.4h, v16.4h\n" + "smlal2 v25.4s, v7.8h, v16.8h\n" + "smlal v24.4s, v8.4h, v17.4h\n" + "uaddw v18.8h, v28.8h, v18.8b\n" + "smlal2 v25.4s, v8.8h, v17.8h\n" + "uaddw v19.8h, v28.8h, v19.8b\n" + + "smlal v26.4s, v1.4h, v18.4h\n" + "uaddw v20.8h, v28.8h, v20.8b\n" + "smlal2 v27.4s, v1.8h, v18.8h\n" + "smlal v26.4s, v2.4h, v19.4h\n" + "uaddw v21.8h, v28.8h, v21.8b\n" + "smlal2 v27.4s, v2.8h, v19.8h\n" + "smlal v26.4s, v4.4h, v20.4h\n" + "smlal v26.4s, v5.4h, v21.4h\n" + "smlal2 v27.4s, v4.8h, v20.8h\n" + "uaddw v22.8h, v28.8h, v22.8b\n" + "smlal2 v27.4s, v5.8h, v21.8h\n" + "uaddw v23.8h, v28.8h, v23.8b\n" + "smlal v26.4s, v7.4h, v22.4h\n" + "smlal2 v27.4s, v7.8h, v22.8h\n" + "smlal v26.4s, v8.4h, v23.4h\n" + "smlal2 v27.4s, v8.8h, v23.8h\n" + + "dup v28.4s, w1\n" + "dup v29.4s, w9\n" + "sqrdmulh v24.4s, v24.4s, v28.4s\n" + "sqrdmulh v25.4s, v25.4s, v28.4s\n" + "sqrdmulh v26.4s, v26.4s, v28.4s\n" + "sqrdmulh v27.4s, v27.4s, v28.4s\n" + "dup v28.4s, w2\n" + "and v30.16b, v24.16b, v29.16b\n" + "and v31.16b, v25.16b, v29.16b\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v24.4s, v24.4s, v30.4s\n" + "sqadd v25.4s, v25.4s, v31.4s\n" + "and v30.16b, v26.16b, v29.16b\n" + "and v31.16b, v27.16b, v29.16b\n" + "sshr v30.4s, v30.4s, #31\n" + "sshr v31.4s, v31.4s, #31\n" + "sqadd v26.4s, v26.4s, v30.4s\n" + "dup v30.4s, w3\n" + "sqadd v27.4s, v27.4s, v31.4s\n" + "dup v31.4s, w4\n" + "srshl v24.4s, v24.4s, v29.4s\n" + "srshl v25.4s, v25.4s, v29.4s\n" + "srshl v26.4s, v26.4s, v29.4s\n" + "srshl v27.4s, v27.4s, v29.4s\n" + "add v24.4s, v24.4s, v28.4s\n" + "add v25.4s, v25.4s, v28.4s\n" + "add v26.4s, v26.4s, v28.4s\n" + "add v27.4s, v27.4s, v28.4s\n" + "dup v28.8h, w0\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smax v25.4s, v25.4s, v30.4s\n" + "smax v26.4s, v26.4s, v30.4s\n" + "smax v27.4s, v27.4s, v30.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "smin v25.4s, v25.4s, v31.4s\n" + "smin v26.4s, v26.4s, v31.4s\n" + "smin v27.4s, v27.4s, v31.4s\n" + "sqxtn v24.4h, v24.4s\n" + "sqxtn v26.4h, v26.4s\n" + "sqxtn2 v24.8h, v25.4s\n" + "sqxtn2 v26.8h, v27.4s\n" + "sqxtun v24.8b, v24.8h\n" + "sqxtun v26.8b, v26.8h\n" + "st1 {v24.8b}, [x6], x5\n" + "st1 {v26.8b}, [x6]\n" + "b " DEPTHWISECONV_LABEL_HEIGHT_1_END "f\n" + + // Handle bottom right output if exists. + DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER ":\n" + "dup v26.4s, w9\n" + "dup v27.4s, w1\n" + "dup v29.4s, w2\n" + + "smlal v24.4s, v0.4h, v9.4h\n" + "smlal2 v25.4s, v0.8h, v9.8h\n" + "smlal v24.4s, v1.4h, v10.4h\n" + "smlal2 v25.4s, v1.8h, v10.8h\n" + "smlal v24.4s, v2.4h, v11.4h\n" + "smlal2 v25.4s, v2.8h, v11.8h\n" + "smlal v24.4s, v3.4h, v12.4h\n" + "smlal2 v25.4s, v3.8h, v12.8h\n" + "smlal v24.4s, v4.4h, v13.4h\n" + "smlal2 v25.4s, v4.8h, v13.8h\n" + "smlal v24.4s, v5.4h, v14.4h\n" + "smlal2 v25.4s, v5.8h, v14.8h\n" + "smlal v24.4s, v6.4h, v15.4h\n" + "smlal2 v25.4s, v6.8h, v15.8h\n" + "smlal v24.4s, v7.4h, v16.4h\n" + "smlal2 v25.4s, v7.8h, v16.8h\n" + "smlal v24.4s, v8.4h, v17.4h\n" + "smlal2 v25.4s, v8.8h, v17.8h\n" + + "sqrdmulh v24.4s, v24.4s, v27.4s\n" + "sqrdmulh v25.4s, v25.4s, v27.4s\n" + "and v18.16b, v24.16b, v26.16b\n" + "and v19.16b, v25.16b, v26.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v24.4s, v24.4s, v18.4s\n" + "sqadd v25.4s, v25.4s, v19.4s\n" + "srshl v24.4s, v24.4s, v26.4s\n" + "srshl v25.4s, v25.4s, v26.4s\n" + "add v24.4s, v24.4s, v29.4s\n" + "add v25.4s, v25.4s, v29.4s\n" + "smax v24.4s, v24.4s, v30.4s\n" + "smax v25.4s, v25.4s, v30.4s\n" + "smin v24.4s, v24.4s, v31.4s\n" + "smin v25.4s, v25.4s, v31.4s\n" + "sqxtn v24.4h, v24.4s\n" + "sqxtn2 v24.8h, v25.4s\n" + "sqxtun v24.8b, v24.8h\n" + "st1 {v24.8b}, [x6]\n" + + DEPTHWISECONV_LABEL_HEIGHT_1_END ":\n" + : + // Outputs. + [filter_ptr] "+r"(filter_ptr), [input_ptr] "+r"(input_ptr), + [output_ptr] "+r"(output_ptr), + [output_window_height] "+r"(output_window_height) + : + // Inputs. + [bias_ptr] "r"(bias_ptr), [input_row_size] "r"(input_row_size), + [input_depth] "r"(input_depth), + [output_window_width] "r"(output_window_width), + [input_width_increment] "r"(input_width_increment), + [input_height_increment] "r"(input_height_increment), + [output_height_increment] "r"(output_height_increment), + [params_ptr] "r"(params_ptr) + : + // Clobbers. + "cc", "memory", + // We use these NEON registers. + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "v30", "v31", + // We use these general-purpose registers. + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x19", "x20"); +#undef DEPTHWISECONV_LABEL_HEIGHT_2_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_1_LEFTOVER +#undef DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_LEFTOVER +#undef DEPTHWISECONV_LABEL_HEIGHT_2_WIDTH_2_AFTER_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_2_AFTER_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_1 +#undef DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LOOP +#undef DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_1_LEFTOVER +#undef DEPTHWISECONV_LABEL_HEIGHT_1_WIDTH_2_LEFTOVER +#undef DEPTHWISECONV_LABEL_HEIGHT_1_END } }; -template <> -struct ConvKernel3x3FilterDepth8<1, 4, 2, 2> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8; - uint8x8_t temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, - temp_8; - +#undef OFFSET_INPUT_DEPTH +#undef OFFSET_INPUT_ROW_SIZE +#undef OFFSET_OUTPUT_DEPTH +#undef OFFSET_OUTPUT_ROW_SIZE +#undef OFFSET_INPUT_OFFSET +#undef OFFSET_OUTPUT_OFFSET +#undef OFFSET_FILTER_OFFSET +#undef OFFSET_OUTPUT_MULTIPLIER +#undef OFFSET_OUTPUT_ACTIVATION_MIN +#undef OFFSET_OUTPUT_ACTIVATION_MAX +#undef OFFSET_OUTPUT_SHIFT +#undef OFFSET_INPUT_WIDTH +#undef OFFSET_INPUT_HEIGHT +#undef OFFSET_OUTPUT_WIDTH +#undef OFFSET_OUTPUT_HEIGHT +#undef STR +#undef STR_UNEXPANDED + +// Copies a subset of the input designated by |input_ptr| into |output_ptr| +// with the specified output dimensions. Supports output depths of 64 only as +// this is the cache line size. +inline void ShuffleInput(const uint8* input_ptr, int64_t input_depth, + int32 input_width, int32 input_height, + int64_t output_depth, int32 output_width, + int32 output_height, uint8* output_ptr) { + const int64_t input_row_size = input_depth * input_width; + for (int32 y = 0; y < output_height; y++) { const uint8* ptr = input_ptr; - - // Load all inputs for top output. - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - temp_2 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - temp_5 = vld1_u8(ptr + 2 * input_depth); - ptr += input_row_size; - temp_6 = vld1_u8(ptr); - temp_7 = vld1_u8(ptr + input_depth); - temp_8 = vld1_u8(ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - - DotProductAndStore( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - - // Second output. - output_ptr += output_depth; - - ptr = input_ptr + 3 * input_depth; - temp_0 = vld1_u8(ptr); - temp_1 = vld1_u8(ptr + input_depth); - ptr += input_row_size; - temp_3 = vld1_u8(ptr); - temp_4 = vld1_u8(ptr + input_depth); - ptr += input_row_size; - temp_6 = vld1_u8(ptr); - temp_7 = vld1_u8(ptr + input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - - DotProductAndStore( - filter, input_2, input_0, input_1, input_5, input_3, input_4, input_8, - input_6, input_7, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - - // Third output. - output_ptr += output_depth; - - ptr = input_ptr + 5 * input_depth; - temp_2 = vld1_u8(ptr); - temp_0 = vld1_u8(ptr + input_depth); - ptr += input_row_size; - temp_5 = vld1_u8(ptr); - temp_3 = vld1_u8(ptr + input_depth); - ptr += input_row_size; - temp_8 = vld1_u8(ptr); - temp_6 = vld1_u8(ptr + input_depth); - - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - - input_2 = vaddq_s16(input_2, input_offset_vec); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - - DotProductAndStore( - filter, input_1, input_2, input_0, input_4, input_5, input_3, input_7, - input_8, input_6, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - - // Fourth output. - output_ptr += output_depth; - - ptr = input_ptr + 7 * input_depth; - temp_1 = vld1_u8(ptr); - temp_2 = vld1_u8(ptr + input_depth); - ptr += input_row_size; - temp_4 = vld1_u8(ptr); - temp_5 = vld1_u8(ptr + input_depth); - ptr += input_row_size; - temp_7 = vld1_u8(ptr); - temp_8 = vld1_u8(ptr + input_depth); - - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); - - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - - DotProductAndStore( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - } -}; - -template -struct ConvKernel3x3FilterDepth8<1, 1, kFixedStrideWidth, kFixedStrideHeight> { - static inline void Run(const uint8* input_ptr, int input_depth, - int32 input_offset, int input_row_size, - const uint8* filter_ptr, int32 filter_offset, - const int32* bias_ptr, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_ptr, - int output_depth, int output_width) { - Filter3x3x8 filter = Load3x3Filter(filter_ptr, filter_offset, output_depth); - - int16x8_t input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8; - - uint8x8_t temp_0 = vld1_u8(input_ptr); - uint8x8_t temp_1 = vld1_u8(input_ptr + input_depth); - uint8x8_t temp_2 = vld1_u8(input_ptr + 2 * input_depth); - - input_ptr += input_row_size; - uint8x8_t temp_3 = vld1_u8(input_ptr); - uint8x8_t temp_4 = vld1_u8(input_ptr + input_depth); - uint8x8_t temp_5 = vld1_u8(input_ptr + 2 * input_depth); - - input_ptr += input_row_size; - uint8x8_t temp_6 = vld1_u8(input_ptr); - uint8x8_t temp_7 = vld1_u8(input_ptr + input_depth); - uint8x8_t temp_8 = vld1_u8(input_ptr + 2 * input_depth); - - input_0 = vreinterpretq_s16_u16(vmovl_u8(temp_0)); - input_1 = vreinterpretq_s16_u16(vmovl_u8(temp_1)); - input_2 = vreinterpretq_s16_u16(vmovl_u8(temp_2)); - input_3 = vreinterpretq_s16_u16(vmovl_u8(temp_3)); - input_4 = vreinterpretq_s16_u16(vmovl_u8(temp_4)); - input_5 = vreinterpretq_s16_u16(vmovl_u8(temp_5)); - input_6 = vreinterpretq_s16_u16(vmovl_u8(temp_6)); - input_7 = vreinterpretq_s16_u16(vmovl_u8(temp_7)); - input_8 = vreinterpretq_s16_u16(vmovl_u8(temp_8)); - - const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); - input_0 = vaddq_s16(input_0, input_offset_vec); - input_1 = vaddq_s16(input_1, input_offset_vec); - input_2 = vaddq_s16(input_2, input_offset_vec); - input_3 = vaddq_s16(input_3, input_offset_vec); - input_4 = vaddq_s16(input_4, input_offset_vec); - input_5 = vaddq_s16(input_5, input_offset_vec); - input_6 = vaddq_s16(input_6, input_offset_vec); - input_7 = vaddq_s16(input_7, input_offset_vec); - input_8 = vaddq_s16(input_8, input_offset_vec); - - DotProductAndStore( - filter, input_0, input_1, input_2, input_3, input_4, input_5, input_6, - input_7, input_8, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, output_ptr); - } -}; - -inline void ShuffleInput(const uint8* input_ptr, int input_depth, - int input_width, int input_height, int output_depth, - int output_width, int output_height, - uint8* output_ptr) { - const int input_row_size = input_depth * input_width; - - for (int y = 0; y < output_height; y++) { - const uint8* ptr = input_ptr; - for (int x = 0; x < output_width; x++) { + for (int32 x = 0; x < output_width; x++) { memcpy(output_ptr, ptr, output_depth); output_ptr += output_depth; ptr += input_depth; @@ -3873,552 +2214,155 @@ inline void ShuffleInput(const uint8* input_ptr, int input_depth, } } -template -struct ConvRow3x3FilterDepth8 {}; - -template -struct ConvRow3x3FilterDepth8<1, kFixedStrideWidth, kFixedStrideHeight> { - static inline void Run(const uint8* input_data, int start_x, int start_y, - int input_depth, int input_width, int input_height, - int input_row_size, int32 input_offset, - const uint8* filter_data, int32 filter_offset, - const int32* bias_data, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - int output_depth, int output_width, - uint8* shuffle_workspace) { - int out_x = start_x; - - // 1x4 at a time. - for (; out_x <= output_width - 4; out_x += 4) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<1, 4, kFixedStrideWidth, kFixedStrideHeight>:: - Run(input_ptr, input_depth, input_offset, input_row_size, - filter_ptr, filter_offset, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - - input_data += 4 * kFixedStrideWidth * input_depth; - output_data += 4 * output_depth; - } - - // 1x1 at a time. - for (; out_x < output_width; out_x++) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<1, 1, kFixedStrideWidth, kFixedStrideHeight>:: - Run(input_ptr, input_depth, input_offset, input_row_size, - filter_ptr, filter_offset, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } +// Calculates the input size depending on stride and output. +inline int32 get_shuffle_input_size(int32 stride, int32 output) { + return stride * (output - 1) + 3; +} - input_data += kFixedStrideWidth * input_depth; - output_data += output_depth; - } +// Indicates the input and output dimensions used when shuffling input +// activations. +struct ShuffleParams { + int32 output_width; + int32 output_height; + int32 input_width; + int32 input_height; + + ShuffleParams() = default; + ShuffleParams(int32 output_width, int32 output_height, int32 stride_width, + int32 stride_height) + : output_width(output_width) + , output_height(output_height) + , input_width(get_shuffle_input_size(stride_width, output_width)) + , input_height(get_shuffle_input_size(stride_height, output_height)) { } }; -template -struct ConvRow3x3FilterDepth8<2, kFixedStrideWidth, kFixedStrideHeight> { - static inline void Run(const uint8* input_data, int start_x, int start_y, - int input_depth, int input_width, int input_height, - int input_row_size, int32 input_offset, - const uint8* filter_data, int32 filter_offset, - const int32* bias_data, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - int output_depth, int output_width, - uint8* shuffle_workspace) { - int out_x = start_x; - - // 2x4 at a time. - for (; out_x <= output_width - 4; out_x += 4) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<2, 4, kFixedStrideWidth, kFixedStrideHeight>:: - Run(input_ptr, input_depth, input_offset, input_row_size, - filter_ptr, filter_offset, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - - input_data += 4 * kFixedStrideWidth * input_depth; - output_data += 4 * output_depth; - } - - // 2x2 at a time. - for (; out_x <= output_width - 2; out_x += 2) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<2, 2, kFixedStrideWidth, kFixedStrideHeight>:: - Run(input_ptr, input_depth, input_offset, input_row_size, - filter_ptr, filter_offset, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - - input_data += 2 * kFixedStrideWidth * input_depth; - output_data += 2 * output_depth; - } - - // 2x1 at a time. - for (; out_x < output_width; out_x++) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<2, 1, kFixedStrideWidth, kFixedStrideHeight>:: - Run(input_ptr, input_depth, input_offset, input_row_size, - filter_ptr, filter_offset, bias_ptr, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - - input_data += kFixedStrideWidth * input_depth; - output_data += output_depth; +template +struct DepthwiseConvThroughDepth { + // Runs the DepthwiseConvWindow kernels through the depth dimension from + // |start_depth| to |end_depth|. Keep this not inlined to maintain a small + // binary size. We use a DepthwiseConvParams struct for read only params + // to minimize call overhead. + static __attribute__((noinline)) void Run(const uint8* input_ptr, + const uint8* filter_ptr, const int32* bias_ptr, uint8* output_ptr, + int64_t start_depth, int64_t end_depth, int64_t input_depth, + int64_t input_row_size, int32 output_window_height, + int32 output_window_width, const DepthwiseConvParams& params) { + for (; start_depth <= end_depth - 8; start_depth += 8) { + DepthwiseConvWindow<8, kStrideWidth, kStrideHeight>::Run( + input_ptr, filter_ptr, bias_ptr, output_ptr, input_depth, + input_row_size, output_window_height, output_window_width, ¶ms); + input_ptr += 8; + output_ptr += 8; + filter_ptr += 8; + bias_ptr += 8; } } }; -template <> -struct ConvRow3x3FilterDepth8<4, 1, 1> { - static inline void Run(const uint8* input_data, int start_x, int start_y, - int input_depth, int input_width, int input_height, - int input_row_size, int32 input_offset, - const uint8* filter_data, int32 filter_offset, - const int32* bias_data, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - int output_depth, int output_width, - uint8* shuffle_workspace) { - int out_x = start_x; - - // 4x4 at a time. - for (; out_x <= output_width - 4; out_x += 4) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<4, 4, 1, 1>::Run( - input_ptr, input_depth, input_offset, input_row_size, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - - input_data += 4 * input_depth; - output_data += 4 * output_depth; - } - - // Handle the rest of the right side. - // 4x2 at a time. - for (; out_x <= output_width - 2; out_x += 2) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<4, 2, 1, 1>::Run( - input_ptr, input_depth, input_offset, input_row_size, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - - input_data += 2 * input_depth; - output_data += 2 * output_depth; - } - - // 4x1 at a time. - for (; out_x < output_width; out_x++) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<4, 1, 1, 1>::Run( - input_ptr, input_depth, input_offset, input_row_size, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - - input_data += input_depth; - output_data += output_depth; - } - } -}; +template +struct DepthwiseConvMultiRow { + using ConvKernel = DepthwiseConvThroughDepth; -template <> -struct ConvRow3x3FilterDepth8<4, 2, 2> { - // The buffer size of the shuffled input. - static inline constexpr int ShuffleWorkspaceSize() { return 64 * 9 * 9; } - - static inline void Run(const uint8* input_data, int start_x, int start_y, - int input_depth, int input_width, int input_height, - int input_row_size, int32 input_offset, - const uint8* filter_data, int32 filter_offset, - const int32* bias_data, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - int output_depth, int output_width, + static inline void Run(const uint8* input_data, int32 start_x, int32 start_y, + const uint8* filter_data, const int32* bias_data, + uint8* output_data, const DepthwiseConvParams& params, + const ShuffleParams& shuffle_params, uint8* shuffle_workspace) { - // Branch and cache misses increase substantially with stride 2 kernels. - // Adding prefetching reduces latency by as much as 2x. - const int i0 = 0; - const int i1 = input_depth; - const int i2 = 2 * input_depth; - const int i3 = 3 * input_depth; - const int i4 = 4 * input_depth; - const int i5 = 5 * input_depth; - const int i6 = 6 * input_depth; - const int i7 = 7 * input_depth; - const int i8 = 8 * input_depth; - -#define DEPTHWISECONV_PRELOAD_ROW(input_ptr, i) \ - preload_l1_keep(input_ptr + i * input_row_size + i0); \ - preload_l1_keep(input_ptr + i * input_row_size + i1); \ - preload_l1_keep(input_ptr + i * input_row_size + i2); \ - preload_l1_keep(input_ptr + i * input_row_size + i3); \ - preload_l1_keep(input_ptr + i * input_row_size + i4); \ - preload_l1_keep(input_ptr + i * input_row_size + i5); \ - preload_l1_keep(input_ptr + i * input_row_size + i6); \ - preload_l1_keep(input_ptr + i * input_row_size + i7); \ - preload_l1_keep(input_ptr + i * input_row_size + i8); - - int out_x = start_x; - // 4x4 at a time. - for (; out_x <= output_width - 4; out_x += 4) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - int depth = 0; - for (; depth <= output_depth - 64; depth += 64) { - // Preload 9x9 input. - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 0); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 1); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 2); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 3); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 4); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 5); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 6); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 7); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 8); - - // For a large input window (64x9x9) that is small enough to fit in L1 - // cache, copy the input into a separate buffer and run the kernel on - // this new buffer. This reduces the likelihood of cache misses when - // the kernel is loading input data. If this size is ever changed, - // update the ShuffleWorkspaceSize() function to return the new size. - ShuffleInput(input_ptr, input_depth, input_width, input_height, 64, 9, - 9, shuffle_workspace); - const uint8* shuffled_ptr = &shuffle_workspace[0]; - - for (int micro_depth = 0; micro_depth <= 64 - 8; micro_depth += 8) { - ConvKernel3x3FilterDepth8<4, 4, 2, 2>::Run( - shuffled_ptr, 64, input_offset, 64 * 9, filter_ptr, filter_offset, - bias_ptr, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_ptr, - output_depth, output_width); - - shuffled_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; + TFLITE_DCHECK(shuffle_params.input_height == + get_shuffle_input_size(kStrideHeight, shuffle_params.output_height)); + TFLITE_DCHECK(shuffle_params.input_width == + get_shuffle_input_size(kStrideWidth, shuffle_params.output_width)); + TFLITE_DCHECK(64 * shuffle_params.input_width * shuffle_params.input_height + <= DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE); + + int32 out_x = start_x; + + // Run shuffling on inputs with sufficiently large depth and width. When + // these parameters are large enough, more time is taken to load inputs + // from memory. At this point, it becomes useful to prefetch and + // preshuffle the input data to maximize locality. + if (params.output_depth > 64 || + (params.output_depth <= 64 && params.input_width > 150)) { + for (; out_x <= (params.output_width - shuffle_params.output_width); + out_x += shuffle_params.output_width) { + const uint8* input_ptr = input_data; + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + uint8* output_ptr = output_data; + int64_t depth = 0; + const int64_t shuffle_row_size = 64 * shuffle_params.input_width; + + for (; depth <= params.output_depth - 64; depth += 64) { + // Preload. + const uint8* h_ptr = input_ptr; + for (int32 i = 0; i < shuffle_params.input_height; i++) { + const uint8* ptr = h_ptr; + for (int32 j = 0; j < shuffle_params.input_width; j++) { + asm volatile("prfm pldl1keep, [%[ptr]]\n" ::[ptr] "r"(ptr) :); + ptr += params.input_depth; + } + h_ptr += params.input_row_size; + } + + // For a large enough input, shuffle into buckets. + ShuffleInput(input_ptr, params.input_depth, params.input_width, + params.input_height, 64, shuffle_params.input_width, + shuffle_params.input_height, shuffle_workspace); + ConvKernel::Run(shuffle_workspace, filter_ptr, bias_ptr, output_ptr, + 0, 64, 64, shuffle_row_size, + shuffle_params.output_height, + shuffle_params.output_width, params); + input_ptr += 64; + output_ptr += 64; + filter_ptr += 64; + bias_ptr += 64; } - input_ptr += 64; - } - // Preload 9x9 input one more time for the rest of the depth. - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 0); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 1); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 2); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 3); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 4); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 5); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 6); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 7); - DEPTHWISECONV_PRELOAD_ROW(input_ptr, 8); - - for (; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<4, 4, 2, 2>::Run( - input_ptr, input_depth, input_offset, input_row_size, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - - input_data += 4 * 2 * input_depth; - output_data += 4 * output_depth; - } - -#undef DEPTHWISECONV_PRELOAD_ROW - - // Handle the rest of the right side. - // 4x2 at a time. - for (; out_x <= output_width - 2; out_x += 2) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<4, 2, 2, 2>::Run( - input_ptr, input_depth, input_offset, input_row_size, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } + // Preload. + const uint8* h_ptr = input_ptr; + for (int32 i = 0; i < shuffle_params.input_height; i++) { + const uint8* ptr = h_ptr; + for (int32 j = 0; j < shuffle_params.input_width; j++) { + asm volatile("prfm pldl1keep, [%[ptr]]\n" ::[ptr] "r"(ptr) :); + ptr += params.input_depth; + } + h_ptr += params.input_row_size; + } - input_data += 2 * 2 * input_depth; - output_data += 2 * output_depth; - } + // Handle leftover depth. + ConvKernel::Run(input_ptr, filter_ptr, bias_ptr, output_ptr, + depth, params.output_depth, params.input_depth, + params.input_row_size, shuffle_params.output_height, + shuffle_params.output_width, params); - // 4x1 at a time. - for (; out_x < output_width; out_x++) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - for (int depth = 0; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<4, 1, 2, 2>::Run( - input_ptr, input_depth, input_offset, input_row_size, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; + input_data += + shuffle_params.output_width * kStrideWidth * params.input_depth; + output_data += shuffle_params.output_width * params.output_depth; } - - input_data += 2 * input_depth; - output_data += output_depth; } - } -}; - -template <> -struct ConvRow3x3FilterDepth8<8, 2, 2> { - static inline void Run(const uint8* input_data, int start_x, int start_y, - int input_depth, int input_width, int input_height, - int input_row_size, int32 input_offset, - const uint8* filter_data, int32 filter_offset, - const int32* bias_data, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - int output_depth, int output_width, - uint8* shuffle_workspace) { - // Reuse 4 row kernels twice. - ConvRow3x3FilterDepth8<4, 2, 2>::Run( - input_data, start_x, start_y, input_depth, input_width, input_height, - input_row_size, input_offset, filter_data, filter_offset, bias_data, - output_offset, output_multiplier, output_shift, output_activation_min, - output_activation_max, output_data, output_depth, output_width, - shuffle_workspace); - - ConvRow3x3FilterDepth8<4, 2, 2>::Run( - input_data + 2 * 4 * input_row_size, start_x, start_y + 4, input_depth, - input_width, input_height, input_row_size, input_offset, filter_data, - filter_offset, bias_data, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_data + 4 * output_depth * output_width, output_depth, - output_width, shuffle_workspace); - } -}; - -template <> -struct ConvRow3x3FilterDepth8<8, 1, 1> { - // The buffer size of the shuffled input. - static inline constexpr int ShuffleWorkspaceSize() { return 64 * 10 * 10; } - - static inline void Run(const uint8* input_data, int start_x, int start_y, - int input_depth, int input_width, int input_height, - int input_row_size, int32 input_offset, - const uint8* filter_data, int32 filter_offset, - const int32* bias_data, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - int output_depth, int output_width, - uint8* shuffle_workspace) { - int out_x = start_x; - // 8x8 at a time. - for (; out_x <= output_width - 8; out_x += 8) { - const int32* bias_ptr = bias_data; - const uint8* filter_ptr = filter_data; - - const uint8* input_ptr = input_data; - uint8* output_ptr = output_data; - - int depth = 0; - for (; depth <= output_depth - 64; depth += 64) { - // For a large input window (64x10x10) that is small enough to fit in L1 - // cache, copy the input into a separate buffer and run the kernel on - // this new buffer. This reduces the likelihood of cache misses when - // the kernel is loading input data. If the size of the input window - // changes, update the function ShuffleWorkspaceSize() with the new - // size. - ShuffleInput(input_ptr, input_depth, input_width, input_height, 64, 10, - 10, shuffle_workspace); - const uint8* shuffled_ptr = shuffle_workspace; - - for (int micro_depth = 0; micro_depth <= 64 - 8; micro_depth += 8) { - ConvKernel3x3FilterDepth8<8, 8, 1, 1>::Run( - shuffled_ptr, 64, input_offset, 64 * 10, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - - shuffled_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - input_ptr += 64; - } - - for (; depth <= output_depth - 8; depth += 8) { - ConvKernel3x3FilterDepth8<8, 8, 1, 1>::Run( - input_ptr, input_depth, input_offset, input_row_size, filter_ptr, - filter_offset, bias_ptr, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_ptr, output_depth, output_width); - - input_ptr += 8; - output_ptr += 8; - filter_ptr += 8; - bias_ptr += 8; - } - input_data += 8 * input_depth; - output_data += 8 * output_depth; + const int32 output_leftover_width = params.output_width - out_x; + if (output_leftover_width > 0) { + ConvKernel::Run(input_data, filter_data, bias_data, output_data, 0, + params.output_depth, params.input_depth, + params.input_row_size, shuffle_params.output_height, + output_leftover_width, params); } - - // Handle the rest of the right side by re-using 4 row kernels twice. - ConvRow3x3FilterDepth8<4, 1, 1>::Run( - input_data, out_x, start_y, input_depth, input_width, input_height, - input_row_size, input_offset, filter_data, filter_offset, bias_data, - output_offset, output_multiplier, output_shift, output_activation_min, - output_activation_max, output_data, output_depth, output_width, - shuffle_workspace); - - ConvRow3x3FilterDepth8<4, 1, 1>::Run( - input_data + 4 * input_row_size, out_x, start_y + 4, input_depth, - input_width, input_height, input_row_size, input_offset, filter_data, - filter_offset, bias_data, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_data + 4 * output_depth * output_width, output_depth, - output_width, shuffle_workspace); } }; inline bool Fast3x3FilterKernelSupported( - const Dims<4>& input_dims, const Dims<4>& filter_dims, int stride_width, - int stride_height, int pad_width, int pad_height, int depth_multiplier, - const Dims<4>& output_dims, int output_shift) { - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int input_depth = ArraySize(input_dims, 0); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); + const Dims<4>& input_dims, const Dims<4>& filter_dims, int32 stride_width, + int32 stride_height, int32 pad_width, int32 pad_height, + int32 depth_multiplier, const Dims<4>& output_dims, int32 output_shift) { + const int32 input_height = ArraySize(input_dims, 2); + const int32 input_width = ArraySize(input_dims, 1); + const int32 input_depth = ArraySize(input_dims, 0); + const int32 filter_height = ArraySize(filter_dims, 2); + const int32 filter_width = ArraySize(filter_dims, 1); + const int32 output_height = ArraySize(output_dims, 2); + const int32 output_width = ArraySize(output_dims, 1); bool supported = filter_width == 3 && filter_height == 3 && depth_multiplier == 1 && @@ -4434,14 +2378,14 @@ inline bool Fast3x3FilterKernelSupported( // Handle case where padding is zero but padding type is not kValid. // This would require special boundary case handling that is not supported. - const int out_x = output_width - 1; - const int out_y = output_height - 1; + const int32 out_x = output_width - 1; + const int32 out_y = output_height - 1; - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; + const int32 in_x_origin = (out_x * stride_width) - pad_width; + const int32 in_y_origin = (out_y * stride_height) - pad_height; - const int in_x_end = in_x_origin + filter_width; - const int in_y_end = in_y_origin + filter_height; + const int32 in_x_end = in_x_origin + filter_width; + const int32 in_y_end = in_y_origin + filter_height; // Supported only if filter on the right and bottom boundary lies completely // within the input. @@ -4451,128 +2395,135 @@ inline bool Fast3x3FilterKernelSupported( inline void DepthwiseConv3x3Filter( const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, int stride_width, - int stride_height, int pad_width, int pad_height, int depth_multiplier, - int32 output_offset, int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int input_depth = ArraySize(input_dims, 0); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); - - // Algorithm assumes below constraints. It is optimized for depth multiplier - // of 1, 3x3 filter, no padding and strides 1 and 2. - TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); + const int32* bias_data, const Dims<4>& bias_dims, int32 stride_width, + int32 stride_height, int32 pad_width, int32 pad_height, + int32 depth_multiplier, int32 output_offset, int32 output_multiplier, + int32 output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + DepthwiseConvParams params; + params.input_depth = ArraySize(input_dims, 0); + params.input_width = ArraySize(input_dims, 1); + params.input_height = ArraySize(input_dims, 2); + params.input_row_size = params.input_depth * params.input_width; + params.input_offset = input_offset; + params.output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + params.output_width = ArraySize(output_dims, 1); + params.output_height = ArraySize(output_dims, 2); + params.output_row_size = params.output_depth * params.output_width; + params.output_offset = output_offset; + params.filter_offset = filter_offset; + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.output_activation_min = output_activation_min; + params.output_activation_max = output_activation_max; + + const int32 filter_height = ArraySize(filter_dims, 2); + const int32 filter_width = ArraySize(filter_dims, 1); + + // Algorithm assumes below constraints. It is optimized for depth + // multiplier of 1, 3x3 filter, no padding and strides 1 and 2. + TFLITE_DCHECK(params.output_depth == params.input_depth * depth_multiplier); TFLITE_DCHECK(depth_multiplier == 1); TFLITE_DCHECK(filter_height == 3); TFLITE_DCHECK(filter_width == 3); - TFLITE_DCHECK(pad_height == 0); - TFLITE_DCHECK(pad_width == 0); TFLITE_DCHECK(stride_height == 1 || stride_height == 2); TFLITE_DCHECK(stride_width == 1 || stride_width == 2); TFLITE_DCHECK(stride_width == stride_height); + TFLITE_DCHECK(pad_height == 0); + TFLITE_DCHECK(pad_width == 0); - const int input_row_size = input_depth * (input_width + 2 * pad_width); - const int output_row_size = output_depth * output_width; - const int input_batch_size = input_row_size * (input_height + 2 * pad_height); - const int output_batch_size = output_depth * output_width * output_height; - - using conv_row_func_t = decltype(&ConvRow3x3FilterDepth8<1, 1, 1>::Run); - conv_row_func_t conv_1_output_row = ConvRow3x3FilterDepth8<1, 1, 1>::Run; - conv_row_func_t conv_2_output_rows = ConvRow3x3FilterDepth8<2, 1, 1>::Run; - conv_row_func_t conv_4_output_rows = ConvRow3x3FilterDepth8<4, 1, 1>::Run; - conv_row_func_t conv_8_output_rows = ConvRow3x3FilterDepth8<8, 1, 1>::Run; + const int32 batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int64_t input_batch_size = params.input_row_size * params.input_height; + const int64_t output_batch_size = + params.output_row_size * params.output_height; + + ShuffleParams one_row_shuffle_params, two_row_shuffle_params, + four_row_shuffle_params, eight_row_shuffle_params; + if (stride_width == 1) { + one_row_shuffle_params = ShuffleParams(30, 1, 1, 1); + two_row_shuffle_params = ShuffleParams(22, 2, 1, 1); + four_row_shuffle_params = ShuffleParams(14, 4, 1, 1); + eight_row_shuffle_params = ShuffleParams(8, 8, 1, 1); + } else { + one_row_shuffle_params = ShuffleParams(14, 1, 2, 2); + two_row_shuffle_params = ShuffleParams(8, 2, 2, 2); + four_row_shuffle_params = ShuffleParams(4, 4, 2, 2); + eight_row_shuffle_params = ShuffleParams(2, 8, 2, 2); + } + using conv_multirow_func_t = decltype(&DepthwiseConvMultiRow<1, 1>::Run); + conv_multirow_func_t conv_multirow_func = DepthwiseConvMultiRow<1, 1>::Run; if (stride_width == 2) { - conv_1_output_row = ConvRow3x3FilterDepth8<1, 2, 2>::Run; - conv_2_output_rows = ConvRow3x3FilterDepth8<2, 2, 2>::Run; - conv_4_output_rows = ConvRow3x3FilterDepth8<4, 2, 2>::Run; - conv_8_output_rows = ConvRow3x3FilterDepth8<8, 2, 2>::Run; + conv_multirow_func = DepthwiseConvMultiRow<2, 2>::Run; } // Allocate maximum memory needed for shuffled input. // TODO(mariewhite): The size of this workspace is small enough to be // allocated on the stack. Eventually we will want to move it to the heap - // and have it allocated outside of this function, like the im2col_array used - // in gemmlowp. -#define DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE 10 * 10 * 64 + // and have it allocated outside of this function, like the im2col_array + // used in gemmlowp. uint8 shuffle_workspace[DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE]; - // Make sure the kernels using this buffer will not run out of bounds. - static_assert(ConvRow3x3FilterDepth8<8, 1, 1>::ShuffleWorkspaceSize() <= - DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE, - "Shuffle workspace size is too small."); - static_assert(ConvRow3x3FilterDepth8<4, 2, 2>::ShuffleWorkspaceSize() <= - DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE, - "Shuffle workspace size is too small."); - -#undef DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE - - for (int b = 0; b < batches; ++b) { + for (int32 b = 0; b < batches; ++b) { const uint8* input_ptr = input_data + b * input_batch_size; uint8* output_ptr = output_data + b * output_batch_size; - int out_y = 0; + int32 out_y = 0; + + // Shuffling shapes that maximize width over the shuffle workspace size + // perform better since the inputs are closer together, minimizing + // shuffling time. + // + // If the input shape has width large enough for the 2 row kernels, + // we prefer to use this. The innermost loop of the kernels handle + // 2 height x 2 width so this is the fastest path. + // + // If the input shape has smaller width but larger height, shuffling is + // still useful and can benefit from kernels 4 row and 8 row kernels. // Handle 8 rows at a time. - for (; out_y <= output_height - 8; out_y += 8) { - conv_8_output_rows(input_ptr, 0, out_y, input_depth, input_width, - input_height, input_row_size, input_offset, - filter_data, filter_offset, bias_data, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth, - output_width, shuffle_workspace); - - input_ptr += 8 * stride_height * input_row_size; - output_ptr += 8 * output_row_size; + if (params.input_width < four_row_shuffle_params.input_width) { + for (; out_y <= params.output_height - 8; out_y += 8) { + conv_multirow_func(input_ptr, 0, out_y, filter_data, bias_data, + output_ptr, params, eight_row_shuffle_params, + shuffle_workspace); + input_ptr += 8 * stride_height * params.input_row_size; + output_ptr += 8 * params.output_row_size; + } } // Handle 4 rows at a time. - for (; out_y <= output_height - 4; out_y += 4) { - conv_4_output_rows(input_ptr, 0, out_y, input_depth, input_width, - input_height, input_row_size, input_offset, - filter_data, filter_offset, bias_data, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth, - output_width, shuffle_workspace); - - input_ptr += 4 * stride_height * input_row_size; - output_ptr += 4 * output_row_size; + if (params.input_width < two_row_shuffle_params.input_width) { + for (; out_y <= params.output_height - 4; out_y += 4) { + conv_multirow_func(input_ptr, 0, out_y, filter_data, bias_data, + output_ptr, params, four_row_shuffle_params, + shuffle_workspace); + input_ptr += 4 * stride_height * params.input_row_size; + output_ptr += 4 * params.output_row_size; + } } // Handle 2 rows at a time. - for (; out_y <= output_height - 2; out_y += 2) { - conv_2_output_rows(input_ptr, 0, out_y, input_depth, input_width, - input_height, input_row_size, input_offset, - filter_data, filter_offset, bias_data, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth, - output_width, shuffle_workspace); - - input_ptr += 2 * stride_height * input_row_size; - output_ptr += 2 * output_row_size; + for (; out_y <= params.output_height - 2; out_y += 2) { + conv_multirow_func(input_ptr, 0, out_y, filter_data, bias_data, + output_ptr, params, two_row_shuffle_params, + shuffle_workspace); + input_ptr += 2 * stride_height * params.input_row_size; + output_ptr += 2 * params.output_row_size; } // Handle one row at a time. - for (; out_y < output_height; out_y++) { - conv_1_output_row(input_ptr, 0, out_y, input_depth, input_width, - input_height, input_row_size, input_offset, filter_data, - filter_offset, bias_data, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_ptr, output_depth, - output_width, shuffle_workspace); - - input_ptr += stride_height * input_row_size; - output_ptr += output_row_size; + for (; out_y < params.output_height; out_y++) { + conv_multirow_func(input_ptr, 0, out_y, filter_data, bias_data, + output_ptr, params, one_row_shuffle_params, + shuffle_workspace); + input_ptr += stride_height * params.input_row_size; + output_ptr += params.output_row_size; } } } +// clang-format on #endif // __aarch64__ -- GitLab From 599808af617fbc754eebe9fe2873c52a63d25a38 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 25 May 2018 11:02:42 -0700 Subject: [PATCH 151/902] Release C++ lock before calling back into python PiperOrigin-RevId: 198073059 --- tensorflow/python/util/util.cc | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index 0f465eda4f..8e839b523e 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -172,17 +172,20 @@ int IsSequenceHelper(PyObject* o) { // Try not to return to Python - see if the type has already been seen // before. - // NOTE: It's not clear whether the lock is required (we should be holding the - // python GIL in this code already). - mutex_lock l(g_type_to_sequence_map); auto* type_to_sequence_map = IsTypeSequenceMap(); auto* type = Py_TYPE(o); - auto it = type_to_sequence_map->find(type); - if (it != type_to_sequence_map->end()) { - return it->second; + { + mutex_lock l(g_type_to_sequence_map); + auto it = type_to_sequence_map->find(type); + if (it != type_to_sequence_map->end()) { + return it->second; + } } + // NOTE: We explicitly release the g_type_to_sequence_map mutex, + // because PyObject_IsInstance() may release the GIL, allowing another thread + // concurrent entry to this function. int is_instance = PyObject_IsInstance(o, CollectionsSequenceType); // Don't cache a failed is_instance check. @@ -195,7 +198,10 @@ int IsSequenceHelper(PyObject* o) { // leak, as there should only be a relatively small number of types in the // map, and an even smaller number that are eligible for decref. Py_INCREF(type); - type_to_sequence_map->insert({type, is_sequence}); + { + mutex_lock l(g_type_to_sequence_map); + type_to_sequence_map->insert({type, is_sequence}); + } return is_sequence; } -- GitLab From 7badca2edfdd22af290e5d52e10072ef8fd82020 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Fri, 25 May 2018 11:27:39 -0700 Subject: [PATCH 152/902] Bump TPU batch size and wrap apply_grads in defun PiperOrigin-RevId: 198077643 --- .../python/examples/resnet50/resnet50_test.py | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index 2d51cfdeee..b14ef1df8f 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -49,15 +49,17 @@ def random_batch(batch_size, data_format): return images, one_hot -def train_one_step(model, images, labels, optimizer): - +def compute_gradients(model, images, labels): with tf.GradientTape() as tape: logits = model(images, training=True) loss = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels) tf.contrib.summary.scalar(name='loss', tensor=loss) - grads = tape.gradient(loss, model.variables) - optimizer.apply_gradients(zip(grads, model.variables)) + return tape.gradient(loss, model.variables) + + +def apply_gradients(model, optimizer, gradients): + optimizer.apply_gradients(zip(gradients, model.variables)) class ResNet50Test(tf.test.TestCase): @@ -114,7 +116,8 @@ class ResNet50Test(tf.test.TestCase): with tf.device(device), tfe.execution_mode(execution_mode): optimizer = tf.train.GradientDescentOptimizer(0.1) images, labels = random_batch(2, data_format) - train_one_step(model, images, labels, optimizer) + apply_gradients(model, optimizer, + compute_gradients(model, images, labels)) self.assertEqual(320, len(model.variables)) tfe.async_wait() events = summary_test_util.events_from_logdir(logdir) @@ -138,14 +141,16 @@ class ResNet50Test(tf.test.TestCase): # garbage to be collected. The hope is that this is a build-only effect, # and a subsequent training loop will create nothing which needs to be # collected. - train_one_step(model, images, labels, optimizer) + apply_gradients(model, optimizer, + compute_gradients(model, images, labels)) gc.collect() previous_gc_debug_flags = gc.get_debug() gc.set_debug(gc.DEBUG_SAVEALL) for _ in range(2): # Run twice to ensure that garbage that is created on the first # iteration is no longer accessible. - train_one_step(model, images, labels, optimizer) + apply_gradients(model, optimizer, + compute_gradients(model, images, labels)) gc.collect() # There should be no garbage requiring collection. self.assertEqual(0, len(gc.garbage)) @@ -180,9 +185,7 @@ class ResNet50Benchmarks(tf.test.Benchmark): return (16, 32, 64) if tf.DeviceSpec.from_string(device.name).device_type == 'TPU': - # TODO(iga): Training fails with batch size of 16, probably because of - # no layout optimizations with op-by-op mode. Investigate more. - return (8,) + return (32,) return (16, 32) def _report(self, label, start, num_iters, device, batch_size, data_format): @@ -248,18 +251,21 @@ class ResNet50Benchmarks(tf.test.Benchmark): device, data_format = device_and_format for batch_size in self._train_batch_sizes(): (images, labels) = random_batch(batch_size, data_format) - num_burn = 3 - num_iters = 10 model = resnet50.ResNet50(data_format) + optimizer = tf.train.GradientDescentOptimizer(0.1) + apply_grads = apply_gradients if defun: model.call = tfe.defun(model.call, compiled=compiled) - optimizer = tf.train.GradientDescentOptimizer(0.1) + apply_grads = tfe.defun(apply_gradients, compiled=compiled) + num_burn = 3 + num_iters = 10 with tf.device(device): iterator = make_iterator((images, labels)) for _ in xrange(num_burn): (images, labels) = iterator.next() - train_one_step(model, images, labels, optimizer) + apply_grads(model, optimizer, + compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() @@ -268,7 +274,8 @@ class ResNet50Benchmarks(tf.test.Benchmark): start = time.time() for _ in xrange(num_iters): (images, labels) = iterator.next() - train_one_step(model, images, labels, optimizer) + apply_grads(model, optimizer, + compute_gradients(model, images, labels)) if execution_mode: tfe.async_wait() self._force_device_sync() -- GitLab From 3890dba889fce1d49a199c72892863f28de02179 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 11:34:30 -0700 Subject: [PATCH 153/902] Extracts the 'simplify strided slice' optimization into its own method. PiperOrigin-RevId: 198078724 --- .../grappler/optimizers/constant_folding.cc | 198 ++++++++++-------- .../grappler/optimizers/constant_folding.h | 5 + 2 files changed, 113 insertions(+), 90 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index a64e9a38fd..90c52b35e5 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1793,96 +1793,14 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } - if (use_shape_info && IsStridedSlice(*node) && - properties->GetInputProperties(node->name()).size() == 4) { - if (node->attr().at("new_axis_mask").i() != 0 || - node->attr().at("shrink_axis_mask").i() != 0) { - // Skip nodes with new/shrink axis mask, since they involve dimension - // changes. - return Status::OK(); - } - const auto& input = properties->GetInputProperties(node->name())[0]; - for (int j = 0; j < input.shape().dim_size(); ++j) { - // Skip if input shape is not fully determined. - if (input.shape().dim(j).size() < 0) { - return Status::OK(); - } - } - const auto& b = properties->GetInputProperties(node->name())[1]; - const auto& e = properties->GetInputProperties(node->name())[2]; - const auto& s = properties->GetInputProperties(node->name())[3]; - if (TensorShape::IsValid(b.shape()) && b.has_value() && - TensorShape::IsValid(e.shape()) && e.has_value() && - TensorShape::IsValid(s.shape()) && s.has_value()) { - Tensor begin(b.dtype(), b.shape()); - if (!begin.FromProto(b.value())) { - return errors::InvalidArgument("Cannot parse tensor from proto: ", - b.value().DebugString()); - } - Tensor end(e.dtype(), e.shape()); - if (!end.FromProto(e.value())) { - return errors::InvalidArgument("Cannot parse tensor from proto: ", - e.value().DebugString()); - } - Tensor strides(s.dtype(), s.shape()); - if (!strides.FromProto(s.value())) { - return errors::InvalidArgument("Cannot parse tensor from proto: ", - s.value().DebugString()); - } - int begin_mask = node->attr().at("begin_mask").i(); - int end_mask = node->attr().at("end_mask").i(); - std::set expanded_ellipsis_indices; - int ellipsis_index = -1; - for (int j = 0; j < input.shape().dim_size(); ++j) { - // find the ellipsis_mask. If not found, insert one in the end if - // necessary. - if (node->attr().at("ellipsis_mask").i() & 1 << j || - (ellipsis_index == -1 && j >= strides.NumElements())) { - ellipsis_index = j; - } - // insert the indices that are immediately after ellipsis_index if - // necessary. - if (ellipsis_index != -1 && - input.shape().dim_size() > - strides.NumElements() + j - ellipsis_index) { - expanded_ellipsis_indices.insert(j); - } - } - - // The node is replaceable iff unknown_rank == false && - // ((begin_mask is set || begin == 0) && (end_mask is set || end == dim) - // && strides == 1) for all dimensions. - bool replaceable = !input.shape().unknown_rank(); - for (int j = 0; replaceable && j < input.shape().dim_size(); ++j) { - if (expanded_ellipsis_indices.find(j) != - expanded_ellipsis_indices.end()) { - // ellipsis_mask is effective on current dimension. - continue; - } - // when we have ellipsis_mask in between, input.shape().dim_size() will - // be greater than strides.NumElements(), since we will insert - // as many as expanded_ellipsis_indices.size() axes during computation. - // We need to subtract this number from j. - int i = j; - if (ellipsis_index != -1 && - j >= ellipsis_index + expanded_ellipsis_indices.size()) { - i = j - expanded_ellipsis_indices.size(); - } - int b = begin.dtype() == DT_INT32 ? begin.vec()(i) - : begin.vec()(i); - int e = - end.dtype() == DT_INT32 ? end.vec()(i) : end.vec()(i); - int s = strides.dtype() == DT_INT32 ? strides.vec()(i) - : strides.vec()(i); - replaceable &= - (begin_mask & 1 << i || b == 0) && - (end_mask & 1 << i || e == input.shape().dim(j).size()) && s == 1; - } - if (replaceable) { - ReplaceOperationWithIdentity(0, *properties, node, optimized_graph); - return Status::OK(); - } - } + bool simplify_strided_slice_successful = false; + Status simplify_strided_slice_status = + SimplifyStridedSlice(*properties, use_shape_info, optimized_graph, node, + &simplify_strided_slice_successful); + if (!simplify_strided_slice_status.ok()) { + return simplify_strided_slice_status; + } else if (simplify_strided_slice_successful) { + return Status::OK(); } bool simplify_tile_successful = false; @@ -1978,6 +1896,106 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +Status ConstantFolding::SimplifyStridedSlice(const GraphProperties& properties, + bool use_shape_info, + GraphDef* optimized_graph, + NodeDef* node, bool* success) { + if (use_shape_info && IsStridedSlice(*node) && + properties.GetInputProperties(node->name()).size() == 4) { + if (node->attr().at("new_axis_mask").i() != 0 || + node->attr().at("shrink_axis_mask").i() != 0) { + // Skip nodes with new/shrink axis mask, since they involve dimension + // changes. + return Status::OK(); + } + const auto& input = properties.GetInputProperties(node->name())[0]; + for (int j = 0; j < input.shape().dim_size(); ++j) { + // Skip if input shape is not fully determined. + if (input.shape().dim(j).size() < 0) { + return Status::OK(); + } + } + const auto& b = properties.GetInputProperties(node->name())[1]; + const auto& e = properties.GetInputProperties(node->name())[2]; + const auto& s = properties.GetInputProperties(node->name())[3]; + if (TensorShape::IsValid(b.shape()) && b.has_value() && + TensorShape::IsValid(e.shape()) && e.has_value() && + TensorShape::IsValid(s.shape()) && s.has_value()) { + Tensor begin(b.dtype(), b.shape()); + if (!begin.FromProto(b.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + b.value().DebugString()); + } + Tensor end(e.dtype(), e.shape()); + if (!end.FromProto(e.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + e.value().DebugString()); + } + Tensor strides(s.dtype(), s.shape()); + if (!strides.FromProto(s.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + s.value().DebugString()); + } + int begin_mask = node->attr().at("begin_mask").i(); + int end_mask = node->attr().at("end_mask").i(); + std::set expanded_ellipsis_indices; + int ellipsis_index = -1; + for (int j = 0; j < input.shape().dim_size(); ++j) { + // find the ellipsis_mask. If not found, insert one in the end if + // necessary. + if (node->attr().at("ellipsis_mask").i() & 1 << j || + (ellipsis_index == -1 && j >= strides.NumElements())) { + ellipsis_index = j; + } + // insert the indices that are immediately after ellipsis_index if + // necessary. + if (ellipsis_index != -1 && + input.shape().dim_size() > + strides.NumElements() + j - ellipsis_index) { + expanded_ellipsis_indices.insert(j); + } + } + + // The node is replaceable iff unknown_rank == false && + // ((begin_mask is set || begin == 0) && (end_mask is set || end == dim) + // && strides == 1) for all dimensions. + bool replaceable = !input.shape().unknown_rank(); + for (int j = 0; replaceable && j < input.shape().dim_size(); ++j) { + if (expanded_ellipsis_indices.find(j) != + expanded_ellipsis_indices.end()) { + // ellipsis_mask is effective on current dimension. + continue; + } + // when we have ellipsis_mask in between, input.shape().dim_size() will + // be greater than strides.NumElements(), since we will insert + // as many as expanded_ellipsis_indices.size() axes during computation. + // We need to subtract this number from j. + int i = j; + if (ellipsis_index != -1 && + j >= ellipsis_index + expanded_ellipsis_indices.size()) { + i = j - expanded_ellipsis_indices.size(); + } + int b = begin.dtype() == DT_INT32 ? begin.vec()(i) + : begin.vec()(i); + int e = + end.dtype() == DT_INT32 ? end.vec()(i) : end.vec()(i); + int s = strides.dtype() == DT_INT32 ? strides.vec()(i) + : strides.vec()(i); + replaceable &= + (begin_mask & 1 << i || b == 0) && + (end_mask & 1 << i || e == input.shape().dim(j).size()) && s == 1; + } + if (replaceable) { + ReplaceOperationWithIdentity(0, properties, node, optimized_graph); + *success = true; + return Status::OK(); + } + } + } + *success = false; + return Status::OK(); +} + Status ConstantFolding::SimplifyTile(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node, diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 30e63544a7..6c42b8f1a8 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -181,6 +181,11 @@ class ConstantFolding : public GraphOptimizer { // Simplifies a Tile operation to an Identity operation if applicable. Status SimplifyTile(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node, bool* success); + + // Simplifies a StridedSlice operation to an Identity operation if applicable. + Status SimplifyStridedSlice(const GraphProperties& properties, + bool use_shape_info, GraphDef* optimized_graph, + NodeDef* node, bool* success); // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From 752943f2004b6a63fa7c82a227ea3075cc6b5b84 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 25 May 2018 11:42:33 -0700 Subject: [PATCH 154/902] Automated g4 rollback of changelist 192848921 PiperOrigin-RevId: 198079927 --- tensorflow/core/framework/op_kernel.cc | 48 +++++++++++--------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index d240c853eb..b05a9df7c1 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -1273,59 +1273,51 @@ const Eigen::SyclDevice& OpKernelContext::eigen_device() const { } #endif -namespace { -template -void CtxFailureInternal(OpKernelT* op_kernel, const char* file, int line, - const Status& s) { - const string logging_prefix = - file == nullptr ? "CtxFailure: " - : strings::StrCat("CtxFailure at ", io::Basename(file), - ":", line, ": "); - - if (errors::IsOutOfRange(s)) { - // VLOG OutOfRange errors. Dataset ops create OutOfRange errors when they - // reach end-of-sequence. - VLOG(1) << logging_prefix << s; - } else { - LOG(WARNING) << logging_prefix << s; - } - op_kernel->SetStatus(s); -} -} // anonymous namespace - void OpKernelConstruction::CtxFailure(const Status& s) { - CtxFailureInternal(this, nullptr, 0, s); + VLOG(1) << s; + SetStatus(s); } void OpKernelConstruction::CtxFailureWithWarning(const Status& s) { - CtxFailureInternal(this, nullptr, 0, s); + LOG(WARNING) << s; + SetStatus(s); } void OpKernelConstruction::CtxFailure(const char* file, int line, const Status& s) { - CtxFailureInternal(this, file, line, s); + VLOG(1) << "OP_REQUIRES failed at " << io::Basename(file) << ":" << line + << " : " << s; + SetStatus(s); } void OpKernelConstruction::CtxFailureWithWarning(const char* file, int line, const Status& s) { - CtxFailureInternal(this, file, line, s); + LOG(WARNING) << "OP_REQUIRES failed at " << io::Basename(file) << ":" << line + << " : " << s; + SetStatus(s); } void OpKernelContext::CtxFailure(const Status& s) { - CtxFailureInternal(this, nullptr, 0, s); + VLOG(1) << s; + SetStatus(s); } void OpKernelContext::CtxFailureWithWarning(const Status& s) { - CtxFailureInternal(this, nullptr, 0, s); + LOG(WARNING) << s; + SetStatus(s); } void OpKernelContext::CtxFailure(const char* file, int line, const Status& s) { - CtxFailureInternal(this, file, line, s); + VLOG(1) << "OP_REQUIRES failed at " << io::Basename(file) << ":" << line + << " : " << s; + SetStatus(s); } void OpKernelContext::CtxFailureWithWarning(const char* file, int line, const Status& s) { - CtxFailureInternal(this, file, line, s); + LOG(WARNING) << "OP_REQUIRES failed at " << io::Basename(file) << ":" << line + << " : " << s; + SetStatus(s); } } // namespace tensorflow -- GitLab From 2e9bc8364d84cebc289318ebed12b308264d4e1c Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 25 May 2018 12:04:49 -0700 Subject: [PATCH 155/902] [TF:XLA] Register Switch and Merge ops on XLA devices. PiperOrigin-RevId: 198083156 --- tensorflow/compiler/jit/xla_device_ops.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h index 33029b7cbe..536325774b 100644 --- a/tensorflow/compiler/jit/xla_device_ops.h +++ b/tensorflow/compiler/jit/xla_device_ops.h @@ -79,7 +79,11 @@ class XlaDeviceDummyOp : public OpKernel { Name("ReadVariableOp").Device(DEVICE).HostMemory("resource"), \ ReadVariableOp); \ REGISTER_KERNEL_BUILDER(Name("ControlTrigger").Device(DEVICE), \ - ControlTriggerOp); + ControlTriggerOp); \ + REGISTER_KERNEL_BUILDER(Name("Switch").Device(DEVICE).HostMemory("pred"), \ + SwitchOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Merge").Device(DEVICE).HostMemory("value_index"), MergeOp); } // namespace tensorflow -- GitLab From e7a9508fe3447bf51cebad48a86e1846a5a48d70 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 12:22:45 -0700 Subject: [PATCH 156/902] Extracts the 'simplify slice' optimization into its own method. PiperOrigin-RevId: 198085532 --- .../grappler/optimizers/constant_folding.cc | 96 +++++++++++-------- .../grappler/optimizers/constant_folding.h | 4 + 2 files changed, 61 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 90c52b35e5..d38f5a9e81 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1752,45 +1752,14 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } - if (use_shape_info && IsSlice(*node) && - properties->GetInputProperties(node->name()).size() == 3) { - const auto& input = properties->GetInputProperties(node->name())[0]; - const auto& b = properties->GetInputProperties(node->name())[1]; - const auto& s = properties->GetInputProperties(node->name())[2]; - if (TensorShape::IsValid(b.shape()) && b.has_value() && - TensorShape::IsValid(s.shape()) && s.has_value()) { - Tensor begin(b.dtype(), b.shape()); - if (!begin.FromProto(b.value())) { - return errors::InvalidArgument("Cannot parse tensor from proto: ", - b.value().DebugString()); - } - Tensor size(s.dtype(), s.shape()); - if (!size.FromProto(s.value())) { - return errors::InvalidArgument("Cannot parse tensor from proto: ", - s.value().DebugString()); - } - // The node is replaceable iff unknown_rank == false && - // begin == 0 && (size == -1 || size == input_shape) for all dimensions - bool replaceable = !input.shape().unknown_rank(); - for (int j = 0; replaceable && j < input.shape().dim_size(); ++j) { - if (begin.dtype() == DT_INT32) { - replaceable &= begin.vec()(j) == 0; - } else { - replaceable &= begin.vec()(j) == 0; - } - if (size.dtype() == DT_INT32) { - replaceable &= (size.vec()(j) == -1 || - size.vec()(j) == input.shape().dim(j).size()); - } else { - replaceable &= (size.vec()(j) == -1 || - size.vec()(j) == input.shape().dim(j).size()); - } - } - if (replaceable) { - ReplaceOperationWithIdentity(0, *properties, node, optimized_graph); - return Status::OK(); - } - } + bool simplify_slice_successful = false; + Status simplify_slice_status = + SimplifySlice(*properties, use_shape_info, optimized_graph, node, + &simplify_slice_successful); + if (!simplify_slice_status.ok()) { + return simplify_slice_status; + } else if (simplify_slice_successful) { + return Status::OK(); } bool simplify_strided_slice_successful = false; @@ -1896,6 +1865,55 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +Status ConstantFolding::SimplifySlice(const GraphProperties& properties, + bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, + bool* success) { + if (use_shape_info && IsSlice(*node) && + properties.GetInputProperties(node->name()).size() == 3) { + const auto& input = properties.GetInputProperties(node->name())[0]; + const auto& b = properties.GetInputProperties(node->name())[1]; + const auto& s = properties.GetInputProperties(node->name())[2]; + if (TensorShape::IsValid(b.shape()) && b.has_value() && + TensorShape::IsValid(s.shape()) && s.has_value()) { + Tensor begin(b.dtype(), b.shape()); + if (!begin.FromProto(b.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + b.value().DebugString()); + } + Tensor size(s.dtype(), s.shape()); + if (!size.FromProto(s.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + s.value().DebugString()); + } + // The node is replaceable iff unknown_rank == false && + // begin == 0 && (size == -1 || size == input_shape) for all dimensions + bool replaceable = !input.shape().unknown_rank(); + for (int j = 0; replaceable && j < input.shape().dim_size(); ++j) { + if (begin.dtype() == DT_INT32) { + replaceable &= begin.vec()(j) == 0; + } else { + replaceable &= begin.vec()(j) == 0; + } + if (size.dtype() == DT_INT32) { + replaceable &= (size.vec()(j) == -1 || + size.vec()(j) == input.shape().dim(j).size()); + } else { + replaceable &= (size.vec()(j) == -1 || + size.vec()(j) == input.shape().dim(j).size()); + } + } + if (replaceable) { + ReplaceOperationWithIdentity(0, properties, node, optimized_graph); + *success = true; + return Status::OK(); + } + } + } + *success = false; + return Status::OK(); +} + Status ConstantFolding::SimplifyStridedSlice(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 6c42b8f1a8..2da63950d6 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -186,6 +186,10 @@ class ConstantFolding : public GraphOptimizer { Status SimplifyStridedSlice(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node, bool* success); + + // Simplifies a Slice operation to an Identity operation if applicable. + Status SimplifySlice(const GraphProperties& properties, bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, bool* success); // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From ae0eb1b7f81f6d98e0503b9568c72feaa805e655 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 12:35:50 -0700 Subject: [PATCH 157/902] enhance Tensorflow GBDT and GBRT model by exposing a new two dimensional output in prediction ops (example id, tree leaf node index id) for input as other model features PiperOrigin-RevId: 198087342 --- .../estimator_batch/estimator.py | 38 ++++- .../estimator_batch/estimator_test.py | 33 +++- .../boosted_trees/estimator_batch/model.py | 8 +- .../boosted_trees/kernels/prediction_ops.cc | 47 +++++- .../lib/models/multiple_additive_trees.cc | 11 +- .../lib/models/multiple_additive_trees.h | 7 +- .../models/multiple_additive_trees_test.cc | 47 ++++-- .../boosted_trees/ops/prediction_ops.cc | 70 ++++++++ .../python/ops/prediction_ops.py | 1 + .../python/training/functions/gbdt_batch.py | 149 +++++++++++------- .../training/functions/gbdt_batch_test.py | 80 ++++++++-- 11 files changed, 384 insertions(+), 107 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py index 89d0d611d2..c8d401bfa6 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py @@ -41,7 +41,8 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): feature_engineering_fn=None, logits_modifier_function=None, center_bias=True, - use_core_libs=False): + use_core_libs=False, + output_leaf_index=False): """Initializes a GradientBoostedDecisionTreeClassifier estimator instance. Args: @@ -66,6 +67,14 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): the bias. use_core_libs: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. + output_leaf_index: whether to output leaf indices along with predictions + during inference. The leaf node indexes are available in predictions + dict by the key 'leaf_index'. For example, + result_dict = classifier.predict(...) + for example_prediction_result in result_dict: + # access leaf index list by example_prediction_result["leaf_index"] + # which contains one leaf index per tree + Raises: ValueError: If learner_config is not valid. """ @@ -74,7 +83,9 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): # supports second order derivative. def loss_fn(labels, logits, weights=None): result = losses.per_example_maxent_loss( - labels=labels, logits=logits, weights=weights, + labels=labels, + logits=logits, + weights=weights, num_classes=n_classes) return math_ops.reduce_mean(result[0]) else: @@ -102,6 +113,7 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): 'center_bias': center_bias, 'logits_modifier_function': logits_modifier_function, 'use_core_libs': use_core_libs, + 'output_leaf_index': output_leaf_index, }, model_dir=model_dir, config=config, @@ -124,7 +136,8 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): feature_engineering_fn=None, logits_modifier_function=None, center_bias=True, - use_core_libs=False): + use_core_libs=False, + output_leaf_index=False): """Initializes a GradientBoostedDecisionTreeRegressor estimator instance. Args: @@ -151,6 +164,13 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): the bias. use_core_libs: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. + output_leaf_index: whether to output leaf indices along with predictions + during inference. The leaf node indexes are available in predictions + dict by the key 'leaf_index'. For example, + result_dict = classifier.predict(...) + for example_prediction_result in result_dict: + # access leaf index list by example_prediction_result["leaf_index"] + # which contains one leaf index per tree """ head = head_lib.regression_head( label_name=label_name, @@ -173,6 +193,7 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): 'logits_modifier_function': logits_modifier_function, 'center_bias': center_bias, 'use_core_libs': use_core_libs, + 'output_leaf_index': False, }, model_dir=model_dir, config=config, @@ -197,7 +218,8 @@ class GradientBoostedDecisionTreeEstimator(estimator.Estimator): feature_engineering_fn=None, logits_modifier_function=None, center_bias=True, - use_core_libs=False): + use_core_libs=False, + output_leaf_index=False): """Initializes a GradientBoostedDecisionTreeEstimator estimator instance. Args: @@ -220,6 +242,13 @@ class GradientBoostedDecisionTreeEstimator(estimator.Estimator): the bias. use_core_libs: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. + output_leaf_index: whether to output leaf indices along with predictions + during inference. The leaf node indexes are available in predictions + dict by the key 'leaf_index'. For example, + result_dict = classifier.predict(...) + for example_prediction_result in result_dict: + # access leaf index list by example_prediction_result["leaf_index"] + # which contains one leaf index per tree """ super(GradientBoostedDecisionTreeEstimator, self).__init__( model_fn=model.model_builder, @@ -233,6 +262,7 @@ class GradientBoostedDecisionTreeEstimator(estimator.Estimator): 'logits_modifier_function': logits_modifier_function, 'center_bias': center_bias, 'use_core_libs': use_core_libs, + 'output_leaf_index': False, }, model_dir=model_dir, config=config, diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py index 0d58317bd5..fe91e5293f 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py @@ -62,12 +62,34 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): examples_per_layer=3, model_dir=model_dir, config=config, - feature_columns=[contrib_feature_column.real_valued_column("x")]) + feature_columns=[contrib_feature_column.real_valued_column("x")], + output_leaf_index=False) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1) classifier.export(self._export_dir_base) + def testThatLeafIndexIsInPredictions(self): + learner_config = learner_pb2.LearnerConfig() + learner_config.num_classes = 2 + learner_config.constraints.max_tree_depth = 1 + model_dir = tempfile.mkdtemp() + config = run_config.RunConfig() + + classifier = estimator.GradientBoostedDecisionTreeClassifier( + learner_config=learner_config, + num_trees=1, + examples_per_layer=3, + model_dir=model_dir, + config=config, + feature_columns=[contrib_feature_column.real_valued_column("x")], + output_leaf_index=True) + + classifier.fit(input_fn=_train_input_fn, steps=15) + result_dict = classifier.predict(input_fn=_eval_input_fn) + for prediction_item in result_dict: + self.assertTrue("leaf_index" in prediction_item) + def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 @@ -87,7 +109,8 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], - use_core_libs=True) + use_core_libs=True, + output_leaf_index=False) model.fit(input_fn=_train_input_fn, steps=15) model.evaluate(input_fn=_eval_input_fn, steps=1) @@ -107,7 +130,8 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], - use_core_libs=True) + use_core_libs=True, + output_leaf_index=False) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1) @@ -127,7 +151,8 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], - use_core_libs=True) + use_core_libs=True, + output_leaf_index=False) regressor.fit(input_fn=_train_input_fn, steps=15) regressor.evaluate(input_fn=_eval_input_fn, steps=1) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/model.py b/tensorflow/contrib/boosted_trees/estimator_batch/model.py index 15ab6d8145..1ee8911989 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/model.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/model.py @@ -63,6 +63,8 @@ def model_builder(features, labels, mode, params, config): num_trees = params["num_trees"] use_core_libs = params["use_core_libs"] logits_modifier_function = params["logits_modifier_function"] + output_leaf_index = params["output_leaf_index"] + if features is None: raise ValueError("At least one feature must be specified.") @@ -96,7 +98,8 @@ def model_builder(features, labels, mode, params, config): feature_columns=feature_columns, logits_dimension=head.logits_dimension, features=training_features, - use_core_columns=use_core_libs) + use_core_columns=use_core_libs, + output_leaf_index=output_leaf_index) with ops.name_scope("gbdt", "gbdt_optimizer"): predictions_dict = gbdt_model.predict(mode) logits = predictions_dict["predictions"] @@ -127,6 +130,9 @@ def model_builder(features, labels, mode, params, config): labels=labels, train_op_fn=_train_op_fn, logits=logits) + if output_leaf_index and gbdt_batch.LEAF_INDEX in predictions_dict: + model_fn_ops.predictions[gbdt_batch.LEAF_INDEX] = predictions_dict[ + gbdt_batch.LEAF_INDEX] if num_trees: if center_bias: num_trees += 1 diff --git a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc index b3fe38614e..dcce8bc650 100644 --- a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc @@ -59,6 +59,7 @@ const char* kApplyDropoutAttributeName = "apply_dropout"; const char* kApplyAveragingAttributeName = "apply_averaging"; const char* kDropoutInfoOutputTensorName = "drop_out_tree_indices_weights"; const char* kPredictionsTensorName = "predictions"; +const char* kLeafIndexTensorName = "leaf_index"; void CalculateTreesToInclude( const boosted_trees::trees::DecisionTreeEnsembleConfig& config, @@ -170,15 +171,16 @@ class GradientTreesPredictionOp : public OpKernel { core::ScopedUnref unref_me(ensemble_resource); if (use_locking_) { tf_shared_lock l(*ensemble_resource->get_mutex()); - DoCompute(context, ensemble_resource); + DoCompute(context, ensemble_resource, false); } else { - DoCompute(context, ensemble_resource); + DoCompute(context, ensemble_resource, false); } } - private: - void DoCompute(OpKernelContext* context, - DecisionTreeEnsembleResource* ensemble_resource) { + protected: + virtual void DoCompute(OpKernelContext* context, + DecisionTreeEnsembleResource* ensemble_resource, + const bool is_output_leaf_index) { // Read dense float features list; OpInputList dense_float_features_list; OP_REQUIRES_OK(context, TensorUtils::ReadDenseFloatFeatures( @@ -267,6 +269,14 @@ class GradientTreesPredictionOp : public OpKernel { &output_predictions_t)); auto output_predictions = output_predictions_t->matrix(); + // Allocate output leaf index matrix. + Tensor* output_leaf_index_t = nullptr; + if (is_output_leaf_index) { + OP_REQUIRES_OK(context, context->allocate_output( + kLeafIndexTensorName, + {batch_size, ensemble_resource->num_trees()}, + &output_leaf_index_t)); + } // Run predictor. thread::ThreadPool* const worker_threads = context->device()->tensorflow_cpu_worker_threads()->workers; @@ -288,11 +298,13 @@ class GradientTreesPredictionOp : public OpKernel { i, weight * (num_ensembles - i + start_averaging) / num_ensembles); } MultipleAdditiveTrees::Predict(adjusted, trees_to_include, batch_features, - worker_threads, output_predictions); + worker_threads, output_predictions, + output_leaf_index_t); } else { MultipleAdditiveTrees::Predict( ensemble_resource->decision_tree_ensemble(), trees_to_include, - batch_features, worker_threads, output_predictions); + batch_features, worker_threads, output_predictions, + output_leaf_index_t); } // Output dropped trees and original weights. @@ -302,7 +314,6 @@ class GradientTreesPredictionOp : public OpKernel { {2, static_cast(dropped_trees.size())}, &output_dropout_info_t)); auto output_dropout_info = output_dropout_info_t->matrix(); - for (int32 i = 0; i < dropped_trees.size(); ++i) { output_dropout_info(0, i) = dropped_trees[i]; output_dropout_info(1, i) = original_weights[i]; @@ -326,6 +337,26 @@ class GradientTreesPredictionOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("GradientTreesPrediction").Device(DEVICE_CPU), GradientTreesPredictionOp); +// GradientTreesPredictionVerboseOp is derived from GradientTreesPredictionOp +// and have an additional output of tensor of rank 2 containing leaf ids for +// each tree where an instance ended up with. +class GradientTreesPredictionVerboseOp : public GradientTreesPredictionOp { + public: + explicit GradientTreesPredictionVerboseOp(OpKernelConstruction* const context) + : GradientTreesPredictionOp(context) {} + + protected: + void DoCompute(OpKernelContext* context, + DecisionTreeEnsembleResource* ensemble_resource, + bool is_output_leaf_index) override { + GradientTreesPredictionOp::DoCompute(context, ensemble_resource, true); + } +}; + +REGISTER_KERNEL_BUILDER( + Name("GradientTreesPredictionVerbose").Device(DEVICE_CPU), + GradientTreesPredictionVerboseOp); + class GradientTreesPartitionExamplesOp : public OpKernel { public: explicit GradientTreesPartitionExamplesOp(OpKernelConstruction* const context) diff --git a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc index 43b00d4c6d..ee664f1ba6 100644 --- a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc +++ b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc @@ -26,7 +26,8 @@ void MultipleAdditiveTrees::Predict( const std::vector& trees_to_include, const boosted_trees::utils::BatchFeatures& features, tensorflow::thread::ThreadPool* const worker_threads, - tensorflow::TTypes::Matrix output_predictions) { + tensorflow::TTypes::Matrix output_predictions, + Tensor* output_leaf_indices) { // Zero out predictions as the model is additive. output_predictions.setZero(); @@ -38,7 +39,8 @@ void MultipleAdditiveTrees::Predict( // Lambda for doing a block of work. auto update_predictions = [&config, &features, &trees_to_include, - &output_predictions](int64 start, int64 end) { + &output_predictions, + &output_leaf_indices](int64 start, int64 end) { auto examples_iterable = features.examples_iterable(start, end); for (const auto& example : examples_iterable) { for (const int32 tree_idx : trees_to_include) { @@ -47,6 +49,11 @@ void MultipleAdditiveTrees::Predict( const float tree_weight = config.tree_weights(tree_idx); const int leaf_idx = trees::DecisionTree::Traverse(tree, 0, example); QCHECK(leaf_idx >= 0) << "Invalid tree: " << tree.DebugString(); + // Checks if output leaf tree index is required. + if (output_leaf_indices != nullptr) { + output_leaf_indices->matrix()(example.example_idx, tree_idx) = + leaf_idx; + } const auto& leaf_node = tree.nodes(leaf_idx); QCHECK(leaf_node.has_leaf()) << "Invalid leaf node: " << leaf_node.DebugString(); diff --git a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h index cc3dc226cd..be7c1555c0 100644 --- a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h +++ b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h @@ -33,12 +33,17 @@ class MultipleAdditiveTrees { public: // Predict runs tree ensemble on the given batch and updates // output predictions accordingly, for the given list of trees. + // output_leaf_indices is a pointer to a 2 dimensional tensor. If it is not + // null, this method fills output_leaf_indices with a per-tree leaf id where + // each of the instances from 'features' ended up in. Its shape is num + // examples X num of trees. When nullptr, leaf ids are not output of trees. static void Predict( const boosted_trees::trees::DecisionTreeEnsembleConfig& config, const std::vector& trees_to_include, const boosted_trees::utils::BatchFeatures& features, tensorflow::thread::ThreadPool* const worker_threads, - tensorflow::TTypes::Matrix output_predictions); + tensorflow::TTypes::Matrix output_predictions, + Tensor* output_leaf_indices); }; } // namespace models diff --git a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees_test.cc b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees_test.cc index 4ca18bedb1..caad023ca6 100644 --- a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees_test.cc @@ -62,7 +62,7 @@ TEST_F(MultipleAdditiveTreesTest, Empty) { tensorflow::thread::ThreadPool threads(tensorflow::Env::Default(), "test", kNumThreadsSingleThreaded); MultipleAdditiveTrees::Predict(tree_ensemble_config, {}, batch_features_, - &threads, output_matrix); + &threads, output_matrix, nullptr); EXPECT_EQ(0, output_matrix(0, 0)); EXPECT_EQ(0, output_matrix(1, 0)); } @@ -99,17 +99,38 @@ TEST_F(MultipleAdditiveTreesTest, SingleClass) { // Normal case. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0, 1}, - batch_features_, &threads, output_matrix); + batch_features_, &threads, output_matrix, + nullptr); EXPECT_FLOAT_EQ(-0.2f, output_matrix(0, 0)); // -0.4 (bias) + 0.2 (leaf 2). EXPECT_FLOAT_EQ(0.5f, output_matrix(1, 0)); // -0.4 (bias) + 0.9 (leaf 1). } + // Normal case with leaf node. + { + // Initialize output leaf inedx tensor, since leaf index is positive in this + // case, initialize with the value of -1. Since there are 2 examples and + // there are 2 trees, initialize leaf output index by 2 * 2. + auto output_leaf_index_tensor = AsTensor({-1, -1, -1, -1}, {2, 2}); + MultipleAdditiveTrees::Predict(tree_ensemble_config, {0, 1}, + batch_features_, &threads, output_matrix, + &output_leaf_index_tensor); + EXPECT_FLOAT_EQ(-0.2f, output_matrix(0, 0)); // -0.4 (bias) + 0.2 (leaf 2). + EXPECT_FLOAT_EQ(0.5f, output_matrix(1, 0)); // -0.4 (bias) + 0.9 (leaf 1). + EXPECT_FLOAT_EQ(0, output_leaf_index_tensor.matrix()( + 0, 0)); // 1st leaf for the first example + EXPECT_FLOAT_EQ(0, output_leaf_index_tensor.matrix()( + 1, 0)); // 1st leaf for the second example + EXPECT_FLOAT_EQ(2, output_leaf_index_tensor.matrix()( + 0, 1)); // 2nd leaf for the first example + EXPECT_FLOAT_EQ(1, output_leaf_index_tensor.matrix()( + 1, 1)); // 2nd leaf for the second example + } // Weighted case { DecisionTreeEnsembleConfig weighted = tree_ensemble_config; weighted.set_tree_weights(0, 6.0); weighted.set_tree_weights(1, 3.2); MultipleAdditiveTrees::Predict(weighted, {0, 1}, batch_features_, &threads, - output_matrix); + output_matrix, nullptr); // -0.4 (bias) + 0.2 (leaf 2). EXPECT_FLOAT_EQ(-0.4f * 6 + 0.2 * 3.2, output_matrix(0, 0)); // -0.4 (bias) + 0.9 (leaf 1). @@ -118,21 +139,21 @@ TEST_F(MultipleAdditiveTreesTest, SingleClass) { // Drop first tree. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {1}, batch_features_, - &threads, output_matrix); + &threads, output_matrix, nullptr); EXPECT_FLOAT_EQ(0.2f, output_matrix(0, 0)); // 0.2 (leaf 2). EXPECT_FLOAT_EQ(0.9f, output_matrix(1, 0)); // 0.9 (leaf 1). } // Drop second tree. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0}, batch_features_, - &threads, output_matrix); + &threads, output_matrix, nullptr); EXPECT_FLOAT_EQ(-0.4f, output_matrix(0, 0)); // -0.4 (bias). EXPECT_FLOAT_EQ(-0.4f, output_matrix(1, 0)); // -0.4 (bias). } // Drop all trees. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {}, batch_features_, - &threads, output_matrix); + &threads, output_matrix, nullptr); EXPECT_FLOAT_EQ(0.0, output_matrix(0, 0)); EXPECT_FLOAT_EQ(0.0, output_matrix(1, 0)); } @@ -172,7 +193,8 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { // Normal case. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0, 1}, - batch_features_, &threads, output_matrix); + batch_features_, &threads, output_matrix, + nullptr); EXPECT_FLOAT_EQ(-0.4f, output_matrix(0, 0)); // -0.4 (bias) EXPECT_FLOAT_EQ(-0.5f, output_matrix(0, 1)); // -0.7 (bias) + 0.2 (leaf 2) EXPECT_FLOAT_EQ(0.5f, output_matrix(1, 0)); // -0.4 (bias) + 0.9 (leaf 1) @@ -184,7 +206,7 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { weighted.set_tree_weights(0, 6.0); weighted.set_tree_weights(1, 3.2); MultipleAdditiveTrees::Predict(weighted, {0, 1}, batch_features_, &threads, - output_matrix); + output_matrix, nullptr); // bias EXPECT_FLOAT_EQ(-0.4f * 6, output_matrix(0, 0)); // bias + leaf 2 @@ -197,7 +219,7 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { // Dropout first tree. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {1}, batch_features_, - &threads, output_matrix); + &threads, output_matrix, nullptr); EXPECT_FLOAT_EQ(0.0, output_matrix(0, 0)); EXPECT_FLOAT_EQ(0.2f, output_matrix(0, 1)); // 0.2 (leaf 2) EXPECT_FLOAT_EQ(0.9f, output_matrix(1, 0)); // 0.9 (leaf 2) @@ -206,7 +228,7 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { // Dropout second tree. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0}, batch_features_, - &threads, output_matrix); + &threads, output_matrix, nullptr); EXPECT_FLOAT_EQ(-0.4f, output_matrix(0, 0)); // -0.4 (bias) EXPECT_FLOAT_EQ(-0.7f, output_matrix(0, 1)); // -0.7 (bias) EXPECT_FLOAT_EQ(-0.4f, output_matrix(1, 0)); // -0.4 (bias) @@ -215,7 +237,7 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { // Drop both trees. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {}, batch_features_, - &threads, output_matrix); + &threads, output_matrix, nullptr); EXPECT_FLOAT_EQ(0.0f, output_matrix(0, 0)); EXPECT_FLOAT_EQ(0.0f, output_matrix(0, 1)); EXPECT_FLOAT_EQ(0.0f, output_matrix(1, 0)); @@ -258,7 +280,8 @@ TEST_F(MultipleAdditiveTreesTest, DenseLeaves) { // Normal case. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0, 1}, - batch_features_, &threads, output_matrix); + batch_features_, &threads, output_matrix, + nullptr); EXPECT_FLOAT_EQ(-0.2f, output_matrix(0, 0)); // -0.4 (tree1) + 0.2 (leaf 2) EXPECT_FLOAT_EQ(-0.4f, output_matrix(0, 1)); // -0.7 (tree1) + 0.3 (leaf 2) EXPECT_FLOAT_EQ(3.4f, output_matrix(0, 2)); // 3.0 -(tree1) + 0.4 (leaf 2) diff --git a/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc b/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc index d66f645f62..6491d58794 100644 --- a/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc @@ -40,6 +40,24 @@ static Status ApplyGradientTreesPredictionShapeFn(InferenceContext* c) { return Status::OK(); } +static Status ApplyGradientTreesPredictionVerboseShapeFn(InferenceContext* c) { + string learner_config_str; + c->GetAttr("learner_config", &learner_config_str).IgnoreError(); + LearnerConfig learner_config; + ParseProtoUnlimited(&learner_config, learner_config_str); + + bool reduce_dim; + c->GetAttr("reduce_dim", &reduce_dim).IgnoreError(); + // Sets the shape of the output as a matrix. + c->set_output(0, {c->Matrix(InferenceContext::kUnknownDim, + reduce_dim ? learner_config.num_classes() - 1 + : learner_config.num_classes())}); + c->set_output(1, {c->UnknownShape()}); + c->set_output(2, {c->Matrix(InferenceContext::kUnknownDim, + InferenceContext::kUnknownDim)}); + return Status::OK(); +} + REGISTER_OP("GradientTreesPrediction") .Attr("learner_config: string") .Attr("num_dense_float_features: int >= 0") @@ -90,6 +108,58 @@ drop_out_tree_indices_weights: Tensor of Rank 2 containing dropped trees indices and original weights of those trees during prediction. )doc"); +REGISTER_OP("GradientTreesPredictionVerbose") + .Attr("learner_config: string") + .Attr("num_dense_float_features: int >= 0") + .Attr("num_sparse_float_features: int >= 0") + .Attr("num_sparse_int_features: int >= 0") + .Attr("use_locking: bool = false") + .Attr("apply_dropout: bool") + .Attr("apply_averaging: bool") + .Attr("center_bias: bool") + .Attr("reduce_dim: bool") + .Input("tree_ensemble_handle: resource") + .Input("seed: int64") + .Input("dense_float_features: num_dense_float_features * float") + .Input("sparse_float_feature_indices: num_sparse_float_features * int64") + .Input("sparse_float_feature_values: num_sparse_float_features * float") + .Input("sparse_float_feature_shapes: num_sparse_float_features * int64") + .Input("sparse_int_feature_indices: num_sparse_int_features * int64") + .Input("sparse_int_feature_values: num_sparse_int_features * int64") + .Input("sparse_int_feature_shapes: num_sparse_int_features * int64") + .Output("predictions: float") + .Output("drop_out_tree_indices_weights: float") + .Output("leaf_index: int32") + .SetShapeFn(ApplyGradientTreesPredictionVerboseShapeFn) + .Doc(R"doc( +Runs multiple additive regression forests predictors on input instances +and computes the final prediction for each class, and outputs a matrix of +leaf ids per each tree in an ensemble. + +learner_config: Config for the learner of type LearnerConfig proto. Prediction +ops for now uses only LearningRateDropoutDrivenConfig config from the learner. +num_dense_float_features: Number of dense float features. +num_sparse_float_features: Number of sparse float features. +num_sparse_int_features: Number of sparse int features. +use_locking: Whether to use locking. +seed: random seed to be used for dropout. +reduce_dim: whether to reduce the dimension (legacy impl) or not. +apply_dropout: whether to apply dropout during prediction. +apply_averaging: whether averaging of tree ensembles should take place. If set +to true, will be based on AveragingConfig from learner_config. +tree_ensemble_handle: The handle to the tree ensemble. +dense_float_features: Rank 2 Tensors containing dense float feature values. +sparse_float_feature_indices: Rank 2 Tensors containing sparse float indices. +sparse_float_feature_values: Rank 1 Tensors containing sparse float values. +sparse_float_feature_shapes: Rank 1 Tensors containing sparse float shapes. +sparse_int_feature_indices: Rank 2 Tensors containing sparse int indices. +sparse_int_feature_values: Rank 1 Tensors containing sparse int values. +sparse_int_feature_shapes: Rank 1 Tensors containing sparse int shapes. +predictions: Rank 2 Tensor containing predictions per example per class. +drop_out_tree_indices_weights: Tensor of Rank 2 containing dropped trees indices +leaf_index: tensor of rank 2 containing leaf ids for each tree where an instance ended up. +)doc"); + REGISTER_OP("GradientTreesPartitionExamples") .Attr("num_dense_float_features: int >= 0") .Attr("num_sparse_float_features: int >= 0") diff --git a/tensorflow/contrib/boosted_trees/python/ops/prediction_ops.py b/tensorflow/contrib/boosted_trees/python/ops/prediction_ops.py index 58f0d36b0f..7f6e55ae58 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/prediction_ops.py +++ b/tensorflow/contrib/boosted_trees/python/ops/prediction_ops.py @@ -21,4 +21,5 @@ from __future__ import print_function from tensorflow.contrib.boosted_trees.python.ops import boosted_trees_ops_loader from tensorflow.contrib.boosted_trees.python.ops.gen_prediction_ops import gradient_trees_partition_examples from tensorflow.contrib.boosted_trees.python.ops.gen_prediction_ops import gradient_trees_prediction +from tensorflow.contrib.boosted_trees.python.ops.gen_prediction_ops import gradient_trees_prediction_verbose # pylint: enable=unused-import diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index 5dd2e0c7f2..35ccb45f5a 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -58,6 +58,7 @@ NUM_LAYERS_ATTEMPTED = "num_layers" NUM_TREES_ATTEMPTED = "num_trees" NUM_USED_HANDLERS = "num_used_handlers" USED_HANDLERS_MASK = "used_handlers_mask" +LEAF_INDEX = "leaf_index" _FEATURE_NAME_TEMPLATE = "%s_%d" @@ -71,18 +72,25 @@ def _get_column_by_index(tensor, indices): return array_ops.reshape(array_ops.gather(p_flat, i_flat), [shape[0], -1]) -def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats, - used_handlers): +def _make_predictions_dict(stamp, + logits, + partition_ids, + ensemble_stats, + used_handlers, + output_leaf_index=False, + leaf_index=None): """Returns predictions for the given logits and n_classes. Args: stamp: The ensemble stamp. - logits: A rank 2 `Tensor` with shape [batch_size, n_classes - 1]. - that contains predictions when no dropout was applied. + logits: A rank 2 `Tensor` with shape [batch_size, n_classes - 1]. that + contains predictions when no dropout was applied. partition_ids: A rank 1 `Tensor` with shape [batch_size]. ensemble_stats: A TreeEnsembleStatsOp result tuple. used_handlers: A TreeEnsembleUsedHandlerOp result tuple of an int and a - boolean mask.. + boolean mask. + leaf_index: A boolean variable indicating whether to output leaf index into + predictions dictionary. Returns: A dict of predictions. @@ -95,6 +103,8 @@ def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats, result[NUM_TREES_ATTEMPTED] = ensemble_stats.attempted_trees result[NUM_USED_HANDLERS] = used_handlers.num_used_handlers result[USED_HANDLERS_MASK] = used_handlers.used_handlers_mask + if output_leaf_index: + result[LEAF_INDEX] = leaf_index return result @@ -268,7 +278,8 @@ class GradientBoostedDecisionTreeModel(object): features, logits_dimension, feature_columns=None, - use_core_columns=False): + use_core_columns=False, + output_leaf_index=False): """Construct a new GradientBoostedDecisionTreeModel function. Args: @@ -276,13 +287,15 @@ class GradientBoostedDecisionTreeModel(object): num_ps_replicas: Number of parameter server replicas, can be 0. ensemble_handle: A handle to the ensemble variable. center_bias: Whether to center the bias before growing trees. - examples_per_layer: Number of examples to accumulate before growing - a tree layer. It can also be a function that computes the number of - examples based on the depth of the layer that's being built. + examples_per_layer: Number of examples to accumulate before growing a tree + layer. It can also be a function that computes the number of examples + based on the depth of the layer that's being built. learner_config: A learner config. features: `dict` of `Tensor` objects. logits_dimension: An int, the dimension of logits. feature_columns: A list of feature columns. + output_leaf_index: A boolean variable indicating whether to output leaf + index into predictions dictionary. Raises: ValueError: if inputs are not valid. @@ -359,6 +372,7 @@ class GradientBoostedDecisionTreeModel(object): self._learner_config.multi_class_strategy == learner_pb2.LearnerConfig.TREE_PER_CLASS and learner_config.num_classes == 2) + self._output_leaf_index = output_leaf_index def _predict_and_return_dict(self, ensemble_handle, ensemble_stamp, mode): """Runs prediction and returns a dictionary of the prediction results. @@ -388,22 +402,44 @@ class GradientBoostedDecisionTreeModel(object): # Make sure ensemble stats run. This will check that the ensemble has # the right stamp. with ops.control_dependencies(ensemble_stats): - predictions, _ = prediction_ops.gradient_trees_prediction( - ensemble_handle, - seed, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - learner_config=self._learner_config_serialized, - apply_dropout=apply_dropout, - apply_averaging=mode != learn.ModeKeys.TRAIN, - use_locking=True, - center_bias=self._center_bias, - reduce_dim=self._reduce_dim) + leaf_matrix = [] + # Only used in infer (predict), not used in train and eval. + if self._output_leaf_index and mode == learn.ModeKeys.INFER: + predictions, _, leaf_matrix = ( + prediction_ops).gradient_trees_prediction_verbose( + ensemble_handle, + seed, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + learner_config=self._learner_config_serialized, + apply_dropout=apply_dropout, + apply_averaging=mode != learn.ModeKeys.TRAIN, + use_locking=True, + center_bias=self._center_bias, + reduce_dim=self._reduce_dim) + + else: + predictions, _ = prediction_ops.gradient_trees_prediction( + ensemble_handle, + seed, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + learner_config=self._learner_config_serialized, + apply_dropout=apply_dropout, + apply_averaging=mode != learn.ModeKeys.TRAIN, + use_locking=True, + center_bias=self._center_bias, + reduce_dim=self._reduce_dim) partition_ids = prediction_ops.gradient_trees_partition_examples( ensemble_handle, self._dense_floats, @@ -416,7 +452,8 @@ class GradientBoostedDecisionTreeModel(object): use_locking=True) return _make_predictions_dict(ensemble_stamp, predictions, partition_ids, - ensemble_stats, used_handlers) + ensemble_stats, used_handlers, + self._output_leaf_index, leaf_matrix) def predict(self, mode): """Returns predictions given the features and mode. @@ -521,7 +558,7 @@ class GradientBoostedDecisionTreeModel(object): aggregation_method=None)[0] strategy = self._learner_config.multi_class_strategy - class_id = constant_op.constant(-1, dtype=dtypes.int32) + class_id = -1 # Handle different multiclass strategies. if strategy == learner_pb2.LearnerConfig.TREE_PER_CLASS: # We build one vs rest trees. @@ -575,39 +612,31 @@ class GradientBoostedDecisionTreeModel(object): # Get the weights for each example for quantiles calculation, weights = self._get_weights(hessian_shape, squeezed_hessians) + regularization_config = self._learner_config.regularization + min_node_weight = self._learner_config.constraints.min_node_weight # Create all handlers ensuring resources are evenly allocated across PS. fc_name_idx = 0 handlers = [] init_stamp_token = constant_op.constant(0, dtype=dtypes.int64) - l1_regularization = constant_op.constant( - self._learner_config.regularization.l1, dtypes.float32) - l2_regularization = constant_op.constant( - self._learner_config.regularization.l2, dtypes.float32) - tree_complexity_regularization = constant_op.constant( - self._learner_config.regularization.tree_complexity, dtypes.float32) - min_node_weight = constant_op.constant( - self._learner_config.constraints.min_node_weight, dtypes.float32) - epsilon = 0.01 - num_quantiles = 100 - strategy_tensor = constant_op.constant(strategy) with ops.device(self._get_replica_device_setter(worker_device)): # Create handlers for dense float columns for dense_float_column_idx in range(len(self._dense_floats)): fc_name = self._fc_names[fc_name_idx] handlers.append( ordinal_split_handler.DenseSplitHandler( - l1_regularization=l1_regularization, - l2_regularization=l2_regularization, - tree_complexity_regularization=tree_complexity_regularization, + l1_regularization=regularization_config.l1, + l2_regularization=regularization_config.l2, + tree_complexity_regularization=( + regularization_config.tree_complexity), min_node_weight=min_node_weight, feature_column_group_id=dense_float_column_idx, - epsilon=epsilon, - num_quantiles=num_quantiles, + epsilon=0.01, + num_quantiles=100, dense_float_column=self._dense_floats[dense_float_column_idx], name=fc_name, gradient_shape=gradient_shape, hessian_shape=hessian_shape, - multiclass_strategy=strategy_tensor, + multiclass_strategy=strategy, init_stamp_token=init_stamp_token)) fc_name_idx += 1 @@ -616,13 +645,14 @@ class GradientBoostedDecisionTreeModel(object): fc_name = self._fc_names[fc_name_idx] handlers.append( ordinal_split_handler.SparseSplitHandler( - l1_regularization=l1_regularization, - l2_regularization=l2_regularization, - tree_complexity_regularization=tree_complexity_regularization, + l1_regularization=regularization_config.l1, + l2_regularization=regularization_config.l2, + tree_complexity_regularization=( + regularization_config.tree_complexity), min_node_weight=min_node_weight, feature_column_group_id=sparse_float_column_idx, - epsilon=epsilon, - num_quantiles=num_quantiles, + epsilon=0.01, + num_quantiles=100, sparse_float_column=sparse_tensor.SparseTensor( self._sparse_float_indices[sparse_float_column_idx], self._sparse_float_values[sparse_float_column_idx], @@ -630,7 +660,7 @@ class GradientBoostedDecisionTreeModel(object): name=fc_name, gradient_shape=gradient_shape, hessian_shape=hessian_shape, - multiclass_strategy=strategy_tensor, + multiclass_strategy=strategy, init_stamp_token=init_stamp_token)) fc_name_idx += 1 @@ -639,9 +669,10 @@ class GradientBoostedDecisionTreeModel(object): fc_name = self._fc_names[fc_name_idx] handlers.append( categorical_split_handler.EqualitySplitHandler( - l1_regularization=l1_regularization, - l2_regularization=l2_regularization, - tree_complexity_regularization=tree_complexity_regularization, + l1_regularization=regularization_config.l1, + l2_regularization=regularization_config.l2, + tree_complexity_regularization=( + regularization_config.tree_complexity), min_node_weight=min_node_weight, feature_column_group_id=sparse_int_column_idx, sparse_int_column=sparse_tensor.SparseTensor( @@ -651,7 +682,7 @@ class GradientBoostedDecisionTreeModel(object): name=fc_name, gradient_shape=gradient_shape, hessian_shape=hessian_shape, - multiclass_strategy=strategy_tensor, + multiclass_strategy=strategy, init_stamp_token=init_stamp_token)) fc_name_idx += 1 @@ -773,7 +804,6 @@ class GradientBoostedDecisionTreeModel(object): empty_hessians = constant_op.constant( [], dtype=dtypes.float32, shape=empty_hess_shape) - active_handlers = array_ops.unstack(active_handlers, axis=0) for handler_idx in range(len(handlers)): handler = handlers[handler_idx] is_active = active_handlers[handler_idx] @@ -984,7 +1014,7 @@ class GradientBoostedDecisionTreeModel(object): # This is a workaround for the slowness of graph building in tf.cond. # See (b/36554864). split_sizes = array_ops.reshape( - array_ops.shape_n(partition_ids_list), [len(partition_ids_list)]) + array_ops.shape_n(partition_ids_list), [-1]) partition_ids = array_ops.concat(partition_ids_list, axis=0) gains = array_ops.concat(gains_list, axis=0) split_infos = array_ops.concat(split_info_list, axis=0) @@ -1049,11 +1079,8 @@ class GradientBoostedDecisionTreeModel(object): # Update ensemble. update_ops = [are_all_splits_ready] - if self._center_bias: - update_model = control_flow_ops.cond(continue_centering, - _center_bias_fn, _grow_ensemble_fn) - else: - update_model = _grow_ensemble_fn() + update_model = control_flow_ops.cond(continue_centering, _center_bias_fn, + _grow_ensemble_fn) update_ops.append(update_model) # Update ensemble stats. diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index 289fb195db..0665c6c63e 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -19,18 +19,15 @@ from __future__ import division from __future__ import print_function from google.protobuf import text_format - from tensorflow.contrib import layers from tensorflow.contrib.boosted_trees.proto import learner_pb2 from tensorflow.contrib.boosted_trees.proto import tree_config_pb2 from tensorflow.contrib.boosted_trees.python.ops import model_ops from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch from tensorflow.contrib.boosted_trees.python.utils import losses - -from tensorflow.python.feature_column import feature_column_lib as core_feature_column from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib from tensorflow.contrib.learn.python.learn.estimators import model_fn - +from tensorflow.python.feature_column import feature_column_lib as core_feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util @@ -728,8 +725,8 @@ class GbdtTest(test_util.TensorFlowTestCase): self.assertEquals(len(output.tree_weights), 0) self.assertEquals(stamp_token.eval(), 0) - def testPredictFn(self): - """Tests the predict function.""" + def testPredictFnWithLeafIndexAdvancedLeft(self): + """Tests the predict function with output leaf ids.""" with self.test_session() as sess: # Create ensemble with one bias node. ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() @@ -737,12 +734,61 @@ class GbdtTest(test_util.TensorFlowTestCase): """ trees { nodes { - leaf { - vector { - value: 0.25 + dense_float_binary_split { + threshold: 1.0 + left_id: 1 + right_id: 2 + } + node_metadata { + gain: 0 + } + } + nodes { + leaf { + vector { + value: 0.25 + } + } + } + nodes { + leaf { + vector { + value: 0.0 + } + } + } + } + tree_weights: 1.0 + tree_metadata { + num_tree_weight_updates: 1 + num_layers_grown: 1 + is_finalized: true + } + trees { + nodes { + dense_float_binary_split { + threshold: 0.99 + left_id: 1 + right_id: 2 + } + node_metadata { + gain: 0 + } + } + nodes { + leaf { + vector { + value: 0.25 + } + } + } + nodes { + leaf { + vector { + value: 0.0 + } } } - } } tree_weights: 1.0 tree_metadata { @@ -763,7 +809,8 @@ class GbdtTest(test_util.TensorFlowTestCase): learner_config.constraints.max_tree_depth = 1 learner_config.constraints.min_node_weight = 0 features = {} - features["dense_float"] = array_ops.ones([4, 1], dtypes.float32) + features["dense_float"] = array_ops.constant( + [[0.0], [1.0], [1.1], [2.0]], dtype=dtypes.float32) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=False, num_ps_replicas=0, @@ -772,15 +819,20 @@ class GbdtTest(test_util.TensorFlowTestCase): examples_per_layer=1, learner_config=learner_config, logits_dimension=1, - features=features) + features=features, + output_leaf_index=True) # Create predict op. - mode = model_fn.ModeKeys.EVAL + mode = model_fn.ModeKeys.INFER predictions_dict = sess.run(gbdt_model.predict(mode)) self.assertEquals(predictions_dict["ensemble_stamp"], 3) + # here are how the first two numbers in expected results are calculated, + # 0.5 = 0.25 + 0.25, and 0.25 = 0.25 + 0 self.assertAllClose(predictions_dict["predictions"], - [[0.25], [0.25], [0.25], [0.25]]) + [[0.5], [0.25], [0], [0]]) self.assertAllClose(predictions_dict["partition_ids"], [0, 0, 0, 0]) + self.assertAllClose(predictions_dict["leaf_index"], + [[1, 1], [1, 2], [2, 2], [2, 2]]) def testTrainFnMulticlassFullHessian(self): """Tests the GBDT train for multiclass full hessian.""" -- GitLab From 0b522fd22b986704d1056254961cc7988ae182eb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 12:54:49 -0700 Subject: [PATCH 158/902] Add ScopedAllocatorOptimizer in support of CollectiveReduce. The efficiency of CollectiveReduce is greatly improved by merging multiple parallel reductions over smaller tensors into a single reduction over a larger tensor that is the concatentation of the smaller tensors. Because CollectiveReduce is essentially an element-wise array operation which operates on a 1-D reshape of the input tensor it is eligible for a ScopedAllocation optimization. The optimization works by looking for serially independent instances of CollectiveReduce that lie within the same name-scope tier and have the same control-flow (e.g. loop) embedding structure. Where two or more such nodes are found the upstream nodes that generate their inputs are modified to write their outputs into consecutive regions of a single tensor buffer maintained by a ScopedAllocator. The multiple CollectiveReduce nodes are then replaced by a single CollectiveReduce that operates in-place on the backing buffer. The effectiveness of the optimization depends on there being candidate CollectiveReduce nodes with these characteristics that become eligible for execution at close to the same time. If the name scope is too large, and includes nodes that become execution eligible at very different times, this graph rewrite could result in a slowdown. Note that this optimization is experimental: it is not guaranteed to work, especially for ops other than CollectiveReduce. PiperOrigin-RevId: 198089642 --- .../common_runtime/scoped_allocator_mgr.cc | 12 +- .../common_runtime/scoped_allocator_mgr.h | 11 +- tensorflow/core/grappler/op_types.cc | 6 + tensorflow/core/grappler/op_types.h | 1 + tensorflow/core/grappler/optimizers/BUILD | 45 + .../grappler/optimizers/meta_optimizer.cc | 28 + .../optimizers/scoped_allocator_optimizer.cc | 929 ++++++++++++++++++ .../optimizers/scoped_allocator_optimizer.h | 107 ++ .../scoped_allocator_optimizer_test.cc | 243 +++++ .../core/kernels/scoped_allocator_ops_test.cc | 41 +- tensorflow/core/ops/scoped_allocator_ops.cc | 37 +- .../core/protobuf/rewriter_config.proto | 10 + 12 files changed, 1443 insertions(+), 27 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc create mode 100644 tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h create mode 100644 tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc diff --git a/tensorflow/core/common_runtime/scoped_allocator_mgr.cc b/tensorflow/core/common_runtime/scoped_allocator_mgr.cc index c045596a69..8ac6adc2e4 100644 --- a/tensorflow/core/common_runtime/scoped_allocator_mgr.cc +++ b/tensorflow/core/common_runtime/scoped_allocator_mgr.cc @@ -160,13 +160,18 @@ Status ScopedAllocatorMgr::AddScopedAllocator( expected_call_count); } -void ScopedAllocatorMgr::PopulateFields( +/*static*/ +size_t ScopedAllocatorMgr::PopulateFields( int32 scope_id, const gtl::ArraySlice& shapes, const DataType dtype, std::vector* fields) { const int32 num_fields = static_cast(shapes.size()); fields->resize(num_fields); size_t offset = 0; for (int32 i = 0; i < num_fields; ++i) { + size_t overshoot = offset % Allocator::kAllocatorAlignment; + if (overshoot > 0) { + offset += (Allocator::kAllocatorAlignment - overshoot); + } size_t bytes = shapes[i].num_elements() * DataTypeSize(dtype); (*fields)[i].scope_id = scope_id + 1 + i; (*fields)[i].bytes = bytes; @@ -175,11 +180,8 @@ void ScopedAllocatorMgr::PopulateFields( << " bytes=" << (*fields)[i].bytes << " offset=" << (*fields)[i].offset; offset += bytes; - size_t overshoot = offset % Allocator::kAllocatorAlignment; - if (overshoot > 0) { - offset += (Allocator::kAllocatorAlignment - overshoot); - } } + return offset; } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/scoped_allocator_mgr.h b/tensorflow/core/common_runtime/scoped_allocator_mgr.h index effc5f2d77..8c5e853472 100644 --- a/tensorflow/core/common_runtime/scoped_allocator_mgr.h +++ b/tensorflow/core/common_runtime/scoped_allocator_mgr.h @@ -89,10 +89,13 @@ class ScopedAllocatorMgr { // Populate the bytes and offset members of Field. Instance allocaters get // consecutive scope_id values following that of the base ScopedAllocator. - static void PopulateFields(int32 scope_id, - const gtl::ArraySlice& shapes, - const DataType dtype, - std::vector* fields); + // Returns the total number of bytes required to be allocated in the + // backing tensor, for convenience. (The same value can be obtained + // by summing offset and bytes in the last field.) + static size_t PopulateFields(int32 scope_id, + const gtl::ArraySlice& shapes, + const DataType dtype, + std::vector* fields); const string& device_name() const { return device_name_; } diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index fe0fad9148..2a47a4c495 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -78,6 +78,12 @@ bool IsCheckNumerics(const NodeDef& node) { return node.op() == "CheckNumerics"; } +bool IsCollective(const NodeDef& node) { + return node.op() == "CollectiveReduce" || + node.op() == "CollectiveBcastSend" || + node.op() == "CollectiveBcastRecv"; +} + bool IsComplex(const NodeDef& node) { return node.op() == "Complex"; } bool IsComplexAbs(const NodeDef& node) { return node.op() == "ComplexAbs"; } diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 915da21fad..e7f39981c0 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -38,6 +38,7 @@ bool IsBiasAddGrad(const NodeDef& node); bool IsBitcast(const NodeDef& node); bool IsCast(const NodeDef& node); bool IsCheckNumerics(const NodeDef& node); +bool IsCollective(const NodeDef& node); bool IsComplex(const NodeDef& node); bool IsComplexAbs(const NodeDef& node); bool IsConj(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index f6860695ec..c90667abad 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -517,6 +517,7 @@ cc_library( ":memory_optimizer", ":model_pruner", ":remapper", + ":scoped_allocator_optimizer", ":shape_optimizer", "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", @@ -762,3 +763,47 @@ tf_cuda_cc_test( "//tensorflow/core/grappler/utils:grappler_test", ], ) + +cc_library( + name = "scoped_allocator_optimizer", + srcs = ["scoped_allocator_optimizer.cc"], + hdrs = [ + "scoped_allocator_optimizer.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":graph_optimizer", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:scoped_allocator_ops_op_lib", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/costs:graph_properties", + "//tensorflow/core/grappler/utils:frame", + ], +) + +tf_cc_test( + name = "scoped_allocator_optimizer_test", + size = "small", + srcs = ["scoped_allocator_optimizer_test.cc"], + deps = [ + ":scoped_allocator_optimizer", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:all_kernels", + "//tensorflow/core:core_cpu", + "//tensorflow/core:direct_session", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", + ], +) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index a92727535d..e6622486eb 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/memory_optimizer.h" #include "tensorflow/core/grappler/optimizers/model_pruner.h" #include "tensorflow/core/grappler/optimizers/remapper.h" +#include "tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h" #include "tensorflow/core/grappler/optimizers/shape_optimizer.h" #include "tensorflow/core/grappler/utils/colocation.h" #include "tensorflow/core/grappler/utils/functions.h" @@ -88,6 +89,8 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( MK_OPT("loop", new LoopOptimizer(cfg_.loop_optimization())); MK_OPT("dependency", new DependencyOptimizer(cfg_.dependency_optimization())); MK_OPT("debug_stripper", new DebugStripper()); + MK_OPT("scoped_allocator", + new ScopedAllocatorOptimizer(cfg_.scoped_allocator_opts())); return std::unique_ptr(); } @@ -145,6 +148,10 @@ Status MetaOptimizer::InitializeOptimizers( optimizers->emplace_back( new AutoParallel(cfg_.auto_parallel().num_replicas())); } + if (cfg_.scoped_allocator_optimization()) { + optimizers->emplace_back( + new ScopedAllocatorOptimizer(cfg_.scoped_allocator_opts())); + } return Status::OK(); } @@ -211,12 +218,32 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, bool is_optimized = false; GraphOptimizationResult optimization_result(item.id); + // ScopedAllocatorOptimizer must run last, so move it to the + // end of optimizers and run only on the last iteration. + { + int sa_index = 0; + for (; sa_index < optimizers.size(); ++sa_index) { + if (optimizers[sa_index]->name() == "scoped_allocator_optimizer") { + break; + } + } + const int last_index = optimizers.size() - 1; + if (sa_index < last_index) { + optimizers[last_index].swap(optimizers[sa_index]); + } + } + + const int last_iteration = NumIterations(cfg_) - 1; for (int iteration = 0; iteration < NumIterations(cfg_); ++iteration) { VLOG(4) << "Starting optimization iteration " << iteration + 1; for (const auto& optimizer : optimizers) { // Some optimizers can run only once. if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue; + // Some must run only on the last iteration. + if (optimizer->name() == "scoped_allocator_optimizer" && + iteration != last_iteration) + continue; uint64 start_us = Env::Default()->NowMicros(); // This swaps the current optimized_graph into optimized item and @@ -361,6 +388,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.auto_parallel().enable() || cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || cfg.debug_stripper() == RewriterConfig::ON || + cfg.scoped_allocator_optimization() == RewriterConfig::ON || !cfg.optimizers().empty() || !cfg.custom_optimizers().empty(); } diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc new file mode 100644 index 0000000000..cceef4098d --- /dev/null +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc @@ -0,0 +1,929 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h" + +#include "tensorflow/core/common_runtime/scoped_allocator.h" +#include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils/frame.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" + +// Like TF_RETURN_IF_ERROR, but also logs a WARNING. +#define LOG_WARNING_AND_RETURN_IF_ERROR(...) \ + do { \ + const ::tensorflow::Status _status = (__VA_ARGS__); \ + if (TF_PREDICT_FALSE(!_status.ok())) { \ + LOG(WARNING) << "error: " << _status; \ + return _status; \ + } \ + } while (0) + +namespace tensorflow { +namespace grappler { + +namespace { +// Node names often have some kind of name_scope prefix, with slashes, +// and a _nn numeric suffix. Returns true if the main part of the node_name +// matches op_name, i.e. it looks from the name like this node is +// of that op type. +bool HasOpName(const string& node_name, const string& op_name) { + size_t begin = node_name.rfind("/"); + if (begin == string::npos) { + begin = 0; + } else { + ++begin; + } + size_t end = node_name.rfind("_"); + if (end != string::npos) { + size_t p = end + 1; + while (p < node_name.size()) { + if (!isdigit(node_name[p])) { + end = node_name.size(); + break; + } + ++p; + } + } else { + end = node_name.size(); + } + return node_name.substr(begin, end - begin) == op_name; +} + +// After shape inference has been done each op should be annotated +// with its output shape(s). This function iterates over a collection +// of ops that are a potential application of a ScopedAllocator. It +// verifies whether they all have the same output type and if so +// gathers a vector of their output shapes. It returns an error if +// any of the ops doesn't have type or shape data, or if it has more +// than one output, of if the output type of all ops is not the same. +// If it returns OK then *type and *shapes should be correctly populated. +Status CheckTypesAndGetShapes(const GraphProperties& graph_properties, + const std::vector& ops, DataType* type, + std::vector* shapes) { + VLOG(1) << "CheckTypesAndGetShapes"; + *type = DT_INVALID; + for (NodeDef* n : ops) { + AttrSlice n_attrs = AttrSlice(*n); + DataType dtype; + LOG_WARNING_AND_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "T", &dtype)); + VLOG(2) << "op " << n->name() << " has type " << dtype << " shapes.size() " + << shapes->size(); + if (!graph_properties.HasOutputProperties(n->name())) { + LOG(ERROR) << "Node " << n->DebugString() << " lacks output shape."; + return errors::Internal("Node ", n->name(), " lacks output shape."); + } + const std::vector& prop_list = + graph_properties.GetOutputProperties(n->name()); + if (prop_list.size() != 1) { + return errors::Internal("Node ", n->name(), + " does not have exactly one output as expected " + "by ScopedAllocatorOptimizer"); + } + const OpInfo::TensorProperties& props = prop_list[0]; + if (shapes->empty()) { + *type = props.dtype(); + } else if (*type != props.dtype()) { + return errors::Internal("Group ops don't all have same type"); + } else if (!TensorShape::IsValid(props.shape())) { + return errors::Internal("Complete shape not known for ", n->name()); + } + VLOG(2) << "Adding shape " << props.shape().DebugString(); + shapes->push_back(TensorShape(props.shape())); + } + return Status::OK(); +} + +// Describes an existing input edge in the graph. +struct InputDesc { + NodeDef* from_node_def; + int output_slot; + NodeDef* to_node_def; + InputDesc(NodeDef* f, int os, NodeDef* t) + : from_node_def(f), output_slot(os), to_node_def(t) {} +}; + +// Populates *inputs with all of the non-control inputs of ops. +// Returns error if it fails to find exactly one input for each op, +// or if some input is not of type dtype. +Status GetInputs(NodeMap* node_map, const std::vector& ops, + DataType dtype, std::vector* inputs) { + VLOG(1) << "Getinputs"; + for (NodeDef* n : ops) { + NodeDef* inode = nullptr; + int position = 0; + VLOG(2) << "for node " << n->name(); + for (const auto& input_name : n->input()) { + if (!IsControlInput(input_name)) { + if (inode) { + return errors::Internal("Found more than one input for node ", + n->name()); + } + ParseNodeName(input_name, &position); + inode = node_map->GetNode(input_name); + CHECK(inode) << input_name; + VLOG(2) << "inode " << inode->DebugString(); + } + } + AttrSlice inode_attrs = AttrSlice(*inode); + DataType inode_dtype; + LOG_WARNING_AND_RETURN_IF_ERROR( + GetNodeAttr(inode_attrs, "T", &inode_dtype)); + if (inode_dtype != dtype) { + return errors::Internal("ScopedAllocatorOptimizer expected input type ", + dtype, " but found ", inode_dtype); + } + // inputs->push_back(InputDesc(inode, position, n)); + inputs->emplace_back(inode, position, n); + } + return Status::OK(); +} + +// Remove the NodeDef nd from node_map and graph. It must be the case +// that nd no longer has any input or output edges, though that is not +// checked. +void RemoveNode(NodeDef* nd, GraphDef* graph, NodeMap* node_map) { + node_map->RemoveNode(nd->name()); + // TODO(tucker): The efficiency of this routine is poor. + // Change to accumulate and do a bulk removal, maybe refactoring + // some code from dependency_optimizer. + protobuf::RepeatedPtrField* nodes = graph->mutable_node(); + for (int i = 0; i < nodes->size(); ++i) { + if (nd->name() == (*nodes)[i].name()) { + nodes->SwapElements(i, nodes->size() - 1); + nodes->RemoveLast(); + return; + } + } + LOG(FATAL) << "Failed to find node " << nd->name() << " in graph"; +} + +// Removes a named edge from between two nodes. +Status RemoveEdge(const string& input_edge_name, const string& from_node_name, + NodeDef* to_node, NodeMap* node_map) { + if (node_map) { + node_map->RemoveOutput(from_node_name, to_node->name()); + } + protobuf::RepeatedPtrField* inputs = to_node->mutable_input(); + int edge_index = -1; + for (edge_index = 0; edge_index < inputs->size(); ++edge_index) { + VLOG(2) << " consider edge " << (*inputs)[edge_index]; + if ((*inputs)[edge_index] == input_edge_name) { + break; + } + } + if (edge_index >= inputs->size()) { + return errors::Internal("Could not find input name ", input_edge_name, + " at node ", to_node->name()); + } + inputs->DeleteSubrange(edge_index, 1); + return Status::OK(); +} +} // namespace + +void ScopedAllocatorOptimizer::ExtendNodeAttr(StringPiece name, + const std::vector& values, + NodeDef* node_def) { + if (HasNodeAttr(*node_def, name)) { + VLOG(2) << "extending"; + AttrValue* existing = &(*node_def->mutable_attr())[name.ToString()]; + for (int32 i : values) { + existing->mutable_list()->add_i(i); + } + } else { + VLOG(2) << "setting new attr value"; + AddNodeAttr(name, values, node_def); + } +} + +class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { + public: + ~UnaryElementwiseRewriter() override {} + + // Return non-OK if any input is already committed to a ScopedAllocator. + Status CheckExistingScopedAllocator(const std::vector& inputs) { + for (const InputDesc& nd : inputs) { + VLOG(2) << "get attrs for " << nd.from_node_def->name(); + AttrSlice n_attrs = AttrSlice(*nd.from_node_def); + int sa_id; + Status ss = GetNodeAttr(n_attrs, "sa_id", &sa_id); + if (ss.ok()) { + LOG(INFO) << "Abandoning PARewriter because input " + << nd.from_node_def->name() << " is already assigned " + << "to ScopedAllocator " << sa_id; + return errors::Internal( + "Abandoning PARewriter because input ", nd.from_node_def->name(), + " is already assigned to ScopedAllocator ", sa_id); + } + } + return Status::OK(); + } + + // Return non-OK if any input is a member of op_set. + Status CheckInternalDataDependency(const std::set& op_set, + const std::vector& inputs) { + for (const InputDesc& nd : inputs) { + if (op_set.find(nd.from_node_def->name()) != op_set.end()) { + if (nd.output_slot != tensorflow::Graph::kControlSlot) { + return errors::Internal("Data edge exists bewtween ", + nd.from_node_def->name(), + " and another " + "node in the set"); + } + } + } + return Status::OK(); + } + + // Remove all control edges between members of ops. + void ClearInternalControlInputs(const std::set& op_set, + const std::vector& ops, + NodeMap* node_map) { + for (NodeDef* n : ops) { + for (const auto& input_name : n->input()) { + if (IsControlInput(input_name)) { + int position = 0; + string input_node_name = ParseNodeName(input_name, &position); + CHECK_EQ(position, -1); + if (op_set.find(input_node_name) != op_set.end()) { + // This is an internal control edge. Remove it. + VLOG(1) << "Remove control output from " << input_node_name + << " via edge " << input_name << " to " << n->name(); + TF_CHECK_OK(RemoveEdge(input_name, input_node_name, n, node_map)); + } + } + } + } + } + + // Examine the input set of an op set, gathering their shapes and types + // and checking whether there are any considerations that prevent use + // of a single ScopedAllocator for all of those inputs. + Status AnalyzeInputs(ScopedAllocatorOptimizer* sa_opti, NodeMap* node_map, + const std::vector& ops, + const std::set& op_instance_names, + string* device_name, DataType* dtype, + std::vector* input_shapes, + std::vector* inputs, TensorShape* sa_shape) { + CHECK(graph_properties_); + LOG_WARNING_AND_RETURN_IF_ERROR( + CheckTypesAndGetShapes(*graph_properties_, ops, dtype, input_shapes)); + LOG_WARNING_AND_RETURN_IF_ERROR( + GetInputs(sa_opti->node_map(), ops, *dtype, inputs)); + LOG_WARNING_AND_RETURN_IF_ERROR(CheckExistingScopedAllocator(*inputs)); + LOG_WARNING_AND_RETURN_IF_ERROR( + CheckInternalDataDependency(op_instance_names, *inputs)); + ClearInternalControlInputs(op_instance_names, ops, node_map); + *device_name = ops[0]->device(); + CHECK(!device_name->empty()); + CHECK(!input_shapes->empty()); + CHECK_EQ(0, Allocator::kAllocatorAlignment % DataTypeSize(*dtype)) + << "ScopedAllocatorOptimizer only applies to types that evenly " + << "divide kAllocatorAlignment"; + std::vector sa_fields; + // Calculate the field embedding boundaries and thereby the + // required size of the backing tensor. + int64 num_bytes = ScopedAllocatorMgr::PopulateFields( + 0 /*scope_id*/, *input_shapes, *dtype, &sa_fields); + int64 num_elts = num_bytes / DataTypeSize(*dtype); + VLOG(2) << "num_bytes " << num_bytes << " num_elts=" << num_elts; + *sa_shape = TensorShape({num_elts}); + return Status::OK(); + } + + // Build the ScopedAllocator node that will be assigned to allocate + // the output tensors of the input node set. + Status ConstructScopedAllocatorNode( + ScopedAllocatorOptimizer* sa_opti, GraphDef* graph, NodeMap* node_map, + const std::vector& ops, const string& device_name, + DataType dtype, int sa_id, const string& sa_name, + const std::vector& input_shapes, + const std::vector& inputs, const TensorShape& sa_shape) { + VLOG(2) << "ConstructScopedAllocatorNode " << sa_name; + NodeDefBuilder sa_builder(sa_name, "_ScopedAllocator"); + sa_builder.Device(device_name); + sa_builder.Attr("sa_name", sa_name); + sa_builder.Attr("T", dtype); + sa_builder.Attr("id", sa_id); + sa_builder.Attr("shapes", input_shapes); + sa_builder.Attr("shape", sa_shape); + sa_builder.Attr("expected_call_count", static_cast(ops.size())); + NodeDef* sa_node = graph->add_node(); + LOG_WARNING_AND_RETURN_IF_ERROR(sa_builder.Finalize(sa_node)); + node_map->AddNode(sa_name, sa_node); + + // Add control edges from the ScopedAllocatorOp to all of the + // input nodes and mark them for allocation from backing tensor. + for (int i = 0; i < inputs.size(); ++i) { + auto& nd = inputs[i]; + VLOG(2) << "To input " << i << ": " << nd.from_node_def->name() + << " add control input " + << "^" << sa_name; + nd.from_node_def->add_input(strings::StrCat("^", sa_name)); + // This attribute says: allocate output_slot from + // ScopedAllocator instance sa_id + 1 + i. + ScopedAllocatorOptimizer::ExtendNodeAttr("_scoped_allocator", + {nd.output_slot, sa_id + 1 + i}, + nd.from_node_def); + node_map->AddOutput(sa_name, nd.from_node_def->name()); + } + return Status::OK(); + } + + Status BuildSAConcatNode(GraphDef* graph, NodeMap* node_map, + const std::vector& ops, + const std::set& op_instance_names, + const string& device_name, DataType dtype, int sa_id, + const string& sa_name, const string& sac_name, + const TensorShape& sa_shape, + std::vector* sac_inputs) { + VLOG(2) << "BuildSAConcatNode " << sac_name; + std::set sac_ctl_inputs; + for (int i = 0; i < ops.size(); ++i) { + NodeDef* old_op = ops[i]; + for (const string& old_op_input : old_op->input()) { + int position = 0; + string input_name = ParseNodeName(old_op_input, &position); + if (position == -1) { + // A control input: drop if from another member of the op set. + if (op_instance_names.find(old_op_input) == op_instance_names.end()) { + sac_ctl_inputs.insert(old_op_input); + } + } else { + // TODO(tucker): remove redundant check. + // A data input: illegal if from another member of the op set. + if (op_instance_names.find(old_op_input) != op_instance_names.end()) { + LOG(ERROR) << "Data edge between " << old_op_input << " and " + << old_op->name() << " cannot build ScopedAllocator."; + return errors::Internal("Data edge between ", old_op_input, " and ", + old_op->name(), + " cannot build ScopedAllocator."); + } + sac_inputs->push_back( + NodeDefBuilder::NodeOut(old_op_input, 0, dtype)); + } + VLOG(3) << "from op " << i << ": " << old_op->name() + << " sac_inputs append " << old_op_input; + } + } + NodeDefBuilder sac_builder(sac_name, "_ScopedAllocatorConcat"); + VLOG(2) << "New sac_name " << sac_name << " shape " + << sa_shape.DebugString(); + sac_builder.Device(device_name); + sac_builder.Attr("sa_name", sa_name); + sac_builder.Attr("id", sa_id); + sac_builder.Attr("T", dtype); + sac_builder.Attr("shape", sa_shape); + sac_builder.Attr("N", static_cast(sac_inputs->size())); + sac_builder.Input(NodeDefBuilder::NodeOut(sa_name, 0, dtype)); + sac_builder.Input(*sac_inputs); + NodeDef* sac_node = graph->add_node(); + LOG_WARNING_AND_RETURN_IF_ERROR(sac_builder.Finalize(sac_node)); + node_map->AddNode(sac_name, sac_node); + node_map->AddOutput(sa_name, sac_name); + + // Attach the old control inputs to the new sac node. + for (const string& ctl_input : sac_ctl_inputs) { + sac_node->add_input(ctl_input); + } + return Status::OK(); + } + + Status BuildReplacementOp(GraphDef* graph, NodeMap* node_map, + const std::vector& ops, + const string& device_name, DataType dtype, + const string& op_name, const string& sac_name, + const string& sa_op_name) { + VLOG(2) << "BuildReplacementOp " << sa_op_name; + NodeDefBuilder op_builder(sa_op_name, op_name); + op_builder.Device(device_name); + + // Transfer the Node Attr from the first replaced Node to the new + // Node. TODO(tucker): In principle we should verify that + // the Attr are consistent and compatible across all op instances. + // Unfortunately that will probably require op-specific tests, so + // punt on that for the time being. + AttrSlice first_slice(*ops[0]); + for (auto& it : first_slice) { + op_builder.Attr(it.first, it.second); + } + op_builder.Attr("_forward_input", {0, 0}); + op_builder.Input(sac_name, 0, dtype); + NodeDef* sa_op_node = graph->add_node(); + LOG_WARNING_AND_RETURN_IF_ERROR(op_builder.Finalize(sa_op_node)); + node_map->AddNode(sa_op_name, sa_op_node); + node_map->AddOutput(sac_name, sa_op_name); + return Status::OK(); + } + + Status BuildSplitNode(GraphDef* graph, NodeMap* node_map, + const std::vector& ops, + const std::vector& input_shapes, + const std::vector& sac_inputs, + const string& device_name, DataType dtype, + const string& op_name, int sa_id, + const string& sas_name, const string& sa_name, + const string& sa_op_name) { + VLOG(2) << "new ScopedAllocatorSplit " << sas_name; + NodeDefBuilder sas_builder(sas_name, "_ScopedAllocatorSplit"); + sas_builder.Device(device_name); + sas_builder.Attr("sa_name", sa_name); + sas_builder.Attr("id", sa_id); + sas_builder.Attr("T", dtype); + sas_builder.Attr("shapes", input_shapes); + std::vector sas_inputs = sac_inputs; + sas_builder.Attr("N", static_cast(sas_inputs.size())); + sas_builder.Input(NodeDefBuilder::NodeOut(sa_op_name, 0, dtype)); + sas_builder.Input(sas_inputs); + NodeDef* sas_node = graph->add_node(); + LOG_WARNING_AND_RETURN_IF_ERROR(sas_builder.Finalize(sas_node)); + node_map->AddNode(sas_name, sas_node); + node_map->AddOutput(sa_op_name, sas_name); + return Status::OK(); + } + + // After the new ScopedAllocator and its corresponding Concat and + // Split nodes have been built, and a new single Op instance + // constructed, rewire the graph: Remove input edges to the old Op + // nodes and replace the old Op node outputs with the corresponding + // ScopedAllocatorSplit node outputs. After this the old Op nodes + // should no longer have any input or output edges and they can be + // removed from the graph. + Status RewireSubgraph(GraphDef* graph, NodeMap* node_map, + const std::vector& ops, + const std::set& op_instance_names, + const string& op_name, const string& sas_name) { + VLOG(2) << "RewireSubgraph"; + for (int op_idx = 0; op_idx < ops.size(); ++op_idx) { + NodeDef* old_op = ops[op_idx]; + // Copy the output node set since we'll be modifying the version + // maintained by NodeMap in the loop. + std::set output_nodes = node_map->GetOutputs(old_op->name()); + VLOG(3) << "old_op " << old_op->name() << " had " << output_nodes.size() + << " outputs. Moving them to the PASplit node."; + if (VLOG_IS_ON(2)) { + for (NodeDef* n : output_nodes) { + VLOG(3) << " output: " << n->name(); + } + } + for (NodeDef* n : output_nodes) { + VLOG(3) << "really checking old output " << n->name() + << " for corresponding input."; + if (op_instance_names.find(n->name()) != op_instance_names.end()) { + // If this output node is a member of the ops set, it must have + // been an internal control edge so drop it. + VLOG(3) << "Dropping control output from " << old_op->name() << " to " + << n->name(); + // However, we may already have dropped it at the clear() below, + // so if we fail to find it, that's okay. + Status ignore = RemoveEdge(strings::StrCat("^", old_op->name()), + old_op->name(), n, node_map); + continue; + } + bool found = false; + VLOG(3) << "about to iterate over " << n->input_size() << " inputs"; + for (int i = 0; i < n->input_size(); ++i) { + VLOG(3) << "input " << n->input(i); + int position = 0; + string input_node = ParseNodeName(n->input(i), &position); + if (input_node == old_op->name()) { + found = true; + VLOG(3) << "match pos=" << position; + if (position == -1) { + // It was a control edge + *n->mutable_input(i) = strings::StrCat("^", sas_name); + } else { + CHECK_EQ(0, position) + << "name " << n->input(i) << " pos " << position; + *n->mutable_input(i) = strings::StrCat(sas_name, ":", op_idx); + } + node_map->RemoveOutput(old_op->name(), n->name()); + node_map->AddOutput(sas_name, n->name()); + VLOG(3) << "breaking on success"; + break; + } else { + VLOG(3) << "other input " << n->input(i); + } + } + // In general it's required that we found the output node's old + // input and replaced it, but one exception is if the output node + // is of the same type being coalesced and the edge is a control + // input. In that case it probably got eliminated in an earlier + // pass. + VLOG(3) << "before HasOp"; + if (!HasOpName(n->name(), op_name)) { + CHECK(found) << "old_op " << old_op->name() << " node " + << " could not find input edge on " << n->DebugString() + << " to replace." + << " " << op_name << " not in " << n->name(); + } + VLOG(3) << "bottom of for output_nodes"; + } + VLOG(3) << "Clearing all inputs of " << old_op->name(); + node_map->RemoveInputs(old_op->name()); + old_op->clear_input(); + node_map->RemoveOutputs(old_op->name()); + VLOG(3) << "after clear: " << old_op->DebugString(); + // old_op should be dead, with no further inputs or outputs. + // It needs to be removed altogether before the graph is generated, + // but we need to leave it around until this Optimizer is done, + // because there may be some + // Remove. + RemoveNode(old_op, graph, node_map); + } + return Status::OK(); + } + + // Given a collection of instances of op_name, presumed to be + // logically parallel and operating on tensors of the same type, + // replace them by a single instance. First find the upstream Ops + // generating their inputs. Create a new ScopedAllocatorOp that + // outputs a single backing_tensor pre-arranged for sub-allocation + // of all of those input tensors. Then insert a new + // ScopedAllocatorConcatOp below the upstream Ops to make explicit + // the materialization of a concatenation of their outputs. Put the + // new op_name instance below the new concat op and follow with a + // ScopedAllocatorSplitOp that restores the correct shape outputs + // for the consumers of the old op_name instances. + // + // There must be no non-control edges between Nodes in 'ops'. + // Control edges among these nodes will be dropped. + Status Rewrite(ScopedAllocatorOptimizer* sa_opti, GraphDef* graph, + const string& op_name, const std::vector& ops, + bool* applied) override { + if (VLOG_IS_ON(1)) { + VLOG(1) << "Rewrite"; + string op_names; + for (auto& nd : ops) { + strings::StrAppend(&op_names, nd->name(), ", "); + } + VLOG(1) << "UnaryElementwiseRewriter::Rewrite " << op_name + << " to: " << op_names; + } + NodeMap* node_map = sa_opti->node_map(); + + // Make a set of the node names for faster membership testing. + std::set op_instance_names; + for (auto& nd : ops) { + op_instance_names.insert(nd->name()); + VLOG(2) << "op_instance_name " << nd->name(); + } + DataType dtype; + std::vector input_shapes; + std::vector inputs; + TensorShape sa_shape; + string device_name; + + TF_RETURN_IF_ERROR(AnalyzeInputs(sa_opti, node_map, ops, op_instance_names, + &device_name, &dtype, &input_shapes, + &inputs, &sa_shape)); + + int sa_id = sa_opti->NewScopedAllocatorId(input_shapes.size()); + string sa_name = strings::StrCat("scoped_allocator_", sa_id); + TF_RETURN_IF_ERROR(ConstructScopedAllocatorNode( + sa_opti, graph, node_map, ops, device_name, dtype, sa_id, sa_name, + input_shapes, inputs, sa_shape)); + + // TODO(tucker): Maybe add control edges to delay execution of the + // ScopedAllocatorOp until just before first use in order to + // conserve memory. What would be correct? Let I0...In be the + // input nodes that are all going to alloc from SA. If we make + // SA wait until all of these are ready, that might be too slow. + // It should probably wait until at least one is ready, but which + // one? Maybe just pick the first. + // { + // auto& nd = inputs[0]; + // std::vector inputs_to_first; + // LOG_WARNING_AND_RETURN_IF_ERROR(GetInputs(sa_opti->node_map(), + // {nd.from_node_def}, + // dtype, &inputs_to_first)); + // for (int i = 0; i < inputs_to_first.size(); ++i) { + // sa_node->add_input( + // strings::StrCat("^", inputs_to_first[i].from_node_def->name())); + // } + // } + + // Build a ScopedAllocatorConcat below all of the input nodes. + std::vector sac_inputs; + string sac_name = strings::StrCat("scoped_allocator_concat_", sa_id); + TF_RETURN_IF_ERROR(BuildSAConcatNode( + graph, node_map, ops, op_instance_names, device_name, dtype, sa_id, + sa_name, sac_name, sa_shape, &sac_inputs)); + + // Construct a new instance of the parallel op and insert it + // immediately below the new ScopedAllocatorConcat. + string sa_op_name = strings::StrCat(sa_name, "_", op_name); + TF_RETURN_IF_ERROR(BuildReplacementOp(graph, node_map, ops, device_name, + dtype, op_name, sac_name, + sa_op_name)); + + // Build a ScopedAllocatorSplit split below the new Op. + string sas_name = strings::StrCat("scoped_allocator_split_", sa_id); + TF_RETURN_IF_ERROR(BuildSplitNode(graph, node_map, ops, input_shapes, + sac_inputs, device_name, dtype, op_name, + sa_id, sas_name, sa_name, sa_op_name)); + + // Rewire the graph. + TF_RETURN_IF_ERROR(RewireSubgraph(graph, node_map, ops, op_instance_names, + op_name, sas_name)); + + *applied = true; + return Status::OK(); + } +}; + +ScopedAllocatorOptimizer::ScopedAllocatorOptimizer( + const ScopedAllocatorOptions& opts) { + VLOG(1) << "ScopedAllocatorOptimizer::ScopedAllocatorOptimizer"; + Rewriter* r = new UnaryElementwiseRewriter(); + to_delete_.push_back(r); + if (opts.enable_op_size() == 0) { + // Opts handled by default: + for (const auto& op_name : {"CollectiveReduce"}) { + op_name_set_.insert(op_name); + rewriters_[op_name] = r; + } + } else { + for (const auto& op_name : opts.enable_op()) { + op_name_set_.insert(op_name); + rewriters_[op_name] = r; + } + } +} + +Status ScopedAllocatorOptimizer::Optimize(Cluster* /*cluster*/, + const GrapplerItem& item, + GraphDef* optimized_graph) { + *optimized_graph = item.graph; + // Nodes that cannot be removed from the graph without damaging correctness, + // typically fetch nodes. + nodes_to_preserve_ = item.NodesToPreserve(); + + GraphProperties graph_properties(item); + const bool assume_valid_feeds = opt_level_ == RewriterConfig::AGGRESSIVE; + LOG_WARNING_AND_RETURN_IF_ERROR( + graph_properties.InferStatically(assume_valid_feeds)); + node_map_.reset(new NodeMap(optimized_graph)); + + LOG_WARNING_AND_RETURN_IF_ERROR(ScopedAllocatorOptimizer::ProcessGraphDef( + optimized_graph, graph_properties)); + + VLOG(1) << "ScopedAllocatorOptimizer::Optimize() done"; + return Status::OK(); +} + +ScopedAllocatorOptimizer::Rewriter* ScopedAllocatorOptimizer::GetRewriter( + const string& op_name) { + auto it = rewriters_.find(op_name); + if (it != rewriters_.end()) { + return it->second; + } + return nullptr; +} + +int ScopedAllocatorOptimizer::NewScopedAllocatorId(int num_fields) { + CHECK_GT(num_fields, 0); + int id = next_sa_id_; + next_sa_id_ += (num_fields + 1); + CHECK_GT(next_sa_id_, 0); + return id; +} + +ScopedAllocatorOptimizer::~ScopedAllocatorOptimizer() { + for (auto ptr : to_delete_) { + delete ptr; + } +} + +void ScopedAllocatorOptimizer::FindOpOccurrences(GraphDef* graph, + const OpNameSet& op_names, + GraphOpOccurrences* occs) { + VLOG(1) << "FindOpOccurrences "; + for (const auto& it : op_names) { + VLOG(1) << "search target " << it; + } + for (int ni = 0; ni < graph->node_size(); ++ni) { + NodeDef* node = graph->mutable_node(ni); + const string& op_name = node->op(); + if (op_names.find(op_name) != op_names.end()) { + VLOG(1) << "found " << op_name << " on dev " << node->device(); + (*occs)[node->device()][op_name].push_back(node); + } + } +} + +namespace { +struct OpNameOrder { + bool operator()(const NodeDef* a, const NodeDef* b) { + return a->name() <= b->name(); + } +}; + +class Tree { + public: + Tree(const string& edge, int depth) : edge_(edge), depth_(depth) {} + ~Tree() { + for (auto it : subtrees_) delete it.second; + } + + Tree* GetSubTree(const string& edge) { + auto it = subtrees_.find(edge); + if (it != subtrees_.end()) { + return it->second; + } + Tree* t = new Tree(edge, depth_ + 1); + subtrees_[edge] = t; + return t; + } + + void InsertNode(NodeDef* n) { nodes_.push_back(n); } + + string edge_; + int depth_; + std::vector nodes_; + std::unordered_map subtrees_; +}; + +// Applies a function to every Tree in DFS order. Terminates early +// on any non-OK Status. +Status ApplyToAll(Tree* tree, const std::function& func) { + Status s; + for (auto it : tree->subtrees_) { + s = ApplyToAll(it.second, func); + if (!s.ok()) return s; + } + s = func(tree); + return s; +} + +Tree* ComputeScopeTree(const string& op_name, + const std::vector& node_vec) { + Tree* root = new Tree("", 0); + for (NodeDef* n : node_vec) { + std::vector pieces = str_util::Split(n->name(), "/"); + // last piece is node name proper. + int depth = pieces.size() - 1; + Tree* subtree = root; + for (int i = 0; i < depth; ++i) { + subtree = subtree->GetSubTree(pieces[i]); + } + subtree->InsertNode(n); + } + return root; +} + +void PartitionByLoopStructure(const FrameMap& frame_map, + std::vector nodes, + std::vector>* loop_groups) { + // It is assumed that two nodes with identical loop containment have + // identical integer vectors. Represent those by 64 bit hashes. + std::unordered_map> loop_sets; + for (NodeDef* nd : nodes) { + uint64 hash = 0; + const auto& it = frame_map.find(nd); + if (it != frame_map.end()) { + const std::vector& loop_ids = it->second; + for (int id : loop_ids) { + hash = Hash64Combine(hash, static_cast(id)); + } + } + loop_sets[hash].push_back(nd); + } + for (auto it : loop_sets) { + loop_groups->push_back(std::move(it.second)); + } +} + +} // namespace + +Status ScopedAllocatorOptimizer::ProcessGraphDef( + GraphDef* graph, const GraphProperties& graph_properties) { + VLOG(1) << "ProcessGraphDef"; + Status status; + GraphOpOccurrences occ; + FindOpOccurrences(graph, op_name_set_, &occ); + if (!occ.empty()) { + FrameMap frame_map; + int num_frames; + LOG_WARNING_AND_RETURN_IF_ERROR( + IdentifyFramesWithNodeMap(*graph, *node_map_, &frame_map, &num_frames)); + for (auto& dt : occ) { + VLOG(2) << "Processing device " << dt.first; + const DevOpOccurrences& dev_occ = dt.second; + for (auto& it : dev_occ) { + string op_name = it.first; + VLOG(1) << "Processing " << op_name << " set size " << it.second.size(); + Rewriter* rewriter = GetRewriter(op_name); + if (!rewriter) { + LOG(ERROR) << "Failed to find PARewriter for op_name " << op_name; + continue; + } + rewriter->SetGraphProperties(graph_properties); + std::unique_ptr root(ComputeScopeTree(it.first, it.second)); + // Nodes with a common depth and root path are now grouped + // in the same Tree struct. Split those groups into subgroups that + // share identical loop nesting. + status = ApplyToAll( + root.get(), [this, rewriter, graph, &frame_map, &op_name](Tree* t) { + VLOG(2) << "applied to tree node " << t->edge_ << " at depth " + << t->depth_ << " of size " << t->nodes_.size(); + if (t->nodes_.size() > 1) { + std::vector> loop_groups; + PartitionByLoopStructure(frame_map, t->nodes_, &loop_groups); + for (auto& lg : loop_groups) { + if (lg.size() > 1) { + bool applied = false; + Status s = OrderNodeSet(&lg); + TF_RETURN_IF_ERROR(s); + VLOG(1) << "Applying Rewriter for " << op_name; + s = rewriter->Rewrite(this, graph, op_name, lg, &applied); + LOG_WARNING_AND_RETURN_IF_ERROR(s); + } + } + } + return Status::OK(); + }); + if (!status.ok()) { + break; + } + } + if (!status.ok()) { + break; + } + } + } + VLOG(1) << "ScopedAllocatorOptimizer returning " << status; + if (!status.ok()) { + LOG(ERROR) << "ScopedAllocatorOptimizer: " << status; + } + return status; +} + +namespace { +struct InstanceKeyLess { + bool operator()(const NodeDef* a, const NodeDef* b) const { + AttrSlice a_attrs = AttrSlice(*a); + AttrSlice b_attrs = AttrSlice(*b); + int32 a_key = -1; + int32 b_key = -1; + Status s = GetNodeAttr(a_attrs, "instance_key", &a_key); + CHECK(s.ok()); + s = GetNodeAttr(b_attrs, "instance_key", &b_key); + CHECK(s.ok()); + return a_key < b_key; + } +}; + +struct NameLess { + bool operator()(const NodeDef* a, const NodeDef* b) const { + return a->name() < b->name(); + } +}; + +bool IsCollectiveNode(const NodeDef& n) { + AttrSlice attrs = AttrSlice(n); + int key = -1; + if (!IsCollective(n)) return false; + Status s = GetNodeAttr(attrs, "instance_key", &key); + if (s.ok() && key >= 0) { + return true; + } + return false; +} +} // namespace + +Status ScopedAllocatorOptimizer::OrderNodeSet( + std::vector* nodes) const { + // Nodes should be identical type. Default order is by name but for + // collectives we order by increasing instance_key so each group gets + // the same instance_key. + if (nodes->size() <= 1) return Status::OK(); + if (IsCollectiveNode(*nodes->at(0))) { + sort(nodes->begin(), nodes->end(), InstanceKeyLess()); + } else { + sort(nodes->begin(), nodes->end(), NameLess()); + } + return Status::OK(); +} + +} // namespace grappler +} // namespace tensorflow + +#undef LOG_WARNING_AND_RETURN_IF_ERROR diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h new file mode 100644 index 0000000000..ab4d444595 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h @@ -0,0 +1,107 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_SCOPED_ALLOCATOR_OPTIMIZER_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_SCOPED_ALLOCATOR_OPTIMIZER_H_ + +#include +#include +#include +#include "tensorflow/core/grappler/optimizers/graph_optimizer.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" + +namespace tensorflow { +namespace grappler { +class Graph; +class GraphProperties; +class NodeMap; +class ScopedAllocatorOptimizer; + +// An Optimizer that introduces ScopedAllocators in order to reduce data +// movement and consolidate some kinds of Ops. +class ScopedAllocatorOptimizer : public GraphOptimizer { + public: + explicit ScopedAllocatorOptimizer(const ScopedAllocatorOptions& opts); + ~ScopedAllocatorOptimizer() override; + + string name() const override { return "scoped_allocator_optimizer"; } + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, double result) override {} + + // Map from an Op name to a vector of Nodes with that Op. + typedef std::unordered_map> DevOpOccurrences; + // Map from a device name to a DevOpOccurrences map. + typedef std::unordered_map GraphOpOccurrences; + typedef std::unordered_set OpNameSet; + + Status ProcessGraphDef(GraphDef* graph, + const GraphProperties& graph_properties); + + // Populates *occs by grouping Nodes with common Ops, according to + // their assigned devices. + void FindOpOccurrences(GraphDef* graph, const OpNameSet& op_names, + GraphOpOccurrences* occs); + + // Returns a new, unused scope_id to be assigned to a ScopedAllocator that + // will allocate num_fields (> 0) separate tensors. + int NewScopedAllocatorId(int num_fields); + + NodeMap* node_map() { return node_map_.get(); } + + // Appends values to the attr value under name in node_def, if present. + // If not present does an assignment. + static void ExtendNodeAttr(StringPiece name, const std::vector& values, + NodeDef* node_def); + + // Class that knows how to do graph rewriting for a particular kind of Op in + // order to take advantage of a ScopedAllocator. + class Rewriter { + public: + virtual ~Rewriter() {} + + virtual Status Rewrite(ScopedAllocatorOptimizer* paopti, GraphDef* graph, + const string& op_name, + const std::vector& nodes, + bool* applied) = 0; + + void SetGraphProperties(const GraphProperties& graph_properties) { + graph_properties_ = &graph_properties; + CHECK(graph_properties_); + } + + protected: + const GraphProperties* graph_properties_; + }; + + private: + Rewriter* GetRewriter(const string& op_name); + + Status OrderNodeSet(std::vector* nodes) const; + + RewriterConfig::Toggle opt_level_; + std::unordered_set nodes_to_preserve_; + OpNameSet op_name_set_; + std::unordered_map rewriters_; + std::vector to_delete_; + int next_sa_id_ = 1; + std::unique_ptr node_map_; +}; + +} // namespace grappler +} // namespace tensorflow +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_SCOPED_ALLOCATOR_OPTIMIZER_H_ diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc new file mode 100644 index 0000000000..3a2859dc5f --- /dev/null +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc @@ -0,0 +1,243 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h" + +#include + +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/graph/testlib.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/config.pb.h" +#include "tensorflow/core/public/session.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class ScopedAllocatorOptimizerTest : public ::testing::Test { + public: + std::unique_ptr CreateSession(const GraphDef& graph, + const ConfigProto& config) { + SessionOptions options; + options.config = config; + (*options.config.mutable_device_count())["CPU"] = 2; + Session* session = NewSession(options); + TF_CHECK_OK(session->Create(graph)); + return std::unique_ptr(session); + } + + std::vector EvaluateNodes(const GraphDef& graph, + const std::vector& fetch) { + SessionOptions options; + std::unique_ptr session(NewSession(options)); + TF_CHECK_OK(session->Create(graph)); + RunOptions run_options; + std::vector output_tensors; + TF_CHECK_OK( + session->Run(run_options, {}, fetch, fetch, &output_tensors, nullptr)); + TF_CHECK_OK(session->Close()); + return output_tensors; + } + + // Constructs the following graph. + // (Flow is top to bottom, like nature intends.) + // + // The intended optimization is to have s1 and s2 allocate from + // an new ScopedAllocator, then replace a1 and a2 with a3 that + // reads from the backing buffer. + /* + a b c + \ / \ / + s1 s2 + | | + a1 a2 + | | + r1 r2 + */ + void BuildAbsGraph(GraphDef* graph_def) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + s = s.WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"); + + Output a = + ops::Const(s.WithOpName("a"), {1.0, 0.0, 0.0, -1.0}, {2, 2}); + Output b = + ops::Const(s.WithOpName("b"), {1.0, -2.0, 3.0, 4.0}, {2, 2}); + Output c = + ops::Const(s.WithOpName("c"), {-5.0, -2.0, 0.0, -2.0}, {2, 2}); + Output s1 = ops::Add(s.WithOpName("s1"), a, b); + Output s2 = ops::Add(s.WithOpName("s2"), b, c); + Output a1 = ops::Abs(s.WithOpName("a1"), s1); + Output a2 = ops::Abs(s.WithOpName("a2"), s2); + Output r1 = ops::Reshape(s.WithOpName("r1"), a1, {1, 4}); + Output r2 = ops::Reshape(s.WithOpName("r2"), a2, {4, 1}); + TF_CHECK_OK(s.ToGraphDef(graph_def)); + } + + void SetShapes(GraphDef* graph_def) { + TensorShapeProto shape_proto; + shape_proto.add_dim()->set_size(2); + shape_proto.add_dim()->set_size(2); + + for (NodeDef& n : *graph_def->mutable_node()) { + if (n.op() == "Add" || n.op() == "Abs") { + AddNodeAttr("_output_shapes", {shape_proto}, &n); + } + } + } +}; + +TEST_F(ScopedAllocatorOptimizerTest, UnaryRewriteOnly) { + // Tests that Rewrite of program with parallel unary Ops is done as + // anticipated. + GrapplerItem item; + BuildAbsGraph(&item.graph); + SetShapes(&item.graph); + + ScopedAllocatorOptions opts; + opts.add_enable_op("Abs"); + ScopedAllocatorOptimizer sao(opts); + ScopedAllocatorOptimizer::OpNameSet ons; + ons.insert("Abs"); + + GraphDef optimized_graph; + TF_ASSERT_OK(sao.Optimize(nullptr /*cluster*/, item, &optimized_graph)); + + // Examine the resulting graph def. + NodeMap node_map(&optimized_graph); + NodeDef* nd = node_map.GetNode("scoped_allocator_1"); + ASSERT_TRUE(nd); + { + auto& nd_set = node_map.GetOutputs(nd->name()); + ASSERT_EQ(3, nd_set.size()); + std::unordered_set expected = {"scoped_allocator_concat_1", "s1", + "s2"}; + for (auto it : nd_set) { + ASSERT_NE(expected.find(it->name()), expected.end()) + << "Failed to find " << it->name(); + } + } + { + auto& nd_set = node_map.GetOutputs("scoped_allocator_concat_1"); + ASSERT_EQ(1, nd_set.size()); + for (auto it : nd_set) { + ASSERT_EQ("scoped_allocator_1_Abs", it->name()); + } + } + { + auto& nd_set = node_map.GetOutputs("scoped_allocator_1_Abs"); + ASSERT_EQ(1, nd_set.size()); + for (auto it : nd_set) { + ASSERT_EQ("scoped_allocator_split_1", it->name()); + } + } + { + auto& nd_set = node_map.GetOutputs("scoped_allocator_split_1"); + ASSERT_EQ(2, nd_set.size()); + std::unordered_set name_set; + for (auto it : nd_set) { + name_set.insert(it->name()); + } + ASSERT_TRUE(name_set.find("r1") != name_set.end()); + ASSERT_TRUE(name_set.find("r2") != name_set.end()); + } +} + +TEST_F(ScopedAllocatorOptimizerTest, UnaryExecute) { + // Constructs the same graph as UnaryRewriteOnly, but actually executes it. + GrapplerItem item; + BuildAbsGraph(&item.graph); + + // Turn off all optimization except the ScopedAllocatorOptimizer + // to avoid anything that would alter the expected graph input/output, + // e.g. by constant folding away all calculations. + ConfigProto config; + GraphOptions* gopt = config.mutable_graph_options(); + OptimizerOptions* opts = gopt->mutable_optimizer_options(); + opts->set_do_common_subexpression_elimination(false); + opts->set_do_constant_folding(false); + opts->set_do_function_inlining(false); + opts->set_opt_level(OptimizerOptions::L0); + RewriterConfig* rwcfg = gopt->mutable_rewrite_options(); + rwcfg->clear_optimizers(); + (*rwcfg->add_optimizers()) = "scoped_allocator"; + rwcfg->mutable_scoped_allocator_opts()->add_enable_op("Abs"); + std::unique_ptr session(CreateSession(item.graph, config)); + + std::vector> inputs; + + // Request two targets: one fetch output and one non-fetched output. + std::vector output_names = {"r1:0", "r2:0", + "scoped_allocator_1_Abs:0"}; + std::vector target_nodes = {}; + std::vector outputs; + Status s = session->Run(inputs, output_names, target_nodes, &outputs); + TF_ASSERT_OK(s); + ASSERT_EQ(outputs.size(), 3); + std::vector expected_r1({2, 2, 3, 3}); + std::vector expected_r2({4, 4, 3, 2}); + // a + b == 2, -2, 3, 3 + // b + c == -4, -4, 3, 2 + for (int oi = 0; oi < outputs.size(); ++oi) { + for (int i = 0; i < outputs[oi].NumElements(); ++i) { + VLOG(0) << "output vec " << oi << " index " << i << " = " + << outputs[oi].flat()(i); + } + if (oi == 0) { + ASSERT_EQ(expected_r1.size(), outputs[oi].NumElements()); + for (int i = 0; i < expected_r1.size(); ++i) { + EXPECT_EQ(expected_r1[i], outputs[oi].flat()(i)); + } + } else if (oi == 1) { + ASSERT_EQ(expected_r2.size(), outputs[oi].NumElements()); + for (int i = 0; i < expected_r2.size(); ++i) { + EXPECT_EQ(expected_r2[i], outputs[oi].flat()(i)); + } + } + } +} + +// Tests static ScopedAllocatorOptimizer::ExtendNodeAttr. +// Maybe this should be moved elsewhere? +TEST_F(ScopedAllocatorOptimizerTest, Extend) { + NodeDef nd; + ScopedAllocatorOptimizer::ExtendNodeAttr("_scoped_allocator", {0, 2}, &nd); + ScopedAllocatorOptimizer::ExtendNodeAttr("_scoped_allocator", {6, 7}, &nd); + ScopedAllocatorOptimizer::ExtendNodeAttr("_scoped_allocator", {2, 3}, &nd); + VLOG(0) << "nd: " << nd.DebugString(); + std::vector scoped_allocator_attrs; + AttrSlice slice(nd); + Status sa_status = + GetNodeAttr(slice, "_scoped_allocator", &scoped_allocator_attrs); + for (int i : scoped_allocator_attrs) { + VLOG(0) << "extracted: " << i; + } + NodeDef nd2; + AddNodeAttr("_scoped_allocator", {0, 2}, &nd2); + AddNodeAttr("_scoped_allocator", {6, 7}, &nd2); + AddNodeAttr("_scoped_allocator", {2, 3}, &nd2); + VLOG(0) << "nd2: " << nd2.DebugString(); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/kernels/scoped_allocator_ops_test.cc b/tensorflow/core/kernels/scoped_allocator_ops_test.cc index 019c6619ee..bb0129fa6f 100644 --- a/tensorflow/core/kernels/scoped_allocator_ops_test.cc +++ b/tensorflow/core/kernels/scoped_allocator_ops_test.cc @@ -37,10 +37,12 @@ namespace tensorflow { class ScopedAllocatorOpTest : public OpsTestBase { protected: - void MakeOp(const gtl::ArraySlice& shapes, DataType dtype, + void MakeOp(const TensorShape& shape, + const gtl::ArraySlice& shapes, DataType dtype, const string& name, int32 id, int32 expected_call_count) { TF_EXPECT_OK(NodeDefBuilder("scoped_allocator_op", "_ScopedAllocator") .Attr("T", dtype) + .Attr("shape", shape) .Attr("shapes", shapes) .Attr("sa_name", name) .Attr("id", id) @@ -61,12 +63,14 @@ class ScopedAllocatorOpTest : public OpsTestBase { }; TEST_F(ScopedAllocatorOpTest, Simple) { - MakeOp({TensorShape({8})}, DT_FLOAT, "test", 120, 1); - MakeOp({TensorShape({32, 32})}, DT_DOUBLE, "test1", 130, 1); - MakeOp({TensorShape({64}), TensorShape({3, 3}), TensorShape({5, 5, 5})}, + MakeOp(TensorShape({8}), {TensorShape({8})}, DT_FLOAT, "test", 120, 1); + MakeOp(TensorShape({1024}), {TensorShape({32, 32})}, DT_DOUBLE, "test1", 130, + 1); + MakeOp(TensorShape({204}), + {TensorShape({64}), TensorShape({3, 3}), TensorShape({5, 5, 5})}, DT_HALF, "test2", 140, 3); - MakeOp({TensorShape({512}), TensorShape({64, 8})}, DT_UINT32, "test3", 150, - 2); + MakeOp(TensorShape({1024}), {TensorShape({512}), TensorShape({64, 8})}, + DT_UINT32, "test3", 150, 2); } // PrepOp is common to ConcatOp tests and SplitOpTests. @@ -249,23 +253,26 @@ TEST_F(ScopedAllocatorConcatOpTest, FailBounds) { class ScopedAllocatorSplitOpTest : public OpsTestBase { protected: - void BuildNodeDef(const TensorShape& shape, DataType dtype, - const string& name, int32 id, int32 num_tensors) { + void BuildNodeDef(const TensorShape& in_shape, DataType dtype, + const string& name, int32 id, int32 num_tensors, + const std::vector& out_shapes) { TF_EXPECT_OK( NodeDefBuilder("scoped_allocator_split_op", "_ScopedAllocatorSplit") .Attr("T", dtype) .Attr("N", num_tensors) .Attr("sa_name", name) .Attr("id", id) + .Attr("shapes", out_shapes) .Input(FakeInput(dtype)) // backing tensor and input .Input( FakeInput(num_tensors, dtype)) // list of subtensors to forward .Finalize(node_def())); } - void MakeOp(const TensorShape& shape, DataType dtype, const string& name, - int32 id, int32 num_tensors) { - BuildNodeDef(shape, dtype, name, id, num_tensors); + void MakeOp(const TensorShape& in_shape, DataType dtype, const string& name, + int32 id, int32 num_tensors, + const std::vector& out_shapes) { + BuildNodeDef(in_shape, dtype, name, id, num_tensors, out_shapes); TF_EXPECT_OK(InitOp()); } @@ -305,33 +312,33 @@ class ScopedAllocatorSplitOpTest : public OpsTestBase { }; TEST_F(ScopedAllocatorSplitOpTest, Success1) { - MakeOp({32}, DT_FLOAT, "test", 120, 2); + MakeOp({32}, DT_FLOAT, "test", 120, 2, {{16}, {16}}); ExecOp(DT_FLOAT, 120, {{16}, {16}}); } TEST_F(ScopedAllocatorSplitOpTest, Success2) { - MakeOp({2, 2, 2}, DT_DOUBLE, "test", 120, 2); + MakeOp({2, 2, 2}, DT_DOUBLE, "test", 120, 2, {{2, 2}, {2, 2}}); ExecOp(DT_DOUBLE, 120, {{2, 2}, {2, 2}}); } TEST_F(ScopedAllocatorSplitOpTest, Success3) { - MakeOp({3, 3, 3}, DT_HALF, "test", 120, 3); + MakeOp({3, 3, 3}, DT_HALF, "test", 120, 3, {{3, 3}, {3, 3}, {3, 3}}); ExecOp(DT_HALF, 120, {{3, 3}, {3, 3}, {3, 3}}); } TEST_F(ScopedAllocatorSplitOpTest, FailNLessThan2) { - BuildNodeDef({4, 4}, DT_FLOAT, "test", 120, 1); + BuildNodeDef({4, 4}, DT_FLOAT, "test", 120, 1, {{4, 4}}); Status s = InitOp(); EXPECT_EQ(s.code(), error::INVALID_ARGUMENT); } TEST_F(ScopedAllocatorSplitOpTest, FailDtypeCheck) { - MakeOp({8}, DT_FLOAT, "test", 120, 2); + MakeOp({8}, DT_FLOAT, "test", 120, 2, {{4}, {4}}); EXPECT_DEATH(ExecOp(DT_HALF, 120, {{4}, {4}}), ""); } TEST_F(ScopedAllocatorSplitOpTest, FailBounds) { - MakeOp({8}, DT_DOUBLE, "test", 120, 2); + MakeOp({8}, DT_DOUBLE, "test", 120, 2, {{4}, {4}}); AddInputFromArray({8}, {0, 1, 2, 3, 4, 5, 6, 7}); AddInputFromArray({4}, {0, 1, 2, 3}); AddInputFromArray({4}, {4, 5, 6, 7}); diff --git a/tensorflow/core/ops/scoped_allocator_ops.cc b/tensorflow/core/ops/scoped_allocator_ops.cc index 1e0dcdac96..359b4d8756 100644 --- a/tensorflow/core/ops/scoped_allocator_ops.cc +++ b/tensorflow/core/ops/scoped_allocator_ops.cc @@ -21,6 +21,7 @@ namespace tensorflow { REGISTER_OP("_ScopedAllocator") .Output("output: T") .Attr("shapes: list(shape)") + .Attr("shape: shape") .Attr("T: type") .Attr("sa_name: string") .Attr("id: int") @@ -35,6 +36,16 @@ Returns a reference to this value. This is an experimental op for internal use only. It is possible to use this op in unsafe ways. + +'shapes' is a list of the shapes of the tensors that are to be allocated +by this ScopedAllocator. +'shape' is the shape of the output of this Op, i.e. the 1D backing tensor +from which the individual allocated tensors are aliased. +'sa_name' is the name assigned to the Node, for connectivity specification +and debugging. +'id' is a non-negative integer 'scope_id' handled by the ScopedAllocatorMgr. +'expected_call_count' is the number of individual tensors expected to +be allocated from the backing tensor. )doc"); REGISTER_OP("_ScopedAllocatorConcat") @@ -57,6 +68,18 @@ reference to that ScopedAllocator's backing tensor. This is an experimental op for internal use only. It is possible to use this op in unsafe ways. + +'backing' is the backing tensor, i.e. the output of an upstream ScopedAllocator. +'inputs' is a list of nominal input tensors, all of which must be aliases +to regions of the backing tensor. These will be outputs of upstream nodes +that allocate their outputs from the same ScopedAllocator. +'shape' is the shape of the output, which will usually be the same shape as +the input backing tensor. +'reshape' is true iff the output shape is to be different from that of +the input backing tensor. +'sa_name' is the Node name of the upstream ScopedAllocator. +'id' is the scope_id identifying the upstream ScopedAllocator. +'N' is the number of nominal inputs to be concatenated. )doc"); REGISTER_OP("_ScopedAllocatorSplit") @@ -67,8 +90,9 @@ REGISTER_OP("_ScopedAllocatorSplit") .Attr("sa_name: string") .Attr("id: int") .Attr("N: int >= 2") + .Attr("shapes: list(shape)") .SetIsStateful() - .SetShapeFn(shape_inference::ExplicitShape) + .SetShapeFn(shape_inference::ExplicitShapes) .Doc(R"doc( Acts roughly like a SplitV Op that splits one tensor into multiple tensors but must only be used in conjunction with corresponding ScopedAllocator @@ -79,6 +103,17 @@ second list. This is an experimental op for internal use only. It is possible to use this op in unsafe ways. + +'concat' is the single output produced by an upstream ScopedAllocatorConcat +node. This is actually the backing tensor from a ScopedAllocator node +upstream of the ScopedAllocatorConcat. +'split' is a list of tensors aliased from the backing tensor. It will +become the output of this ScopedAllocatorSplit node. +'type' is the common DataType of all of the input and output tensors. +'sa_name' is the Node name of the upstream ScopedAllocator. +'id' is the scope_id identifying the upstream ScopedAllocator. +'N' is the number of split tensors. +'shapes' is a list of the split tensor shapes. )doc"); } // end namespace tensorflow diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 45e57594e4..bbb25d6f3f 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -14,6 +14,11 @@ message AutoParallelOptions { int32 num_replicas = 2; } +message ScopedAllocatorOptions { + // If present, only perform optimization for these ops. + repeated string enable_op = 1; +} + message RewriterConfig { // Graph rewriting is experimental and subject to change, not covered by any // API stability guarantees. @@ -67,6 +72,9 @@ message RewriterConfig { Toggle debug_stripper = 11; // If true, don't remove unnecessary ops from the graph bool disable_model_pruning = 2; + // Try to allocate some independent Op outputs contiguously in order to + // merge or eliminate downstream Ops (off by default). + Toggle scoped_allocator_optimization = 15; // Controls how many times we run the optimizers in meta optimizer (default // is once). @@ -115,6 +123,8 @@ message RewriterConfig { // meta-optimizer or when manually specified through the optimizers field. AutoParallelOptions auto_parallel = 5; + ScopedAllocatorOptions scoped_allocator_opts = 16; + // If non-empty, will use this as an alternative way to specify a list of // optimizations to turn on and the order of the optimizations (replacing the // meta-optimizer). -- GitLab From bef91e4d84c9618226ce2a0c570b7d89ccf3b69d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 12:56:40 -0700 Subject: [PATCH 159/902] [tpu:profiler] Minor change in the description of tool name proto. PiperOrigin-RevId: 198089875 --- tensorflow/contrib/tpu/profiler/tpu_profiler.proto | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto index 7be694e866..f0fca63db0 100644 --- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto @@ -68,7 +68,8 @@ message ProfileRequest { } message ProfileToolData { - // The tool's name which this data is associated. (e.g. "input_pipeline".) + // The file name which this data is associated (e.g. "input_pipeline.json", + // "cluster_xxx.memory_viewer.json"). string name = 1; // The data payload (likely json) for the specific tool. -- GitLab From b6ae98b4ac1ec3051d81f3133b827d6bb305aa2b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 12:58:55 -0700 Subject: [PATCH 160/902] Use functions to build dense splits. Tensorflow Function invocations share the same graph so using them reduces the graph construction overhead. PiperOrigin-RevId: 198090110 --- .../learner/batch/ordinal_split_handler.py | 230 ++++++++++-------- .../batch/ordinal_split_handler_test.py | 34 +-- 2 files changed, 150 insertions(+), 114 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py index 8225318b70..409a2d8f46 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler.py @@ -243,45 +243,74 @@ class DenseSplitHandler(InequalitySplitHandler): def make_splits(self, stamp_token, next_stamp_token, class_id): """Create the best split using the accumulated stats and flush the state.""" - # Get the bucket boundaries - are_splits_ready, buckets = ( - self._quantile_accumulator.get_buckets(stamp_token)) - # After we receive the boundaries from previous iteration we can flush - # the quantile accumulator. - with ops.control_dependencies([buckets]): - flush_quantiles = self._quantile_accumulator.flush( - stamp_token=stamp_token, next_stamp_token=next_stamp_token) - - # Get the aggregated gradients and hessians per - # pair. - # In order to distribute the computation on all the PSs we use the PS that - # had the stats accumulator on. - with ops.device(None): - with ops.device(self._stats_accumulator.resource().device): - num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( - self._stats_accumulator.flush(stamp_token, next_stamp_token)) - - # Put quantile and stats accumulator flushing in the dependency path. - are_splits_ready = control_flow_ops.with_dependencies( - [flush_quantiles, partition_ids], are_splits_ready) - - partition_ids, gains, split_infos = ( - split_handler_ops.build_dense_inequality_splits( - num_minibatches=num_minibatches, - bucket_boundaries=buckets, - partition_ids=partition_ids, - bucket_ids=bucket_ids, - gradients=gradients, - hessians=hessians, - class_id=class_id, - feature_column_group_id=self._feature_column_group_id, - l1_regularization=self._l1_regularization, - l2_regularization=self._l2_regularization, - tree_complexity_regularization=self. - _tree_complexity_regularization, - min_node_weight=self._min_node_weight, - multiclass_strategy=self._multiclass_strategy)) - return (are_splits_ready, partition_ids, gains, split_infos) + if (self._gradient_shape == tensor_shape.scalar() and + self._hessian_shape == tensor_shape.scalar()): + handler = make_dense_split_scalar + else: + handler = make_dense_split_tensor + + are_splits_ready, partition_ids, gains, split_infos = ( + handler(self._quantile_accumulator.resource(), + self._stats_accumulator.resource(), stamp_token, + next_stamp_token, self._multiclass_strategy, class_id, + self._feature_column_group_id, self._l1_regularization, + self._l2_regularization, self._tree_complexity_regularization, + self._min_node_weight)) + return are_splits_ready, partition_ids, gains, split_infos + + +def _make_dense_split(quantile_accumulator_handle, stats_accumulator_handle, + stamp_token, next_stamp_token, multiclass_strategy, + class_id, feature_column_id, l1_regularization, + l2_regularization, tree_complexity_regularization, + min_node_weight, is_multi_dimentional): + """Function that builds splits for a dense feature column.""" + # Get the bucket boundaries + are_splits_ready, buckets = ( + gen_quantile_ops.quantile_accumulator_get_buckets( + quantile_accumulator_handles=[quantile_accumulator_handle], + stamp_token=stamp_token)) + # quantile_accumulator_get_buckets returns a list of results per handle that + # we pass to it. In this case we're getting results just for one resource. + are_splits_ready = are_splits_ready[0] + buckets = buckets[0] + + # After we receive the boundaries from previous iteration we can flush + # the quantile accumulator. + with ops.control_dependencies([buckets]): + flush_quantiles = gen_quantile_ops.quantile_accumulator_flush( + quantile_accumulator_handle=quantile_accumulator_handle, + stamp_token=stamp_token, + next_stamp_token=next_stamp_token) + + if is_multi_dimentional: + num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( + gen_stats_accumulator_ops.stats_accumulator_tensor_flush( + stats_accumulator_handle, stamp_token, next_stamp_token)) + else: + num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( + gen_stats_accumulator_ops.stats_accumulator_scalar_flush( + stats_accumulator_handle, stamp_token, next_stamp_token)) + + # Put quantile and stats accumulator flushing in the dependency path. + with ops.control_dependencies([flush_quantiles, partition_ids]): + are_splits_ready = array_ops.identity(are_splits_ready) + partition_ids, gains, split_infos = ( + split_handler_ops.build_dense_inequality_splits( + num_minibatches=num_minibatches, + bucket_boundaries=buckets, + partition_ids=partition_ids, + bucket_ids=bucket_ids, + gradients=gradients, + hessians=hessians, + class_id=class_id, + feature_column_group_id=feature_column_id, + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + tree_complexity_regularization=tree_complexity_regularization, + min_node_weight=min_node_weight, + multiclass_strategy=multiclass_strategy)) + return are_splits_ready, partition_ids, gains, split_infos class SparseSplitHandler(InequalitySplitHandler): @@ -399,63 +428,64 @@ class SparseSplitHandler(InequalitySplitHandler): return are_splits_ready, partition_ids, gains, split_infos -def _specialize_sparse_split(is_multi_dimentional): +def _make_sparse_split(quantile_accumulator_handle, stats_accumulator_handle, + stamp_token, next_stamp_token, multiclass_strategy, + class_id, feature_column_id, l1_regularization, + l2_regularization, tree_complexity_regularization, + min_node_weight, is_multi_dimentional): + """Function that builds splits for a sparse feature column.""" + # Get the bucket boundaries + are_splits_ready, buckets = ( + gen_quantile_ops.quantile_accumulator_get_buckets( + quantile_accumulator_handles=[quantile_accumulator_handle], + stamp_token=stamp_token)) + # quantile_accumulator_get_buckets returns a list of results per handle that + # we pass to it. In this case we're getting results just for one resource. + are_splits_ready = are_splits_ready[0] + buckets = buckets[0] + + # After we receive the boundaries from previous iteration we can flush + # the quantile accumulator. + with ops.control_dependencies([buckets]): + flush_quantiles = gen_quantile_ops.quantile_accumulator_flush( + quantile_accumulator_handle=quantile_accumulator_handle, + stamp_token=stamp_token, + next_stamp_token=next_stamp_token) + + if is_multi_dimentional: + num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( + gen_stats_accumulator_ops.stats_accumulator_tensor_flush( + stats_accumulator_handle, stamp_token, next_stamp_token)) + else: + num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( + gen_stats_accumulator_ops.stats_accumulator_scalar_flush( + stats_accumulator_handle, stamp_token, next_stamp_token)) + + # Put quantile and stats accumulator flushing in the dependency path. + with ops.control_dependencies([flush_quantiles, partition_ids]): + are_splits_ready = array_ops.identity(are_splits_ready) + partition_ids, gains, split_infos = ( + split_handler_ops.build_sparse_inequality_splits( + num_minibatches=num_minibatches, + bucket_boundaries=buckets, + partition_ids=partition_ids, + bucket_ids=bucket_ids, + gradients=gradients, + hessians=hessians, + class_id=class_id, + feature_column_group_id=feature_column_id, + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + tree_complexity_regularization=tree_complexity_regularization, + min_node_weight=min_node_weight, + bias_feature_id=_BIAS_FEATURE_ID, + multiclass_strategy=multiclass_strategy)) + return are_splits_ready, partition_ids, gains, split_infos + + +def _specialize_make_split(func, is_multi_dimentional): """Builds a specialized version of the function.""" - def _make_sparse_split(quantile_accumulator_handle, stats_accumulator_handle, - stamp_token, next_stamp_token, multiclass_strategy, - class_id, feature_column_id, l1_regularization, - l2_regularization, tree_complexity_regularization, - min_node_weight, is_multi_dimentional): - """Function that builds splits for a sparse feature column.""" - # Get the bucket boundaries - are_splits_ready, buckets = ( - gen_quantile_ops.quantile_accumulator_get_buckets( - quantile_accumulator_handles=[quantile_accumulator_handle], - stamp_token=stamp_token)) - # quantile_accumulator_get_buckets returns a list of results per handle that - # we pass to it. In this case we're getting results just for one resource. - are_splits_ready = are_splits_ready[0] - buckets = buckets[0] - - # After we receive the boundaries from previous iteration we can flush - # the quantile accumulator. - with ops.control_dependencies([buckets]): - flush_quantiles = gen_quantile_ops.quantile_accumulator_flush( - quantile_accumulator_handle=quantile_accumulator_handle, - stamp_token=stamp_token, - next_stamp_token=next_stamp_token) - - if is_multi_dimentional: - num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( - gen_stats_accumulator_ops.stats_accumulator_tensor_flush( - stats_accumulator_handle, stamp_token, next_stamp_token)) - else: - num_minibatches, partition_ids, bucket_ids, gradients, hessians = ( - gen_stats_accumulator_ops.stats_accumulator_scalar_flush( - stats_accumulator_handle, stamp_token, next_stamp_token)) - - # Put quantile and stats accumulator flushing in the dependency path. - with ops.control_dependencies([flush_quantiles, partition_ids]): - are_splits_ready = array_ops.identity(are_splits_ready) - partition_ids, gains, split_infos = ( - split_handler_ops.build_sparse_inequality_splits( - num_minibatches=num_minibatches, - bucket_boundaries=buckets, - partition_ids=partition_ids, - bucket_ids=bucket_ids, - gradients=gradients, - hessians=hessians, - class_id=class_id, - feature_column_group_id=feature_column_id, - l1_regularization=l1_regularization, - l2_regularization=l2_regularization, - tree_complexity_regularization=tree_complexity_regularization, - min_node_weight=min_node_weight, - bias_feature_id=_BIAS_FEATURE_ID, - multiclass_strategy=multiclass_strategy)) - return are_splits_ready, partition_ids, gains, split_infos - @function.Defun( dtypes.resource, dtypes.resource, @@ -474,7 +504,7 @@ def _specialize_sparse_split(is_multi_dimentional): l1_regularization, l2_regularization, tree_complexity_regularization, min_node_weight): """Function that builds splits for a sparse feature column.""" - return _make_sparse_split( + return func( quantile_accumulator_handle, stats_accumulator_handle, stamp_token, next_stamp_token, multiclass_strategy, class_id, feature_column_id, l1_regularization, l2_regularization, tree_complexity_regularization, @@ -482,9 +512,15 @@ def _specialize_sparse_split(is_multi_dimentional): return f +make_dense_split_scalar = _specialize_make_split(_make_dense_split, + is_multi_dimentional=False) +make_dense_split_tensor = _specialize_make_split(_make_dense_split, + is_multi_dimentional=True) -make_sparse_split_scalar = _specialize_sparse_split(is_multi_dimentional=False) -make_sparse_split_tensor = _specialize_sparse_split(is_multi_dimentional=True) +make_sparse_split_scalar = _specialize_make_split(_make_sparse_split, + is_multi_dimentional=False) +make_sparse_split_tensor = _specialize_make_split(_make_sparse_split, + is_multi_dimentional=True) @function.Defun( diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py index c081a3f2c4..2f2c230211 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py @@ -67,9 +67,9 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): hessian_shape = tensor_shape.scalar() split_handler = ordinal_split_handler.DenseSplitHandler( l1_regularization=0.1, - l2_regularization=1, - tree_complexity_regularization=0, - min_node_weight=0, + l2_regularization=1., + tree_complexity_regularization=0., + min_node_weight=0., epsilon=0.001, num_quantiles=10, feature_column_group_id=0, @@ -203,10 +203,10 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): hessian_shape = tensor_shape.TensorShape([2, 2]) split_handler = ordinal_split_handler.DenseSplitHandler( - l1_regularization=0, - l2_regularization=1, - tree_complexity_regularization=0, - min_node_weight=0, + l1_regularization=0., + l2_regularization=1., + tree_complexity_regularization=0., + min_node_weight=0., epsilon=0.001, num_quantiles=3, feature_column_group_id=0, @@ -291,10 +291,10 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): hessian_shape = tensor_shape.TensorShape([2]) split_handler = ordinal_split_handler.DenseSplitHandler( - l1_regularization=0, - l2_regularization=1, - tree_complexity_regularization=0, - min_node_weight=0, + l1_regularization=0., + l2_regularization=1., + tree_complexity_regularization=0., + min_node_weight=0., epsilon=0.001, num_quantiles=3, feature_column_group_id=0, @@ -376,9 +376,9 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): split_handler = ordinal_split_handler.DenseSplitHandler( l1_regularization=0.1, - l2_regularization=1, - tree_complexity_regularization=0, - min_node_weight=0, + l2_regularization=1., + tree_complexity_regularization=0., + min_node_weight=0., epsilon=0.001, num_quantiles=10, feature_column_group_id=0, @@ -451,9 +451,9 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): split_handler = ordinal_split_handler.DenseSplitHandler( l1_regularization=0.1, - l2_regularization=1, + l2_regularization=1., tree_complexity_regularization=0.5, - min_node_weight=0, + min_node_weight=0., epsilon=0.001, num_quantiles=10, feature_column_group_id=0, @@ -585,7 +585,7 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): split_handler = ordinal_split_handler.DenseSplitHandler( l1_regularization=0.1, - l2_regularization=1, + l2_regularization=1., tree_complexity_regularization=0.5, min_node_weight=1.5, epsilon=0.001, -- GitLab From 68430112b2ca5c160db6dd412d43f572ec69e72f Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 25 May 2018 13:20:13 -0700 Subject: [PATCH 161/902] Public API to switch between eager execution and graph building. Now, after tf.enable_eager_execution() has been executed, entering the context manager of a tf.Graph will enable graph mode. So, for example ``` tf.enable_eager_execution() with tf.Graph().as_default(): c = tf.constant(1.0) # this is a graph tensor c2 = tf.constant(1.0) # this is an eager tensor ``` The main use-case of this is allowing documentation writers to make a single notebook which starts with eager execution and seamlessly transitions to building graphs. This also makes many explicit enablings of graph mode in the code redundant (a cleanup cl will follow). PiperOrigin-RevId: 198092991 --- .../distribute/python/mirrored_strategy.py | 13 +++++- .../contrib/distribute/python/monitor_test.py | 3 +- tensorflow/contrib/eager/python/saver_test.py | 45 ++++++++----------- .../opt/python/training/adamax_test.py | 8 ++-- .../contrib/optimizer_v2/momentum_test.py | 11 +---- tensorflow/python/framework/ops.py | 32 +++---------- tensorflow/python/framework/ops_test.py | 26 +++++++++-- tensorflow/python/framework/test_util.py | 20 ++++++--- tensorflow/python/framework/test_util_test.py | 5 ++- .../kernel_tests/accumulate_n_eager_test.py | 7 ++- .../python/kernel_tests/py_func_test.py | 5 +-- tensorflow/python/ops/variables.py | 2 +- tensorflow/python/training/adam_test.py | 7 ++- tensorflow/python/training/momentum_test.py | 11 +---- tensorflow/python/training/training_util.py | 20 ++++----- 15 files changed, 105 insertions(+), 110 deletions(-) diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index 89f2c431fe..14dbbd6e27 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import contextlib import threading import six @@ -39,6 +40,16 @@ from tensorflow.python.training import distribute as distribute_lib # TODO(josh11b): Replace asserts in this file with if ...: raise ... +@contextlib.contextmanager +def _enter_graph(g): + if context.executing_eagerly(): + with g.as_default(), context.eager_mode(): + yield + else: + with g.as_default(): + yield + + def _cpu_device(device): cpu_device = tf_device.DeviceSpec.from_string(device) cpu_device.merge_from(tf_device.DeviceSpec(device_type="CPU", device_index=0)) @@ -458,7 +469,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): with self.coord.stop_on_exception(), \ context.context()._mode(self.context_mode), \ context.context().device_policy(self.context_device_policy), \ - self.graph.as_default(), \ + _enter_graph(self.graph), \ MirroredTowerContext(self.distribution, self.tower_id), \ ops.device(self.device), \ ops.name_scope(self._captured_name_scope), \ diff --git a/tensorflow/contrib/distribute/python/monitor_test.py b/tensorflow/contrib/distribute/python/monitor_test.py index 8277e1e791..4fdb9bf69b 100644 --- a/tensorflow/contrib/distribute/python/monitor_test.py +++ b/tensorflow/contrib/distribute/python/monitor_test.py @@ -25,6 +25,7 @@ from tensorflow.contrib.distribute.python import combinations from tensorflow.contrib.distribute.python import monitor as monitor_lib from tensorflow.contrib.distribute.python import one_device_strategy from tensorflow.contrib.distribute.python.single_loss_example import single_loss_example +from tensorflow.python.client import session from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.framework import ops @@ -65,7 +66,7 @@ class MonitorTest(test.TestCase, parameterized.TestCase): step_function, _ = single_loss_example( lambda: gradient_descent.GradientDescentOptimizer(0.2), distribution) - with self.test_session() as sess: + with session.Session() as sess, context.eager_mode(): with self.assertRaisesRegexp(ValueError, "Should not provide"): _ = monitor_lib.Monitor(step_function, sess) diff --git a/tensorflow/contrib/eager/python/saver_test.py b/tensorflow/contrib/eager/python/saver_test.py index 4032e755f6..90a3711475 100644 --- a/tensorflow/contrib/eager/python/saver_test.py +++ b/tensorflow/contrib/eager/python/saver_test.py @@ -60,15 +60,9 @@ class SaverTest(test.TestCase): def testSameNameNoClobbering(self): with ops.device(self._dev()): - # Note that this test purposefully uses Graphs rather than - # IsolateTest. Users are more likely to accidentally create the same - # variable name this way. - first_graph = ops.Graph() - with first_graph.as_default(): - v1_first_graph = resource_variable_ops.ResourceVariable(1.0, name='v1') - with ops.Graph().as_default(): - v1_second_graph = resource_variable_ops.ResourceVariable(2.0, name='v1') - saver = _saver.Saver([v1_first_graph, v1_second_graph]) + v1 = resource_variable_ops.ResourceVariable(1.0, name='v1') + v2 = resource_variable_ops.ResourceVariable(2.0, name='v1') + saver = _saver.Saver([v1, v2]) ckpt_prefix = os.path.join(test.get_temp_dir(), 'ckpt') with self.assertRaisesRegexp(ValueError, 'v1'): saver.save(ckpt_prefix) @@ -126,12 +120,11 @@ class SaverTest(test.TestCase): saver = _saver.Saver([v1]) saver.save(ckpt_prefix) - with ops.Graph().as_default(): - saver = _saver.Saver([v1]) - with _saver.restore_variables_on_create(ckpt_prefix): - # Value is from checkpoint, but not from argument. - ret, _ = model(2.0) - self.assertEqual(ret.numpy(), 1.0) + saver = _saver.Saver([v1]) + with _saver.restore_variables_on_create(ckpt_prefix): + # Value is from checkpoint, but not from argument. + ret, _ = model(2.0) + self.assertEqual(ret.numpy(), 1.0) def testRestoreNotFound(self): with ops.device(self._dev()): @@ -184,17 +177,17 @@ class SaverTest(test.TestCase): 4, model(array_ops.constant(2, dtype=dtypes.float32)).numpy()) # reset the graph and reload on create, so that 1 + 2 = 3 - with ops.Graph().as_default(): - with _saver.restore_variables_on_create(ckpt_prefix): - @graph_callable.graph_callable( - [graph_callable.ShapeAndDtype(shape=(), dtype=dtypes.float32)]) - def model2(x): - v = variable_scope.get_variable( - 'v', initializer=init_ops.zeros_initializer(), shape=()) - return v + x - - self.assertEqual( - 3, model2(array_ops.constant(2, dtype=dtypes.float32)).numpy()) + ops.reset_default_graph() + with _saver.restore_variables_on_create(ckpt_prefix): + @graph_callable.graph_callable( + [graph_callable.ShapeAndDtype(shape=(), dtype=dtypes.float32)]) + def model2(x): + v = variable_scope.get_variable( + 'v', initializer=init_ops.zeros_initializer(), shape=()) + return v + x + + self.assertEqual( + 3, model2(array_ops.constant(2, dtype=dtypes.float32)).numpy()) class GetOptimizerTests(test.TestCase): diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index bc92a7006f..21bf3f5313 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -198,11 +198,11 @@ class AdaMaxOptimizerTest(test.TestCase): self.assertTrue(beta1_power is not None) self.assertIn(beta1_power, opt_variables) - with ops.Graph().as_default(): - # Shouldn't return non-slot variables from other graphs. - self.assertEqual(0, len(opt.variables())) - if not context.executing_eagerly(): + with ops.Graph().as_default(): + # Shouldn't return non-slot variables from other graphs. + self.assertEqual(0, len(opt.variables())) + self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) diff --git a/tensorflow/contrib/optimizer_v2/momentum_test.py b/tensorflow/contrib/optimizer_v2/momentum_test.py index 26724f66c2..24cdab4626 100644 --- a/tensorflow/contrib/optimizer_v2/momentum_test.py +++ b/tensorflow/contrib/optimizer_v2/momentum_test.py @@ -134,7 +134,6 @@ class MomentumOptimizerTest(test.TestCase): with context.eager_mode(): self.doTestBasic(use_resource=True, use_callable_params=True) - @test_util.run_in_graph_and_eager_modes(reset_test=True) def testVariablesAcrossGraphs(self): optimizer = momentum_lib.MomentumOptimizer(0.01, 0.5) with ops.Graph().as_default(): @@ -142,10 +141,7 @@ class MomentumOptimizerTest(test.TestCase): [1.0, 2.0], dtype=dtypes.float32, name="var0") var1 = resource_variable_ops.ResourceVariable( [3.0, 4.0], dtype=dtypes.float32, name="var1") - if context.executing_eagerly(): - loss = lambda: math_ops.reduce_sum(var0 + var1) - else: - loss = math_ops.reduce_sum(var0 + var1) + loss = math_ops.reduce_sum(var0 + var1) optimizer.minimize(loss) optimizer_variables = optimizer.variables() self.assertStartsWith(optimizer_variables[0].name, "var0") @@ -157,10 +153,7 @@ class MomentumOptimizerTest(test.TestCase): [1.0, 2.0], dtype=dtypes.float32, name="var2") var3 = resource_variable_ops.ResourceVariable( [3.0, 4.0], dtype=dtypes.float32, name="var3") - if context.executing_eagerly(): - loss = lambda: math_ops.reduce_sum(var2 + var3) - else: - loss = math_ops.reduce_sum(var2 + var3) + loss = math_ops.reduce_sum(var2 + var3) optimizer.minimize(loss) optimizer_variables = optimizer.variables() self.assertStartsWith(optimizer_variables[0].name, "var2") diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 9fc8136348..3af0cc44a8 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -20,7 +20,6 @@ from __future__ import print_function import collections import copy -import functools import linecache import os import re @@ -3861,6 +3860,9 @@ class Graph(object): assert c.graph is g ``` + If eager execution is enabled ops created under this context manager will be + added to the graph instead of executed eagerly. + Returns: A context manager for using this graph as the default graph. """ @@ -5270,35 +5272,15 @@ class _DefaultGraphStack(_DefaultStack): # pylint: disable=protected-access @tf_contextlib.contextmanager def get_controller(self, default): try: - if context.executing_eagerly(): - # A Graph alone on the context stack would keep init_scope-wrapped - # operations graph building when entered (assuming init_scope is called - # in a graph building context). Instead, we push a context which first - # enables eager execution and then re-enters the Graph. - context.context().context_switches.push( - default.building_function, - functools.partial( - _enter_context_and_graph, - context.eager_mode, - default.as_default)) - else: - # This Graph is being used from a graph building context. A lack of - # context switch implies that the context is graph building. - context.context().context_switches.push(default.building_function, - default.as_default) - with super(_DefaultGraphStack, self).get_controller(default) as g: + context.context().context_switches.push( + default.building_function, default.as_default) + with super(_DefaultGraphStack, self).get_controller( + default) as g, context.graph_mode(): yield g finally: context.context().context_switches.pop() -@tf_contextlib.contextmanager -def _enter_context_and_graph(context_fn, graph_fn): - """Combines two context managers.""" - with context_fn(), graph_fn(): - yield - - _default_graph_stack = _DefaultGraphStack() diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 87317db015..e7732632f2 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -2215,12 +2215,25 @@ class InitScopeTest(test_util.TensorFlowTestCase): self.assertEqual(ops.get_name_scope(), "inner") self.assertEqual(ops.get_name_scope(), "") - def testEagerGraphContextsExecuteEagerly(self): + def testEnteringGraphFromEagerIsSticky(self): with context.eager_mode(): + g = ops.Graph() + with g.as_default(): + with ops.init_scope(): + self.assertFalse(context.executing_eagerly()) + self.assertEqual(g, ops.get_default_graph()) + + def testMixGraphEager(self): + with context.eager_mode(): + c = constant_op.constant(1.0) with ops.Graph().as_default(): - with context.graph_mode(): - with ops.init_scope(): - self.assertTrue(context.executing_eagerly()) + with self.assertRaisesRegexp( + RuntimeError, "Attempting to capture an EagerTensor"): + math_ops.add(c, c) + c2 = constant_op.constant(2.0) + with self.assertRaisesRegexp( + TypeError, "contains objects other than 'EagerTensor'"): + math_ops.add(c2, c2) def testPreservesNameScopeInEagerExecution(self): with context.eager_mode(): @@ -2254,6 +2267,11 @@ class GraphTest(test_util.TensorFlowTestCase): with g0.as_default(): ops.reset_default_graph() + def testGraphContextManagerCancelsEager(self): + with context.eager_mode(): + with ops.Graph().as_default(): + self.assertFalse(context.executing_eagerly()) + def testGraphContextManager(self): g0 = ops.Graph() with g0.as_default() as g1: diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 5b01df48fe..b56483f373 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -556,12 +556,16 @@ def assert_no_new_tensors(f): tensors_before = set( id(obj) for obj in gc.get_objects() if _is_tensorflow_object(obj)) - outside_graph_key = ops.get_default_graph()._graph_key - with ops.Graph().as_default(): + if context.executing_eagerly(): + f(self, **kwargs) + ops.reset_default_graph() + else: # Run the test in a new graph so that collections get cleared when it's # done, but inherit the graph key so optimizers behave. - ops.get_default_graph()._graph_key = outside_graph_key - f(self, **kwargs) + outside_graph_key = ops.get_default_graph()._graph_key + with ops.Graph().as_default(): + ops.get_default_graph()._graph_key = outside_graph_key + f(self, **kwargs) # Make an effort to clear caches, which would otherwise look like leaked # Tensors. backprop._zeros_cache.flush() @@ -727,12 +731,12 @@ def run_in_graph_and_eager_modes(__unused__=None, f(self, **kwargs) if assert_no_eager_garbage: + ops.reset_default_graph() run_eagerly = assert_no_new_tensors( assert_no_garbage_created(run_eagerly)) with context.eager_mode(): - with ops.Graph().as_default(): - run_eagerly(self, **kwargs) + run_eagerly(self, **kwargs) return decorated @@ -1027,7 +1031,9 @@ class TensorFlowTestCase(googletest.TestCase): rewriter_config_pb2.RewriterConfig.OFF) return config - if graph is None: + if context.executing_eagerly(): + yield None + elif graph is None: if self._cached_session is None: self._cached_session = session.Session( graph=None, config=prepare_config(config)) diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py index 0f53762f6f..0178908bcc 100644 --- a/tensorflow/python/framework/test_util_test.py +++ b/tensorflow/python/framework/test_util_test.py @@ -619,6 +619,7 @@ class GarbageCollectionTest(test_util.TensorFlowTestCase): ReferenceCycleTest().test_has_no_cycle() + @test_util.run_in_graph_and_eager_modes() def test_no_leaked_tensor_decorator(self): class LeakedTensorTest(object): @@ -628,11 +629,11 @@ class GarbageCollectionTest(test_util.TensorFlowTestCase): @test_util.assert_no_new_tensors def test_has_leak(self): - self.a = constant_op.constant([3.]) + self.a = constant_op.constant([3.], name="leak") @test_util.assert_no_new_tensors def test_has_no_leak(self): - constant_op.constant([3.]) + constant_op.constant([3.], name="no-leak") with self.assertRaisesRegexp(AssertionError, "Tensors not deallocated"): LeakedTensorTest().test_has_leak() diff --git a/tensorflow/python/kernel_tests/accumulate_n_eager_test.py b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py index dc11b7dece..5f516f2c7e 100644 --- a/tensorflow/python/kernel_tests/accumulate_n_eager_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py @@ -43,10 +43,9 @@ class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): np.random.seed(12345) x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] tf_x = ops.convert_n_to_tensor(x) - with self.test_session(use_gpu=True): - self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).numpy()) - self.assertAllClose(x[0] * 5, - math_ops.accumulate_n([tf_x[0]] * 5).numpy()) + self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x)) + self.assertAllClose(x[0] * 5, + math_ops.accumulate_n([tf_x[0]] * 5)) def testGrad(self): np.random.seed(42) diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index b9f44d728a..dc7399f040 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -446,9 +446,8 @@ class PyFuncTest(test.TestCase): a = array_ops.ones((3, 3), dtype=dtypes.int32) x = array_ops.ones((3, 1), dtype=dtypes.int32) output = script_ops.eager_py_func(matmul, inp=[a, x], Tout=dtypes.int32) - with self.test_session(): - ret = self.evaluate(output) - self.assertAllEqual(ret, [[3], [3], [3]]) + ret = self.evaluate(output) + self.assertAllEqual(ret, [[3], [3], [3]]) @test_util.run_in_graph_and_eager_modes() def testEagerSingleOutputFloat32(self): diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 959ae08ee4..d88fd836f5 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -259,7 +259,7 @@ class Variable(checkpointable.CheckpointableBase): constraint=constraint) def __repr__(self): - if context.executing_eagerly(): + if context.executing_eagerly() and not self._in_graph_mode: return "" % ( self.name, self.get_shape(), self.dtype.name, ops.numpy_text(self.read_value(), is_repr=True)) diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py index 9be8b6aafe..bc68f24c6f 100644 --- a/tensorflow/python/training/adam_test.py +++ b/tensorflow/python/training/adam_test.py @@ -180,11 +180,10 @@ class AdamOptimizerTest(test.TestCase): self.assertIn(beta1_power, opt_variables) self.assertIn(beta2_power, opt_variables) - with ops.Graph().as_default(): - # Shouldn't return non-slot variables from other graphs. - self.assertEqual(0, len(opt.variables())) - if not context.executing_eagerly(): + with ops.Graph().as_default(): + # Shouldn't return non-slot variables from other graphs. + self.assertEqual(0, len(opt.variables())) self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py index 7bd57ad3d8..f7e78071d8 100644 --- a/tensorflow/python/training/momentum_test.py +++ b/tensorflow/python/training/momentum_test.py @@ -134,7 +134,6 @@ class MomentumOptimizerTest(test.TestCase): with context.eager_mode(): self.doTestBasic(use_resource=True, use_callable_params=True) - @test_util.run_in_graph_and_eager_modes(reset_test=True) def testVariablesAcrossGraphs(self): optimizer = momentum_lib.MomentumOptimizer(0.01, 0.5) with ops.Graph().as_default(): @@ -142,10 +141,7 @@ class MomentumOptimizerTest(test.TestCase): [1.0, 2.0], dtype=dtypes.float32, name="var0") var1 = resource_variable_ops.ResourceVariable( [3.0, 4.0], dtype=dtypes.float32, name="var1") - if context.executing_eagerly(): - loss = lambda: math_ops.reduce_sum(var0 + var1) - else: - loss = math_ops.reduce_sum(var0 + var1) + loss = math_ops.reduce_sum(var0 + var1) optimizer.minimize(loss) optimizer_variables = optimizer.variables() self.assertStartsWith(optimizer_variables[0].name, "var0") @@ -157,10 +153,7 @@ class MomentumOptimizerTest(test.TestCase): [1.0, 2.0], dtype=dtypes.float32, name="var2") var3 = resource_variable_ops.ResourceVariable( [3.0, 4.0], dtype=dtypes.float32, name="var3") - if context.executing_eagerly(): - loss = lambda: math_ops.reduce_sum(var2 + var3) - else: - loss = math_ops.reduce_sum(var2 + var3) + loss = math_ops.reduce_sum(var2 + var3) optimizer.minimize(loss) optimizer_variables = optimizer.variables() self.assertStartsWith(optimizer_variables[0].name, "var2") diff --git a/tensorflow/python/training/training_util.py b/tensorflow/python/training/training_util.py index d05e1d2c83..0877b2a8a2 100644 --- a/tensorflow/python/training/training_util.py +++ b/tensorflow/python/training/training_util.py @@ -119,18 +119,18 @@ def create_global_step(graph=None): graph = graph or ops.get_default_graph() if get_global_step(graph) is not None: raise ValueError('"global_step" already exists.') + if context.executing_eagerly(): + with ops.device('cpu:0'): + return variable_scope.get_variable( + ops.GraphKeys.GLOBAL_STEP, + shape=[], + dtype=dtypes.int64, + initializer=init_ops.zeros_initializer(), + trainable=False, + collections=[ops.GraphKeys.GLOBAL_VARIABLES, + ops.GraphKeys.GLOBAL_STEP]) # Create in proper graph and base name_scope. with graph.as_default() as g, g.name_scope(None): - if context.executing_eagerly(): - with ops.device('cpu:0'): - return variable_scope.get_variable( - ops.GraphKeys.GLOBAL_STEP, - shape=[], - dtype=dtypes.int64, - initializer=init_ops.zeros_initializer(), - trainable=False, - collections=[ops.GraphKeys.GLOBAL_VARIABLES, - ops.GraphKeys.GLOBAL_STEP]) return variable_scope.get_variable( ops.GraphKeys.GLOBAL_STEP, shape=[], -- GitLab From 7b3d634372feb93c989f8c71f0bc8a87caff766c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 25 May 2018 13:38:24 -0700 Subject: [PATCH 162/902] [TF:XLA] Bump open source llvm revision to r333273 PiperOrigin-RevId: 198095416 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 90de15e18e..fb7ff3e8ab 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -453,11 +453,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/387dd2c562ef25850ef9229de82b9bcd0959b02c.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/387dd2c562ef25850ef9229de82b9bcd0959b02c.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/10826be2a677d7babbc0c0640e94bf75fc808893.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/10826be2a677d7babbc0c0640e94bf75fc808893.tar.gz", ], - sha256 = "c94107d335f54a2d9c22dd38c754688f63618c42598bfc2df5bcce9de8363367", - strip_prefix = "llvm-387dd2c562ef25850ef9229de82b9bcd0959b02c", + sha256 = "eabe882cae486692abe6c62465f5899e558682739264d2f715d3ffe4ddc138ad", + strip_prefix = "llvm-10826be2a677d7babbc0c0640e94bf75fc808893", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) -- GitLab From b2b13549d763b75e160744bc00ee1c526b419c03 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 13:39:25 -0700 Subject: [PATCH 163/902] [tpu:profiler] Capture the data for generating a memory viewer of the profiling results. PiperOrigin-RevId: 198095564 --- tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index e51f64521f..99485322c6 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -81,6 +81,7 @@ ProfileRequest PopulateProfileRequest(int duration_ms, } request.add_tools("op_profile"); request.add_tools("input_pipeline"); + request.add_tools("memory_viewer"); request.add_tools("overview_page"); *request.mutable_opts() = opts; std::cout << "Limiting the number of trace events to " << kMaxEvents -- GitLab From a6369634688dafa4c061f499a6adbb4e78666832 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 13:44:36 -0700 Subject: [PATCH 164/902] Disable //tensorflow/contrib/lite/python:lite_test on Windows PiperOrigin-RevId: 198096344 --- tensorflow/contrib/lite/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index 17c11ab0f9..a40e512045 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -58,6 +58,7 @@ py_test( srcs = ["lite_test.py"], data = [":interpreter_test_data"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":lite", ], -- GitLab From ea72dd6c0c8a5572d2ca34be4ca0ab43efc4570d Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Fri, 25 May 2018 13:58:51 -0700 Subject: [PATCH 165/902] Add EagerTensor profiler and device shape utilities This change includes the following steps to make EagerTensor profiler work: - Add a PaddedShapeFn to XlaDevice::Metadata. We need a backend-independent way to get a fully-padded shape and its layout on the device. This function is set during device construction. CPU and GPU devices effectively get an identity function since they neither change the layout nor pad. TPU gets the appropriate function. - Add TFE_TensorDebugInfo struct and C API methods for it. These methods are necessary to fetch the shape and layout from under the C API to the Python level. This can be a home for more debug information later. - Make EagerTensor weak referencable. This involves adding a pointer to the list of current weak references. This addition should have negligible overhead when profiler is not used. The only operations on this field are setting it to null on construction and checking if it is null on destruction. - Adding C++ functions callable from Python to register an instance of EagerTensorProfiler and retrieve debug information for a given EagerTensor. These functions are used in the new "inspect" module. - Finally, writing the actual profiler. PiperOrigin-RevId: 198098380 --- tensorflow/c/eager/BUILD | 27 ++- tensorflow/c/eager/c_api.cc | 4 - tensorflow/c/eager/c_api.h | 39 ++++ tensorflow/c/eager/c_api_debug.cc | 167 ++++++++++++++++ tensorflow/c/eager/c_api_debug_test.cc | 50 +++++ tensorflow/c/eager/c_api_internal.h | 8 + tensorflow/c/eager/c_api_test.cc | 125 +----------- tensorflow/c/eager/c_api_test_util.cc | 163 ++++++++++++++++ tensorflow/c/eager/c_api_test_util.h | 53 +++++ tensorflow/compiler/jit/xla_cpu_device.cc | 11 +- tensorflow/compiler/jit/xla_device.cc | 36 +++- tensorflow/compiler/jit/xla_device.h | 21 +- tensorflow/compiler/jit/xla_gpu_device.cc | 3 +- .../compiler/jit/xla_interpreter_device.cc | 11 +- tensorflow/contrib/cmake/tf_c.cmake | 1 + tensorflow/python/eager/pywrap_tensor.cc | 181 ++++++++++++++---- tensorflow/python/eager/pywrap_tfe.h | 13 ++ tensorflow/python/pywrap_tfe.i | 2 + 18 files changed, 733 insertions(+), 182 deletions(-) create mode 100644 tensorflow/c/eager/c_api_debug.cc create mode 100644 tensorflow/c/eager/c_api_debug_test.cc create mode 100644 tensorflow/c/eager/c_api_test_util.cc create mode 100644 tensorflow/c/eager/c_api_test_util.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 9ce781fab0..f265da2c2c 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -14,6 +14,7 @@ tf_cuda_library( name = "c_api", srcs = [ "c_api.cc", + "c_api_debug.cc", "c_api_internal.h", ], hdrs = ["c_api.h"], @@ -45,6 +46,7 @@ tf_cuda_library( "//tensorflow:with_xla_support": [ "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/jit", + "//tensorflow/compiler/jit:xla_device", ], "//conditions:default": [], }) + [ @@ -99,9 +101,31 @@ tf_cuda_library( ], ) +tf_cuda_library( + name = "c_api_test_util", + testonly = 1, + srcs = ["c_api_test_util.cc"], + hdrs = ["c_api_test_util.h"], + visibility = [ + "//learning/brain:__subpackages__", + "//tensorflow:__subpackages__", + ], + deps = [ + ":c_api", + "//tensorflow/c:c_test_util", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + ], +) + tf_cuda_cc_test( name = "c_api_test", - srcs = ["c_api_test.cc"], + srcs = [ + "c_api_debug_test.cc", + "c_api_test.cc", + ], extra_copts = tfe_xla_copts(), tags = [ "guitar", @@ -109,6 +133,7 @@ tf_cuda_cc_test( ], deps = [ ":c_api", + ":c_api_test_util", "//tensorflow/c:c_test_util", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 216210c88c..81221c4078 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -73,10 +73,6 @@ string DeviceName(const tensorflow::Device* d) { return (d == nullptr) ? "cpu:0" : d->name(); } -#ifdef TENSORFLOW_EAGER_USE_XLA -std::atomic_int_fast64_t func_id_generator(0); -#endif // TENSORFLOW_EAGER_USE_XLA - tensorflow::Status GetAllRemoteDevices( const std::vector& remote_workers, tensorflow::WorkerCacheInterface* worker_cache, diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 574a097e0d..1862af3ce2 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -191,6 +191,45 @@ TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_TensorHandleCopyToDevice( TFE_TensorHandle* h, TFE_Context* ctx, const char* device_name, TF_Status* status); +// Debugging/Profiling information for TFE_TensorHandle +// +// TFE_TensorDebugInfo contains information useful for debugging and +// profiling tensors. +typedef struct TFE_TensorDebugInfo TFE_TensorDebugInfo; + +// Retrieves TFE_TensorDebugInfo for `handle`. +// If TFE_TensorHandleTensorDebugInfo succeeds, `status` is set to OK and caller +// is responsible for deleting returned TFE_TensorDebugInfo. +// If TFE_TensorHandleTensorDebugInfo fails, `status` is set to appropriate +// error and nullptr is returned. This function can block till the operation +// that produces `handle` has completed. +TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo( + TFE_TensorHandle* handle, TF_Status* status); + +// Deletes `debug_info`. +TF_CAPI_EXPORT extern void TFE_DeleteTensorDebugInfo( + TFE_TensorDebugInfo* debug_info); + +// Returns the number of dimensions used to represent the tensor on its device. +// The number of dimensions used to reprensent the tensor on device can be +// different from the number returned by TFE_TensorHandleNumDims. +// The return value was current at the time of TFE_TensorDebugInfo creation. +TF_CAPI_EXPORT extern int TFE_TensorDebugInfoOnDeviceNumDims( + TFE_TensorDebugInfo* debug_info); + +// Returns the number of elements in dimension `dim_index`. +// Tensor representation on device can be transposed from its representation +// on host. The data contained in dimension `dim_index` on device +// can correspond to the data contained in another dimension in on-host +// representation. The dimensions are indexed using the standard TensorFlow +// major-to-minor order (slowest varying dimension first), +// not the XLA's minor-to-major order. +// On-device dimensions can be padded. TFE_TensorDebugInfoOnDeviceDim returns +// the number of elements in a dimension after padding. +// The return value was current at the time of TFE_TensorDebugInfo creation. +TF_CAPI_EXPORT extern int64_t TFE_TensorDebugInfoOnDeviceDim( + TFE_TensorDebugInfo* debug_info, int dim_index); + // Description of the TensorFlow op to execute. // // Assumes that the provided 'ctx' outlives the returned TFE_Op, i.e., diff --git a/tensorflow/c/eager/c_api_debug.cc b/tensorflow/c/eager/c_api_debug.cc new file mode 100644 index 0000000000..5006b76f19 --- /dev/null +++ b/tensorflow/c/eager/c_api_debug.cc @@ -0,0 +1,167 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/eager/c_api.h" + +#include + +#include "tensorflow/c/c_api.h" +#include "tensorflow/c/eager/c_api_internal.h" +#ifdef TENSORFLOW_EAGER_USE_XLA +#include "tensorflow/compiler/jit/xla_device.h" +#endif // TENSORFLOW_EAGER_USE_XLA + +using tensorflow::int64; +using tensorflow::string; + +namespace { + +std::vector TensorShapeAsVector(TFE_TensorHandle* handle, + TF_Status* status) { + std::vector shape; + int rank = TFE_TensorHandleNumDims(handle, status); + if (!status->status.ok()) { + return shape; + } + shape.reserve(rank); + for (int i = 0; i < rank; ++i) { + shape.push_back(TFE_TensorHandleDim(handle, i, status)); + if (!status->status.ok()) { + return shape; + } + } + return shape; +} + +} // namespace + +extern "C" { + +TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo( + TFE_TensorHandle* handle, TF_Status* status) { + const tensorflow::Tensor* tensor; + status->status = handle->handle->Tensor(&tensor); + if (!status->status.ok()) { + return nullptr; + } + + tensorflow::Device* device; + status->status = handle->handle->Device(&device); + if (!status->status.ok()) { + return nullptr; + } + +#ifdef TENSORFLOW_EAGER_USE_XLA + // If tensor resides on an XLA device, use XLA device's PaddedShapeFn. + tensorflow::XlaDevice* xla_device = + dynamic_cast(device); + if (xla_device != nullptr) { + tensorflow::XlaDevice::PaddedShapeFn shape_fn = + xla_device->metadata().padded_shape_fn(); + xla::Shape padded_shape; + status->status = shape_fn(*tensor, &padded_shape); + if (!status->status.ok()) { + return nullptr; + } + if (VLOG_IS_ON(3)) { + std::vector shape_to_log = TensorShapeAsVector(handle, status); + if (!status->status.ok()) { + // Ignore the status here as we are simply logging. + status->status = tensorflow::Status::OK(); + } else { + VLOG(3) << "Fully padded shape of [" + << tensorflow::str_util::Join(shape_to_log, ", ") << "] is " + << padded_shape.DebugString(); + } + } + + if (xla::ShapeUtil::IsTuple(padded_shape)) { + if (xla::ShapeUtil::TupleElementCount(padded_shape) != 2) { + // Currently, the only case of XlaTensor containing a tuple shape is to + // represent 64 bit ints, doubles, and complex numbers (we don't support + // 64bit complex numbers). + status->status = tensorflow::errors::InvalidArgument( + "XlaTensors should only contain tuples of size 2. Shape: ", + padded_shape.DebugString()); + return nullptr; + } + + // shape0 is not a const& because we will assign it to padded_shape below. + // It is illegal to assign a part of a message to itself. + xla::Shape shape0 = xla::ShapeUtil::GetTupleElementShape(padded_shape, 0); + const xla::Shape& shape1 = + xla::ShapeUtil::GetTupleElementShape(padded_shape, 1); + if (xla::ShapeUtil::IsTuple(shape0) || xla::ShapeUtil::IsTuple(shape1)) { + status->status = tensorflow::errors::InvalidArgument( + "XlaTensors should not contain nested tuples. Shape: ", + padded_shape.DebugString()); + return nullptr; + } + if (!xla::ShapeUtil::Equal(shape0, shape1)) { + status->status = tensorflow::errors::InvalidArgument( + "Subshapes of XlaTensors should be the same. Shape: ", + padded_shape.DebugString()); + return nullptr; + } + + // Since the only case we handle here are two equal subshapes, we + // simply return one of them. The caller will interpret it as this + // shape directly storing the 64bit types. This approximation is good + // enough for this API's debugging use case. + padded_shape = shape0; + } + + int rank = padded_shape.dimensions_size(); + std::vector dev_dims; + dev_dims.reserve(rank); + if (rank == 1) { + // Rank 1 tensors might not have padded_shape.layout.minor_to_major set, + dev_dims.push_back(padded_shape.dimensions(0)); + } else { + for (int i = rank - 1; i >= 0; --i) { + int64 dim_index = padded_shape.layout().minor_to_major(i); + dev_dims.push_back(padded_shape.dimensions(dim_index)); + } + } + status->status = tensorflow::Status::OK(); + return new TFE_TensorDebugInfo(dev_dims); + } +#endif // TENSORFLOW_EAGER_USE_XLA + + // If the tensor is not an XLA tensor, the device shape is + // the same as regular tensor shape. + std::vector dev_dims = TensorShapeAsVector(handle, status); + if (!status->status.ok()) { + return nullptr; + } + return new TFE_TensorDebugInfo(dev_dims); +} + +TF_CAPI_EXPORT extern void TFE_DeleteTensorDebugInfo( + TFE_TensorDebugInfo* debug_info) { + delete debug_info; +} + +TF_CAPI_EXPORT extern int TFE_TensorDebugInfoOnDeviceNumDims( + TFE_TensorDebugInfo* debug_info) { + return debug_info->dev_dims.size(); +} + +TF_CAPI_EXPORT extern int64_t TFE_TensorDebugInfoOnDeviceDim( + TFE_TensorDebugInfo* debug_info, int dim_index) { + return debug_info->dev_dims[dim_index]; +} + +} // extern "C" diff --git a/tensorflow/c/eager/c_api_debug_test.cc b/tensorflow/c/eager/c_api_debug_test.cc new file mode 100644 index 0000000000..cddb9f6e00 --- /dev/null +++ b/tensorflow/c/eager/c_api_debug_test.cc @@ -0,0 +1,50 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/eager/c_api.h" + +#include +#include "tensorflow/c/eager/c_api_test_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" + +TEST(CApiDebug, ScalarCPU) { + TFE_TensorHandle* h = TestScalarTensorHandle(); + TF_Status* status = TF_NewStatus(); + TFE_TensorDebugInfo* debug_info = TFE_TensorHandleTensorDebugInfo(h, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + + ASSERT_EQ(0, TFE_TensorDebugInfoOnDeviceNumDims(debug_info)); + + TFE_DeleteTensorDebugInfo(debug_info); + TFE_DeleteTensorHandle(h); + TF_DeleteStatus(status); +} + +TEST(CApiDebug, 2DCPU) { + TFE_TensorHandle* h = TestMatrixTensorHandle3X2(); + TF_Status* status = TF_NewStatus(); + TFE_TensorDebugInfo* debug_info = TFE_TensorHandleTensorDebugInfo(h, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + + ASSERT_EQ(2, TFE_TensorDebugInfoOnDeviceNumDims(debug_info)); + // Shape is the same for CPU tensors. + EXPECT_EQ(3, TFE_TensorDebugInfoOnDeviceDim(debug_info, 0)); + EXPECT_EQ(2, TFE_TensorDebugInfoOnDeviceDim(debug_info, 1)); + + TFE_DeleteTensorDebugInfo(debug_info); + TFE_DeleteTensorHandle(h); + TF_DeleteStatus(status); +} diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 2b8384d720..04a6efc47c 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -107,6 +107,14 @@ struct TFE_TensorHandle { tensorflow::TensorHandle* handle; }; +struct TFE_TensorDebugInfo { + TFE_TensorDebugInfo(const std::vector& dims) + : dev_dims(dims) {} + + // Fully-padded, minor-to-major. + std::vector dev_dims; +}; + struct TFE_Op { // t is NULL iff the TFE_Op corresponds to a TensorFlow function instead of a // primitive operation. diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 49646bb735..27ff5f7211 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/c/eager/c_api.h" #include +#include "tensorflow/c/eager/c_api_test_util.h" #include "tensorflow/core/distributed_runtime/rpc/eager/eager_grpc_server_lib.h" #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -32,122 +33,6 @@ using tensorflow::string; namespace { -TFE_TensorHandle* DoubleTestMatrixTensorHandle() { - int64_t dims[] = {2, 2}; - double data[] = {1.0, 2.0, 3.0, 4.0}; - TF_Tensor* t = TF_AllocateTensor( - TF_DOUBLE, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); - memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); - TF_Status* status = TF_NewStatus(); - TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TF_DeleteTensor(t); - TF_DeleteStatus(status); - return th; -} - -TFE_TensorHandle* TestMatrixTensorHandle() { - int64_t dims[] = {2, 2}; - float data[] = {1.0f, 2.0f, 3.0f, 4.0f}; - TF_Tensor* t = TF_AllocateTensor( - TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); - memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); - TF_Status* status = TF_NewStatus(); - TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TF_DeleteTensor(t); - TF_DeleteStatus(status); - return th; -} - -TFE_TensorHandle* TestMatrixTensorHandle3X2() { - int64_t dims[] = {3, 2}; - double data[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; - TF_Tensor* t = TF_AllocateTensor( - TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); - memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); - TF_Status* status = TF_NewStatus(); - TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TF_DeleteTensor(t); - TF_DeleteStatus(status); - return th; -} - -TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) { - TF_Status* status = TF_NewStatus(); - - TFE_Op* op = TFE_NewOp(ctx, "MatMul", status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TFE_OpAddInput(op, a, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TFE_OpAddInput(op, b, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TF_DeleteStatus(status); - TFE_OpSetAttrBool(op, "transpose_a", 0); - TFE_OpSetAttrBool(op, "transpose_b", 0); - TFE_OpSetAttrType(op, "T", TFE_TensorHandleDataType(a)); - - return op; -} - -TFE_TensorHandle* TestAxisTensorHandle() { - int64_t dims[] = {1}; - int data[] = {1}; - TF_Tensor* t = TF_AllocateTensor( - TF_INT32, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); - memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); - TF_Status* status = TF_NewStatus(); - TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TF_DeleteTensor(t); - TF_DeleteStatus(status); - return th; -} - -TFE_Op* MinOp(TFE_Context* ctx, TFE_TensorHandle* input, - TFE_TensorHandle* axis) { - TF_Status* status = TF_NewStatus(); - - TFE_Op* op = TFE_NewOp(ctx, "Min", status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TFE_OpAddInput(op, input, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TFE_OpAddInput(op, axis, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TFE_OpSetAttrBool(op, "keep_dims", 1); - TFE_OpSetAttrType(op, "Tidx", TF_INT32); - TF_DeleteStatus(status); - TFE_OpSetAttrType(op, "T", TFE_TensorHandleDataType(input)); - - return op; -} - -// If there is a GPU device, returns true and sets 'gpu_device_name' -// accordingly. -bool GetGPUDeviceName(TFE_Context* ctx, string* gpu_device_name) { - std::unique_ptr status( - TF_NewStatus(), TF_DeleteStatus); - TF_DeviceList* devices = TFE_ContextListDevices(ctx, status.get()); - CHECK_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - - const int num_devices = TF_DeviceListCount(devices); - for (int i = 0; i < num_devices; ++i) { - const string device_type(TF_DeviceListType(devices, i, status.get())); - CHECK_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get()); - const string device_name(TF_DeviceListName(devices, i, status.get())); - CHECK_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get()); - if (device_type == "GPU") { - *gpu_device_name = device_name; - LOG(INFO) << "Found GPU device " << device_name; - TF_DeleteDeviceList(devices); - return true; - } - } - TF_DeleteDeviceList(devices); - return false; -} - void BM_InitOp(int iters) { tensorflow::testing::StopTiming(); TF_Status* status = TF_NewStatus(); @@ -536,7 +421,7 @@ void TensorHandleSilentCopy(bool async) { // Disable the test if no GPU is present. string gpu_device_name; - if (GetGPUDeviceName(ctx, &gpu_device_name)) { + if (GetDeviceName(ctx, &gpu_device_name, "GPU")) { TFE_TensorHandle* hgpu = TFE_TensorHandleCopyToDevice( hcpu, ctx, gpu_device_name.c_str(), status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); @@ -583,7 +468,7 @@ void TensorHandleSilentCopyLocal(bool async) { // Disable the test if no GPU is present. string gpu_device_name; - if (GetGPUDeviceName(ctx, &gpu_device_name)) { + if (GetDeviceName(ctx, &gpu_device_name, "GPU")) { TFE_TensorHandle* hgpu = TFE_TensorHandleCopyToDevice( hcpu, ctx, gpu_device_name.c_str(), status.get()); ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get()); @@ -624,7 +509,7 @@ void SetAndGetOpDevices(bool async) { // Disable the test if no GPU is present. string gpu_device_name; - if (GetGPUDeviceName(ctx, &gpu_device_name)) { + if (GetDeviceName(ctx, &gpu_device_name, "GPU")) { TFE_OpSetDevice(matmul, "GPU:0", status); ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); const char* device_name = TFE_OpGetDevice(matmul, status); @@ -688,7 +573,7 @@ void Execute_MatMul_CPU_Runtime_Error(bool async) { TFE_DeleteContextOptions(opts); TFE_TensorHandle* m1 = TestMatrixTensorHandle(); - TFE_TensorHandle* m2 = TestMatrixTensorHandle3X2(); + TFE_TensorHandle* m2 = DoubleTestMatrixTensorHandle3X2(); TFE_Op* matmul = MatMulOp(ctx, m1, m2); TFE_OpSetDevice(matmul, "/job:localhost/replica:0/task:0/device:CPU:0", status); diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc new file mode 100644 index 0000000000..5607c9dcb0 --- /dev/null +++ b/tensorflow/c/eager/c_api_test_util.cc @@ -0,0 +1,163 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/eager/c_api_test_util.h" + +#include "tensorflow/c/eager/c_api.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" + +using tensorflow::string; + +TFE_TensorHandle* TestScalarTensorHandle() { + float data[] = {1.0f}; + TF_Tensor* t = TF_AllocateTensor(TF_FLOAT, nullptr, 0, sizeof(float)); + memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteTensor(t); + TF_DeleteStatus(status); + return th; +} + +TFE_TensorHandle* DoubleTestMatrixTensorHandle() { + int64_t dims[] = {2, 2}; + double data[] = {1.0, 2.0, 3.0, 4.0}; + TF_Tensor* t = TF_AllocateTensor( + TF_DOUBLE, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); + memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteTensor(t); + TF_DeleteStatus(status); + return th; +} + +TFE_TensorHandle* TestMatrixTensorHandle() { + int64_t dims[] = {2, 2}; + float data[] = {1.0f, 2.0f, 3.0f, 4.0f}; + TF_Tensor* t = TF_AllocateTensor( + TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); + memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteTensor(t); + TF_DeleteStatus(status); + return th; +} + +TFE_TensorHandle* DoubleTestMatrixTensorHandle3X2() { + int64_t dims[] = {3, 2}; + double data[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + TF_Tensor* t = TF_AllocateTensor( + TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); + memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteTensor(t); + TF_DeleteStatus(status); + return th; +} + +TFE_TensorHandle* TestMatrixTensorHandle3X2() { + int64_t dims[] = {3, 2}; + float data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + TF_Tensor* t = TF_AllocateTensor( + TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); + memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteTensor(t); + TF_DeleteStatus(status); + return th; +} + +TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) { + TF_Status* status = TF_NewStatus(); + + TFE_Op* op = TFE_NewOp(ctx, "MatMul", status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(op, a, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(op, b, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteStatus(status); + TFE_OpSetAttrBool(op, "transpose_a", 0); + TFE_OpSetAttrBool(op, "transpose_b", 0); + TFE_OpSetAttrType(op, "T", TFE_TensorHandleDataType(a)); + + return op; +} + +TFE_TensorHandle* TestAxisTensorHandle() { + int64_t dims[] = {1}; + int data[] = {1}; + TF_Tensor* t = TF_AllocateTensor( + TF_INT32, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); + memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteTensor(t); + TF_DeleteStatus(status); + return th; +} + +TFE_Op* MinOp(TFE_Context* ctx, TFE_TensorHandle* input, + TFE_TensorHandle* axis) { + TF_Status* status = TF_NewStatus(); + + TFE_Op* op = TFE_NewOp(ctx, "Min", status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(op, input, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(op, axis, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpSetAttrBool(op, "keep_dims", 1); + TFE_OpSetAttrType(op, "Tidx", TF_INT32); + TF_DeleteStatus(status); + TFE_OpSetAttrType(op, "T", TFE_TensorHandleDataType(input)); + + return op; +} + +bool GetDeviceName(TFE_Context* ctx, string* device_name, + const char* device_type) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + TF_DeviceList* devices = TFE_ContextListDevices(ctx, status.get()); + CHECK_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + const int num_devices = TF_DeviceListCount(devices); + for (int i = 0; i < num_devices; ++i) { + const string dev_type(TF_DeviceListType(devices, i, status.get())); + CHECK_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get()); + const string dev_name(TF_DeviceListName(devices, i, status.get())); + CHECK_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get()); + if (dev_type == device_type) { + *device_name = dev_name; + LOG(INFO) << "Found " << device_type << " device " << *device_name; + TF_DeleteDeviceList(devices); + return true; + } + } + TF_DeleteDeviceList(devices); + return false; +} diff --git a/tensorflow/c/eager/c_api_test_util.h b/tensorflow/c/eager/c_api_test_util.h new file mode 100644 index 0000000000..474cae67c8 --- /dev/null +++ b/tensorflow/c/eager/c_api_test_util.h @@ -0,0 +1,53 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_EAGER_C_API_TEST_UTIL_H_ +#define TENSORFLOW_C_EAGER_C_API_TEST_UTIL_H_ + +#include "tensorflow/c/eager/c_api.h" + +#include "tensorflow/core/platform/types.h" + +// Return a tensor handle containing a float scalar +TFE_TensorHandle* TestScalarTensorHandle(); + +// Return a tensor handle containing a 2x2 matrix of doubles +TFE_TensorHandle* DoubleTestMatrixTensorHandle(); + +// Return a tensor handle containing a 2x2 matrix of floats +TFE_TensorHandle* TestMatrixTensorHandle(); + +// Return a tensor handle containing a 3x2 matrix of doubles +TFE_TensorHandle* DoubleTestMatrixTensorHandle3X2(); + +// Return a tensor handle containing a 3x2 matrix of floats +TFE_TensorHandle* TestMatrixTensorHandle3X2(); + +// Return a matmul op multiplying `a` by `b`. +TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b); + +// Return an 1-D INT32 tensor containing a single value 1. +TFE_TensorHandle* TestAxisTensorHandle(); + +// Return an op taking minimum of `input` long `axis` dimension. +TFE_Op* MinOp(TFE_Context* ctx, TFE_TensorHandle* input, + TFE_TensorHandle* axis); + +// If there is a device of type `device_type`, returns true +// and sets 'device_name' accordingly. +// `device_type` must be either "GPU" or "TPU". +bool GetDeviceName(TFE_Context* ctx, tensorflow::string* device_name, + const char* device_type); + +#endif // TENSORFLOW_C_EAGER_C_API_TEST_UTIL_H_ diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc index ea9e036604..43648402f6 100644 --- a/tensorflow/compiler/jit/xla_cpu_device.cc +++ b/tensorflow/compiler/jit/xla_cpu_device.cc @@ -50,11 +50,12 @@ Status XlaCpuDeviceFactory::CreateDevices(const SessionOptions& options, (void)registrations; std::unique_ptr device; - TF_RETURN_IF_ERROR( - XlaDevice::Create("Host", DEVICE_XLA_CPU, 0, DEVICE_CPU_XLA_JIT, options, - name_prefix, registration, - /*transfer_as_literal=*/false, - /*shape_representation_fn=*/{}, &device)); + TF_RETURN_IF_ERROR(XlaDevice::Create("Host", DEVICE_XLA_CPU, 0, + DEVICE_CPU_XLA_JIT, options, name_prefix, + registration, + /*transfer_as_literal=*/false, + /*shape_representation_fn=*/{}, + /*padded_shape_fn=*/{}, &device)); devices->push_back(device.release()); return Status::OK(); } diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index f13b46c532..ed007d603e 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_device_context.h" #include "tensorflow/compiler/jit/xla_device_ops.h" #include "tensorflow/compiler/tf2xla/dump_graph.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/client_library.h" #include "tensorflow/core/common_runtime/device.h" @@ -105,6 +106,25 @@ XlaDeviceAllocator* XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( return alloc_ptr; } +namespace { + +// Default PaddedShapeFn implementation that simply returns the unpadded +// on-device shape. This is accurate for CPU and GPU devices that neither +// transpose nor pad tensors. +Status DefaultPaddedShapeFn(const Tensor& tensor, xla::Shape* shape) { + const tensorflow::XlaTensor* xla_tensor = + tensorflow::XlaTensor::FromTensor(&tensor); + if (xla_tensor == nullptr) { + return TensorShapeToXLAShape(tensor.dtype(), tensor.shape(), shape); + } + + const xla::ShapedBuffer& shaped_buffer = xla_tensor->shaped_buffer(); + *shape = shaped_buffer.on_device_shape(); + return Status::OK(); +} + +} // namespace + /* static */ Status XlaDevice::Create( const string& platform_name, const string& device_name, int device_ordinal, const string& jit_device_name, const SessionOptions& options, @@ -112,7 +132,7 @@ XlaDeviceAllocator* XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( const XlaOpRegistry::DeviceRegistration& registration, bool transfer_as_literal, const XlaCompiler::ShapeRepresentationFn& shape_representation_fn, - std::unique_ptr* device) { + const PaddedShapeFn& padded_shape_fn, std::unique_ptr* device) { VLOG(1) << "XlaDevice::Create " << platform_name << " " << device_name << ":" << device_ordinal; @@ -133,17 +153,20 @@ XlaDeviceAllocator* XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( device->reset(new XlaDevice( options, attrs, device_ordinal, DeviceType(jit_device_name), - platform.ValueOrDie(), transfer_as_literal, shape_representation_fn)); + platform.ValueOrDie(), transfer_as_literal, shape_representation_fn, + padded_shape_fn ? padded_shape_fn : DefaultPaddedShapeFn)); return Status::OK(); } XlaDevice::Metadata::Metadata( int device_ordinal, se::Platform* platform, const DeviceType& device_type, - XlaCompiler::ShapeRepresentationFn shape_representation_fn) + XlaCompiler::ShapeRepresentationFn shape_representation_fn, + PaddedShapeFn padded_shape_fn) : device_ordinal_(device_ordinal), device_type_(device_type), platform_(platform), - shape_representation_fn_(std::move(shape_representation_fn)) {} + shape_representation_fn_(std::move(shape_representation_fn)), + padded_shape_fn_(std::move(padded_shape_fn)) {} int XlaDevice::Metadata::device_ordinal() const { return device_ordinal_; } @@ -178,10 +201,11 @@ XlaDevice::XlaDevice( const SessionOptions& options, const DeviceAttributes& attrs, int device_ordinal, const DeviceType& jit_device_name, se::Platform* platform, bool transfer_as_literal, - const XlaCompiler::ShapeRepresentationFn& shape_representation_fn) + const XlaCompiler::ShapeRepresentationFn& shape_representation_fn, + const PaddedShapeFn& padded_shape_fn) : LocalDevice(options, attrs), xla_metadata_(device_ordinal, platform, jit_device_name, - shape_representation_fn), + shape_representation_fn, padded_shape_fn), device_ordinal_(device_ordinal), jit_device_name_(jit_device_name), xla_allocator_(nullptr), diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h index d5d345d43b..02e88ee679 100644 --- a/tensorflow/compiler/jit/xla_device.h +++ b/tensorflow/compiler/jit/xla_device.h @@ -45,13 +45,19 @@ namespace tensorflow { class XlaDevice : public LocalDevice { public: + // Given a tensor, sets `xla::Shape*` the shape of tensor's representation + // on device, fully padded. On error, the contents of `xla::Shape*` + // are undefined. + typedef std::function PaddedShapeFn; + // Wrapper class to store metadata about the XlaDevice, where it can be // retrieved e.g., when lazily creating the XlaCompilationCache device. class Metadata { public: Metadata(int device_ordinal, se::Platform* platform, const DeviceType& device_type, - XlaCompiler::ShapeRepresentationFn shape_representation_fn); + XlaCompiler::ShapeRepresentationFn shape_representation_fn, + PaddedShapeFn padded_shape_fn); // The index of the device on this host. int device_ordinal() const; @@ -62,12 +68,14 @@ class XlaDevice : public LocalDevice { const XlaCompiler::ShapeRepresentationFn& shape_representation_fn() const { return shape_representation_fn_; } + const PaddedShapeFn& padded_shape_fn() const { return padded_shape_fn_; } private: const int device_ordinal_; const DeviceType device_type_; se::Platform* platform_; // Not owned. XlaCompiler::ShapeRepresentationFn shape_representation_fn_; + PaddedShapeFn padded_shape_fn_; TF_DISALLOW_COPY_AND_ASSIGN(Metadata); }; @@ -81,6 +89,8 @@ class XlaDevice : public LocalDevice { // 'transfer_as_literal' is true if device<->host transfers must be done using // XLA's TransferLiteral{To,From}Device interface. If false, we can use // ThenMemcpy instead. + // If padded_shape_fn is empty, a default implementation that returns + // the on-host shape is used. static Status Create( const string& platform_name, const string& device_name, int device_ordinal, const string& jit_device_name, @@ -88,12 +98,16 @@ class XlaDevice : public LocalDevice { const XlaOpRegistry::DeviceRegistration& registration, bool transfer_as_literal, const XlaCompiler::ShapeRepresentationFn& shape_representation_fn, - std::unique_ptr* device); + const PaddedShapeFn& padded_shape_fn, std::unique_ptr* device); + // Creates a new XLA Device. + // If padded_shape_fn is empty, a default implementation that returns + // the logical on-device shape without padding is used. XlaDevice(const SessionOptions& options, const DeviceAttributes& attrs, int device_ordinal, const DeviceType& jit_device_name, se::Platform* platform, bool transfer_as_literal, - const XlaCompiler::ShapeRepresentationFn& shape_representation_fn); + const XlaCompiler::ShapeRepresentationFn& shape_representation_fn, + const PaddedShapeFn& padded_shape_fn); ~XlaDevice() override; Allocator* GetAllocator(AllocatorAttributes attr) override; @@ -110,6 +124,7 @@ class XlaDevice : public LocalDevice { Tensor* tensor) override; xla::LocalClient* client() const; + const Metadata& metadata() { return xla_metadata_; } xla::StatusOr GetStream(); // If not already set, create and set GpuDeviceInfo. diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc index 26842fbe5c..c0d86a28c7 100644 --- a/tensorflow/compiler/jit/xla_gpu_device.cc +++ b/tensorflow/compiler/jit/xla_gpu_device.cc @@ -49,7 +49,8 @@ Status XlaGpuDeviceFactory::CreateDevices(const SessionOptions& options, XlaDevice::Create("CUDA", DEVICE_XLA_GPU, 0, DEVICE_GPU_XLA_JIT, options, name_prefix, registration, /*transfer_as_literal=*/false, - /*shape_representation_fn=*/{}, &device); + /*shape_representation_fn=*/{}, + /*padded_shape_fn=*/{}, &device); if (!status.ok()) { // Treat failures as non-fatal; there might not be a GPU in the machine. VLOG(1) << "Failed to create XLA_GPU device: " << status; diff --git a/tensorflow/compiler/jit/xla_interpreter_device.cc b/tensorflow/compiler/jit/xla_interpreter_device.cc index 4146996f63..661187f4a8 100644 --- a/tensorflow/compiler/jit/xla_interpreter_device.cc +++ b/tensorflow/compiler/jit/xla_interpreter_device.cc @@ -48,11 +48,12 @@ Status XlaInterpreterDeviceFactory::CreateDevices( registration.compile_resource_ops = true; std::unique_ptr device; - TF_RETURN_IF_ERROR(XlaDevice::Create( - "Interpreter", DEVICE_XLA_INTERPRETER, 0, DEVICE_INTERPRETER_XLA_JIT, - options, name_prefix, registration, - /*transfer_as_literal=*/false, - /*shape_representation_fn=*/{}, &device)); + TF_RETURN_IF_ERROR(XlaDevice::Create("Interpreter", DEVICE_XLA_INTERPRETER, 0, + DEVICE_INTERPRETER_XLA_JIT, options, + name_prefix, registration, + /*transfer_as_literal=*/false, + /*shape_representation_fn=*/{}, + /*padded_shape_fn=*/{}, &device)); devices->push_back(device.release()); return Status::OK(); } diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index 310fe58e05..bda5e26f43 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -21,6 +21,7 @@ set(tf_c_srcs "${tensorflow_source_dir}/tensorflow/c/c_api_function.cc" "${tensorflow_source_dir}/tensorflow/c/eager/c_api.cc" "${tensorflow_source_dir}/tensorflow/c/eager/c_api.h" + "${tensorflow_source_dir}/tensorflow/c/eager/c_api_debug.cc" "${tensorflow_source_dir}/tensorflow/c/eager/tape.h" "${tensorflow_source_dir}/tensorflow/c/checkpoint_reader.cc" "${tensorflow_source_dir}/tensorflow/c/checkpoint_reader.h" diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index a62af4a06c..ea604647fa 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -27,8 +27,15 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/python/lib/core/ndarray_tensor.h" +// forward declare +struct EagerTensor; + namespace { +// An instance of _EagerTensorProfiler that will receive callbacks about +// events on eager tensors. This is set by TFE_Py_InitEagerTensor, if at all. +PyObject* eager_tensor_profiler = nullptr; + TFE_Context* GetContext(PyObject* ctx) { TFE_Context* context = reinterpret_cast(PyCapsule_GetPointer(ctx, nullptr)); @@ -253,8 +260,45 @@ typedef struct EagerTensor { // to use a TF_Status object. However note that accesses to `status` are not // thread-safe. TF_Status* status; + + PyObject* weakreflist; /* List of weak references */ } EagerTensor; +namespace { + +// Returns true on success - successfully invoked or no profiler registered. +// Returns false if some error occurred. +bool MaybeInvokeCreatedOnEagerTensorProfiler(EagerTensor* created_tensor) { + if (eager_tensor_profiler != nullptr) { +#if PY_MAJOR_VERSION < 3 + PyObject* created_method_name = PyString_InternFromString("created"); +#else + PyObject* created_method_name = PyUnicode_InternFromString("created"); +#endif + if (created_method_name == nullptr) { + return false; + } + PyObject* result = PyObject_CallMethodObjArgs( + eager_tensor_profiler, created_method_name, created_tensor, NULL); + if (result == nullptr) { + LOG(ERROR) << "Invoking created() on EagerTensor profiler failed"; + // While we can potentially continue because the error is related to + // profiling, we choose to return an error because: + // - If profiling is used, the user likely wants to stop execution on + // profiling errors. + // - Error in profiling code might have left some state in an invalid + // form that can lead to an error later on. Better to fail fast. + Py_DECREF(created_method_name); + return false; + } + Py_DECREF(created_method_name); + Py_DECREF(result); + } + return true; +} + +} // namespace + // tp_init for EagerTensor. int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { self->id = get_uid(); @@ -266,6 +310,7 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { Py_INCREF(Py_None); self->tensor_shape = Py_None; self->status = TF_NewStatus(); + self->weakreflist = nullptr; PyObject* value; PyObject* context = nullptr; PyObject* device = nullptr; @@ -344,11 +389,22 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { if (handle == nullptr) return -1; } self->handle = handle.release(); + + if (!MaybeInvokeCreatedOnEagerTensorProfiler(self)) { + return -1; + } + return 0; } // tp_dealloc for EagerTensor. void EagerTensor_dealloc(EagerTensor* self) { + // Clear weak references to self. + // Needs to happen before any actual destruction. + if (self->weakreflist != nullptr) { + PyObject_ClearWeakRefs((PyObject*)self); + } + TF_DeleteStatus(self->status); Py_DECREF(self->handle_data); Py_DECREF(self->keras_mask); @@ -574,43 +630,43 @@ static PyTypeObject _EagerTensorType = { // clang-format off PyVarObject_HEAD_INIT(nullptr, 0) // clang-format on - "EagerTensor", /* tp_name */ - sizeof(EagerTensor), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)EagerTensor_dealloc, /* tp_dealloc */ - nullptr, /* tp_print */ - nullptr, /* tp_getattr */ - nullptr, /* tp_setattr */ - nullptr, /* tp_compare */ - nullptr, /* tp_repr */ - nullptr, /* tp_as_number */ - nullptr, /* tp_as_sequence */ - nullptr, /* tp_as_mapping */ - nullptr, /* tp_hash */ - nullptr, /* tp_call */ - nullptr, /* tp_str */ - nullptr, /* tp_getattro */ - nullptr, /* tp_setattro */ - nullptr, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - nullptr, /* tp_doc */ - nullptr, /* tp_traverse */ - nullptr, /* tp_clear */ - nullptr, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - nullptr, /* tp_iter */ - nullptr, /* tp_iternext */ - EagerTensor_methods, /* tp_methods */ - nullptr, /* tp_members */ - EagerTensor_getseters, /* tp_getset */ - nullptr, /* tp_base */ - nullptr, /* tp_dict */ - nullptr, /* tp_descr_get */ - nullptr, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)EagerTensor_init, /* tp_init */ - nullptr, /* tp_alloc */ - nullptr, /* tp_new */ + "EagerTensor", /* tp_name */ + sizeof(EagerTensor), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)EagerTensor_dealloc, /* tp_dealloc */ + nullptr, /* tp_print */ + nullptr, /* tp_getattr */ + nullptr, /* tp_setattr */ + nullptr, /* tp_compare */ + nullptr, /* tp_repr */ + nullptr, /* tp_as_number */ + nullptr, /* tp_as_sequence */ + nullptr, /* tp_as_mapping */ + nullptr, /* tp_hash */ + nullptr, /* tp_call */ + nullptr, /* tp_str */ + nullptr, /* tp_getattro */ + nullptr, /* tp_setattro */ + nullptr, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + nullptr, /* tp_doc */ + nullptr, /* tp_traverse */ + nullptr, /* tp_clear */ + nullptr, /* tp_richcompare */ + offsetof(EagerTensor, weakreflist), /* tp_weaklistoffset */ + nullptr, /* tp_iter */ + nullptr, /* tp_iternext */ + EagerTensor_methods, /* tp_methods */ + nullptr, /* tp_members */ + EagerTensor_getseters, /* tp_getset */ + nullptr, /* tp_base */ + nullptr, /* tp_dict */ + nullptr, /* tp_descr_get */ + nullptr, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)EagerTensor_init, /* tp_init */ + nullptr, /* tp_alloc */ + nullptr, /* tp_new */ }; #endif @@ -641,6 +697,11 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) { t->tensor_shape = Py_None; t->handle = handle; t->status = TF_NewStatus(); + t->weakreflist = nullptr; + + if (!MaybeInvokeCreatedOnEagerTensorProfiler(t)) { + return nullptr; + } } return reinterpret_cast(t); } @@ -720,6 +781,18 @@ PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) { return reinterpret_cast(EagerTensorType); } +PyObject* TFE_Py_SetEagerTensorProfiler(PyObject* profiler) { + Py_XDECREF(eager_tensor_profiler); + + if (profiler == Py_None) { + eager_tensor_profiler = nullptr; + } else { + eager_tensor_profiler = profiler; + Py_INCREF(eager_tensor_profiler); + } + Py_RETURN_NONE; +} + PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim) { if (!PyList_Check(tensors) && !PyTuple_Check(tensors)) { PyErr_SetString(PyExc_TypeError, @@ -792,3 +865,37 @@ PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim) { return EagerTensorFromHandle(handle); } + +PyObject* TFE_Py_TensorShapeOnDevice(PyObject* tensor) { + if (!EagerTensor_CheckExact(tensor)) { + PyErr_SetString( + PyExc_TypeError, + tensorflow::strings::StrCat("Expected an EagerTensors but got type \"", + Py_TYPE(tensor)->tp_name, "\"") + .c_str()); + return nullptr; + } + TFE_TensorHandle* handle = EagerTensor_Handle(tensor); + + auto status = tensorflow::make_safe(TF_NewStatus()); + TFE_TensorDebugInfo* debug_info = + TFE_TensorHandleTensorDebugInfo(handle, status.get()); + if (TF_GetCode(status.get()) != TF_OK) { + PyErr_SetString( + PyExc_RuntimeError, + tensorflow::strings::StrCat("Error retrieving tensor's device shape: ", + TF_Message(status.get())) + .c_str()); + return nullptr; + } + + int rank = TFE_TensorDebugInfoOnDeviceNumDims(debug_info); + PyObject* shape = PyTuple_New(rank); + for (int i = 0; i < rank; ++i) { + tensorflow::int64 dim_size = TFE_TensorDebugInfoOnDeviceDim(debug_info, i); + PyTuple_SET_ITEM(shape, i, PyLong_FromLongLong(dim_size)); + } + TFE_DeleteTensorDebugInfo(debug_info); + + return shape; +} diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index 73fe80e8ca..c502fe9e85 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -115,6 +115,15 @@ TFE_TensorHandle* EagerTensor_Handle(const PyObject* o); // newly created type, or nullptr on error. PyObject* TFE_Py_InitEagerTensor(PyObject* base_class); +// Sets `profiler` as the current profiler to receive callbacks about events +// on eager tensors. Currently, the only reported event is creation. +// `profiler` is expected to have a `created(self, eager_tensor)` method that +// takes the created tensor as its single argument. +// Previous profiler, if any, is unset and will not receive any more +// callbacks. +// To unset the profiler, pass Py_None as the value of `profiler`. +PyObject* TFE_Py_SetEagerTensorProfiler(PyObject* profiler); + // Creates a new tape and adds it to the active set. `persistent` must be a // PyBool_Type, i.e either Py_True or Py_False PyObject* TFE_Py_TapeSetNew(PyObject* persistent); @@ -203,4 +212,8 @@ PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape); // tensors in `tensors`. PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim); +// Returns the shape of this tensor's on-device representation. +// The shape is represented as a Python tuple of integers. +PyObject* TFE_Py_TensorShapeOnDevice(PyObject* tensor); + #endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_ diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 42c708b024..500dc30cc3 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -33,6 +33,7 @@ limitations under the License. %rename("%s") TFE_ContextAsyncClearError; %rename("%s") TFE_OpNameGetAttrType; %rename("%s") TFE_Py_InitEagerTensor; +%rename("%s") TFE_Py_SetEagerTensorProfiler; %rename("%s") TFE_Py_RegisterExceptionClass; %rename("%s") TFE_Py_RegisterGradientFunction; %rename("%s") TFE_Py_RegisterFallbackExceptionClass; @@ -60,6 +61,7 @@ limitations under the License. %rename("%s") TFE_ContextOptionsSetAsync; %rename("%s") TFE_DeleteContextOptions; %rename("%s") TFE_Py_TensorShapeSlice; +%rename("%s") TFE_Py_TensorShapeOnDevice; %{ #include "tensorflow/python/eager/pywrap_tfe.h" -- GitLab From 5430f37a9aaf463ffba13e47121fc9b6a5834cd2 Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Fri, 25 May 2018 15:11:46 -0700 Subject: [PATCH 166/902] Add warning to LookupOrCreate about reentrancy issue PiperOrigin-RevId: 198110382 --- tensorflow/core/framework/resource_mgr.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h index 11160127e4..33d4cb77ff 100644 --- a/tensorflow/core/framework/resource_mgr.h +++ b/tensorflow/core/framework/resource_mgr.h @@ -131,6 +131,10 @@ class ResourceMgr { // "*resource". Otherwise, invokes creator() to create the resource. // The caller takes the ownership of one ref on "*resource". // + // WARNING: creator() must not call any methods on ResourceMgr during its + // execution, because a non-reentrant lock is held during the creator() call + // in order to guarantee atomicity of LookupOrCreate(). + // // REQUIRES: std::is_base_of // REQUIRES: resource != nullptr template -- GitLab From f69f4f0592accd26c0f49d18e360cc4cdb2ce5b3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 16:07:25 -0700 Subject: [PATCH 167/902] Automated g4 rollback of changelist 198087342 PiperOrigin-RevId: 198117552 --- .../estimator_batch/estimator.py | 38 +---- .../estimator_batch/estimator_test.py | 33 +--- .../boosted_trees/estimator_batch/model.py | 8 +- .../boosted_trees/kernels/prediction_ops.cc | 47 +----- .../lib/models/multiple_additive_trees.cc | 11 +- .../lib/models/multiple_additive_trees.h | 7 +- .../models/multiple_additive_trees_test.cc | 47 ++---- .../boosted_trees/ops/prediction_ops.cc | 70 -------- .../python/ops/prediction_ops.py | 1 - .../python/training/functions/gbdt_batch.py | 149 +++++++----------- .../training/functions/gbdt_batch_test.py | 80 ++-------- 11 files changed, 107 insertions(+), 384 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py index c8d401bfa6..89d0d611d2 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py @@ -41,8 +41,7 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): feature_engineering_fn=None, logits_modifier_function=None, center_bias=True, - use_core_libs=False, - output_leaf_index=False): + use_core_libs=False): """Initializes a GradientBoostedDecisionTreeClassifier estimator instance. Args: @@ -67,14 +66,6 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): the bias. use_core_libs: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. - output_leaf_index: whether to output leaf indices along with predictions - during inference. The leaf node indexes are available in predictions - dict by the key 'leaf_index'. For example, - result_dict = classifier.predict(...) - for example_prediction_result in result_dict: - # access leaf index list by example_prediction_result["leaf_index"] - # which contains one leaf index per tree - Raises: ValueError: If learner_config is not valid. """ @@ -83,9 +74,7 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): # supports second order derivative. def loss_fn(labels, logits, weights=None): result = losses.per_example_maxent_loss( - labels=labels, - logits=logits, - weights=weights, + labels=labels, logits=logits, weights=weights, num_classes=n_classes) return math_ops.reduce_mean(result[0]) else: @@ -113,7 +102,6 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): 'center_bias': center_bias, 'logits_modifier_function': logits_modifier_function, 'use_core_libs': use_core_libs, - 'output_leaf_index': output_leaf_index, }, model_dir=model_dir, config=config, @@ -136,8 +124,7 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): feature_engineering_fn=None, logits_modifier_function=None, center_bias=True, - use_core_libs=False, - output_leaf_index=False): + use_core_libs=False): """Initializes a GradientBoostedDecisionTreeRegressor estimator instance. Args: @@ -164,13 +151,6 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): the bias. use_core_libs: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. - output_leaf_index: whether to output leaf indices along with predictions - during inference. The leaf node indexes are available in predictions - dict by the key 'leaf_index'. For example, - result_dict = classifier.predict(...) - for example_prediction_result in result_dict: - # access leaf index list by example_prediction_result["leaf_index"] - # which contains one leaf index per tree """ head = head_lib.regression_head( label_name=label_name, @@ -193,7 +173,6 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): 'logits_modifier_function': logits_modifier_function, 'center_bias': center_bias, 'use_core_libs': use_core_libs, - 'output_leaf_index': False, }, model_dir=model_dir, config=config, @@ -218,8 +197,7 @@ class GradientBoostedDecisionTreeEstimator(estimator.Estimator): feature_engineering_fn=None, logits_modifier_function=None, center_bias=True, - use_core_libs=False, - output_leaf_index=False): + use_core_libs=False): """Initializes a GradientBoostedDecisionTreeEstimator estimator instance. Args: @@ -242,13 +220,6 @@ class GradientBoostedDecisionTreeEstimator(estimator.Estimator): the bias. use_core_libs: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. - output_leaf_index: whether to output leaf indices along with predictions - during inference. The leaf node indexes are available in predictions - dict by the key 'leaf_index'. For example, - result_dict = classifier.predict(...) - for example_prediction_result in result_dict: - # access leaf index list by example_prediction_result["leaf_index"] - # which contains one leaf index per tree """ super(GradientBoostedDecisionTreeEstimator, self).__init__( model_fn=model.model_builder, @@ -262,7 +233,6 @@ class GradientBoostedDecisionTreeEstimator(estimator.Estimator): 'logits_modifier_function': logits_modifier_function, 'center_bias': center_bias, 'use_core_libs': use_core_libs, - 'output_leaf_index': False, }, model_dir=model_dir, config=config, diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py index fe91e5293f..0d58317bd5 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py @@ -62,34 +62,12 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): examples_per_layer=3, model_dir=model_dir, config=config, - feature_columns=[contrib_feature_column.real_valued_column("x")], - output_leaf_index=False) + feature_columns=[contrib_feature_column.real_valued_column("x")]) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1) classifier.export(self._export_dir_base) - def testThatLeafIndexIsInPredictions(self): - learner_config = learner_pb2.LearnerConfig() - learner_config.num_classes = 2 - learner_config.constraints.max_tree_depth = 1 - model_dir = tempfile.mkdtemp() - config = run_config.RunConfig() - - classifier = estimator.GradientBoostedDecisionTreeClassifier( - learner_config=learner_config, - num_trees=1, - examples_per_layer=3, - model_dir=model_dir, - config=config, - feature_columns=[contrib_feature_column.real_valued_column("x")], - output_leaf_index=True) - - classifier.fit(input_fn=_train_input_fn, steps=15) - result_dict = classifier.predict(input_fn=_eval_input_fn) - for prediction_item in result_dict: - self.assertTrue("leaf_index" in prediction_item) - def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 @@ -109,8 +87,7 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], - use_core_libs=True, - output_leaf_index=False) + use_core_libs=True) model.fit(input_fn=_train_input_fn, steps=15) model.evaluate(input_fn=_eval_input_fn, steps=1) @@ -130,8 +107,7 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], - use_core_libs=True, - output_leaf_index=False) + use_core_libs=True) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1) @@ -151,8 +127,7 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], - use_core_libs=True, - output_leaf_index=False) + use_core_libs=True) regressor.fit(input_fn=_train_input_fn, steps=15) regressor.evaluate(input_fn=_eval_input_fn, steps=1) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/model.py b/tensorflow/contrib/boosted_trees/estimator_batch/model.py index 1ee8911989..15ab6d8145 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/model.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/model.py @@ -63,8 +63,6 @@ def model_builder(features, labels, mode, params, config): num_trees = params["num_trees"] use_core_libs = params["use_core_libs"] logits_modifier_function = params["logits_modifier_function"] - output_leaf_index = params["output_leaf_index"] - if features is None: raise ValueError("At least one feature must be specified.") @@ -98,8 +96,7 @@ def model_builder(features, labels, mode, params, config): feature_columns=feature_columns, logits_dimension=head.logits_dimension, features=training_features, - use_core_columns=use_core_libs, - output_leaf_index=output_leaf_index) + use_core_columns=use_core_libs) with ops.name_scope("gbdt", "gbdt_optimizer"): predictions_dict = gbdt_model.predict(mode) logits = predictions_dict["predictions"] @@ -130,9 +127,6 @@ def model_builder(features, labels, mode, params, config): labels=labels, train_op_fn=_train_op_fn, logits=logits) - if output_leaf_index and gbdt_batch.LEAF_INDEX in predictions_dict: - model_fn_ops.predictions[gbdt_batch.LEAF_INDEX] = predictions_dict[ - gbdt_batch.LEAF_INDEX] if num_trees: if center_bias: num_trees += 1 diff --git a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc index dcce8bc650..b3fe38614e 100644 --- a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc @@ -59,7 +59,6 @@ const char* kApplyDropoutAttributeName = "apply_dropout"; const char* kApplyAveragingAttributeName = "apply_averaging"; const char* kDropoutInfoOutputTensorName = "drop_out_tree_indices_weights"; const char* kPredictionsTensorName = "predictions"; -const char* kLeafIndexTensorName = "leaf_index"; void CalculateTreesToInclude( const boosted_trees::trees::DecisionTreeEnsembleConfig& config, @@ -171,16 +170,15 @@ class GradientTreesPredictionOp : public OpKernel { core::ScopedUnref unref_me(ensemble_resource); if (use_locking_) { tf_shared_lock l(*ensemble_resource->get_mutex()); - DoCompute(context, ensemble_resource, false); + DoCompute(context, ensemble_resource); } else { - DoCompute(context, ensemble_resource, false); + DoCompute(context, ensemble_resource); } } - protected: - virtual void DoCompute(OpKernelContext* context, - DecisionTreeEnsembleResource* ensemble_resource, - const bool is_output_leaf_index) { + private: + void DoCompute(OpKernelContext* context, + DecisionTreeEnsembleResource* ensemble_resource) { // Read dense float features list; OpInputList dense_float_features_list; OP_REQUIRES_OK(context, TensorUtils::ReadDenseFloatFeatures( @@ -269,14 +267,6 @@ class GradientTreesPredictionOp : public OpKernel { &output_predictions_t)); auto output_predictions = output_predictions_t->matrix(); - // Allocate output leaf index matrix. - Tensor* output_leaf_index_t = nullptr; - if (is_output_leaf_index) { - OP_REQUIRES_OK(context, context->allocate_output( - kLeafIndexTensorName, - {batch_size, ensemble_resource->num_trees()}, - &output_leaf_index_t)); - } // Run predictor. thread::ThreadPool* const worker_threads = context->device()->tensorflow_cpu_worker_threads()->workers; @@ -298,13 +288,11 @@ class GradientTreesPredictionOp : public OpKernel { i, weight * (num_ensembles - i + start_averaging) / num_ensembles); } MultipleAdditiveTrees::Predict(adjusted, trees_to_include, batch_features, - worker_threads, output_predictions, - output_leaf_index_t); + worker_threads, output_predictions); } else { MultipleAdditiveTrees::Predict( ensemble_resource->decision_tree_ensemble(), trees_to_include, - batch_features, worker_threads, output_predictions, - output_leaf_index_t); + batch_features, worker_threads, output_predictions); } // Output dropped trees and original weights. @@ -314,6 +302,7 @@ class GradientTreesPredictionOp : public OpKernel { {2, static_cast(dropped_trees.size())}, &output_dropout_info_t)); auto output_dropout_info = output_dropout_info_t->matrix(); + for (int32 i = 0; i < dropped_trees.size(); ++i) { output_dropout_info(0, i) = dropped_trees[i]; output_dropout_info(1, i) = original_weights[i]; @@ -337,26 +326,6 @@ class GradientTreesPredictionOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("GradientTreesPrediction").Device(DEVICE_CPU), GradientTreesPredictionOp); -// GradientTreesPredictionVerboseOp is derived from GradientTreesPredictionOp -// and have an additional output of tensor of rank 2 containing leaf ids for -// each tree where an instance ended up with. -class GradientTreesPredictionVerboseOp : public GradientTreesPredictionOp { - public: - explicit GradientTreesPredictionVerboseOp(OpKernelConstruction* const context) - : GradientTreesPredictionOp(context) {} - - protected: - void DoCompute(OpKernelContext* context, - DecisionTreeEnsembleResource* ensemble_resource, - bool is_output_leaf_index) override { - GradientTreesPredictionOp::DoCompute(context, ensemble_resource, true); - } -}; - -REGISTER_KERNEL_BUILDER( - Name("GradientTreesPredictionVerbose").Device(DEVICE_CPU), - GradientTreesPredictionVerboseOp); - class GradientTreesPartitionExamplesOp : public OpKernel { public: explicit GradientTreesPartitionExamplesOp(OpKernelConstruction* const context) diff --git a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc index ee664f1ba6..43b00d4c6d 100644 --- a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc +++ b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.cc @@ -26,8 +26,7 @@ void MultipleAdditiveTrees::Predict( const std::vector& trees_to_include, const boosted_trees::utils::BatchFeatures& features, tensorflow::thread::ThreadPool* const worker_threads, - tensorflow::TTypes::Matrix output_predictions, - Tensor* output_leaf_indices) { + tensorflow::TTypes::Matrix output_predictions) { // Zero out predictions as the model is additive. output_predictions.setZero(); @@ -39,8 +38,7 @@ void MultipleAdditiveTrees::Predict( // Lambda for doing a block of work. auto update_predictions = [&config, &features, &trees_to_include, - &output_predictions, - &output_leaf_indices](int64 start, int64 end) { + &output_predictions](int64 start, int64 end) { auto examples_iterable = features.examples_iterable(start, end); for (const auto& example : examples_iterable) { for (const int32 tree_idx : trees_to_include) { @@ -49,11 +47,6 @@ void MultipleAdditiveTrees::Predict( const float tree_weight = config.tree_weights(tree_idx); const int leaf_idx = trees::DecisionTree::Traverse(tree, 0, example); QCHECK(leaf_idx >= 0) << "Invalid tree: " << tree.DebugString(); - // Checks if output leaf tree index is required. - if (output_leaf_indices != nullptr) { - output_leaf_indices->matrix()(example.example_idx, tree_idx) = - leaf_idx; - } const auto& leaf_node = tree.nodes(leaf_idx); QCHECK(leaf_node.has_leaf()) << "Invalid leaf node: " << leaf_node.DebugString(); diff --git a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h index be7c1555c0..cc3dc226cd 100644 --- a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h +++ b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees.h @@ -33,17 +33,12 @@ class MultipleAdditiveTrees { public: // Predict runs tree ensemble on the given batch and updates // output predictions accordingly, for the given list of trees. - // output_leaf_indices is a pointer to a 2 dimensional tensor. If it is not - // null, this method fills output_leaf_indices with a per-tree leaf id where - // each of the instances from 'features' ended up in. Its shape is num - // examples X num of trees. When nullptr, leaf ids are not output of trees. static void Predict( const boosted_trees::trees::DecisionTreeEnsembleConfig& config, const std::vector& trees_to_include, const boosted_trees::utils::BatchFeatures& features, tensorflow::thread::ThreadPool* const worker_threads, - tensorflow::TTypes::Matrix output_predictions, - Tensor* output_leaf_indices); + tensorflow::TTypes::Matrix output_predictions); }; } // namespace models diff --git a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees_test.cc b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees_test.cc index caad023ca6..4ca18bedb1 100644 --- a/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/models/multiple_additive_trees_test.cc @@ -62,7 +62,7 @@ TEST_F(MultipleAdditiveTreesTest, Empty) { tensorflow::thread::ThreadPool threads(tensorflow::Env::Default(), "test", kNumThreadsSingleThreaded); MultipleAdditiveTrees::Predict(tree_ensemble_config, {}, batch_features_, - &threads, output_matrix, nullptr); + &threads, output_matrix); EXPECT_EQ(0, output_matrix(0, 0)); EXPECT_EQ(0, output_matrix(1, 0)); } @@ -99,38 +99,17 @@ TEST_F(MultipleAdditiveTreesTest, SingleClass) { // Normal case. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0, 1}, - batch_features_, &threads, output_matrix, - nullptr); + batch_features_, &threads, output_matrix); EXPECT_FLOAT_EQ(-0.2f, output_matrix(0, 0)); // -0.4 (bias) + 0.2 (leaf 2). EXPECT_FLOAT_EQ(0.5f, output_matrix(1, 0)); // -0.4 (bias) + 0.9 (leaf 1). } - // Normal case with leaf node. - { - // Initialize output leaf inedx tensor, since leaf index is positive in this - // case, initialize with the value of -1. Since there are 2 examples and - // there are 2 trees, initialize leaf output index by 2 * 2. - auto output_leaf_index_tensor = AsTensor({-1, -1, -1, -1}, {2, 2}); - MultipleAdditiveTrees::Predict(tree_ensemble_config, {0, 1}, - batch_features_, &threads, output_matrix, - &output_leaf_index_tensor); - EXPECT_FLOAT_EQ(-0.2f, output_matrix(0, 0)); // -0.4 (bias) + 0.2 (leaf 2). - EXPECT_FLOAT_EQ(0.5f, output_matrix(1, 0)); // -0.4 (bias) + 0.9 (leaf 1). - EXPECT_FLOAT_EQ(0, output_leaf_index_tensor.matrix()( - 0, 0)); // 1st leaf for the first example - EXPECT_FLOAT_EQ(0, output_leaf_index_tensor.matrix()( - 1, 0)); // 1st leaf for the second example - EXPECT_FLOAT_EQ(2, output_leaf_index_tensor.matrix()( - 0, 1)); // 2nd leaf for the first example - EXPECT_FLOAT_EQ(1, output_leaf_index_tensor.matrix()( - 1, 1)); // 2nd leaf for the second example - } // Weighted case { DecisionTreeEnsembleConfig weighted = tree_ensemble_config; weighted.set_tree_weights(0, 6.0); weighted.set_tree_weights(1, 3.2); MultipleAdditiveTrees::Predict(weighted, {0, 1}, batch_features_, &threads, - output_matrix, nullptr); + output_matrix); // -0.4 (bias) + 0.2 (leaf 2). EXPECT_FLOAT_EQ(-0.4f * 6 + 0.2 * 3.2, output_matrix(0, 0)); // -0.4 (bias) + 0.9 (leaf 1). @@ -139,21 +118,21 @@ TEST_F(MultipleAdditiveTreesTest, SingleClass) { // Drop first tree. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {1}, batch_features_, - &threads, output_matrix, nullptr); + &threads, output_matrix); EXPECT_FLOAT_EQ(0.2f, output_matrix(0, 0)); // 0.2 (leaf 2). EXPECT_FLOAT_EQ(0.9f, output_matrix(1, 0)); // 0.9 (leaf 1). } // Drop second tree. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0}, batch_features_, - &threads, output_matrix, nullptr); + &threads, output_matrix); EXPECT_FLOAT_EQ(-0.4f, output_matrix(0, 0)); // -0.4 (bias). EXPECT_FLOAT_EQ(-0.4f, output_matrix(1, 0)); // -0.4 (bias). } // Drop all trees. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {}, batch_features_, - &threads, output_matrix, nullptr); + &threads, output_matrix); EXPECT_FLOAT_EQ(0.0, output_matrix(0, 0)); EXPECT_FLOAT_EQ(0.0, output_matrix(1, 0)); } @@ -193,8 +172,7 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { // Normal case. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0, 1}, - batch_features_, &threads, output_matrix, - nullptr); + batch_features_, &threads, output_matrix); EXPECT_FLOAT_EQ(-0.4f, output_matrix(0, 0)); // -0.4 (bias) EXPECT_FLOAT_EQ(-0.5f, output_matrix(0, 1)); // -0.7 (bias) + 0.2 (leaf 2) EXPECT_FLOAT_EQ(0.5f, output_matrix(1, 0)); // -0.4 (bias) + 0.9 (leaf 1) @@ -206,7 +184,7 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { weighted.set_tree_weights(0, 6.0); weighted.set_tree_weights(1, 3.2); MultipleAdditiveTrees::Predict(weighted, {0, 1}, batch_features_, &threads, - output_matrix, nullptr); + output_matrix); // bias EXPECT_FLOAT_EQ(-0.4f * 6, output_matrix(0, 0)); // bias + leaf 2 @@ -219,7 +197,7 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { // Dropout first tree. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {1}, batch_features_, - &threads, output_matrix, nullptr); + &threads, output_matrix); EXPECT_FLOAT_EQ(0.0, output_matrix(0, 0)); EXPECT_FLOAT_EQ(0.2f, output_matrix(0, 1)); // 0.2 (leaf 2) EXPECT_FLOAT_EQ(0.9f, output_matrix(1, 0)); // 0.9 (leaf 2) @@ -228,7 +206,7 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { // Dropout second tree. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0}, batch_features_, - &threads, output_matrix, nullptr); + &threads, output_matrix); EXPECT_FLOAT_EQ(-0.4f, output_matrix(0, 0)); // -0.4 (bias) EXPECT_FLOAT_EQ(-0.7f, output_matrix(0, 1)); // -0.7 (bias) EXPECT_FLOAT_EQ(-0.4f, output_matrix(1, 0)); // -0.4 (bias) @@ -237,7 +215,7 @@ TEST_F(MultipleAdditiveTreesTest, MultiClass) { // Drop both trees. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {}, batch_features_, - &threads, output_matrix, nullptr); + &threads, output_matrix); EXPECT_FLOAT_EQ(0.0f, output_matrix(0, 0)); EXPECT_FLOAT_EQ(0.0f, output_matrix(0, 1)); EXPECT_FLOAT_EQ(0.0f, output_matrix(1, 0)); @@ -280,8 +258,7 @@ TEST_F(MultipleAdditiveTreesTest, DenseLeaves) { // Normal case. { MultipleAdditiveTrees::Predict(tree_ensemble_config, {0, 1}, - batch_features_, &threads, output_matrix, - nullptr); + batch_features_, &threads, output_matrix); EXPECT_FLOAT_EQ(-0.2f, output_matrix(0, 0)); // -0.4 (tree1) + 0.2 (leaf 2) EXPECT_FLOAT_EQ(-0.4f, output_matrix(0, 1)); // -0.7 (tree1) + 0.3 (leaf 2) EXPECT_FLOAT_EQ(3.4f, output_matrix(0, 2)); // 3.0 -(tree1) + 0.4 (leaf 2) diff --git a/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc b/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc index 6491d58794..d66f645f62 100644 --- a/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/prediction_ops.cc @@ -40,24 +40,6 @@ static Status ApplyGradientTreesPredictionShapeFn(InferenceContext* c) { return Status::OK(); } -static Status ApplyGradientTreesPredictionVerboseShapeFn(InferenceContext* c) { - string learner_config_str; - c->GetAttr("learner_config", &learner_config_str).IgnoreError(); - LearnerConfig learner_config; - ParseProtoUnlimited(&learner_config, learner_config_str); - - bool reduce_dim; - c->GetAttr("reduce_dim", &reduce_dim).IgnoreError(); - // Sets the shape of the output as a matrix. - c->set_output(0, {c->Matrix(InferenceContext::kUnknownDim, - reduce_dim ? learner_config.num_classes() - 1 - : learner_config.num_classes())}); - c->set_output(1, {c->UnknownShape()}); - c->set_output(2, {c->Matrix(InferenceContext::kUnknownDim, - InferenceContext::kUnknownDim)}); - return Status::OK(); -} - REGISTER_OP("GradientTreesPrediction") .Attr("learner_config: string") .Attr("num_dense_float_features: int >= 0") @@ -108,58 +90,6 @@ drop_out_tree_indices_weights: Tensor of Rank 2 containing dropped trees indices and original weights of those trees during prediction. )doc"); -REGISTER_OP("GradientTreesPredictionVerbose") - .Attr("learner_config: string") - .Attr("num_dense_float_features: int >= 0") - .Attr("num_sparse_float_features: int >= 0") - .Attr("num_sparse_int_features: int >= 0") - .Attr("use_locking: bool = false") - .Attr("apply_dropout: bool") - .Attr("apply_averaging: bool") - .Attr("center_bias: bool") - .Attr("reduce_dim: bool") - .Input("tree_ensemble_handle: resource") - .Input("seed: int64") - .Input("dense_float_features: num_dense_float_features * float") - .Input("sparse_float_feature_indices: num_sparse_float_features * int64") - .Input("sparse_float_feature_values: num_sparse_float_features * float") - .Input("sparse_float_feature_shapes: num_sparse_float_features * int64") - .Input("sparse_int_feature_indices: num_sparse_int_features * int64") - .Input("sparse_int_feature_values: num_sparse_int_features * int64") - .Input("sparse_int_feature_shapes: num_sparse_int_features * int64") - .Output("predictions: float") - .Output("drop_out_tree_indices_weights: float") - .Output("leaf_index: int32") - .SetShapeFn(ApplyGradientTreesPredictionVerboseShapeFn) - .Doc(R"doc( -Runs multiple additive regression forests predictors on input instances -and computes the final prediction for each class, and outputs a matrix of -leaf ids per each tree in an ensemble. - -learner_config: Config for the learner of type LearnerConfig proto. Prediction -ops for now uses only LearningRateDropoutDrivenConfig config from the learner. -num_dense_float_features: Number of dense float features. -num_sparse_float_features: Number of sparse float features. -num_sparse_int_features: Number of sparse int features. -use_locking: Whether to use locking. -seed: random seed to be used for dropout. -reduce_dim: whether to reduce the dimension (legacy impl) or not. -apply_dropout: whether to apply dropout during prediction. -apply_averaging: whether averaging of tree ensembles should take place. If set -to true, will be based on AveragingConfig from learner_config. -tree_ensemble_handle: The handle to the tree ensemble. -dense_float_features: Rank 2 Tensors containing dense float feature values. -sparse_float_feature_indices: Rank 2 Tensors containing sparse float indices. -sparse_float_feature_values: Rank 1 Tensors containing sparse float values. -sparse_float_feature_shapes: Rank 1 Tensors containing sparse float shapes. -sparse_int_feature_indices: Rank 2 Tensors containing sparse int indices. -sparse_int_feature_values: Rank 1 Tensors containing sparse int values. -sparse_int_feature_shapes: Rank 1 Tensors containing sparse int shapes. -predictions: Rank 2 Tensor containing predictions per example per class. -drop_out_tree_indices_weights: Tensor of Rank 2 containing dropped trees indices -leaf_index: tensor of rank 2 containing leaf ids for each tree where an instance ended up. -)doc"); - REGISTER_OP("GradientTreesPartitionExamples") .Attr("num_dense_float_features: int >= 0") .Attr("num_sparse_float_features: int >= 0") diff --git a/tensorflow/contrib/boosted_trees/python/ops/prediction_ops.py b/tensorflow/contrib/boosted_trees/python/ops/prediction_ops.py index 7f6e55ae58..58f0d36b0f 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/prediction_ops.py +++ b/tensorflow/contrib/boosted_trees/python/ops/prediction_ops.py @@ -21,5 +21,4 @@ from __future__ import print_function from tensorflow.contrib.boosted_trees.python.ops import boosted_trees_ops_loader from tensorflow.contrib.boosted_trees.python.ops.gen_prediction_ops import gradient_trees_partition_examples from tensorflow.contrib.boosted_trees.python.ops.gen_prediction_ops import gradient_trees_prediction -from tensorflow.contrib.boosted_trees.python.ops.gen_prediction_ops import gradient_trees_prediction_verbose # pylint: enable=unused-import diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index 35ccb45f5a..5dd2e0c7f2 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -58,7 +58,6 @@ NUM_LAYERS_ATTEMPTED = "num_layers" NUM_TREES_ATTEMPTED = "num_trees" NUM_USED_HANDLERS = "num_used_handlers" USED_HANDLERS_MASK = "used_handlers_mask" -LEAF_INDEX = "leaf_index" _FEATURE_NAME_TEMPLATE = "%s_%d" @@ -72,25 +71,18 @@ def _get_column_by_index(tensor, indices): return array_ops.reshape(array_ops.gather(p_flat, i_flat), [shape[0], -1]) -def _make_predictions_dict(stamp, - logits, - partition_ids, - ensemble_stats, - used_handlers, - output_leaf_index=False, - leaf_index=None): +def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats, + used_handlers): """Returns predictions for the given logits and n_classes. Args: stamp: The ensemble stamp. - logits: A rank 2 `Tensor` with shape [batch_size, n_classes - 1]. that - contains predictions when no dropout was applied. + logits: A rank 2 `Tensor` with shape [batch_size, n_classes - 1]. + that contains predictions when no dropout was applied. partition_ids: A rank 1 `Tensor` with shape [batch_size]. ensemble_stats: A TreeEnsembleStatsOp result tuple. used_handlers: A TreeEnsembleUsedHandlerOp result tuple of an int and a - boolean mask. - leaf_index: A boolean variable indicating whether to output leaf index into - predictions dictionary. + boolean mask.. Returns: A dict of predictions. @@ -103,8 +95,6 @@ def _make_predictions_dict(stamp, result[NUM_TREES_ATTEMPTED] = ensemble_stats.attempted_trees result[NUM_USED_HANDLERS] = used_handlers.num_used_handlers result[USED_HANDLERS_MASK] = used_handlers.used_handlers_mask - if output_leaf_index: - result[LEAF_INDEX] = leaf_index return result @@ -278,8 +268,7 @@ class GradientBoostedDecisionTreeModel(object): features, logits_dimension, feature_columns=None, - use_core_columns=False, - output_leaf_index=False): + use_core_columns=False): """Construct a new GradientBoostedDecisionTreeModel function. Args: @@ -287,15 +276,13 @@ class GradientBoostedDecisionTreeModel(object): num_ps_replicas: Number of parameter server replicas, can be 0. ensemble_handle: A handle to the ensemble variable. center_bias: Whether to center the bias before growing trees. - examples_per_layer: Number of examples to accumulate before growing a tree - layer. It can also be a function that computes the number of examples - based on the depth of the layer that's being built. + examples_per_layer: Number of examples to accumulate before growing + a tree layer. It can also be a function that computes the number of + examples based on the depth of the layer that's being built. learner_config: A learner config. features: `dict` of `Tensor` objects. logits_dimension: An int, the dimension of logits. feature_columns: A list of feature columns. - output_leaf_index: A boolean variable indicating whether to output leaf - index into predictions dictionary. Raises: ValueError: if inputs are not valid. @@ -372,7 +359,6 @@ class GradientBoostedDecisionTreeModel(object): self._learner_config.multi_class_strategy == learner_pb2.LearnerConfig.TREE_PER_CLASS and learner_config.num_classes == 2) - self._output_leaf_index = output_leaf_index def _predict_and_return_dict(self, ensemble_handle, ensemble_stamp, mode): """Runs prediction and returns a dictionary of the prediction results. @@ -402,44 +388,22 @@ class GradientBoostedDecisionTreeModel(object): # Make sure ensemble stats run. This will check that the ensemble has # the right stamp. with ops.control_dependencies(ensemble_stats): - leaf_matrix = [] - # Only used in infer (predict), not used in train and eval. - if self._output_leaf_index and mode == learn.ModeKeys.INFER: - predictions, _, leaf_matrix = ( - prediction_ops).gradient_trees_prediction_verbose( - ensemble_handle, - seed, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - learner_config=self._learner_config_serialized, - apply_dropout=apply_dropout, - apply_averaging=mode != learn.ModeKeys.TRAIN, - use_locking=True, - center_bias=self._center_bias, - reduce_dim=self._reduce_dim) - - else: - predictions, _ = prediction_ops.gradient_trees_prediction( - ensemble_handle, - seed, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - learner_config=self._learner_config_serialized, - apply_dropout=apply_dropout, - apply_averaging=mode != learn.ModeKeys.TRAIN, - use_locking=True, - center_bias=self._center_bias, - reduce_dim=self._reduce_dim) + predictions, _ = prediction_ops.gradient_trees_prediction( + ensemble_handle, + seed, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + learner_config=self._learner_config_serialized, + apply_dropout=apply_dropout, + apply_averaging=mode != learn.ModeKeys.TRAIN, + use_locking=True, + center_bias=self._center_bias, + reduce_dim=self._reduce_dim) partition_ids = prediction_ops.gradient_trees_partition_examples( ensemble_handle, self._dense_floats, @@ -452,8 +416,7 @@ class GradientBoostedDecisionTreeModel(object): use_locking=True) return _make_predictions_dict(ensemble_stamp, predictions, partition_ids, - ensemble_stats, used_handlers, - self._output_leaf_index, leaf_matrix) + ensemble_stats, used_handlers) def predict(self, mode): """Returns predictions given the features and mode. @@ -558,7 +521,7 @@ class GradientBoostedDecisionTreeModel(object): aggregation_method=None)[0] strategy = self._learner_config.multi_class_strategy - class_id = -1 + class_id = constant_op.constant(-1, dtype=dtypes.int32) # Handle different multiclass strategies. if strategy == learner_pb2.LearnerConfig.TREE_PER_CLASS: # We build one vs rest trees. @@ -612,31 +575,39 @@ class GradientBoostedDecisionTreeModel(object): # Get the weights for each example for quantiles calculation, weights = self._get_weights(hessian_shape, squeezed_hessians) - regularization_config = self._learner_config.regularization - min_node_weight = self._learner_config.constraints.min_node_weight # Create all handlers ensuring resources are evenly allocated across PS. fc_name_idx = 0 handlers = [] init_stamp_token = constant_op.constant(0, dtype=dtypes.int64) + l1_regularization = constant_op.constant( + self._learner_config.regularization.l1, dtypes.float32) + l2_regularization = constant_op.constant( + self._learner_config.regularization.l2, dtypes.float32) + tree_complexity_regularization = constant_op.constant( + self._learner_config.regularization.tree_complexity, dtypes.float32) + min_node_weight = constant_op.constant( + self._learner_config.constraints.min_node_weight, dtypes.float32) + epsilon = 0.01 + num_quantiles = 100 + strategy_tensor = constant_op.constant(strategy) with ops.device(self._get_replica_device_setter(worker_device)): # Create handlers for dense float columns for dense_float_column_idx in range(len(self._dense_floats)): fc_name = self._fc_names[fc_name_idx] handlers.append( ordinal_split_handler.DenseSplitHandler( - l1_regularization=regularization_config.l1, - l2_regularization=regularization_config.l2, - tree_complexity_regularization=( - regularization_config.tree_complexity), + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + tree_complexity_regularization=tree_complexity_regularization, min_node_weight=min_node_weight, feature_column_group_id=dense_float_column_idx, - epsilon=0.01, - num_quantiles=100, + epsilon=epsilon, + num_quantiles=num_quantiles, dense_float_column=self._dense_floats[dense_float_column_idx], name=fc_name, gradient_shape=gradient_shape, hessian_shape=hessian_shape, - multiclass_strategy=strategy, + multiclass_strategy=strategy_tensor, init_stamp_token=init_stamp_token)) fc_name_idx += 1 @@ -645,14 +616,13 @@ class GradientBoostedDecisionTreeModel(object): fc_name = self._fc_names[fc_name_idx] handlers.append( ordinal_split_handler.SparseSplitHandler( - l1_regularization=regularization_config.l1, - l2_regularization=regularization_config.l2, - tree_complexity_regularization=( - regularization_config.tree_complexity), + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + tree_complexity_regularization=tree_complexity_regularization, min_node_weight=min_node_weight, feature_column_group_id=sparse_float_column_idx, - epsilon=0.01, - num_quantiles=100, + epsilon=epsilon, + num_quantiles=num_quantiles, sparse_float_column=sparse_tensor.SparseTensor( self._sparse_float_indices[sparse_float_column_idx], self._sparse_float_values[sparse_float_column_idx], @@ -660,7 +630,7 @@ class GradientBoostedDecisionTreeModel(object): name=fc_name, gradient_shape=gradient_shape, hessian_shape=hessian_shape, - multiclass_strategy=strategy, + multiclass_strategy=strategy_tensor, init_stamp_token=init_stamp_token)) fc_name_idx += 1 @@ -669,10 +639,9 @@ class GradientBoostedDecisionTreeModel(object): fc_name = self._fc_names[fc_name_idx] handlers.append( categorical_split_handler.EqualitySplitHandler( - l1_regularization=regularization_config.l1, - l2_regularization=regularization_config.l2, - tree_complexity_regularization=( - regularization_config.tree_complexity), + l1_regularization=l1_regularization, + l2_regularization=l2_regularization, + tree_complexity_regularization=tree_complexity_regularization, min_node_weight=min_node_weight, feature_column_group_id=sparse_int_column_idx, sparse_int_column=sparse_tensor.SparseTensor( @@ -682,7 +651,7 @@ class GradientBoostedDecisionTreeModel(object): name=fc_name, gradient_shape=gradient_shape, hessian_shape=hessian_shape, - multiclass_strategy=strategy, + multiclass_strategy=strategy_tensor, init_stamp_token=init_stamp_token)) fc_name_idx += 1 @@ -804,6 +773,7 @@ class GradientBoostedDecisionTreeModel(object): empty_hessians = constant_op.constant( [], dtype=dtypes.float32, shape=empty_hess_shape) + active_handlers = array_ops.unstack(active_handlers, axis=0) for handler_idx in range(len(handlers)): handler = handlers[handler_idx] is_active = active_handlers[handler_idx] @@ -1014,7 +984,7 @@ class GradientBoostedDecisionTreeModel(object): # This is a workaround for the slowness of graph building in tf.cond. # See (b/36554864). split_sizes = array_ops.reshape( - array_ops.shape_n(partition_ids_list), [-1]) + array_ops.shape_n(partition_ids_list), [len(partition_ids_list)]) partition_ids = array_ops.concat(partition_ids_list, axis=0) gains = array_ops.concat(gains_list, axis=0) split_infos = array_ops.concat(split_info_list, axis=0) @@ -1079,8 +1049,11 @@ class GradientBoostedDecisionTreeModel(object): # Update ensemble. update_ops = [are_all_splits_ready] - update_model = control_flow_ops.cond(continue_centering, _center_bias_fn, - _grow_ensemble_fn) + if self._center_bias: + update_model = control_flow_ops.cond(continue_centering, + _center_bias_fn, _grow_ensemble_fn) + else: + update_model = _grow_ensemble_fn() update_ops.append(update_model) # Update ensemble stats. diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index 0665c6c63e..289fb195db 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -19,15 +19,18 @@ from __future__ import division from __future__ import print_function from google.protobuf import text_format + from tensorflow.contrib import layers from tensorflow.contrib.boosted_trees.proto import learner_pb2 from tensorflow.contrib.boosted_trees.proto import tree_config_pb2 from tensorflow.contrib.boosted_trees.python.ops import model_ops from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch from tensorflow.contrib.boosted_trees.python.utils import losses + +from tensorflow.python.feature_column import feature_column_lib as core_feature_column from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib from tensorflow.contrib.learn.python.learn.estimators import model_fn -from tensorflow.python.feature_column import feature_column_lib as core_feature_column + from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util @@ -725,8 +728,8 @@ class GbdtTest(test_util.TensorFlowTestCase): self.assertEquals(len(output.tree_weights), 0) self.assertEquals(stamp_token.eval(), 0) - def testPredictFnWithLeafIndexAdvancedLeft(self): - """Tests the predict function with output leaf ids.""" + def testPredictFn(self): + """Tests the predict function.""" with self.test_session() as sess: # Create ensemble with one bias node. ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() @@ -734,61 +737,12 @@ class GbdtTest(test_util.TensorFlowTestCase): """ trees { nodes { - dense_float_binary_split { - threshold: 1.0 - left_id: 1 - right_id: 2 - } - node_metadata { - gain: 0 - } - } - nodes { - leaf { - vector { - value: 0.25 - } - } - } - nodes { - leaf { - vector { - value: 0.0 - } - } - } - } - tree_weights: 1.0 - tree_metadata { - num_tree_weight_updates: 1 - num_layers_grown: 1 - is_finalized: true - } - trees { - nodes { - dense_float_binary_split { - threshold: 0.99 - left_id: 1 - right_id: 2 - } - node_metadata { - gain: 0 - } - } - nodes { - leaf { - vector { - value: 0.25 - } - } - } - nodes { - leaf { - vector { - value: 0.0 - } + leaf { + vector { + value: 0.25 } } + } } tree_weights: 1.0 tree_metadata { @@ -809,8 +763,7 @@ class GbdtTest(test_util.TensorFlowTestCase): learner_config.constraints.max_tree_depth = 1 learner_config.constraints.min_node_weight = 0 features = {} - features["dense_float"] = array_ops.constant( - [[0.0], [1.0], [1.1], [2.0]], dtype=dtypes.float32) + features["dense_float"] = array_ops.ones([4, 1], dtypes.float32) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=False, num_ps_replicas=0, @@ -819,20 +772,15 @@ class GbdtTest(test_util.TensorFlowTestCase): examples_per_layer=1, learner_config=learner_config, logits_dimension=1, - features=features, - output_leaf_index=True) + features=features) # Create predict op. - mode = model_fn.ModeKeys.INFER + mode = model_fn.ModeKeys.EVAL predictions_dict = sess.run(gbdt_model.predict(mode)) self.assertEquals(predictions_dict["ensemble_stamp"], 3) - # here are how the first two numbers in expected results are calculated, - # 0.5 = 0.25 + 0.25, and 0.25 = 0.25 + 0 self.assertAllClose(predictions_dict["predictions"], - [[0.5], [0.25], [0], [0]]) + [[0.25], [0.25], [0.25], [0.25]]) self.assertAllClose(predictions_dict["partition_ids"], [0, 0, 0, 0]) - self.assertAllClose(predictions_dict["leaf_index"], - [[1, 1], [1, 2], [2, 2], [2, 2]]) def testTrainFnMulticlassFullHessian(self): """Tests the GBDT train for multiclass full hessian.""" -- GitLab From d40a5c7cc4d510902c8d2bb0981438c62397ab81 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 16:43:29 -0700 Subject: [PATCH 168/902] Extracts the 'remove reverse node' optimization into its own method. PiperOrigin-RevId: 198122165 --- .../grappler/optimizers/constant_folding.cc | 100 ++++++++++-------- .../grappler/optimizers/constant_folding.h | 4 + 2 files changed, 62 insertions(+), 42 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index d38f5a9e81..df32d4a25d 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1695,7 +1695,6 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } } - // Remove RandomShuffle op if it is scalar or first dimension is of size 1. if (use_shape_info && IsRandomShuffle(*node) && !properties->GetInputProperties(node->name()).empty()) { @@ -1709,47 +1708,14 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } - // Remove Reverse op over dimensions with size 1. - if (use_shape_info && node->op() == "ReverseV2" && - properties->GetInputProperties(node->name()).size() >= 2) { - const auto& shape = properties->GetInputProperties(node->name())[0].shape(); - if (shape.unknown_rank()) { - // Not optimizable. - return Status::OK(); - } - const auto& a = properties->GetInputProperties(node->name())[1]; - if (TensorShape::IsValid(a.shape()) && a.has_value()) { - Tensor axis(a.dtype(), a.shape()); - if (!axis.FromProto(a.value())) { - return errors::InvalidArgument("Cannot parse tensor from proto: ", - a.value().DebugString()); - } - std::set target_axes; - for (int j = 0; j < axis.NumElements(); ++j) { - // value of axis can be negative. - if (axis.dtype() == DT_INT64) { - target_axes.insert((axis.vec()(j) + shape.dim_size()) % - shape.dim_size()); - } else { - target_axes.insert((axis.vec()(j) + shape.dim_size()) % - shape.dim_size()); - } - } - - // The node is replaceable iff - // unknown_rank == false && - // (dim_size == 0 || all dims have size 1 || - // all dims with > 1 size are not in target_axes) - bool replaceable = !shape.unknown_rank(); - for (int j = 0; replaceable && j < shape.dim_size(); ++j) { - replaceable &= shape.dim(j).size() == 1 || - target_axes.find(j) == target_axes.end(); - } - if (replaceable) { - ReplaceOperationWithIdentity(0, *properties, node, optimized_graph); - return Status::OK(); - } - } + bool remove_reverse_successful = false; + Status remove_reverse_status = + RemoveReverse(*properties, use_shape_info, optimized_graph, node, + &remove_reverse_successful); + if (!remove_reverse_status.ok()) { + return remove_reverse_status; + } else if (remove_reverse_successful) { + return Status::OK(); } bool simplify_slice_successful = false; @@ -1865,6 +1831,56 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +Status ConstantFolding::RemoveReverse(const GraphProperties& properties, + bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, + bool* success) { + if (use_shape_info && node->op() == "ReverseV2" && + properties.GetInputProperties(node->name()).size() >= 2) { + const auto& shape = properties.GetInputProperties(node->name())[0].shape(); + if (shape.unknown_rank()) { + // Not optimizable. + return Status::OK(); + } + const auto& a = properties.GetInputProperties(node->name())[1]; + if (TensorShape::IsValid(a.shape()) && a.has_value()) { + Tensor axis(a.dtype(), a.shape()); + if (!axis.FromProto(a.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + a.value().DebugString()); + } + std::set target_axes; + for (int j = 0; j < axis.NumElements(); ++j) { + // value of axis can be negative. + if (axis.dtype() == DT_INT64) { + target_axes.insert((axis.vec()(j) + shape.dim_size()) % + shape.dim_size()); + } else { + target_axes.insert((axis.vec()(j) + shape.dim_size()) % + shape.dim_size()); + } + } + + // The node is replaceable iff + // unknown_rank == false && + // (dim_size == 0 || all dims have size 1 || + // all dims with > 1 size are not in target_axes) + bool replaceable = !shape.unknown_rank(); + for (int j = 0; replaceable && j < shape.dim_size(); ++j) { + replaceable &= shape.dim(j).size() == 1 || + target_axes.find(j) == target_axes.end(); + } + if (replaceable) { + ReplaceOperationWithIdentity(0, properties, node, optimized_graph); + *success = true; + return Status::OK(); + } + } + } + *success = false; + return Status::OK(); +} + Status ConstantFolding::SimplifySlice(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node, diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 2da63950d6..9a3ea03552 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -190,6 +190,10 @@ class ConstantFolding : public GraphOptimizer { // Simplifies a Slice operation to an Identity operation if applicable. Status SimplifySlice(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node, bool* success); + + // Removes Reverse op over dimensions with size 1. + Status RemoveReverse(const GraphProperties& properties, bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node, bool* success); // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From e19edb4f9706632b399a99760f84e09f78b5987f Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 25 May 2018 16:43:40 -0700 Subject: [PATCH 169/902] Ignore while loops instead of mangling them in the automatic control dependencies. PiperOrigin-RevId: 198122188 --- tensorflow/python/eager/function.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index b46e0612c3..23d87fb394 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -1215,6 +1215,9 @@ class AutomaticControlDependencies(object): # test that it works. Support while loops. Support init_scope escaping from # this. for op in new_operations: + # TODO(apassos) make this code safely support while loops. + if isinstance(op._control_flow_context, control_flow_ops.WhileContext): # pylint: disable=protected-access + continue control_inputs = set() # Ensure stateful ops run if (op.type not in self._graph._registered_ops # pylint: disable=protected-access -- GitLab From e140ab8d07dd6aec70f61c0c6939506f6e67ac5e Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Fri, 25 May 2018 17:05:33 -0700 Subject: [PATCH 170/902] [tf.data] Fixing concurrency issue in `map_and_batch`. PiperOrigin-RevId: 198124860 --- tensorflow/core/kernels/data/map_and_batch_dataset_op.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index 879bb40331..f41a810b07 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -211,6 +211,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) override { + mutex_lock external_l(external_mu_); mutex_lock l(mu_); EnsureRunnerThreadStarted(ctx); BatchResult* result = &batch_results_[ComputeIndex(input_batch_)]; @@ -220,6 +221,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { protected: Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock external_l(external_mu_); mutex_lock l(mu_); // Wait for all in-flight calls to complete. while (num_calls_ > 0) { @@ -243,6 +245,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { Status RestoreInternal(IteratorContext* ctx, IteratorStateReader* reader) override { + mutex_lock external_l(external_mu_); mutex_lock l(mu_); TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); TF_RETURN_IF_ERROR( @@ -629,6 +632,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + // Used for coordination between the main thread, the runner thread, and + // the callback threads. mutex mu_; // Used for coordination between the main thread, the runner thread, and // the callback threads. In particular, the runner thread should only @@ -636,6 +641,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { // user specified level of parallelism and there are slots available in // the `batch_results_` buffer. condition_variable cond_var_; + // Used for serializing external parallelism. + mutex external_mu_ ACQUIRED_BEFORE(mu_); // Counts the number of outstanding calls for this batch. int64 num_calls_ GUARDED_BY(mu_) = 0; // Counts the total number of calls. -- GitLab From 06717b77e05bd602d10fe40f4519dbb105fabd5c Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Fri, 25 May 2018 17:12:49 -0700 Subject: [PATCH 171/902] Provide a step container to OpKernelContexts when running eagerly. This lets us run some ops that require step containers (e.g. TensorArray-related ops). Before change: Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------- BM_CreateGraph 61292 80915 8581 BM_RunGraph 7899 13398 51251 BM_CreateAndDestroySession 2588 2594 269838 BM_KernelAndDeviceInit 2971 2976 235908 BM_KernelAndDeviceRun 505 506 1000000 After change: Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------- BM_CreateGraph 78295 105539 8698 BM_RunGraph 9907 16988 47908 BM_CreateAndDestroySession 2773 2778 247635 BM_KernelAndDeviceInit 2678 2682 270054 BM_KernelAndDeviceRun 553 554 1000000 PiperOrigin-RevId: 198125630 --- tensorflow/core/common_runtime/eager/kernel_and_device.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index a63b2b9711..2a43a31c02 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/framework/step_stats.pb.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/map_util.h" @@ -86,6 +87,11 @@ Status KernelAndDevice::Run(std::vector* input_tensors, [](std::function f) { f(); }; params.runner = &runner; + ScopedStepContainer step_container(0, [this](const string& name) { + device_->resource_manager()->Cleanup(name).IgnoreError(); + }); + params.step_container = &step_container; + OpKernelContext context(¶ms); if (kernel_->def().op() == "_Recv") { -- GitLab From 8fcc95ebf42ed8eea543ec2edf1a1ed1c62ca7e8 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 25 May 2018 17:22:11 -0700 Subject: [PATCH 172/902] Enable while loop constant sinking for GPU To avoid keeping constants in while loop bodies after optimization (where they may cause extra copies) we run a late pass of LICM that has been asked to hoist constants when it can. PiperOrigin-RevId: 198126497 --- tensorflow/compiler/xla/service/BUILD | 1 + tensorflow/compiler/xla/service/gpu/BUILD | 2 + .../compiler/xla/service/gpu/gpu_compiler.cc | 12 +++ .../while_loop_invariant_code_motion.cc | 27 ++++++- .../while_loop_invariant_code_motion.h | 16 ++++ .../while_loop_invariant_code_motion_test.cc | 73 +++++++++++++++++++ 6 files changed, 127 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 749873e560..2976bdb9e9 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2862,6 +2862,7 @@ tf_cc_test( ":while_loop_invariant_code_motion", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/tests:hlo_verified_test_base", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:test", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index ffb1af2d87..2794930248 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -546,6 +546,8 @@ cc_library( "//tensorflow/compiler/xla/service:reshape_mover", "//tensorflow/compiler/xla/service:transpose_folding", "//tensorflow/compiler/xla/service:tuple_simplifier", + "//tensorflow/compiler/xla/service:while_loop_constant_sinking", + "//tensorflow/compiler/xla/service:while_loop_invariant_code_motion", "//tensorflow/compiler/xla/service:while_loop_simplifier", "//tensorflow/compiler/xla/service:zero_sized_hlo_elimination", "//tensorflow/compiler/xla/service/gpu:cudnn_batchnorm_rewriter", diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 5ef422c90b..b857219807 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -73,6 +73,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/reshape_mover.h" #include "tensorflow/compiler/xla/service/transpose_folding.h" #include "tensorflow/compiler/xla/service/tuple_simplifier.h" +#include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h" +#include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h" #include "tensorflow/compiler/xla/service/while_loop_simplifier.h" #include "tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -176,6 +178,7 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, /*is_layout_sensitive=*/false, [](const Shape&, const Shape&) { return false; }); pass.AddPass(); + pass.AddPass(); pass.AddPass(); pass.AddPass(); pass.AddPass(); @@ -274,6 +277,15 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, TF_RETURN_IF_ERROR(fusion.Run(hlo_module).status()); } } + + { + // Do an aggressive LICM pass over while loops. In particular, this hoists + // constants that were sunk by WhileLoopConstantSinking. Leaving them in + // the while loop may result in unnecessary copies. + HloPassPipeline pipeline("while-loop-licm"); + pipeline.AddPass(true); + TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); + } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc index 321fdeb1ea..09ddcffb22 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc @@ -98,14 +98,17 @@ static void CreateLoopInvariantCopy( // Returns true if `instruction` is worth hoisting only if it lets us hoist some // instruction using it. The rationale is that hoisting these instructions will // prevent simplification and fusion in the while body. -static bool NotWorthHoistingIndividually(const HloInstruction& instruction) { +bool WhileLoopInvariantCodeMotion::NotWorthHoistingIndividually( + const HloInstruction& instruction) { switch (instruction.opcode()) { default: return false; + case HloOpcode::kConstant: + return !hoist_constants_; + case HloOpcode::kBitcast: case HloOpcode::kBroadcast: - case HloOpcode::kConstant: case HloOpcode::kReshape: case HloOpcode::kReverse: case HloOpcode::kSlice: @@ -115,7 +118,8 @@ static bool NotWorthHoistingIndividually(const HloInstruction& instruction) { } } -static StatusOr TryHoistingInvariantInstructionsFromWhileBody( +StatusOr +WhileLoopInvariantCodeMotion::TryHoistingInvariantInstructionsFromWhileBody( HloInstruction* while_instr) { auto print_no_metadata = HloPrintOptions{}.set_print_metadata(false); @@ -161,12 +165,16 @@ static StatusOr TryHoistingInvariantInstructionsFromWhileBody( } } - if (unhoisted_invariant_instructions.empty()) { + if (unhoisted_invariant_instructions.empty() && !hoist_constants_) { // There are no obviously loop invariant elements in the state being // threaded through the while loop so give up. In theory this precondition // is too strong -- we could have code that e.g. permutes the elements in // the while state but uses a select to pick the same value on every // iteration. + // + // If we were asked to hoist constants, we need to scan the while body for + // constants even if we didn't find any loop invariant values in the while + // state tuple. return false; } @@ -243,6 +251,9 @@ static StatusOr TryHoistingInvariantInstructionsFromWhileBody( } StatusOr WhileLoopInvariantCodeMotion::Run(HloModule* module) { + VLOG(2) << "HLO module before WhileLoopConstantSinking:"; + XLA_VLOG_LINES(2, module->ToString()); + bool changed = false; std::vector while_instrs; for (auto* comp : module->computations()) { @@ -270,6 +281,14 @@ StatusOr WhileLoopInvariantCodeMotion::Run(HloModule* module) { TryHoistingInvariantInstructionsFromWhileBody(while_instr)); changed |= result; } + + if (changed) { + VLOG(2) << "HLO module after WhileLoopConstantSinking:"; + XLA_VLOG_LINES(2, module->ToString()); + } else { + VLOG(2) << "HLO module unchanged after WhileLoopConstantSinking"; + } + return changed; } } // namespace xla diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h index 8c4b765b00..8e6cc87875 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h @@ -27,12 +27,28 @@ namespace xla { class WhileLoopInvariantCodeMotion : public HloPassInterface { public: + // If `hoist_constants` is true then constants are always hoisted out of while + // loop bodies. Otherwise they are only hoisted out if they enable other + // non-trivial computations to be hoisted out. + // + // Setting `hoist_constants` to false can be help if LICM is run in the mid + // level HLO pipeline because hoisting constants out of while loop bodies can + // break optimizations like constant folding. + explicit WhileLoopInvariantCodeMotion(bool hoist_constants = false) + : hoist_constants_(hoist_constants) {} ~WhileLoopInvariantCodeMotion() override = default; tensorflow::StringPiece name() const override { return "while-loop-invariant-code-motion"; } StatusOr Run(HloModule* module) override; + + private: + bool NotWorthHoistingIndividually(const HloInstruction& instruction); + StatusOr TryHoistingInvariantInstructionsFromWhileBody( + HloInstruction* while_instr); + + bool hoist_constants_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc index 799340fda9..e1ec12192f 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/lib/core/status_test_util.h" namespace xla { @@ -438,5 +439,77 @@ TEST_F(WhileLoopInvariantCodeMotionTest, BodyHasNonTupleRoot) { EXPECT_FALSE(simplified_loop); } +const char* const kConstantHoistingTestCase = R"( +HloModule ModuleWithWhile + +body { + p_body = (f32[2]{0}) parameter(0) + p_body.1 = f32[2]{0} get-tuple-element(p_body), index=0 + const = f32[2]{0} constant({3, 4}) + add.0 = f32[2]{0} add(p_body.1, const) + ROOT root = (f32[2]{0}) tuple(add.0) +} + +condition { + p_cond = (f32[2]{0}) parameter(0) + ROOT result = pred[] constant(true) +} + +ENTRY entry { + const_0 = f32[2]{0} constant({1, 2}) + while_init = (f32[2]{0}) tuple(const_0) + ROOT while = (f32[2]{0}) while(while_init), condition=condition, body=body +} +)"; + +TEST_F(WhileLoopInvariantCodeMotionTest, HoistsConstantWhenAsked) { + ParseAndVerifyModule(kConstantHoistingTestCase); + + TF_ASSERT_OK_AND_ASSIGN( + bool simplified_loop, + WhileLoopInvariantCodeMotion{/*hoist_constants=*/true}.Run(&module())); + EXPECT_TRUE(simplified_loop); + + HloComputation* while_body = module().GetComputationWithName("wide.body"); + ASSERT_NE(while_body, nullptr); + + // We expect the while body to be the equivalent of: + // + // wide.body { + // wide_param.1 = (f32[2]{0}, f32[2]{0}) parameter(0) + // get-tuple-element.1 = f32[2]{0} get-tuple-element(wide_param.1), index=0 + // tuple.1 = (f32[2]{0}) tuple(get-tuple-element.1) + // get-tuple-element.4 = f32[2]{0} get-tuple-element(tuple.1), index=0 + // get-tuple-element.7 = f32[2]{0} get-tuple-element(wide_param.1), index=1 + // add.1 = f32[2]{0} add(get-tuple-element.4, get-tuple-element.7) + // tuple.3 = (f32[2]{0}) tuple(add.1) + // get-tuple-element.8 = f32[2]{0} get-tuple-element(tuple.3), index=0 + // get-tuple-element.9 = f32[2]{0} get-tuple-element(wide_param.1), index=1 + // ROOT tuple.4 = (f32[2]{0}, f32[2]{0}) tuple(get-tuple-element.8, + // get-tuple-element.9) + // } + + auto wide_param_1 = op::Parameter(0); + auto get_tuple_element_1 = op::GetTupleElement(wide_param_1, 0); + auto tuple_1 = op::Tuple(get_tuple_element_1); + auto get_tuple_element_4 = op::GetTupleElement(tuple_1, 0); + auto get_tuple_element_7 = op::GetTupleElement(wide_param_1, 1); + auto add_1 = op::Add(get_tuple_element_4, get_tuple_element_7); + auto tuple_3 = op::Tuple(add_1); + auto get_tuple_element_8 = op::GetTupleElement(tuple_3, 0); + auto get_tuple_element_9 = op::GetTupleElement(wide_param_1, 1); + auto tuple_4 = op::Tuple(get_tuple_element_8, get_tuple_element_9); + + EXPECT_THAT(while_body->root_instruction(), tuple_4); +} + +TEST_F(WhileLoopInvariantCodeMotionTest, DoesNotHoistConstantByDefault) { + ParseAndVerifyModule(kConstantHoistingTestCase); + + TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop, + WhileLoopInvariantCodeMotion{}.Run(&module())); + EXPECT_FALSE(simplified_loop); +} + } // namespace } // namespace xla -- GitLab From 36d77e84af08ded6c9b964d900231f7d04d507a8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 17:22:17 -0700 Subject: [PATCH 173/902] Turn on heuristic (mostly-NHWC) convolution layout assignment for (V100, fp16) by default. Also increase the column reduction tile size to reduce atomic operations. PiperOrigin-RevId: 198126505 --- tensorflow/compiler/xla/service/gpu/gpu_options.cc | 4 ++-- .../compiler/xla/service/gpu/ir_emitter_unnested.cc | 10 +++++++--- tensorflow/compiler/xla/tests/BUILD | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.cc b/tensorflow/compiler/xla/service/gpu/gpu_options.cc index 174aaf122c..35b4b4e20b 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_options.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_options.cc @@ -20,8 +20,8 @@ namespace xla { namespace gpu { bool ConvUseLayoutHeuristic(const HloModuleConfig& config) { - return config.debug_options().xla_backend_extra_options().count( - "xla_gpu_experimental_conv_use_layout_heuristic"); + return !config.debug_options().xla_backend_extra_options().count( + "xla_gpu_experimental_conv_disable_layout_heuristic"); } } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index d07d197784..ae4e305b80 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -1171,9 +1171,13 @@ Status IrEmitterUnnested::EmitColumnReduction( // 4567 // Numbers indicate tile IDs. // // Each tile is first partially reduced to a scalar by a thread, and then the - // scalar is accumulated to the output vector using atomic operations. We - // choose 16 as the tile size, which matches Eigen's ColumnReduceKernel. - constexpr int64 kTileSize = 16; + // scalar is accumulated to the output vector using atomic operations. + // + // We choose 128 as the tile size based on empirical evidence. It's big enough + // to reduce the amount of atomic adds in the end, maximizing the memory + // bandwidth. + constexpr int64 kTileSize = 128; + // If the height is not a multiple of the tile size, we pad the bottom of the // input matrix. const int64 height_in_tiles = CeilOfRatio(height, kTileSize); diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 1a12fd0113..a62d49e9c7 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -808,7 +808,7 @@ xla_test( name = "convolution_test_gpu_alternative_layout", timeout = "long", srcs = ["convolution_test.cc"], - backend_args = {"gpu": ["--xla_backend_extra_options=xla_gpu_experimental_conv_use_layout_heuristic"]}, + backend_args = {"gpu": ["--xla_backend_extra_options=xla_gpu_experimental_conv_disable_layout_heuristic"]}, backends = ["gpu"], shard_count = 25, deps = CONVOLUTION_TEST_DEPS, -- GitLab From 5ef609b6e542dc1e3f0eaf195a1f8d8d4e7ff8af Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 25 May 2018 17:29:37 -0700 Subject: [PATCH 174/902] [TF:XLA] Add direct implementation of AssignVariableOp for XLA devices. This allows us to avoid an XLA compilation and tensor copies when assigning to a variable placed on an XLA device. PiperOrigin-RevId: 198127062 --- tensorflow/compiler/jit/xla_device_context.cc | 59 ++++++++++++-- tensorflow/compiler/jit/xla_device_context.h | 9 ++- tensorflow/compiler/jit/xla_device_ops.cc | 81 +++++++++++++++++++ tensorflow/compiler/jit/xla_device_ops.h | 12 +++ .../compiler/tests/variable_ops_test.py | 19 +++++ .../compiler/tf2xla/kernels/variable_ops.cc | 2 +- 6 files changed, 172 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index c718125a38..71e63b110b 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -54,7 +54,13 @@ XlaTransferManager::XlaTransferManager( client_(client), transfer_manager_(client->backend().transfer_manager()), transfer_as_literal_(transfer_as_literal), - shape_representation_fn_(std::move(shape_representation_fn)) {} + shape_representation_fn_(std::move(shape_representation_fn)) { + if (!shape_representation_fn_) { + shape_representation_fn_ = [](const TensorShape& shape, DataType dtype) { + return shape; + }; + } +} Status XlaTransferManager::TransferLiteralToDevice( const Tensor& host_tensor, Tensor* device_tensor) const { @@ -113,13 +119,8 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor, XlaTensor* xla_tensor = XlaTensor::FromTensor(device_tensor); CHECK(xla_tensor); - TensorShape shape; - if (shape_representation_fn_) { - shape = shape_representation_fn_(device_tensor->shape(), - device_tensor->dtype()); - } else { - shape = device_tensor->shape(); - } + TensorShape shape = shape_representation_fn_(device_tensor->shape(), + device_tensor->dtype()); if (!xla_tensor->has_shaped_buffer()) { Status s = xla_tensor->AllocateShapedBuffer( device_tensor->dtype(), shape, client_, @@ -203,6 +204,42 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor, done(Status::OK()); } +void XlaTransferManager::CopyDeviceTensorToDevice(const Tensor& src_tensor, + Tensor* dst_tensor, + const StatusCallback& done) { + // TODO(phawkins): replace this code with an asynchronous implementation. + auto body = [&]() { + if (src_tensor.NumElements() == 0) { + return Status::OK(); + } + XlaTensor* xla_src = XlaTensor::FromTensor(&src_tensor); + XlaTensor* xla_dst = XlaTensor::FromTensor(dst_tensor); + CHECK(xla_src && xla_dst) + << "Missing destination tensor for device-to-device copy"; + if (!xla_dst->has_shaped_buffer()) { + TensorShape shape = + shape_representation_fn_(src_tensor.shape(), src_tensor.dtype()); + TF_RETURN_IF_ERROR( + xla_dst->AllocateShapedBuffer(src_tensor.dtype(), shape, client_, + stream_->parent()->device_ordinal())); + } + TF_RETURN_IF_ERROR( + xla_dst->shaped_buffer().buffers().ForEachMutableElementWithStatus( + [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) { + const se::DeviceMemoryBase& from_buffer = + xla_src->shaped_buffer().buffers().element(index); + CHECK_EQ(buffer->size(), from_buffer.size()); + if (!stream_->parent()->SynchronousMemcpy(buffer, from_buffer, + buffer->size())) { + return errors::Internal("Device to device memcpy failed"); + } + return Status::OK(); + })); + return Status::OK(); + }; + done(body()); +} + XlaDeviceContext::XlaDeviceContext( se::Stream* stream, xla::LocalClient* client, bool transfer_as_literal, XlaCompiler::ShapeRepresentationFn shape_representation_fn) @@ -224,4 +261,10 @@ void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor, done); } +void XlaDeviceContext::CopyDeviceTensorToDevice(const Tensor& src_tensor, + Tensor* dst_tensor, + const StatusCallback& done) { + manager_.CopyDeviceTensorToDevice(src_tensor, dst_tensor, done); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h index 9af9655868..ee346e5653 100644 --- a/tensorflow/compiler/jit/xla_device_context.h +++ b/tensorflow/compiler/jit/xla_device_context.h @@ -55,6 +55,10 @@ class XlaTransferManager { void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece tensor_name, Device* device, Tensor* cpu_tensor, StatusCallback done); + + void CopyDeviceTensorToDevice(const Tensor& src_tensor, Tensor* dst_tensor, + const StatusCallback& done); + se::Stream* stream() const { return stream_; } private: @@ -72,7 +76,7 @@ class XlaTransferManager { xla::TransferManager* transfer_manager_; // True if we must use XLA's TransferManager for correct device transfers. const bool transfer_as_literal_; - const XlaCompiler::ShapeRepresentationFn shape_representation_fn_; + XlaCompiler::ShapeRepresentationFn shape_representation_fn_; }; // DeviceContext for operators assigned to XlaDevice devices. The @@ -90,6 +94,9 @@ class XlaDeviceContext : public DeviceContext { void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece tensor_name, Device* device, Tensor* cpu_tensor, StatusCallback done) override; + void CopyDeviceTensorToDevice(const Tensor& src_tensor, Tensor* dst_tensor, + const StatusCallback& done); + se::Stream* stream() const override { return manager_.stream(); } private: diff --git a/tensorflow/compiler/jit/xla_device_ops.cc b/tensorflow/compiler/jit/xla_device_ops.cc index f68dba6b6a..5ecb1afa7b 100644 --- a/tensorflow/compiler/jit/xla_device_ops.cc +++ b/tensorflow/compiler/jit/xla_device_ops.cc @@ -15,7 +15,10 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_device_ops.h" +#include + #include "tensorflow/compiler/jit/xla_device_context.h" +#include "tensorflow/compiler/jit/xla_tensor.h" namespace tensorflow { @@ -26,4 +29,82 @@ void XlaDeviceDummyOp::Compute(OpKernelContext* ctx) { << type_string() << " on an XLA device. This should never happen."; } +XlaAssignVariableOp::XlaAssignVariableOp(OpKernelConstruction* c) + : AsyncOpKernel(c) { + OP_REQUIRES_OK(c, c->GetAttr("dtype", &dtype_)); +} + +void XlaAssignVariableOp::ComputeAsync(OpKernelContext* context, + DoneCallback done) { + OP_REQUIRES_ASYNC(context, dtype_ == context->input(1).dtype(), + errors::InvalidArgument( + "Variable and value dtypes don't match; respectively, ", + dtype_, " and ", context->input(1).dtype()), + done); + Var* variable = nullptr; + OP_REQUIRES_OK_ASYNC( + context, + LookupOrCreateResource( + context, HandleFromInput(context, 0), &variable, + [this, context](Var** ptr) { + *ptr = new Var(dtype_); + PersistentTensor unused; + Tensor* tmp; + AllocatorAttributes attr; + TF_RETURN_IF_ERROR(context->allocate_persistent( + dtype_, context->input(1).shape(), &unused, &tmp, attr)); + *(*ptr)->tensor() = *tmp; + return Status::OK(); + }), + done); + core::ScopedUnref s(variable); + + OP_REQUIRES_ASYNC(context, variable->tensor()->dtype() == dtype_, + errors::InvalidArgument( + "Trying to assign variable with wrong dtype. Expected ", + DataTypeString(variable->tensor()->dtype()), " got ", + DataTypeString(dtype_)), + done); + + const Tensor& value = context->input(1); + AllocatorAttributes attr; + + // Copying is unnecessary if we are the last user of the value tensor, we can + // just adopt the input tensor's buffer instead. + std::unique_ptr input_alias = context->forward_input( + 1, /*output_index=*/OpKernelContext::Params::kNoReservation, dtype_, + value.shape(), DEVICE_MEMORY, attr); + mutex_lock ml(*variable->mu()); + variable->is_initialized = true; + if (input_alias) { + *variable->tensor() = *input_alias; + done(); + return; + } + + // Need to copy, but maybe we can re-use variable's buffer? + if (!XlaTensor::RefCountIsOne(*variable->tensor()) || + !variable->tensor()->shape().IsSameSize(value.shape())) { + // Copy to new buffer + PersistentTensor unused; + Tensor* tmp; + OP_REQUIRES_OK_ASYNC(context, + context->allocate_persistent(dtype_, value.shape(), + &unused, &tmp, attr), + done); + *variable->tensor() = *tmp; + } + + XlaDeviceContext* device_context = + static_cast(context->op_device_context()); + + variable->Ref(); + device_context->CopyDeviceTensorToDevice( + value, variable->tensor(), [context, variable, done](Status status) { + variable->Unref(); + context->SetStatus(status); + done(); + }); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h index 536325774b..b27c32e9bc 100644 --- a/tensorflow/compiler/jit/xla_device_ops.h +++ b/tensorflow/compiler/jit/xla_device_ops.h @@ -42,6 +42,15 @@ class XlaDeviceDummyOp : public OpKernel { void Compute(OpKernelContext* ctx) override; }; +class XlaAssignVariableOp : public AsyncOpKernel { + public: + explicit XlaAssignVariableOp(OpKernelConstruction* c); + void ComputeAsync(OpKernelContext* context, DoneCallback done) override; + + private: + DataType dtype_; +}; + #define REGISTER_XLA_LAUNCH_KERNEL(DEVICE, KERNEL, TYPES) \ REGISTER_KERNEL_BUILDER(Name("XlaLaunch") \ .Device(DEVICE) \ @@ -78,6 +87,9 @@ class XlaDeviceDummyOp : public OpKernel { REGISTER_KERNEL_BUILDER( \ Name("ReadVariableOp").Device(DEVICE).HostMemory("resource"), \ ReadVariableOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("AssignVariableOp").Device(DEVICE).HostMemory("resource"), \ + XlaAssignVariableOp); \ REGISTER_KERNEL_BUILDER(Name("ControlTrigger").Device(DEVICE), \ ControlTriggerOp); \ REGISTER_KERNEL_BUILDER(Name("Switch").Device(DEVICE).HostMemory("pred"), \ diff --git a/tensorflow/compiler/tests/variable_ops_test.py b/tensorflow/compiler/tests/variable_ops_test.py index 8ecad00f6e..2c09b03d5a 100644 --- a/tensorflow/compiler/tests/variable_ops_test.py +++ b/tensorflow/compiler/tests/variable_ops_test.py @@ -187,6 +187,25 @@ class VariableOpsTest(XLATestCase): rtol=1e-4) self.assertAllClose(np.array([1.9, 2.9], dtype=np.float32), vb, rtol=1e-4) + def testWriteOfAliasedTensor(self): + for dtype in self.numeric_types: + init = np.array([[1, 2j], [3, 4]]).astype(dtype) + update = np.array([[7, 1j], [2, 11]]).astype(dtype) + with self.test_session() as sess, self.test_scope(): + v = resource_variable_ops.ResourceVariable(init) + sess.run(variables.variables_initializer([v])) + p = array_ops.placeholder(dtype) + q = array_ops.identity(p) + x = v.read_value() + # Writes the value of 'p' to 'v', but keeps a reference to the original + # value of 'v' so the variable update cannot reuse its buffer. + with ops.control_dependencies([x]): + y = v.assign(q) + result = sess.run([x, y, q], {p: update}) + self.assertAllClose(init, result[0]) + self.assertAllClose(update, result[1]) + self.assertAllClose(update, result[2]) + class StridedSliceAssignChecker(object): """Compares the results of a slice assignment using Tensorflow and numpy.""" diff --git a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc index 631cd4471b..a163fa0a5b 100644 --- a/tensorflow/compiler/tf2xla/kernels/variable_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/variable_ops.cc @@ -67,7 +67,7 @@ class AssignVariableOp : public XlaOpKernel { ctx->AssignVariable(0, ctx->input_type(1), ctx->Input(1))); } }; -REGISTER_XLA_OP(Name("AssignVariableOp"), AssignVariableOp); +REGISTER_XLA_OP(Name("AssignVariableOp").CompilationOnly(), AssignVariableOp); class AssignAddVariableOp : public XlaOpKernel { public: -- GitLab From a6eb244b2b8ee4d9592a705c4bc0771e4d708565 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 25 May 2018 17:37:01 -0700 Subject: [PATCH 175/902] Minor eager performance improvements - remove linear regression dependence on global step. This speeds things up a lot for the benchmark (since it removes a bunch of unnecessary code), but is obviously not a fair comparison. I think its worth doing, since I don't see any reason to have a global step in eager. - nn_ops dropout had an unnecessary convert_to_tensor, convert back to numpy (with a GPU this would copy out, copy back). - cudnn_recurrent reshape would always fallback to the slow path - so I just converted it to be in the fastpath - this will be low impact. - tensor_shape should not generate a new object every time - remove unnecessary list creation and searching in some dtypes functions PiperOrigin-RevId: 198127757 --- .../linear_regression/linear_regression.py | 6 ++-- tensorflow/python/framework/dtypes.py | 28 +++++++++---------- tensorflow/python/framework/tensor_shape.py | 5 +++- .../python/keras/layers/cudnn_recurrent.py | 6 ++-- tensorflow/python/ops/nn_ops.py | 19 +++++++++---- 5 files changed, 39 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py index 2259c20741..099b712fc0 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py +++ b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py @@ -75,7 +75,6 @@ def fit(model, dataset, optimizer, verbose=False, logdir=None): mse = lambda xs, ys: mean_square_loss(model, xs, ys) loss_and_grads = tfe.implicit_value_and_gradients(mse) - tf.train.get_or_create_global_step() if logdir: # Support for TensorBoard summaries. Once training has started, use: # tensorboard --logdir= @@ -87,12 +86,13 @@ def fit(model, dataset, optimizer, verbose=False, logdir=None): if verbose: print("Iteration %d: loss = %s" % (i, loss.numpy())) - optimizer.apply_gradients(grads, global_step=tf.train.get_global_step()) + optimizer.apply_gradients(grads) if logdir: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): - tf.contrib.summary.scalar("loss", loss) + tf.contrib.summary.scalar("loss", loss, step=i) + tf.contrib.summary.scalar("step", i, step=i) def synthetic_dataset(w, b, noise_level, batch_size, num_batches): diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py index 7f9ef53457..c3f70df7d8 100644 --- a/tensorflow/python/framework/dtypes.py +++ b/tensorflow/python/framework/dtypes.py @@ -120,11 +120,7 @@ class DType(object): @property def is_numpy_compatible(self): - numpy_incompatible = [ - types_pb2.DT_VARIANT, types_pb2.DT_VARIANT_REF, types_pb2.DT_RESOURCE, - types_pb2.DT_RESOURCE_REF - ] - return self._type_enum not in numpy_incompatible + return self._type_enum not in _NUMPY_INCOMPATIBLE @property def as_numpy_dtype(self): @@ -162,7 +158,7 @@ class DType(object): @property def is_quantized(self): """Returns whether this is a quantized data type.""" - return self.base_dtype in [qint8, quint8, qint16, quint16, qint32] + return self.base_dtype in _QUANTIZED_DTYPES_NO_REF @property def is_unsigned(self): @@ -401,6 +397,11 @@ quint16_ref = DType(types_pb2.DT_QUINT16_REF) qint32_ref = DType(types_pb2.DT_QINT32_REF) bfloat16_ref = DType(types_pb2.DT_BFLOAT16_REF) +_NUMPY_INCOMPATIBLE = frozenset([ + types_pb2.DT_VARIANT, types_pb2.DT_VARIANT_REF, types_pb2.DT_RESOURCE, + types_pb2.DT_RESOURCE_REF +]) + # Maintain an intern table so that we don't have to create a large # number of small objects. _INTERN_TABLE = { @@ -645,10 +646,10 @@ _TF_TO_NP = { _np_bfloat16, } -QUANTIZED_DTYPES = frozenset([ - qint8, quint8, qint16, quint16, qint32, qint8_ref, quint8_ref, qint16_ref, - quint16_ref, qint32_ref -]) +_QUANTIZED_DTYPES_NO_REF = frozenset([qint8, quint8, qint16, quint16, qint32]) +_QUANTIZED_DTYPES_REF = frozenset( + [qint8_ref, quint8_ref, qint16_ref, quint16_ref, qint32_ref]) +QUANTIZED_DTYPES = _QUANTIZED_DTYPES_REF.union(_QUANTIZED_DTYPES_NO_REF) tf_export("QUANTIZED_DTYPES").export_constant(__name__, "QUANTIZED_DTYPES") _PYTHON_TO_TF = { @@ -662,10 +663,9 @@ def as_dtype(type_value): """Converts the given `type_value` to a `DType`. Args: - type_value: A value that can be converted to a `tf.DType` - object. This may currently be a `tf.DType` object, a - [`DataType` - enum](https://www.tensorflow.org/code/tensorflow/core/framework/types.proto), + type_value: A value that can be converted to a `tf.DType` object. This may + currently be a `tf.DType` object, a [`DataType` + enum](https://www.tensorflow.org/code/tensorflow/core/framework/types.proto), a string type name, or a `numpy.dtype`. Returns: diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index 0dd29460ed..c9be3d5005 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -961,9 +961,12 @@ def unknown_shape(ndims=None): return TensorShape([Dimension(None)] * ndims) +_SCALAR_SHAPE = TensorShape([]) + + def scalar(): """Returns a shape representing a scalar.""" - return TensorShape([]) + return _SCALAR_SHAPE def vector(length): diff --git a/tensorflow/python/keras/layers/cudnn_recurrent.py b/tensorflow/python/keras/layers/cudnn_recurrent.py index 5c4a2dbe92..ad6594279d 100644 --- a/tensorflow/python/keras/layers/cudnn_recurrent.py +++ b/tensorflow/python/keras/layers/cudnn_recurrent.py @@ -20,6 +20,7 @@ from __future__ import print_function import collections +from tensorflow.python.framework import constant_op from tensorflow.python.keras import backend as K from tensorflow.python.keras import constraints from tensorflow.python.keras import initializers @@ -71,10 +72,11 @@ class _CuDNNRNN(RNN): self.constants_spec = None self._states = None self._num_constants = None + self._vector_shape = constant_op.constant([-1]) def _canonical_to_params(self, weights, biases): - weights = [array_ops.reshape(x, (-1,)) for x in weights] - biases = [array_ops.reshape(x, (-1,)) for x in biases] + weights = [array_ops.reshape(x, self._vector_shape) for x in weights] + biases = [array_ops.reshape(x, self._vector_shape) for x in biases] return array_ops.concat(weights + biases, axis=0) def call(self, inputs, mask=None, training=None, initial_state=None): diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 09a4425436..a0b55eb077 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2311,13 +2311,22 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: di if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1: raise ValueError("keep_prob must be a scalar tensor or a float in the " "range (0, 1], got %g" % keep_prob) - keep_prob = ops.convert_to_tensor( - keep_prob, dtype=x.dtype, name="keep_prob") - keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar()) - # Do nothing if we know keep_prob == 1 - if tensor_util.constant_value(keep_prob) == 1: + # Early return if nothing needs to be dropped. + if isinstance(keep_prob, float) and keep_prob == 1: return x + if context.executing_eagerly(): + if isinstance(keep_prob, ops.EagerTensor): + if keep_prob.numpy() == 1: + return x + else: + keep_prob = ops.convert_to_tensor( + keep_prob, dtype=x.dtype, name="keep_prob") + keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar()) + + # Do nothing if we know keep_prob == 1 + if tensor_util.constant_value(keep_prob) == 1: + return x noise_shape = _get_noise_shape(x, noise_shape) -- GitLab From b4423efd55c5e463dd70d6975aa3a9d0f260011b Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 25 May 2018 17:46:19 -0700 Subject: [PATCH 176/902] Add a type-erased broadcast implementation to xla::Literal And use this in HLO evaluator. Since broadcast only moves bytes around we don't need a type specialized implementation. I'll use this in a later change. PiperOrigin-RevId: 198128524 --- tensorflow/compiler/xla/literal_util.cc | 41 +++++++++++++++++++ tensorflow/compiler/xla/literal_util.h | 6 +++ tensorflow/compiler/xla/literal_util_test.cc | 30 ++++++++++++++ .../compiler/xla/service/hlo_evaluator.cc | 22 ++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 + .../xla/service/hlo_evaluator_typed_visitor.h | 30 -------------- tensorflow/compiler/xla/shape_util.h | 23 +++++++++++ 7 files changed, 124 insertions(+), 30 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 4c560767dc..7563cc1e34 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -807,6 +807,47 @@ std::unique_ptr LiteralBase::Relayout( return result; } +StatusOr> LiteralBase::Broadcast( + const Shape& result_shape, + tensorflow::gtl::ArraySlice dimensions) const { + if (!ShapeUtil::IsArray(shape())) { + return InvalidArgument("Broadcast only supports arrays."); + } + + for (int64 i = 0; i < dimensions.size(); i++) { + TF_RET_CHECK(shape().dimensions(i) == + result_shape.dimensions(dimensions[i])); + } + + std::unique_ptr result = MakeUnique(result_shape); + + // scratch_source_index is temporary storage space for the computed index into + // the input literal. We put it here to avoid allocating an std::vector in + // every iteration of ShapeUtil::ForEachIndex. + std::vector scratch_source_index(shape().dimensions_size()); + + char* dest_data = static_cast(result->untyped_data()); + const char* source_data = static_cast(untyped_data()); + const int64 primitive_size = + ShapeUtil::ByteSizeOfPrimitiveType(shape().element_type()); + + ShapeUtil::ForEachIndex( + result_shape, [&](tensorflow::gtl::ArraySlice output_index) { + for (int64 i = 0; i < dimensions.size(); ++i) { + scratch_source_index[i] = output_index[dimensions[i]]; + } + int64 dest_index = IndexUtil::MultidimensionalIndexToLinearIndex( + result_shape, output_index); + int64 source_index = IndexUtil::MultidimensionalIndexToLinearIndex( + shape(), scratch_source_index); + memcpy(dest_data + primitive_size * dest_index, + source_data + primitive_size * source_index, primitive_size); + return true; + }); + + return std::move(result); +} + StatusOr> LiteralBase::Reshape( tensorflow::gtl::ArraySlice dimensions) const { if (!ShapeUtil::IsArray(shape())) { diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index 609dc7a3ac..2ca9060cc7 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -277,6 +277,12 @@ class LiteralBase { StatusOr> Reshape( tensorflow::gtl::ArraySlice dimensions) const; + // Creates a new literal by broadcasting this literal with `dimensions` to + // yield a literal of shape `result_shape`. + StatusOr> Broadcast( + const Shape& result_shape, + tensorflow::gtl::ArraySlice dimensions) const; + // Creates a new literal by reordering the dimensions of this literal. // The given `permutation` must be a permutation of the dimension numbers // in the original literal, and it specifies the order of the new dimensions diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 77f979a0d7..f127cee0fd 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -1810,5 +1810,35 @@ TEST_F(LiteralUtilTest, GetSparseElementAsString) { tensorflow::strings::StrCat("(", float{3.0}, ", ", float{4.0}, ")")); } +TEST_F(LiteralUtilTest, BroadcastVectorToMatrix0) { + std::unique_ptr literal = Literal::CreateR1({1, 2}); + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr broadcasted_literal, + literal->Broadcast( + /*result_shape=*/ShapeUtil::MakeShape(S64, {2, 2}), + /*dimensions=*/{0})); + EXPECT_EQ(*broadcasted_literal, *Literal::CreateR2({{1, 1}, {2, 2}})); +} + +TEST_F(LiteralUtilTest, BroadcastVectorToMatrix1) { + std::unique_ptr literal = Literal::CreateR1({1, 2}); + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr broadcasted_literal, + literal->Broadcast( + /*result_shape=*/ShapeUtil::MakeShape(S64, {2, 2}), + /*dimensions=*/{1})); + EXPECT_EQ(*broadcasted_literal, *Literal::CreateR2({{1, 2}, {1, 2}})); +} + +TEST_F(LiteralUtilTest, BroadcastScalarToMatrix) { + std::unique_ptr literal = Literal::CreateR0(9); + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr broadcasted_literal, + literal->Broadcast( + /*result_shape=*/ShapeUtil::MakeShape(S32, {2, 2}), + /*dimensions=*/{})); + EXPECT_EQ(*broadcasted_literal, *Literal::CreateR2({{9, 9}, {9, 9}})); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index fa59a5fb20..2a8de02298 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -859,6 +859,28 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) { return Status::OK(); } +Status HloEvaluator::HandleBroadcast(HloInstruction* broadcast) { + const Literal& operand = GetEvaluatedLiteralFor(broadcast->operand(0)); + + TF_RET_CHECK(broadcast->dimensions().size() == + ShapeUtil::Rank(operand.shape())) + << "broadcast dimensions is of size: " << broadcast->dimensions().size() + << " and rank of operand_to_broadcast is: " + << ShapeUtil::Rank(operand.shape()); + // Checks that operand's dimensions are the same as the broadcast's + // dimensions along the dimensions to be broadcasted. + for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { + TF_RET_CHECK(broadcast->shape().dimensions(broadcast->dimensions(i)) == + operand.shape().dimensions(i)); + } + + TF_ASSIGN_OR_RETURN( + evaluated_[broadcast], + operand.Broadcast(broadcast->shape(), broadcast->dimensions())); + + return Status::OK(); +} + Status HloEvaluator::HandleGetTupleElement(HloInstruction* get_tuple_element) { const auto result_shape = get_tuple_element->shape(); const int64 index = get_tuple_element->tuple_index(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 566d53a414..2b72ff158f 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -166,6 +166,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleSelect(HloInstruction* select) override; + Status HandleBroadcast(HloInstruction* broadcast) override; + // Returns the already-evaluated literal result for the instruction. // A Constant instruction is considered evaluated and its literal will be // returned directly without looking up the cache. diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index e37d651c95..82ee77e1ae 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -161,36 +161,6 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return HandleRound(round); } - Status HandleBroadcast(HloInstruction* broadcast) override { - const Literal& operand_to_broadcast = - parent_->GetEvaluatedLiteralFor(broadcast->operand(0)); - std::vector broadcast_indices( - ShapeUtil::Rank(broadcast->operand(0)->shape()), 0); - - TF_RET_CHECK(broadcast->dimensions().size() == - ShapeUtil::Rank(operand_to_broadcast.shape())) - << "broadcast dimensions is of size: " << broadcast->dimensions().size() - << " and rank of operand_to_broadcast is: " - << ShapeUtil::Rank(operand_to_broadcast.shape()); - // Checks that operand's dimensions are the same as the broadcast's - // dimensions along the dimensions to be broadcasted. - for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { - TF_RET_CHECK(broadcast->shape().dimensions(broadcast->dimensions(i)) == - operand_to_broadcast.shape().dimensions(i)); - } - - auto output = MakeUnique(broadcast->shape()); - TF_RETURN_IF_ERROR(output->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { - for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { - broadcast_indices[i] = multi_index[broadcast->dimensions(i)]; - } - return operand_to_broadcast.Get(broadcast_indices); - })); - parent_->evaluated_[broadcast] = std::move(output); - return Status::OK(); - } - template < typename NativeT, typename std::enable_if::value>::type* = nullptr> diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 6f5765849a..cf40068b33 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -629,6 +630,28 @@ class ShapeUtil { .IgnoreError(); } + // These convenience wrappers don't take `base`, `count` and `incr` + // explicitly, but iterate over every element in `shape` instead. + + template + static Status ForEachIndexWithStatus(const Shape& shape, + const FnType& visitor_function) { + std::vector base(shape.dimensions_size()); + std::vector incr(shape.dimensions_size(), 1); + return ForEachIndexWithStatus(shape, base, + /*count=*/AsInt64Slice(shape.dimensions()), + incr, visitor_function); + } + + template + static void ForEachIndex(const Shape& shape, const FnType& visitor_function) { + ForEachIndexWithStatus(shape, + [&](tensorflow::gtl::ArraySlice indices) { + return StatusOr(visitor_function(indices)); + }) + .IgnoreError(); + } + // A parallel version of ForEachIndex(WithStatus). This can only be used if // the visitor_function is thread-safe and the order of iteration does not // matter. -- GitLab From 0daf2a1ce15e7604f15fc252b93c4120574b4ec0 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 25 May 2018 19:18:30 -0700 Subject: [PATCH 177/902] [Hlo Graphviz] Always show metadata as tooltips Always show metadata as tooltips. PiperOrigin-RevId: 198134430 --- .../compiler/xla/service/hlo_graph_dumper.cc | 31 +++++++------------ .../compiler/xla/service/hlo_graph_dumper.h | 3 +- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 81f8743dca..4bf89be441 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -321,13 +321,11 @@ optional MatchTrivialComputation(const HloComputation* computation) { class HloDotDumper { public: HloDotDumper(const HloComputation* computation, tensorflow::StringPiece label, - const DebugOptions& debug_options, bool show_metadata, - bool show_backend_config, const HloExecutionProfile* profile, - NodeFilter filter) + const DebugOptions& debug_options, bool show_backend_config, + const HloExecutionProfile* profile, NodeFilter filter) : computation_(computation), label_(std::string(label)), debug_options_(debug_options), - show_metadata_(show_metadata), show_backend_config_(show_backend_config), profile_(profile), filter_(std::move(filter)) {} @@ -395,7 +393,6 @@ class HloDotDumper { const HloComputation* computation_; // never null const string label_; // overall name for the graph const DebugOptions& debug_options_; - const bool show_metadata_; const bool show_backend_config_; const HloExecutionProfile* profile_; // may be null const NodeFilter filter_; @@ -798,9 +795,9 @@ string HloDotDumper::DumpInstruction(const HloInstruction* instr) { } } - return Printf(R"(%s [label=<%s>, shape=%s, tooltip=" ", %s];)" + return Printf(R"(%s [label=<%s>, shape=%s, tooltip="%s", %s];)" "\n", - InstructionId(instr), node_body, node_shape, + InstructionId(instr), node_body, node_shape, node_metadata, NodeColorAttributes(color)); } @@ -1068,10 +1065,6 @@ string HloDotDumper::GetInstructionNodeLabel(const HloInstruction* instr) { } string HloDotDumper::GetInstructionNodeMetadata(const HloInstruction* instr) { - if (!show_metadata_) { - return ""; - } - std::vector lines; if (!instr->metadata().op_name().empty()) { lines.push_back(HtmlLikeStringSanitize(instr->metadata().op_name())); @@ -1446,7 +1439,7 @@ string ExportGraph(const string& graph, string DumpGraph(const HloComputation& computation, const string& label, const DebugOptions& debug_options, const HloExecutionProfile* hlo_execution_profile, - bool show_metadata, bool show_backend_config) { + bool show_backend_config) { GraphRendererInterface::GraphKind graph_kind; string graph; if (debug_options.xla_hlo_dump_as_graphdef()) { @@ -1457,8 +1450,8 @@ string DumpGraph(const HloComputation& computation, const string& label, graph_kind = GraphRendererInterface::TF_GRAPHDEF; } else { graph = - HloDotDumper(&computation, label, debug_options, show_metadata, - show_backend_config, hlo_execution_profile, NodeFilter()) + HloDotDumper(&computation, label, debug_options, show_backend_config, + hlo_execution_profile, NodeFilter()) .Dump(); graph_kind = GraphRendererInterface::DOT_GRAPH; } @@ -1470,15 +1463,15 @@ string DumpGraph(const HloComputation& computation, const string& label, } string DumpNeighborhoodAround(const HloInstruction& node, int radius, - bool show_metadata, bool show_backend_config) { + bool show_backend_config) { auto debug_options = node.GetModule()->config().debug_options(); string label = StrCat("Neighborhood of ", radius, " nodes around ", node.name()); NodeFilter filter = MakeNodeFilter(&node, radius); - string graph = HloDotDumper(node.parent(), label, debug_options, - show_metadata, show_backend_config, - /*profile=*/nullptr, filter) - .Dump(); + string graph = + HloDotDumper(node.parent(), label, debug_options, show_backend_config, + /*profile=*/nullptr, filter) + .Dump(); return ExportGraph(graph, GraphRendererInterface::DOT_GRAPH, debug_options); } diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.h b/tensorflow/compiler/xla/service/hlo_graph_dumper.h index fc8e1468ac..0b11f34abb 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.h +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.h @@ -56,7 +56,7 @@ string MaybeDumpHloModule(const HloModule& module, const string& label, string DumpGraph(const HloComputation& computation, const string& label, const DebugOptions& debug_options, const HloExecutionProfile* hlo_execution_profile = nullptr, - bool show_metadata = false, bool show_backend_config = false); + bool show_backend_config = false); // Like DumpGraph, but renders only nodes "near" the given node in the graph. // @@ -64,7 +64,6 @@ string DumpGraph(const HloComputation& computation, const string& label, // (roughly) corresponds to the max distance a node may be from the primary node // before it's omitted from the graph. string DumpNeighborhoodAround(const HloInstruction& node, int radius, - bool show_metadata = false, bool show_backend_config = false); // Dumps the HloModule::ToString() as a file into the provided directory path -- GitLab From 32e21641d774ba783cdb2312bf4971b9481f57ca Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 25 May 2018 19:21:57 -0700 Subject: [PATCH 178/902] Add support for unary and binary ops to indexed tensor analysis I've added a TODO to clean up the use of ValueOrDie which I will address in an immediately following CL. PiperOrigin-RevId: 198134579 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_evaluator.cc | 29 +++ .../compiler/xla/service/hlo_evaluator.h | 6 + .../xla/service/indexed_array_analysis.cc | 209 +++++++++++++++--- .../xla/service/indexed_array_analysis.h | 7 +- .../service/indexed_array_analysis_test.cc | 178 ++++++++++++++- tensorflow/compiler/xla/util.h | 5 + 7 files changed, 406 insertions(+), 29 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 2976bdb9e9..5472f9a637 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2927,6 +2927,7 @@ cc_library( hdrs = ["indexed_array_analysis.h"], deps = [ ":hlo", + ":hlo_evaluator", ":hlo_pass", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 2a8de02298..e90eb0669d 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -309,6 +309,35 @@ StatusOr> HloEvaluator::EvaluateWithSubstitutions( return result; } +StatusOr> HloEvaluator::EvaluateElementwiseBinaryOp( + HloOpcode opcode, const Literal& lhs, const Literal& rhs) { + std::unique_ptr lhs_instr = + HloInstruction::CreateConstant(lhs.CloneToUnique()); + std::unique_ptr rhs_instr = + HloInstruction::CreateConstant(rhs.CloneToUnique()); + + std::unique_ptr cloned_instruction = + HloInstruction::CreateBinary(lhs.shape(), opcode, lhs_instr.get(), + rhs_instr.get()); + auto result = Evaluate(cloned_instruction.get()); + + cloned_instruction->DetachFromOperands(); + return result; +} + +StatusOr> HloEvaluator::EvaluateElementwiseUnaryOp( + HloOpcode opcode, const Literal& operand) { + std::unique_ptr operand_instr = + HloInstruction::CreateConstant(operand.CloneToUnique()); + + std::unique_ptr cloned_instruction = + HloInstruction::CreateUnary(operand.shape(), opcode, operand_instr.get()); + auto result = Evaluate(cloned_instruction.get()); + + cloned_instruction->DetachFromOperands(); + return result; +} + Status HloEvaluator::HandleParameter(HloInstruction* parameter) { CHECK_LT(parameter->parameter_number(), arg_literals_.size()); const Literal* input_literal = arg_literals_[parameter->parameter_number()]; diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 2b72ff158f..b53d5644de 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -109,6 +109,12 @@ class HloEvaluator : public DfsHloVisitorWithDefault { const std::unordered_map& substitutions); + StatusOr> EvaluateElementwiseBinaryOp( + HloOpcode opcode, const Literal& lhs, const Literal& rhs); + + StatusOr> EvaluateElementwiseUnaryOp( + HloOpcode opcode, const Literal& operand); + protected: // Make HloEvaluatorTypedVisitor a friend because it is logically part of this // class. diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc index b74f05e080..5d870f9fc4 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/indexed_array_analysis.h" #include "tensorflow/compiler/xla/map_util.h" +#include "tensorflow/compiler/xla/service/hlo_evaluator.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" @@ -32,7 +33,9 @@ using tensorflow::gtl::ArraySlice; using tensorflow::str_util::Join; } // namespace -string IndexedArrayAnalysis::ToString(Array* root) { +// TODO(sanjoy): Make this pass StatusOr safe. + +string IndexedArrayAnalysis::ToString(Array* root, bool print_constants) { switch (root->kind()) { case Array::kUnknown: { auto* unknown_tensor = root->as(); @@ -41,6 +44,12 @@ string IndexedArrayAnalysis::ToString(Array* root) { } case Array::kConstant: { + if (print_constants) { + string contents = root->as()->literal()->ToString(); + return tensorflow::strings::StrCat( + "(constant ", ShapeUtil::HumanString(root->shape()), " ", contents, + ")"); + } return tensorflow::strings::StrCat( "(constant ", ShapeUtil::HumanString(root->shape()), ")"); } @@ -52,9 +61,10 @@ string IndexedArrayAnalysis::ToString(Array* root) { ? "scalar-indexed-const" : "scalar-indexed"; return tensorflow::strings::StrCat( - "(", name, " ", ToString(indexed_array->source()), " ", - ToString(indexed_array->indices()), " ", indexed_array->source_dim(), - "->[", Join(indexed_array->output_dims(), ","), "])"); + "(", name, " ", ToString(indexed_array->source(), print_constants), + " ", ToString(indexed_array->indices(), print_constants), " ", + indexed_array->source_dim(), "->[", + Join(indexed_array->output_dims(), ","), "])"); } } } @@ -115,23 +125,25 @@ void IndexedArrayAnalysis::TraverseAndPopulateCache( Analysis::Array* IndexedArrayAnalysis::ComputeArrayFor( const HloInstruction* instr) { Array* computed_array; - switch (instr->opcode()) { - default: - computed_array = nullptr; - break; - case HloOpcode::kConstant: - computed_array = ComputeArrayForConstant(instr->literal()); - break; - case HloOpcode::kGather: - computed_array = ComputeArrayForGather( - instr->shape(), instr->gather_dimension_numbers(), - instr->gather_window_bounds(), FindOrDie(cache_, instr->operand(0)), - FindOrDie(cache_, instr->operand(1))); - break; - case HloOpcode::kReshape: - computed_array = ComputeArrayForReshape( - instr->shape(), FindOrDie(cache_, instr->operand(0))); - break; + if (instr->IsElementwise() && instr->operand_count() == 1) { + computed_array = ComputeArrayForElementwiseUnaryOp( + instr, FindOrDie(cache_, instr->operand(0))); + } else if (instr->IsElementwise() && instr->operand_count() == 2) { + computed_array = ComputeArrayForElementwiseBinaryOp( + instr, FindOrDie(cache_, instr->operand(0)), + FindOrDie(cache_, instr->operand(1))); + } else if (instr->opcode() == HloOpcode::kConstant) { + computed_array = ComputeArrayForConstant(instr->literal()); + } else if (instr->opcode() == HloOpcode::kGather) { + computed_array = ComputeArrayForGather( + instr->shape(), instr->gather_dimension_numbers(), + instr->gather_window_bounds(), FindOrDie(cache_, instr->operand(0)), + FindOrDie(cache_, instr->operand(1))); + } else if (instr->opcode() == HloOpcode::kReshape) { + computed_array = ComputeArrayForReshape( + instr->shape(), FindOrDie(cache_, instr->operand(0))); + } else { + computed_array = nullptr; } if (!computed_array) { @@ -166,14 +178,14 @@ ScalarIndexedArray* IndexedArrayAnalysis::FoldGatherOfGather( IndexComponent::Ungathered); // Simulate the first gather. - simulated_index.erase(simulated_index.begin() + source->source_dim()); + EraseAt(&simulated_index, source->source_dim()); for (int64 gather_dim : source->output_dims()) { simulated_index.insert(simulated_index.begin() + gather_dim, IndexComponent::GatheredFirst); } // Simulate the second gather. - simulated_index.erase(simulated_index.begin() + source_dim); + EraseAt(&simulated_index, source_dim); for (int64 output_dim : output_dims) { simulated_index.insert(simulated_index.begin() + output_dim, IndexComponent::GatheredSecond); @@ -463,8 +475,7 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayForReshape( int64 output_dim = scalar_indexed->output_dims()[i]; int64 output_dim_after_reshape = MapPassthroughOperandDimToResultDim( reshape_passthrough_dims, output_dim); - new_scalar_indexed_source_shape.erase( - new_scalar_indexed_source_shape.begin() + output_dim_after_reshape); + EraseAt(&new_scalar_indexed_source_shape, output_dim_after_reshape); } // After this, we need to add in the dimension that will be the source @@ -541,6 +552,154 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayForReshape( output_dims_for_new_scalar_indexed_node, shape); } +Analysis::Array* IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp( + const HloInstruction* instr, Array* lhs, Array* rhs) { + // Try to fold BinaryOp(Broadcast(Const0), ScalarIndexed(Const1, Indices)) + // => ScalarIndexed(BinaryOp(Broadcast'(Const0), Const1), Indices) + // + // We can do this if every output dimension from the scalar-indexed node is a + // broadcasted dimension for the broadcast node. Informally, the precondition + // means Broadcast(Const0)[IDX] is solely a function of the components of IDX + // that are not output-dims for the scalar-indexed node. In other words, for + // every assignment to the non-output dims in IDX we have a "constant" LHS to + // the BinaryOp. This transform propagates this "constant" to the source for + // the scalar-indexed node. + + ScalarIndexedConstantArray* lhs_scalar_indexed_const = + dynamic_cast(lhs); + ScalarIndexedConstantArray* rhs_scalar_indexed_const = + dynamic_cast(rhs); + + bool lhs_is_indexed; + + // One of the operands must be scalar-indexed and the other must be a + // broadcast of a constant. + if (lhs_scalar_indexed_const && !rhs_scalar_indexed_const) { + lhs_is_indexed = true; + } else if (rhs_scalar_indexed_const && !lhs_scalar_indexed_const) { + lhs_is_indexed = false; + } else { + return nullptr; + } + + ScalarIndexedConstantArray* scalar_indexed_const = + lhs_is_indexed ? lhs_scalar_indexed_const : rhs_scalar_indexed_const; + UnknownArray* candidate_broadcast_array = + dynamic_cast(lhs_is_indexed ? rhs : lhs); + if (!candidate_broadcast_array || + candidate_broadcast_array->instruction().opcode() != + HloOpcode::kBroadcast) { + return nullptr; + } + + const HloInstruction* broadcast_instr = + &candidate_broadcast_array->instruction(); + const HloInstruction* broadcast_const_operand = broadcast_instr->operand(0); + if (broadcast_const_operand->opcode() != HloOpcode::kConstant) { + return nullptr; + } + + ArraySlice broadcast_dims = broadcast_instr->dimensions(); + auto is_broadcasted_dim = [&](int64 output_dim) { + return c_find(broadcast_dims, output_dim) == broadcast_dims.end(); + }; + + // All of the output dims must be "broadcasted" dims for the other operand. + if (!c_all_of(scalar_indexed_const->output_dims(), is_broadcasted_dim)) { + return nullptr; + } + + // To figure out the broadcast dimensions for the (constant) source for the + // scalar-indexed node, we "simulate" the index transformation done by the + // existing broadcsat: + enum class IndexComponent { Broadcasted, NotBroadcasted }; + std::vector simulated_index( + broadcast_instr->shape().dimensions_size(), IndexComponent::Broadcasted); + for (int64 broadcast_dim : broadcast_dims) { + simulated_index[broadcast_dim] = IndexComponent::NotBroadcasted; + } + + // The scalar-indexed node "removes" the source dim and "inserts" the output + // dims. We do the opposite here to undo the scalar-indexed operation. + ArraySlice output_dims = scalar_indexed_const->output_dims(); + for (int64 i = output_dims.size() - 1; i >= 0; --i) { + CHECK(simulated_index[output_dims[i]] == IndexComponent::Broadcasted); + EraseAt(&simulated_index, output_dims[i]); + } + + InsertAt(&simulated_index, scalar_indexed_const->source_dim(), + IndexComponent::Broadcasted); + + // new_inner_broadcast_dims holds the broadcast dimensions for the inner + // BinaryOp(Broadcast'(Const0), Const1). We now translate simulated_index to + // new_inner_broadcast_dims. + std::vector new_inner_broadcast_dims; + for (int64 i = 0; i < simulated_index.size(); i++) { + if (simulated_index[i] == IndexComponent::NotBroadcasted) { + new_inner_broadcast_dims.push_back(i); + } + } + + // inner_broadcast_result is the Broadcast'(Const0) bit in + // BinaryOp(Broadcast'(Const0), Const1) + std::unique_ptr inner_broadcast_result = + broadcast_const_operand->literal() + .Broadcast(scalar_indexed_const->source()->shape(), + new_inner_broadcast_dims) + .ConsumeValueOrDie(); + + // literal_for_new_source is BinaryOp(Broadcast'(Const0), Const1) + const Literal* literal_for_new_source; + if (lhs_is_indexed) { + literal_for_new_source = + TakeOwnership(HloEvaluator{} + .EvaluateElementwiseBinaryOp( + instr->opcode(), scalar_indexed_const->literal(), + *inner_broadcast_result) + .ConsumeValueOrDie()); + } else { + literal_for_new_source = + TakeOwnership(HloEvaluator{} + .EvaluateElementwiseBinaryOp( + instr->opcode(), *inner_broadcast_result, + scalar_indexed_const->literal()) + .ConsumeValueOrDie()); + } + + ConstantArray* new_source = Construct(literal_for_new_source); + return Construct( + new_source, scalar_indexed_const->indices(), + scalar_indexed_const->source_dim(), + std::vector(scalar_indexed_const->output_dims().begin(), + scalar_indexed_const->output_dims().end()), + scalar_indexed_const->shape()); +} + +Analysis::Array* IndexedArrayAnalysis::ComputeArrayForElementwiseUnaryOp( + const HloInstruction* instr, Array* operand) { + auto* scalar_indexed_const = + dynamic_cast(operand); + if (operand == nullptr) { + return nullptr; + } + + // Fold UnaryOp(ScalarIndexed(Const, Indices)) + // => ScalarIndexed(UnaryOp(Const), Indices) + + Literal* literal_for_new_source = + TakeOwnership(HloEvaluator{} + .EvaluateElementwiseUnaryOp( + instr->opcode(), scalar_indexed_const->literal()) + .ConsumeValueOrDie()); + ConstantArray* new_source = Construct(literal_for_new_source); + return Construct( + new_source, scalar_indexed_const->indices(), + scalar_indexed_const->source_dim(), + std::vector(scalar_indexed_const->output_dims().begin(), + scalar_indexed_const->output_dims().end()), + scalar_indexed_const->shape()); +} + tensorflow::StringPiece IndexedArrayAnalysisPrinterPass::name() const { return "indexed-array-analysis-printer-pass"; } diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.h b/tensorflow/compiler/xla/service/indexed_array_analysis.h index 35d454ab77..8c1f616fab 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.h +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.h @@ -223,7 +223,7 @@ class IndexedArrayAnalysis { Array* GetArrayFor(const HloInstruction* instr); // Pretty-prints the expression rooted at `root`. - string ToString(Array* root); + string ToString(Array* root, bool print_constants = false); private: // Helper function that ensures that every HLO instruction that is @@ -268,6 +268,11 @@ class IndexedArrayAnalysis { Array* ComputeArrayForReshape(const Shape& shape, Array* operand); + Array* ComputeArrayForElementwiseBinaryOp(const HloInstruction* instr, + Array* lhs, Array* rhs); + Array* ComputeArrayForElementwiseUnaryOp(const HloInstruction* instr, + Array* operand); + template T* Construct(Args&&... args) { T* new_tensor = new T(std::forward(args)...); diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc index e1090df942..76e7e7086c 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc @@ -23,12 +23,27 @@ class IndexedArrayAnalysisTest : public HloVerifiedTestBase { protected: void AssertArrayForRootExpressionIs(const string& hlo_text, const string& root_expression) { + AssertArrayForRootExpressionIsImpl(hlo_text, root_expression, + /*print_constants=*/false); + } + + void AssertArrayWithConstantsForRootExpressionIs( + const string& hlo_text, const string& root_expression) { + AssertArrayForRootExpressionIsImpl(hlo_text, root_expression, + /*print_constants=*/true); + } + + private: + void AssertArrayForRootExpressionIsImpl(const string& hlo_text, + const string& root_expression, + bool print_constants) { IndexedArrayAnalysis indexed_tensor_analysis; ParseAndVerifyModule(hlo_text); - string result = - indexed_tensor_analysis.ToString(indexed_tensor_analysis.GetArrayFor( - module().entry_computation()->root_instruction())); + string result = indexed_tensor_analysis.ToString( + indexed_tensor_analysis.GetArrayFor( + module().entry_computation()->root_instruction()), + print_constants); LOG(INFO) << result; ASSERT_EQ(result, root_expression); } @@ -298,5 +313,162 @@ ENTRY main { AssertArrayForRootExpressionIs(hlo_text, "%reshape"); } + +TEST_F(IndexedArrayAnalysisTest, UnaryOpOfGather) { + string hlo_text = R"( +HloModule UnaryOpOfGather + +ENTRY main { + operand = f32[3,4] constant(f32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}}) + indices = s32[5] parameter(0) + gather = f32[5,4] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1,4} + ROOT tanh = f32[5,4] tanh(gather) +} +)"; + + AssertArrayWithConstantsForRootExpressionIs(hlo_text, 1 + R"( +(scalar-indexed-const (constant f32[3,4] f32[3,4] { + { 0.761594176, 0.964027584, 0.995054781, 0.999329329 }, + { 0.761594176, 0.995054781, 0.964027584, 0.999329329 }, + { 0.999329329, 0.995054781, 0.964027584, 0.761594176 } +}) %indices 0->[0]))"); +} + +TEST_F(IndexedArrayAnalysisTest, AddBroadcastedScalarWithGather) { + string hlo_text = R"( +HloModule AddBroadcastedScalarWithGather + +ENTRY main { + gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}}) + constant = s32[] constant(5) + constant_broadcasted = s32[5,4] broadcast(constant), dimensions={} + indices = s32[5] parameter(0) + gather = s32[5,4] gather(gather_operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1,4} + ROOT add = s32[5,4] add(gather, constant_broadcasted) +} +)"; + + AssertArrayWithConstantsForRootExpressionIs(hlo_text, 1 + R"( +(scalar-indexed-const (constant s32[3,4] s32[3,4] { + { 6, 7, 8, 9 }, + { 6, 8, 7, 9 }, + { 9, 8, 7, 6 } +}) %indices 0->[0]))"); +} + +TEST_F(IndexedArrayAnalysisTest, + SubtractBroadcastedScalarWithGather_GatherIsLhs) { + string hlo_text = R"( +HloModule SubtractBroadcastedScalarWithGather + +ENTRY main { + gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}}) + constant = s32[] constant(5) + constant_broadcasted = s32[5,4] broadcast(constant), dimensions={} + indices = s32[5] parameter(0) + gather = s32[5,4] gather(gather_operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1,4} + ROOT sub = s32[5,4] subtract(gather, constant_broadcasted) +} +)"; + + AssertArrayWithConstantsForRootExpressionIs(hlo_text, 1 + R"( +(scalar-indexed-const (constant s32[3,4] s32[3,4] { + { -4, -3, -2, -1 }, + { -4, -2, -3, -1 }, + { -1, -2, -3, -4 } +}) %indices 0->[0]))"); +} + +TEST_F(IndexedArrayAnalysisTest, + SubtractBroadcastedScalarWithGather_GatherIsRhs) { + string hlo_text = R"( +HloModule SubtractBroadcastedScalarWithGather + +ENTRY main { + gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}}) + constant = s32[] constant(5) + constant_broadcasted = s32[5,4] broadcast(constant), dimensions={} + indices = s32[5] parameter(0) + gather = s32[5,4] gather(gather_operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1,4} + ROOT sub = s32[5,4] subtract(constant_broadcasted, gather) +} +)"; + + AssertArrayWithConstantsForRootExpressionIs(hlo_text, 1 + R"( +(scalar-indexed-const (constant s32[3,4] s32[3,4] { + { 4, 3, 2, 1 }, + { 4, 2, 3, 1 }, + { 1, 2, 3, 4 } +}) %indices 0->[0]))"); +} + +TEST_F(IndexedArrayAnalysisTest, AddBroadcastedVectorWithGather) { + string hlo_text = R"( +HloModule AddBroadcastedVectorWithGather + +ENTRY main { + gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}}) + constant_vect = s32[4] constant({10,11,12,13}) + constant_broadcasted = s32[5,4] broadcast(constant_vect), dimensions={1} + indices = s32[5] parameter(0) + gather = s32[5,4] gather(gather_operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1,4} + ROOT add = s32[5,4] add(gather, constant_broadcasted) +} +)"; + + AssertArrayWithConstantsForRootExpressionIs(hlo_text, 1 + R"( +(scalar-indexed-const (constant s32[3,4] s32[3,4] { + { 11, 13, 15, 17 }, + { 11, 14, 14, 17 }, + { 14, 14, 14, 14 } +}) %indices 0->[0]))"); +} + +TEST_F(IndexedArrayAnalysisTest, AddBroadcastedVectorWithGather_Negative) { + string hlo_text = R"( +HloModule AddBroadcastedVectorWithGather + +ENTRY main { + gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}}) + constant_vect = s32[5] constant({10,11,12,13,14}) + constant_broadcasted = s32[5,4] broadcast(constant_vect), dimensions={0} + indices = s32[5] parameter(0) + gather = s32[5,4] gather(gather_operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1,4} + ROOT add = s32[5,4] add(gather, constant_broadcasted) +} +)"; + + AssertArrayForRootExpressionIs(hlo_text, "%add"); +} } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index 6ca0c02c66..7303640726 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -537,6 +537,11 @@ void InsertAt(C* c, int64 index, Value&& value) { c->insert(c->begin() + index, std::forward(value)); } +template +void EraseAt(C* c, int64 index) { + c->erase(c->begin() + index); +} + // Returns true if `x` fits in 32-bits. template bool IsInt32(T x) { -- GitLab From 336d77ea19be48efad6025f824a58f89a87ce097 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 25 May 2018 20:23:31 -0700 Subject: [PATCH 179/902] [XLA] Don't compute relative error when the expected value is 0. In literal_comparison, don't try to compute a relative error when the expected value is 0, because doing so would mean that the only acceptable value *is* zero, which probably isn't what you mean. PiperOrigin-RevId: 198137414 --- tensorflow/compiler/xla/literal_comparison.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/literal_comparison.cc b/tensorflow/compiler/xla/literal_comparison.cc index bf9679cafe..a588f4a03d 100644 --- a/tensorflow/compiler/xla/literal_comparison.cc +++ b/tensorflow/compiler/xla/literal_comparison.cc @@ -317,7 +317,15 @@ class NearComparator { rel_error = std::numeric_limits::infinity(); } else { abs_error = FpAbsoluteValue(actual - expected); - rel_error = abs_error / FpAbsoluteValue(expected); + // If the expected result is exactly zero, don't compute relative error; + // that's meaningless. + // + // TODO(b/80321728): Come up with a better way to handle this case. + if (expected == NativeT{}) { + rel_error = 0; + } else { + rel_error = abs_error / FpAbsoluteValue(expected); + } } const bool is_abs_mismatch = abs_error > error_.abs; const bool is_rel_mismatch = rel_error > error_.rel; -- GitLab From 16b5e21ef4be2ace560b1c5308dd08a298603594 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 21:38:56 -0700 Subject: [PATCH 180/902] Use dict(locals()) instead of distribution_util.parent_frame_arguments. This will be much faster at object construction time (going forward we'll figure out a way to make this a function to call). PiperOrigin-RevId: 198141184 --- .../contrib/distributions/python/ops/autoregressive.py | 2 +- .../contrib/distributions/python/ops/batch_reshape.py | 3 +-- tensorflow/contrib/distributions/python/ops/binomial.py | 2 +- tensorflow/contrib/distributions/python/ops/cauchy.py | 3 +-- tensorflow/contrib/distributions/python/ops/chi2.py | 5 ++--- .../contrib/distributions/python/ops/deterministic.py | 3 +-- tensorflow/contrib/distributions/python/ops/geometric.py | 2 +- tensorflow/contrib/distributions/python/ops/gumbel.py | 3 +-- tensorflow/contrib/distributions/python/ops/half_normal.py | 3 +-- tensorflow/contrib/distributions/python/ops/independent.py | 3 +-- .../contrib/distributions/python/ops/inverse_gamma.py | 4 ++-- tensorflow/contrib/distributions/python/ops/logistic.py | 3 +-- tensorflow/contrib/distributions/python/ops/mixture.py | 2 +- .../contrib/distributions/python/ops/mixture_same_family.py | 2 +- tensorflow/contrib/distributions/python/ops/mvn_diag.py | 4 ++-- .../distributions/python/ops/mvn_diag_plus_low_rank.py | 2 +- .../contrib/distributions/python/ops/mvn_full_covariance.py | 3 +-- .../contrib/distributions/python/ops/mvn_linear_operator.py | 2 +- tensorflow/contrib/distributions/python/ops/mvn_tril.py | 2 +- .../contrib/distributions/python/ops/negative_binomial.py | 2 +- .../contrib/distributions/python/ops/onehot_categorical.py | 2 +- tensorflow/contrib/distributions/python/ops/poisson.py | 2 +- .../contrib/distributions/python/ops/poisson_lognormal.py | 2 +- .../distributions/python/ops/quantized_distribution.py | 2 +- .../contrib/distributions/python/ops/relaxed_bernoulli.py | 2 +- .../distributions/python/ops/relaxed_onehot_categorical.py | 2 +- tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py | 2 +- .../distributions/python/ops/vector_diffeomixture.py | 2 +- .../distributions/python/ops/vector_exponential_diag.py | 2 +- .../python/ops/vector_exponential_linear_operator.py | 2 +- .../contrib/distributions/python/ops/vector_laplace_diag.py | 2 +- .../python/ops/vector_laplace_linear_operator.py | 2 +- .../distributions/python/ops/vector_sinh_arcsinh_diag.py | 2 +- .../contrib/distributions/python/ops/vector_student_t.py | 2 +- tensorflow/contrib/distributions/python/ops/wishart.py | 6 +++--- tensorflow/python/ops/distributions/bernoulli.py | 2 +- tensorflow/python/ops/distributions/beta.py | 4 ++-- tensorflow/python/ops/distributions/categorical.py | 2 +- tensorflow/python/ops/distributions/dirichlet.py | 2 +- .../python/ops/distributions/dirichlet_multinomial.py | 2 +- tensorflow/python/ops/distributions/distribution.py | 2 +- tensorflow/python/ops/distributions/exponential.py | 5 ++--- tensorflow/python/ops/distributions/gamma.py | 4 ++-- tensorflow/python/ops/distributions/laplace.py | 5 ++--- tensorflow/python/ops/distributions/multinomial.py | 2 +- tensorflow/python/ops/distributions/normal.py | 5 ++--- tensorflow/python/ops/distributions/student_t.py | 4 ++-- .../python/ops/distributions/transformed_distribution.py | 2 +- tensorflow/python/ops/distributions/uniform.py | 3 +-- 49 files changed, 60 insertions(+), 73 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/autoregressive.py b/tensorflow/contrib/distributions/python/ops/autoregressive.py index d813831bef..11ca90c483 100644 --- a/tensorflow/contrib/distributions/python/ops/autoregressive.py +++ b/tensorflow/contrib/distributions/python/ops/autoregressive.py @@ -144,7 +144,7 @@ class Autoregressive(distribution_lib.Distribution): `distribution_fn(sample0).event_shape.num_elements()` are both `None`. ValueError: if `num_steps < 1`. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name) as name: self._distribution_fn = distribution_fn self._sample0 = sample0 diff --git a/tensorflow/contrib/distributions/python/ops/batch_reshape.py b/tensorflow/contrib/distributions/python/ops/batch_reshape.py index c709318f76..4714caad69 100644 --- a/tensorflow/contrib/distributions/python/ops/batch_reshape.py +++ b/tensorflow/contrib/distributions/python/ops/batch_reshape.py @@ -28,7 +28,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import distribution as distribution_lib -from tensorflow.python.ops.distributions import util as distribution_util __all__ = [ @@ -103,7 +102,7 @@ class BatchReshape(distribution_lib.Distribution): ValueError: if `batch_shape` size is not the same as a `distribution.batch_shape` size. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) name = name or "BatchReshape" + distribution.name with ops.name_scope(name, values=[batch_shape]) as name: # The unexpanded batch shape may contain up to one dimension of -1. diff --git a/tensorflow/contrib/distributions/python/ops/binomial.py b/tensorflow/contrib/distributions/python/ops/binomial.py index 24b26bf124..e4944beedc 100644 --- a/tensorflow/contrib/distributions/python/ops/binomial.py +++ b/tensorflow/contrib/distributions/python/ops/binomial.py @@ -163,7 +163,7 @@ class Binomial(distribution.Distribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[total_count, logits, probs]) as name: self._total_count = self._maybe_assert_valid_total_count( ops.convert_to_tensor(total_count, name="total_count"), diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py index f5ffdd8731..23b6a83c17 100644 --- a/tensorflow/contrib/distributions/python/ops/cauchy.py +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -29,7 +29,6 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution -from tensorflow.python.ops.distributions import util as distribution_util __all__ = [ "Cauchy", @@ -121,7 +120,7 @@ class Cauchy(distribution.Distribution): Raises: TypeError: if `loc` and `scale` have different `dtype`. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): diff --git a/tensorflow/contrib/distributions/python/ops/chi2.py b/tensorflow/contrib/distributions/python/ops/chi2.py index 08cdc15828..686ae1ba74 100644 --- a/tensorflow/contrib/distributions/python/ops/chi2.py +++ b/tensorflow/contrib/distributions/python/ops/chi2.py @@ -25,7 +25,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import gamma -from tensorflow.python.ops.distributions import util as distribution_util __all__ = [ @@ -84,7 +83,7 @@ class Chi2(gamma.Gamma): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) # Even though all stats of chi2 are defined for valid parameters, this is # not true in the parent class "gamma." therefore, passing # allow_nan_stats=True @@ -120,7 +119,7 @@ class Chi2WithAbsDf(Chi2): validate_args=False, allow_nan_stats=True, name="Chi2WithAbsDf"): - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[df]) as name: super(Chi2WithAbsDf, self).__init__( df=math_ops.floor( diff --git a/tensorflow/contrib/distributions/python/ops/deterministic.py b/tensorflow/contrib/distributions/python/ops/deterministic.py index 6d7d6d307b..c44c76a133 100644 --- a/tensorflow/contrib/distributions/python/ops/deterministic.py +++ b/tensorflow/contrib/distributions/python/ops/deterministic.py @@ -32,7 +32,6 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import distribution -from tensorflow.python.ops.distributions import util as distribution_util __all__ = [ "Deterministic", @@ -87,7 +86,7 @@ class _BaseDeterministic(distribution.Distribution): Raises: ValueError: If `loc` is a scalar. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[loc, atol, rtol]) as name: loc = ops.convert_to_tensor(loc, name="loc") if is_vector and validate_args: diff --git a/tensorflow/contrib/distributions/python/ops/geometric.py b/tensorflow/contrib/distributions/python/ops/geometric.py index 446cff6ec2..e1e42ee95d 100644 --- a/tensorflow/contrib/distributions/python/ops/geometric.py +++ b/tensorflow/contrib/distributions/python/ops/geometric.py @@ -85,7 +85,7 @@ class Geometric(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits, probs, validate_args=validate_args, name=name) diff --git a/tensorflow/contrib/distributions/python/ops/gumbel.py b/tensorflow/contrib/distributions/python/ops/gumbel.py index ed9ea6f4f3..9d94fd11c6 100644 --- a/tensorflow/contrib/distributions/python/ops/gumbel.py +++ b/tensorflow/contrib/distributions/python/ops/gumbel.py @@ -29,7 +29,6 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution -from tensorflow.python.ops.distributions import util as distribution_util class _Gumbel(distribution.Distribution): @@ -125,7 +124,7 @@ class _Gumbel(distribution.Distribution): Raises: TypeError: if loc and scale are different dtypes. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): diff --git a/tensorflow/contrib/distributions/python/ops/half_normal.py b/tensorflow/contrib/distributions/python/ops/half_normal.py index 7e12767f6d..9c96254d1c 100644 --- a/tensorflow/contrib/distributions/python/ops/half_normal.py +++ b/tensorflow/contrib/distributions/python/ops/half_normal.py @@ -31,7 +31,6 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import special_math -from tensorflow.python.ops.distributions import util as distribution_util __all__ = [ @@ -106,7 +105,7 @@ class HalfNormal(distribution.Distribution): if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): diff --git a/tensorflow/contrib/distributions/python/ops/independent.py b/tensorflow/contrib/distributions/python/ops/independent.py index fa89fff3b7..cd6eaa8407 100644 --- a/tensorflow/contrib/distributions/python/ops/independent.py +++ b/tensorflow/contrib/distributions/python/ops/independent.py @@ -29,7 +29,6 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import distribution as distribution_lib from tensorflow.python.ops.distributions import kullback_leibler -from tensorflow.python.ops.distributions import util as distribution_util class Independent(distribution_lib.Distribution): @@ -117,7 +116,7 @@ class Independent(distribution_lib.Distribution): ValueError: if `reinterpreted_batch_ndims` exceeds `distribution.batch_ndims` """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) name = name or "Independent" + distribution.name self._distribution = distribution with ops.name_scope(name) as name: diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py index 85e8e10466..208057b34d 100644 --- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py +++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py @@ -125,7 +125,7 @@ class InverseGamma(distribution.Distribution): Raises: TypeError: if `concentration` and `rate` are different dtypes. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[concentration, rate]) as name: with ops.control_dependencies([ check_ops.assert_positive(concentration), @@ -280,7 +280,7 @@ class InverseGammaWithSoftplusConcentrationRate(InverseGamma): validate_args=False, allow_nan_stats=True, name="InverseGammaWithSoftplusConcentrationRate"): - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[concentration, rate]) as name: super(InverseGammaWithSoftplusConcentrationRate, self).__init__( concentration=nn.softplus(concentration, diff --git a/tensorflow/contrib/distributions/python/ops/logistic.py b/tensorflow/contrib/distributions/python/ops/logistic.py index 0103283259..27aa863440 100644 --- a/tensorflow/contrib/distributions/python/ops/logistic.py +++ b/tensorflow/contrib/distributions/python/ops/logistic.py @@ -31,7 +31,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution -from tensorflow.python.ops.distributions import util as distribution_util class Logistic(distribution.Distribution): @@ -120,7 +119,7 @@ class Logistic(distribution.Distribution): Raises: TypeError: if loc and scale are different dtypes. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py index d54f30dc63..bfb53a06c0 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture.py +++ b/tensorflow/contrib/distributions/python/ops/mixture.py @@ -116,7 +116,7 @@ class Mixture(distribution.Distribution): matching static batch shapes, or all components do not have matching static event shapes. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) if not isinstance(cat, categorical.Categorical): raise TypeError("cat must be a Categorical distribution, but saw: %s" % cat) diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py index c7c90cf875..112eefd369 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py +++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py @@ -130,7 +130,7 @@ class MixtureSameFamily(distribution.Distribution): ValueError: if `mixture_distribution` categories does not equal `components_distribution` rightmost batch shape. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name) as name: self._mixture_distribution = mixture_distribution self._components_distribution = components_distribution diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag.py b/tensorflow/contrib/distributions/python/ops/mvn_diag.py index cad398582b..d2beb2aff0 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag.py @@ -193,7 +193,7 @@ class MultivariateNormalDiag( Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier]): @@ -224,7 +224,7 @@ class MultivariateNormalDiagWithSoftplusScale(MultivariateNormalDiag): validate_args=False, allow_nan_stats=True, name="MultivariateNormalDiagWithSoftplusScale"): - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[scale_diag]) as name: super(MultivariateNormalDiagWithSoftplusScale, self).__init__( loc=loc, diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py index 1c11594df3..5117379b04 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py @@ -215,7 +215,7 @@ class MultivariateNormalDiagPlusLowRank( Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) def _convert_to_tensor(x, name): return None if x is None else ops.convert_to_tensor(x, name=name) with ops.name_scope(name) as name: diff --git a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py index 47d7d13cf3..57f47db50c 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py @@ -24,7 +24,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops.distributions import util as distribution_util __all__ = [ @@ -156,7 +155,7 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL): Raises: ValueError: if neither `loc` nor `covariance_matrix` are specified. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) # Convert the covariance_matrix up to a scale_tril and call MVNTriL. with ops.name_scope(name) as name: diff --git a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py index 79916fef8d..6a0383db02 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py @@ -170,7 +170,7 @@ class MultivariateNormalLinearOperator( ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) if scale is None: raise ValueError("Missing required `scale` parameter.") if not scale.dtype.is_floating: diff --git a/tensorflow/contrib/distributions/python/ops/mvn_tril.py b/tensorflow/contrib/distributions/python/ops/mvn_tril.py index d6b0ed994e..c809ef3c1c 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_tril.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_tril.py @@ -179,7 +179,7 @@ class MultivariateNormalTriL( Raises: ValueError: if neither `loc` nor `scale_tril` are specified. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) def _convert_to_tensor(x, name): return None if x is None else ops.convert_to_tensor(x, name=name) if loc is None and scale_tril is None: diff --git a/tensorflow/contrib/distributions/python/ops/negative_binomial.py b/tensorflow/contrib/distributions/python/ops/negative_binomial.py index 1085c56dc8..2bd11e24b3 100644 --- a/tensorflow/contrib/distributions/python/ops/negative_binomial.py +++ b/tensorflow/contrib/distributions/python/ops/negative_binomial.py @@ -90,7 +90,7 @@ class NegativeBinomial(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[total_count, logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits, probs, validate_args=validate_args, name=name) diff --git a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py index a4b9f3b78d..3e44c10fab 100644 --- a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py @@ -115,7 +115,7 @@ class OneHotCategorical(distribution.Distribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( name=name, logits=logits, probs=probs, validate_args=validate_args, diff --git a/tensorflow/contrib/distributions/python/ops/poisson.py b/tensorflow/contrib/distributions/python/ops/poisson.py index b345394021..04de8106ee 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson.py +++ b/tensorflow/contrib/distributions/python/ops/poisson.py @@ -93,7 +93,7 @@ class Poisson(distribution.Distribution): TypeError: if `rate` is not a float-type. TypeError: if `log_rate` is not a float-type. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[rate]) as name: if (rate is None) == (log_rate is None): raise ValueError("Must specify exactly one of `rate` and `log_rate`.") diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py index fe72091d7d..7b10ba998f 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py +++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py @@ -255,7 +255,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution): TypeError: if `quadrature_grid` and `quadrature_probs` have different base `dtype`. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[loc, scale]) as name: if loc is not None: loc = ops.convert_to_tensor(loc, name="loc") diff --git a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py index 584d2c385f..5ac6c34b53 100644 --- a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py @@ -263,7 +263,7 @@ class QuantizedDistribution(distributions.Distribution): `Distribution` or continuous. NotImplementedError: If the base distribution does not implement `cdf`. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) values = ( list(distribution.parameters.values()) + [low, high]) diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py b/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py index 0362996e68..4182ca2b56 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py @@ -165,7 +165,7 @@ class RelaxedBernoulli(transformed_distribution.TransformedDistribution): Raises: ValueError: If both `probs` and `logits` are passed, or if neither. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[logits, probs, temperature]) as name: with ops.control_dependencies([check_ops.assert_positive(temperature)] if validate_args else []): diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py index 910c430ae7..5414f347cd 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py @@ -162,7 +162,7 @@ class ExpRelaxedOneHotCategorical(distribution.Distribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[logits, probs, temperature]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( diff --git a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py index f04dc8da39..a764544932 100644 --- a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py +++ b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py @@ -132,7 +132,7 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[loc, scale, skewness, tailweight]) as name: diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py index cd6d749959..8d4914e16c 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py +++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py @@ -395,7 +395,7 @@ class VectorDiffeomixture(distribution_lib.Distribution): ValueError: if `not distribution.is_scalar_batch`. ValueError: if `not distribution.is_scalar_event`. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[mix_loc, temperature]) as name: if not scale or len(scale) < 2: raise ValueError("Must specify list (or list-like object) of scale " diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py index 3465d66b30..a75b3f3df1 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py @@ -175,7 +175,7 @@ class VectorExponentialDiag( Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier]): diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py index 2c31b01984..a7d4c55be9 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py @@ -175,7 +175,7 @@ class VectorExponentialLinearOperator( ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) if scale is None: raise ValueError("Missing required `scale` parameter.") if not scale.dtype.is_floating: diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py index 6a36018d6f..4a53e7a621 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_diag.py @@ -210,7 +210,7 @@ class VectorLaplaceDiag( Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name): with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier]): diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py index 97e5c76d80..0566e04fec 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py @@ -191,7 +191,7 @@ class VectorLaplaceLinearOperator( ValueError: if `scale` is unspecified. TypeError: if not `scale.dtype.is_floating` """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) if scale is None: raise ValueError("Missing required `scale` parameter.") if not scale.dtype.is_floating: diff --git a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py index ff5ca45257..bb33cd0762 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py @@ -163,7 +163,7 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): Raises: ValueError: if at most `scale_identity_multiplier` is specified. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope( name, diff --git a/tensorflow/contrib/distributions/python/ops/vector_student_t.py b/tensorflow/contrib/distributions/python/ops/vector_student_t.py index 4742f75218..21f84dcbde 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_student_t.py +++ b/tensorflow/contrib/distributions/python/ops/vector_student_t.py @@ -175,7 +175,7 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution): if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) graph_parents = [df, loc, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_factor, scale_perturb_diag] with ops.name_scope(name) as name: diff --git a/tensorflow/contrib/distributions/python/ops/wishart.py b/tensorflow/contrib/distributions/python/ops/wishart.py index f555867e7f..88d4280759 100644 --- a/tensorflow/contrib/distributions/python/ops/wishart.py +++ b/tensorflow/contrib/distributions/python/ops/wishart.py @@ -107,7 +107,7 @@ class _WishartLinearOperator(distribution.Distribution): ValueError: if df < k, where scale operator event shape is `(k, k)` """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) self._cholesky_input_output_matrices = cholesky_input_output_matrices with ops.name_scope(name) as name: with ops.name_scope("init", values=[df, scale_operator]): @@ -530,7 +530,7 @@ class WishartCholesky(_WishartLinearOperator): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[scale]) as name: with ops.name_scope("init", values=[scale]): scale = ops.convert_to_tensor(scale) @@ -646,7 +646,7 @@ class WishartFull(_WishartLinearOperator): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name) as name: with ops.name_scope("init", values=[scale]): scale = ops.convert_to_tensor(scale) diff --git a/tensorflow/python/ops/distributions/bernoulli.py b/tensorflow/python/ops/distributions/bernoulli.py index d7fb3f1f78..84d9d40a35 100644 --- a/tensorflow/python/ops/distributions/bernoulli.py +++ b/tensorflow/python/ops/distributions/bernoulli.py @@ -71,7 +71,7 @@ class Bernoulli(distribution.Distribution): Raises: ValueError: If p and logits are passed, or if neither are passed. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, diff --git a/tensorflow/python/ops/distributions/beta.py b/tensorflow/python/ops/distributions/beta.py index b697848600..f28f76b6c4 100644 --- a/tensorflow/python/ops/distributions/beta.py +++ b/tensorflow/python/ops/distributions/beta.py @@ -150,7 +150,7 @@ class Beta(distribution.Distribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[concentration1, concentration0]) as name: self._concentration1 = self._maybe_assert_valid_concentration( ops.convert_to_tensor(concentration1, name="concentration1"), @@ -321,7 +321,7 @@ class BetaWithSoftplusConcentration(Beta): validate_args=False, allow_nan_stats=True, name="BetaWithSoftplusConcentration"): - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[concentration1, concentration0]) as name: super(BetaWithSoftplusConcentration, self).__init__( diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py index bbdc8c455a..b88a0518b6 100644 --- a/tensorflow/python/ops/distributions/categorical.py +++ b/tensorflow/python/ops/distributions/categorical.py @@ -182,7 +182,7 @@ class Categorical(distribution.Distribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py index 8d0d1d860b..1ab58c1450 100644 --- a/tensorflow/python/ops/distributions/dirichlet.py +++ b/tensorflow/python/ops/distributions/dirichlet.py @@ -154,7 +154,7 @@ class Dirichlet(distribution.Distribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[concentration]) as name: self._concentration = self._maybe_assert_valid_concentration( ops.convert_to_tensor(concentration, name="concentration"), diff --git a/tensorflow/python/ops/distributions/dirichlet_multinomial.py b/tensorflow/python/ops/distributions/dirichlet_multinomial.py index 3a35e0caa0..5350c82847 100644 --- a/tensorflow/python/ops/distributions/dirichlet_multinomial.py +++ b/tensorflow/python/ops/distributions/dirichlet_multinomial.py @@ -191,7 +191,7 @@ class DirichletMultinomial(distribution.Distribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[total_count, concentration]) as name: # Broadcasting works because: # * The broadcasting convention is to prepend dimensions of size [1], and diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py index a6579e3246..0db4749507 100644 --- a/tensorflow/python/ops/distributions/distribution.py +++ b/tensorflow/python/ops/distributions/distribution.py @@ -525,7 +525,7 @@ class Distribution(_BaseDistribution): """Dictionary of parameters used to instantiate this `Distribution`.""" # Remove "self", "__class__", or other special variables. These can appear # if the subclass used: - # `parameters = distribution_util.parent_frame_arguments()`. + # `parameters = dict(locals())`. return dict((k, v) for k, v in self._parameters.items() if not k.startswith("__") and k != "self") diff --git a/tensorflow/python/ops/distributions/exponential.py b/tensorflow/python/ops/distributions/exponential.py index 1e08f48d52..24bc3f3d3e 100644 --- a/tensorflow/python/ops/distributions/exponential.py +++ b/tensorflow/python/ops/distributions/exponential.py @@ -27,7 +27,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import gamma -from tensorflow.python.ops.distributions import util as distribution_util from tensorflow.python.util.tf_export import tf_export @@ -91,7 +90,7 @@ class Exponential(gamma.Gamma): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) # Even though all statistics of are defined for valid inputs, this is not # true in the parent class "Gamma." Therefore, passing # allow_nan_stats=True @@ -144,7 +143,7 @@ class ExponentialWithSoftplusRate(Exponential): validate_args=False, allow_nan_stats=True, name="ExponentialWithSoftplusRate"): - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[rate]) as name: super(ExponentialWithSoftplusRate, self).__init__( rate=nn.softplus(rate, name="softplus_rate"), diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py index 7ca690d9d2..163a27f758 100644 --- a/tensorflow/python/ops/distributions/gamma.py +++ b/tensorflow/python/ops/distributions/gamma.py @@ -126,7 +126,7 @@ class Gamma(distribution.Distribution): Raises: TypeError: if `concentration` and `rate` are different dtypes. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[concentration, rate]) as name: with ops.control_dependencies([ check_ops.assert_positive(concentration), @@ -261,7 +261,7 @@ class GammaWithSoftplusConcentrationRate(Gamma): validate_args=False, allow_nan_stats=True, name="GammaWithSoftplusConcentrationRate"): - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[concentration, rate]) as name: super(GammaWithSoftplusConcentrationRate, self).__init__( concentration=nn.softplus(concentration, diff --git a/tensorflow/python/ops/distributions/laplace.py b/tensorflow/python/ops/distributions/laplace.py index ee3a6a40ff..be17cf2527 100644 --- a/tensorflow/python/ops/distributions/laplace.py +++ b/tensorflow/python/ops/distributions/laplace.py @@ -33,7 +33,6 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import special_math -from tensorflow.python.ops.distributions import util as distribution_util from tensorflow.python.util.tf_export import tf_export @@ -101,7 +100,7 @@ class Laplace(distribution.Distribution): Raises: TypeError: if `loc` and `scale` are of different dtype. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): @@ -218,7 +217,7 @@ class LaplaceWithSoftplusScale(Laplace): validate_args=False, allow_nan_stats=True, name="LaplaceWithSoftplusScale"): - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[loc, scale]) as name: super(LaplaceWithSoftplusScale, self).__init__( loc=loc, diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index 036ba45ccc..d0943e8eee 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -182,7 +182,7 @@ class Multinomial(distribution.Distribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[total_count, logits, probs]) as name: self._total_count = ops.convert_to_tensor(total_count, name="total_count") if validate_args: diff --git a/tensorflow/python/ops/distributions/normal.py b/tensorflow/python/ops/distributions/normal.py index 0620aae10d..d0a987ba7c 100644 --- a/tensorflow/python/ops/distributions/normal.py +++ b/tensorflow/python/ops/distributions/normal.py @@ -32,7 +32,6 @@ from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import kullback_leibler from tensorflow.python.ops.distributions import special_math -from tensorflow.python.ops.distributions import util as distribution_util from tensorflow.python.util.tf_export import tf_export @@ -132,7 +131,7 @@ class Normal(distribution.Distribution): Raises: TypeError: if `loc` and `scale` have different `dtype`. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): @@ -244,7 +243,7 @@ class NormalWithSoftplusScale(Normal): validate_args=False, allow_nan_stats=True, name="NormalWithSoftplusScale"): - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[scale]) as name: super(NormalWithSoftplusScale, self).__init__( loc=loc, diff --git a/tensorflow/python/ops/distributions/student_t.py b/tensorflow/python/ops/distributions/student_t.py index 9330b930b5..20a2d16181 100644 --- a/tensorflow/python/ops/distributions/student_t.py +++ b/tensorflow/python/ops/distributions/student_t.py @@ -157,7 +157,7 @@ class StudentT(distribution.Distribution): Raises: TypeError: if loc and scale are different dtypes. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[df, loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(df)] if validate_args else []): @@ -349,7 +349,7 @@ class StudentTWithAbsDfSoftplusScale(StudentT): validate_args=False, allow_nan_stats=True, name="StudentTWithAbsDfSoftplusScale"): - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[df, scale]) as name: super(StudentTWithAbsDfSoftplusScale, self).__init__( df=math_ops.floor(math_ops.abs(df)), diff --git a/tensorflow/python/ops/distributions/transformed_distribution.py b/tensorflow/python/ops/distributions/transformed_distribution.py index c2674bd6e5..e80bf9ee42 100644 --- a/tensorflow/python/ops/distributions/transformed_distribution.py +++ b/tensorflow/python/ops/distributions/transformed_distribution.py @@ -252,7 +252,7 @@ class TransformedDistribution(distribution_lib.Distribution): name: Python `str` name prefixed to Ops created by this class. Default: `bijector.name + distribution.name`. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) name = name or (("" if bijector is None else bijector.name) + distribution.name) with ops.name_scope(name, values=[event_shape, batch_shape]) as name: diff --git a/tensorflow/python/ops/distributions/uniform.py b/tensorflow/python/ops/distributions/uniform.py index dfa10331e3..e66c4a37e7 100644 --- a/tensorflow/python/ops/distributions/uniform.py +++ b/tensorflow/python/ops/distributions/uniform.py @@ -29,7 +29,6 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution -from tensorflow.python.ops.distributions import util as distribution_util from tensorflow.python.util.tf_export import tf_export @@ -103,7 +102,7 @@ class Uniform(distribution.Distribution): Raises: InvalidArgumentError: if `low >= high` and `validate_args=False`. """ - parameters = distribution_util.parent_frame_arguments() + parameters = dict(locals()) with ops.name_scope(name, values=[low, high]) as name: with ops.control_dependencies([ check_ops.assert_less( -- GitLab From 31c544c8e5b04f4bcf544018fcfcacbee5294025 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 22:46:46 -0700 Subject: [PATCH 181/902] DepthwiseConv Optimizations PiperOrigin-RevId: 198144118 --- .../lite/kernels/internal/optimized/depthwiseconv_uint8.h | 4 +++- .../internal/optimized/depthwiseconv_uint8_3x3_filter.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index b85e6c49e0..3fd00c8930 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1697,7 +1697,9 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, #endif TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); -#ifdef __aarch64__ +// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on +// Jetson TX-2. This compiler does not support the offsetof() macro. +#if defined(__aarch64__) && !defined(GOOGLE_L4T) // Call kernel optimized for depthwise convolutions using 3x3 filters if // parameters are supported. if (Fast3x3FilterKernelSupported( diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index 51fbd54906..8cd72239e9 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -23,7 +23,9 @@ limitations under the License. namespace tflite { namespace optimized_ops { -#ifdef __aarch64__ +// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on +// Jetson TX-2. This compiler does not support the offsetof() macro. +#if defined(__aarch64__) && !defined(GOOGLE_L4T) // clang-format gets confused with this file and ends up formatting lines to // be larger than 80 characters. Turn off here and back on at the end of the -- GitLab From fce6e5f095c54001527f7efed25d075f31978d3d Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Fri, 25 May 2018 23:44:11 -0700 Subject: [PATCH 182/902] Fix the issue where returned Status is not used. PiperOrigin-RevId: 198146500 --- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 32b211dcd1..96e0700862 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -2534,7 +2534,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // Build the TRT op // TODO(sami,ben,jie): proper naming! tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp"); - SetInputList(s, &op_builder, &input_names, &input_dtypes); + TF_RETURN_IF_ERROR(SetInputList(s, &op_builder, &input_names, &input_dtypes)); std::vector segment_names; segment_names.reserve(s.subgraph_node_ids.size()); @@ -2632,7 +2632,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Build the TRT op tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); - SetInputList(s, &op_builder, &input_names, &input_dtypes); + TF_RETURN_IF_ERROR(SetInputList(s, &op_builder, &input_names, &input_dtypes)); VLOG(0) << "Finished op preparation"; -- GitLab From 5acba9b600d5463dd4b542c7f606c02da6bc6f6c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 26 May 2018 08:25:12 -0700 Subject: [PATCH 183/902] Extracts the 'remove random shuffle node' optimization into its own method. PiperOrigin-RevId: 198169790 --- .../grappler/optimizers/constant_folding.cc | 32 ++++++++++++------- .../grappler/optimizers/constant_folding.h | 5 +++ 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index df32d4a25d..fed5d873c8 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1695,17 +1695,9 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } } - // Remove RandomShuffle op if it is scalar or first dimension is of size 1. - if (use_shape_info && IsRandomShuffle(*node) && - !properties->GetInputProperties(node->name()).empty()) { - const auto& shape = properties->GetInputProperties(node->name())[0].shape(); - // The node is replaceable iff - // unknown_rank == false && (dim_size == 0 || first dim is of size 1) - if (!shape.unknown_rank() && - (shape.dim_size() == 0 || shape.dim(0).size() == 1)) { - ReplaceOperationWithIdentity(0, *properties, node, optimized_graph); - return Status::OK(); - } + + if (RemoveRandomShuffle(*properties, use_shape_info, optimized_graph, node)) { + return Status::OK(); } bool remove_reverse_successful = false; @@ -1831,6 +1823,24 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +bool ConstantFolding::RemoveRandomShuffle(const GraphProperties& properties, + bool use_shape_info, + GraphDef* optimized_graph, + NodeDef* node) { + if (use_shape_info && IsRandomShuffle(*node) && + !properties.GetInputProperties(node->name()).empty()) { + const auto& shape = properties.GetInputProperties(node->name())[0].shape(); + // The node is replaceable iff + // unknown_rank == false && (dim_size == 0 || first dim is of size 1) + if (!shape.unknown_rank() && + (shape.dim_size() == 0 || shape.dim(0).size() == 1)) { + ReplaceOperationWithIdentity(0, properties, node, optimized_graph); + return true; + } + } + return false; +} + Status ConstantFolding::RemoveReverse(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node, diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 9a3ea03552..c760b05480 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -194,6 +194,11 @@ class ConstantFolding : public GraphOptimizer { // Removes Reverse op over dimensions with size 1. Status RemoveReverse(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node, bool* success); + + // Removes RandomShuffle op if it is scalar or first dimension is of size 1. + bool RemoveRandomShuffle(const GraphProperties& properties, + bool use_shape_info, GraphDef* optimized_graph, + NodeDef* node); // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; -- GitLab From 2561c4000afcead84823ebead70498533e5ebebb Mon Sep 17 00:00:00 2001 From: braincodercn Date: Sun, 27 May 2018 00:25:10 +0800 Subject: [PATCH 184/902] Fix build error Fix build error: 'function' in namespace 'std' does not name a template type --- tensorflow/python/util/util.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index 8bfc0ec958..0f465eda4f 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/python/util/util.h" +#include #include #include -- GitLab From f2177855323f11e4f9620638e238691c57000373 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 27 May 2018 10:49:12 -0700 Subject: [PATCH 185/902] TPUEstimator.export_savedmodel() saves a SavedModel with both TPU and CPU graphs. PiperOrigin-RevId: 198229550 --- tensorflow/contrib/tpu/python/tpu/tpu.py | 1 + .../contrib/tpu/python/tpu/tpu_estimator.py | 213 +++++++++++++++++- tensorflow/python/estimator/estimator.py | 26 ++- 3 files changed, 228 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index f531ae5fad..7d165fdd6e 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -330,6 +330,7 @@ def outside_compilation(computation, args=None): Returns: The Tensors returned by computation. """ + args = [] if args is None else args graph = ops.get_default_graph() # If we are in a TPUReplicateContext, signal that we are now diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index f0c7564175..f27375637a 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -46,6 +46,7 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator import estimator as estimator_lib from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator.export import export_output as export_output_lib from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -61,6 +62,7 @@ from tensorflow.python.ops import summary_ops_v2 as contrib_summary from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.saved_model import tag_constants from tensorflow.python.summary import summary from tensorflow.python.training import basic_session_run_hooks from tensorflow.python.training import evaluation @@ -71,6 +73,7 @@ from tensorflow.python.util import function_utils from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect + _INITIAL_LOSS = 1e7 _ZERO_LOSS = 0. _TPU_ESTIMATOR = 'tpu_estimator' @@ -81,6 +84,7 @@ _CROSS_REPLICA_SUM_OP = 'CrossReplicaSum' _ONE_GIGABYTE = 1024 * 1024 * 1024 _TPU_ENQUEUE_OPS = '_tpu_enqueue_ops' _TPU_TRAIN_OP = '_tpu_train_op' +_REWRITE_FOR_INFERENCE_MODE = '_rewrite_for_inference' _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY, _CTX_KEY] @@ -1773,8 +1777,45 @@ class TPUEstimator(estimator_lib.Estimator): Exporting ========= - Exporting `SavedModel` support on TPU is not yet implemented. So, - `export_savedmodel` is executed on CPU, even if `use_tpu` is true. + `export_savedmodel` exports 2 metagraphs, one with `tag_constants.SERVING`, + and another with `tag_constants.SERVING` and `tag_constants.TPU`. + At serving time, these tags are used to select metagraph to load. + + Before running the graph on TPU, TPU system needs to be initialized. If + TensorFlow Serving model-server is used, this is done automatically. If + not, please call `session.run(tpu.initialize_system())`. + + `tpu.outside_compilation` can be used to wrap TPU incompatible ops in + `model_fn`. + + Example: + ---------------- + + ``` + def model_fn(features, labels, mode, config, params): + ... + logits = ... + export_outputs = { + 'logits': export_output_lib.PredictOutput( + {'logits': logits}) + } + + def host_call(logits): + class_ids = math_ops.argmax(logits) + classes = string_ops.as_string(class_ids) + export_outputs['classes'] = + export_output_lib.ClassificationOutput(classes=classes) + + tpu.outside_compilation(host_call, [logits]) + + ... + ``` + + Current limitations: + -------------------- + + 1. Outside compilation does not work yet (b/79991729). + """ def __init__(self, @@ -1903,6 +1944,103 @@ class TPUEstimator(estimator_lib.Estimator): self._is_input_fn_invoked = None + def _add_meta_graph_for_mode(self, + builder, + input_receiver_fn_map, + checkpoint_path, + strip_default_attrs, + save_variables=True, + mode=model_fn_lib.ModeKeys.PREDICT, + export_tags=None): + if mode != model_fn_lib.ModeKeys.PREDICT: + raise NotImplementedError( + 'TPUEstimator only handles mode PREDICT for export_savedmodel(); ' + 'got {}.'.format(mode)) + + super(TPUEstimator, self)._add_meta_graph_for_mode(builder, + input_receiver_fn_map, + checkpoint_path, + strip_default_attrs, + save_variables, + mode=mode) + + input_receiver_fn_map = {_REWRITE_FOR_INFERENCE_MODE: + input_receiver_fn_map[mode]} + export_tags = [tag_constants.SERVING, tag_constants.TPU] + mode = _REWRITE_FOR_INFERENCE_MODE + super(TPUEstimator, self)._add_meta_graph_for_mode(builder, + input_receiver_fn_map, + checkpoint_path, + strip_default_attrs, + save_variables=False, + mode=mode, + export_tags=export_tags) + + def _call_model_fn(self, features, labels, mode, config): + if mode == _REWRITE_FOR_INFERENCE_MODE: + return self._call_model_fn_for_inference(features, labels, mode, config) + else: + return super(TPUEstimator, self)._call_model_fn( + features, labels, mode, config) + + def _call_model_fn_for_inference(self, features, labels, mode, config): + """Wraps `_call_model_fn` for `export_savedmodel`.""" + if mode != _REWRITE_FOR_INFERENCE_MODE: + raise ValueError('mode must be {}; ' + 'got {}.'.format(_REWRITE_FOR_INFERENCE_MODE, mode)) + + capture = _CapturedObject() + + def computation(): + """Compute tpu tensors used in export_outputs. + + Passed to rewrite_for_inference so that model_fn will be called under + the rewriting contexts. Only tpu tensors are returned, but export_outputs + and scaffold are captured. + + Returns: + A list of Tensors used in export_outputs and not marked for + outside_compilation. + """ + # We should only call model fn once and it should be inside `computation` + # so that building the graph will happen under `rewrite_for_inference`. + mode = model_fn_lib.ModeKeys.PREDICT + estimator_spec = self._call_model_fn(features, labels, mode, config) + + # We pick the TPU tensors out from `export_output` and later return them + # from `computation` for rewriting. + tensors_dict = collections.OrderedDict( + (k, _export_output_to_tensors(v)) + for k, v in six.iteritems(estimator_spec.export_outputs) + ) + tensors = nest.flatten(tensors_dict) + tpu_tensors = [t for t in tensors if _is_tpu_tensor(t)] + + # We cannot return anything other than `tpu_tensors` here so we capture + # the rest for later use. + capture.capture((estimator_spec, tensors_dict, tensors)) + return tpu_tensors + + tpu_tensors_on_cpu = tpu.rewrite_for_inference(computation) + estimator_spec, tensors_dict, tensors = capture.get() + + # Reconstruct `tensors`, but with `tpu_tensors` replaced with + # `tpu_tensors_on_cpu`. + new_tensors = [ + tpu_tensors_on_cpu.pop(0) if _is_tpu_tensor(t) else t + for t in tensors + ] + # Reconstruct `tensors_dict`. + new_tensors_dict = nest.pack_sequence_as(tensors_dict, new_tensors) + # Reconstruct `export_outputs`. + export_outputs = estimator_spec.export_outputs + new_export_outputs = collections.OrderedDict( + (k, _clone_export_output_with_tensors(export_outputs[k], v)) + for k, v in six.iteritems(new_tensors_dict) + ) + + return estimator_spec._replace(export_outputs=new_export_outputs) + def _create_global_step(self, graph): """Creates a global step suitable for TPUs. @@ -2278,6 +2416,76 @@ class TPUEstimator(estimator_lib.Estimator): return _model_fn +def _is_tpu_tensor(tensor): + if not isinstance(tensor, ops.Tensor): + return False + try: + tensor.op.get_attr(tpu._OUTSIDE_COMPILATION_ATTR) # pylint: disable=protected-access + except ValueError: + return True + else: + return False + + +def _export_output_to_tensors(export_output): + """Get a list of `Tensors` used in `export_output`. + + Args: + export_output: an `ExportOutput` object such as `ClassificationOutput`, + `RegressionOutput`, or `PredictOutput`. + Returns: + a list of tensors used in export_output. + + Raises: + ValueError: if `export_output` is not one of `ClassificationOutput`, + `RegressionOutput`, or `PredictOutput`. + """ + if isinstance(export_output, export_output_lib.ClassificationOutput): + return [export_output.scores, export_output.classes] + elif isinstance(export_output, export_output_lib.RegressionOutput): + return [export_output.value] + elif isinstance(export_output, export_output_lib.PredictOutput): + return export_output.outputs.values() + else: + raise ValueError( + '`export_output` must be have type `ClassificationOutput`, ' + '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output)) + + +def _clone_export_output_with_tensors(export_output, tensors): + """Clones `export_output` but with new `tensors`. + + Args: + export_output: an `ExportOutput` object such as `ClassificationOutput`, + `RegressionOutput`, or `PredictOutput`. + tensors: a list of `Tensors` used to construct a new `export_output`. + + Returns: + A dict similar to `export_output` but with `tensors`. + + Raises: + ValueError: if `export_output` is not one of `ClassificationOutput`, + `RegressionOutput`, or `PredictOutput`. + """ + if isinstance(export_output, export_output_lib.ClassificationOutput): + if len(tensors) != 2: + raise ValueError('tensors must be of length 2; ' + 'got {}.'.format(len(tensors))) + return export_output_lib.ClassificationOutput(*tensors) + elif isinstance(export_output, export_output_lib.RegressionOutput): + if len(tensors) != 1: + raise ValueError('tensors must be of length 1; ' + 'got {}'.format(len(tensors))) + return export_output_lib.RegressionOutput(*tensors) + elif isinstance(export_output, export_output_lib.PredictOutput): + return export_output_lib.PredictOutput( + dict(zip(export_output.outputs.keys(), tensors))) + else: + raise ValueError( + '`export_output` must be have type `ClassificationOutput`, ' + '`RegressionOutput`, or `PredictOutput`; got {}.'.format(export_output)) + + def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn): """Executes `model_fn_wrapper` multiple times on all TPU shards.""" iterations_per_loop_var = _create_or_get_iterations_per_loop() @@ -2844,4 +3052,3 @@ def _add_item_to_params(params, key, value): else: # Now params is Python dict. params[key] = value - diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 347a760333..331ee7490e 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -212,8 +212,8 @@ class Estimator(object): else: self._session_config = self._config.session_config - self._device_fn = self._config.device_fn or \ - _get_replica_device_setter(self._config) + self._device_fn = ( + self._config.device_fn or _get_replica_device_setter(self._config)) if model_fn is None: raise ValueError('model_fn must be provided to Estimator.') @@ -564,7 +564,8 @@ class Estimator(object): allowed_overrides = set([ '_call_input_fn', '_create_global_step', '_convert_train_steps_to_hooks', '_convert_eval_steps_to_hooks', - '_tf_api_names', '_validate_features_in_predict_input' + '_tf_api_names', '_validate_features_in_predict_input', + '_call_model_fn', '_add_meta_graph_for_mode' ]) estimator_members = set([m for m in Estimator.__dict__.keys() if not m.startswith('__')]) @@ -828,10 +829,14 @@ class Estimator(object): gfile.Rename(temp_export_dir, export_dir) return export_dir - def _add_meta_graph_for_mode( - self, builder, input_receiver_fn_map, checkpoint_path, - strip_default_attrs, save_variables=True, - mode=model_fn_lib.ModeKeys.PREDICT): + def _add_meta_graph_for_mode(self, + builder, + input_receiver_fn_map, + checkpoint_path, + strip_default_attrs, + save_variables=True, + mode=model_fn_lib.ModeKeys.PREDICT, + export_tags=None): # pylint: disable=line-too-long """Loads variables and adds them along with a MetaGraphDef for saving. @@ -850,9 +855,14 @@ class Estimator(object): True for the first call to this function, and the SavedModelBuilder will raise an error if that is not the case. mode: tf.estimator.ModeKeys value indicating which mode will be exported. + export_tags: The set of tags with which to save `MetaGraphDef`. If None, + a default set will be selected to matched the passed mode. """ # pylint: enable=line-too-long + if export_tags is None: + export_tags = model_fn_lib.EXPORT_TAG_MAP[mode] input_receiver_fn = input_receiver_fn_map[mode] + with ops.Graph().as_default() as g: self._create_and_assert_global_step(g) random_seed.set_random_seed(self._config.tf_random_seed) @@ -877,8 +887,6 @@ class Estimator(object): with tf_session.Session(config=self._session_config) as session: - export_tags = model_fn_lib.EXPORT_TAG_MAP[mode] - local_init_op = ( estimator_spec.scaffold.local_init_op or monitored_session.Scaffold.default_local_init_op()) -- GitLab From 418b5abda254f11ca54d0439893024c58e2af983 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 27 May 2018 18:43:32 +0000 Subject: [PATCH 186/902] Fix incorrect documentation for `tf.reduce_any` This fix fixes the incorrect documentation for `tf.reduce_any`. The previous description: ``` If `axis` has no entries, all dimensions are reduced, and a tensor with a single element is returned. ``` is not correct. See below: ``` Python 2.7.12 (default, Dec 4 2017, 14:50:18) [GCC 5.4.0 20160609] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> import tensorflow as tf >>> x = tf.constant([[True, True], [False, False]]) >>> v1 = tf.reduce_any(x, []) >>> tf.Session().run(v1) array([[ True, True], [False, False]]) >>> v2 = tf.reduce_any(x, None) >>> tf.Session().run(v2) True >>> ``` Instead, the correct description should be: ``` If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. ``` Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 118b02c6c7..53d5edbf18 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1675,7 +1675,7 @@ def reduce_any(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: -- GitLab From 564c146f37a02c3930a0dcc2978c9054664e927e Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 27 May 2018 18:55:23 +0000 Subject: [PATCH 187/902] Fix incorrect documentation for `tf.reduce_all` Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 53d5edbf18..b7e3de7e85 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1617,7 +1617,7 @@ def reduce_all(input_tensor, entry in `axis`. If `keepdims` is true, the reduced dimensions are retained with length 1. - If `axis` has no entries, all dimensions are reduced, and a + If `axis` is None, all dimensions are reduced, and a tensor with a single element is returned. For example: -- GitLab From a8b3027c404dfef87265a2856e982381f0f55cd3 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Sun, 27 May 2018 13:27:28 -0700 Subject: [PATCH 188/902] [XLA] Don't display metadata inline in HLO graph dump. We only want to display it in the tooltip. PiperOrigin-RevId: 198235268 --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 4bf89be441..a2cb21c09b 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -788,8 +788,8 @@ string HloDotDumper::DumpInstruction(const HloInstruction* instr) { } // Build the text that will be displayed inside the node. string node_body = node_label; - for (const string& s : {trivial_subcomputation, node_metadata, - node_backend_config, extra_info, inlined_constants}) { + for (const string& s : {trivial_subcomputation, node_backend_config, + extra_info, inlined_constants}) { if (!s.empty()) { StrAppend(&node_body, "
", s); } -- GitLab From d0e31cd4b00f30f5ffb9753f5f1e79f8940b0734 Mon Sep 17 00:00:00 2001 From: "candy.dc" Date: Mon, 28 May 2018 16:53:59 +0800 Subject: [PATCH 189/902] Fix typo --- tensorflow/core/kernels/sparse_matmul_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc index a1f9667b78..866c5dcd52 100644 --- a/tensorflow/core/kernels/sparse_matmul_op.cc +++ b/tensorflow/core/kernels/sparse_matmul_op.cc @@ -1490,7 +1490,7 @@ inline void LibxsmmSparseMatMul::Compute( #endif // TENSORFLOW_USE_LIBXSMM -// Here is a an overview of the SparseMatMul code. Note that we assume that the +// Here is an overview of the SparseMatMul code. Note that we assume that the // left matrix is sparse. // // The matrix "left" is divided into a grid with blocksize of (M, KL). Each -- GitLab From 7c8b25a64d393099272df70224bed1b51d8dfbae Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 28 May 2018 05:21:13 -0700 Subject: [PATCH 190/902] Relax compatibility checks for Protocol Buffer classes, to not rely on the exact layout of the generated classes. The definition of the message is checked instead. PiperOrigin-RevId: 198292780 --- .../tensorflow.-attr-value.-list-value.pbtxt | 172 ++++---- .../api/golden/tensorflow.-attr-value.pbtxt | 265 +++++++------ ...ow.-config-proto.-device-count-entry.pbtxt | 99 +---- ...nsorflow.-config-proto.-experimental.pbtxt | 86 +--- .../api/golden/tensorflow.-config-proto.pbtxt | 278 +++++++------ .../tools/api/golden/tensorflow.-event.pbtxt | 180 ++++----- .../golden/tensorflow.-g-p-u-options.pbtxt | 196 ++++----- .../api/golden/tensorflow.-graph-def.pbtxt | 122 ++---- .../golden/tensorflow.-graph-options.pbtxt | 173 +++----- .../golden/tensorflow.-histogram-proto.pbtxt | 152 +++---- .../api/golden/tensorflow.-log-message.pbtxt | 152 ++----- ...meta-graph-def.-collection-def-entry.pbtxt | 100 +---- ...rflow.-meta-graph-def.-meta-info-def.pbtxt | 148 +++---- ...-meta-graph-def.-signature-def-entry.pbtxt | 100 +---- .../golden/tensorflow.-meta-graph-def.pbtxt | 239 ++++++----- ...nsorflow.-name-attr-list.-attr-entry.pbtxt | 100 +---- .../golden/tensorflow.-name-attr-list.pbtxt | 120 ++---- .../tensorflow.-node-def.-attr-entry.pbtxt | 100 +---- .../api/golden/tensorflow.-node-def.pbtxt | 150 +++---- .../tensorflow.-optimizer-options.pbtxt | 200 ++++------ .../api/golden/tensorflow.-run-metadata.pbtxt | 109 ++--- ...ensorflow.-run-options.-experimental.pbtxt | 86 +--- .../api/golden/tensorflow.-run-options.pbtxt | 205 ++++------ .../api/golden/tensorflow.-session-log.pbtxt | 146 ++----- ...rflow.-summary-metadata.-plugin-data.pbtxt | 96 +---- .../golden/tensorflow.-summary-metadata.pbtxt | 126 ++---- .../golden/tensorflow.-summary.-audio.pbtxt | 126 ++---- .../golden/tensorflow.-summary.-image.pbtxt | 116 ++---- .../golden/tensorflow.-summary.-value.pbtxt | 180 ++++----- .../api/golden/tensorflow.-summary.pbtxt | 230 ++++++----- .../tensorflow.-tensor-info.-coo-sparse.pbtxt | 106 +---- .../api/golden/tensorflow.-tensor-info.pbtxt | 149 +++---- ...flow.profiler.-advice-proto.-checker.pbtxt | 86 +--- ...ofiler.-advice-proto.-checkers-entry.pbtxt | 100 +---- .../tensorflow.profiler.-advice-proto.pbtxt | 123 ++---- ...graph-node-proto.-input-shapes-entry.pbtxt | 100 +---- ...ensorflow.profiler.-graph-node-proto.pbtxt | 373 +++++++++--------- ...low.profiler.-multi-graph-node-proto.pbtxt | 288 ++++++-------- ...er.-op-log-proto.-id-to-string-entry.pbtxt | 99 +---- .../tensorflow.profiler.-op-log-proto.pbtxt | 120 ++---- .../golden/tensorflow.summary.-event.pbtxt | 180 ++++----- .../tensorflow.summary.-session-log.pbtxt | 146 ++----- ...sorflow.summary.-summary-description.pbtxt | 86 +--- .../tensorflow.summary.-summary.-audio.pbtxt | 126 ++---- .../tensorflow.summary.-summary.-image.pbtxt | 116 ++---- .../tensorflow.summary.-summary.-value.pbtxt | 180 ++++----- .../golden/tensorflow.summary.-summary.pbtxt | 230 ++++++----- ...sorflow.summary.-tagged-run-metadata.pbtxt | 96 +---- .../golden/tensorflow.train.-bytes-list.pbtxt | 86 +--- .../tensorflow.train.-cluster-def.pbtxt | 87 +--- .../golden/tensorflow.train.-example.pbtxt | 87 +--- .../tensorflow.train.-feature-list.pbtxt | 87 +--- ...n.-feature-lists.-feature-list-entry.pbtxt | 100 +---- .../tensorflow.train.-feature-lists.pbtxt | 110 ++---- .../golden/tensorflow.train.-feature.pbtxt | 115 ++---- ...rflow.train.-features.-feature-entry.pbtxt | 100 +---- .../golden/tensorflow.train.-features.pbtxt | 110 ++---- .../golden/tensorflow.train.-float-list.pbtxt | 89 +---- .../golden/tensorflow.train.-int64-list.pbtxt | 89 +---- ...nsorflow.train.-job-def.-tasks-entry.pbtxt | 99 +---- .../golden/tensorflow.train.-job-def.pbtxt | 119 ++---- .../golden/tensorflow.train.-saver-def.pbtxt | 178 +++------ .../tensorflow.train.-sequence-example.pbtxt | 98 +---- .../golden/tensorflow.train.-server-def.pbtxt | 128 ++---- tensorflow/tools/api/lib/api_objects.proto | 7 + .../api/lib/python_object_to_proto_visitor.py | 15 +- .../tools/api/tests/api_compatibility_test.py | 20 + 67 files changed, 2947 insertions(+), 6033 deletions(-) diff --git a/tensorflow/tools/api/golden/tensorflow.-attr-value.-list-value.pbtxt b/tensorflow/tools/api/golden/tensorflow.-attr-value.-list-value.pbtxt index 0fb1aaba28..f1dffd5952 100644 --- a/tensorflow/tools/api/golden/tensorflow.-attr-value.-list-value.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-attr-value.-list-value.pbtxt @@ -1,108 +1,70 @@ path: "tensorflow.AttrValue.ListValue" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "B_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FUNC_FIELD_NUMBER" - mtype: "" - } - member { - name: "F_FIELD_NUMBER" - mtype: "" - } - member { - name: "I_FIELD_NUMBER" - mtype: "" - } - member { - name: "SHAPE_FIELD_NUMBER" - mtype: "" - } - member { - name: "S_FIELD_NUMBER" - mtype: "" - } - member { - name: "TENSOR_FIELD_NUMBER" - mtype: "" - } - member { - name: "TYPE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "ListValue" + field { + name: "s" + number: 2 + label: LABEL_REPEATED + type: TYPE_BYTES + } + field { + name: "i" + number: 3 + label: LABEL_REPEATED + type: TYPE_INT64 + options { + packed: true + } + } + field { + name: "f" + number: 4 + label: LABEL_REPEATED + type: TYPE_FLOAT + options { + packed: true + } + } + field { + name: "b" + number: 5 + label: LABEL_REPEATED + type: TYPE_BOOL + options { + packed: true + } + } + field { + name: "type" + number: 6 + label: LABEL_REPEATED + type: TYPE_ENUM + type_name: ".tensorflow.DataType" + options { + packed: true + } + } + field { + name: "shape" + number: 7 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorShapeProto" + } + field { + name: "tensor" + number: 8 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorProto" + } + field { + name: "func" + number: 9 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.NameAttrList" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-attr-value.pbtxt b/tensorflow/tools/api/golden/tensorflow.-attr-value.pbtxt index e7a3a1f02f..6ccd64f428 100644 --- a/tensorflow/tools/api/golden/tensorflow.-attr-value.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-attr-value.pbtxt @@ -1,120 +1,151 @@ path: "tensorflow.AttrValue" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "B_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FUNC_FIELD_NUMBER" - mtype: "" - } - member { - name: "F_FIELD_NUMBER" - mtype: "" - } - member { - name: "I_FIELD_NUMBER" - mtype: "" - } - member { - name: "LIST_FIELD_NUMBER" - mtype: "" - } - member { - name: "ListValue" - mtype: "" - } - member { - name: "PLACEHOLDER_FIELD_NUMBER" - mtype: "" - } - member { - name: "SHAPE_FIELD_NUMBER" - mtype: "" - } - member { - name: "S_FIELD_NUMBER" - mtype: "" - } - member { - name: "TENSOR_FIELD_NUMBER" - mtype: "" - } - member { - name: "TYPE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "AttrValue" + field { + name: "s" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BYTES + oneof_index: 0 + } + field { + name: "i" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT64 + oneof_index: 0 + } + field { + name: "f" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + oneof_index: 0 + } + field { + name: "b" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_BOOL + oneof_index: 0 + } + field { + name: "type" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".tensorflow.DataType" + oneof_index: 0 + } + field { + name: "shape" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorShapeProto" + oneof_index: 0 + } + field { + name: "tensor" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorProto" + oneof_index: 0 + } + field { + name: "list" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.AttrValue.ListValue" + oneof_index: 0 + } + field { + name: "func" + number: 10 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.NameAttrList" + oneof_index: 0 + } + field { + name: "placeholder" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_STRING + oneof_index: 0 + } + nested_type { + name: "ListValue" + field { + name: "s" + number: 2 + label: LABEL_REPEATED + type: TYPE_BYTES + } + field { + name: "i" + number: 3 + label: LABEL_REPEATED + type: TYPE_INT64 + options { + packed: true + } + } + field { + name: "f" + number: 4 + label: LABEL_REPEATED + type: TYPE_FLOAT + options { + packed: true + } + } + field { + name: "b" + number: 5 + label: LABEL_REPEATED + type: TYPE_BOOL + options { + packed: true + } + } + field { + name: "type" + number: 6 + label: LABEL_REPEATED + type: TYPE_ENUM + type_name: ".tensorflow.DataType" + options { + packed: true + } + } + field { + name: "shape" + number: 7 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorShapeProto" + } + field { + name: "tensor" + number: 8 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorProto" + } + field { + name: "func" + number: 9 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.NameAttrList" + } + } + oneof_decl { + name: "value" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-config-proto.-device-count-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.-config-proto.-device-count-entry.pbtxt index 29bb3be35c..d9b1426828 100644 --- a/tensorflow/tools/api/golden/tensorflow.-config-proto.-device-count-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-config-proto.-device-count-entry.pbtxt @@ -1,84 +1,21 @@ path: "tensorflow.ConfigProto.DeviceCountEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "DeviceCountEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt b/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt index 0a0669e10c..9e09a8d48e 100644 --- a/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-config-proto.-experimental.pbtxt @@ -1,80 +1,12 @@ path: "tensorflow.ConfigProto.Experimental" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "COLLECTIVE_GROUP_LEADER_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Experimental" + field { + name: "collective_group_leader" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt index 0d53d1c2b9..4af4ed70ef 100644 --- a/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-config-proto.pbtxt @@ -1,148 +1,136 @@ path: "tensorflow.ConfigProto" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "ALLOW_SOFT_PLACEMENT_FIELD_NUMBER" - mtype: "" - } - member { - name: "CLUSTER_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "DEVICE_COUNT_FIELD_NUMBER" - mtype: "" - } - member { - name: "DEVICE_FILTERS_FIELD_NUMBER" - mtype: "" - } - member { - name: "DeviceCountEntry" - mtype: "" - } - member { - name: "EXPERIMENTAL_FIELD_NUMBER" - mtype: "" - } - member { - name: "Experimental" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "GPU_OPTIONS_FIELD_NUMBER" - mtype: "" - } - member { - name: "GRAPH_OPTIONS_FIELD_NUMBER" - mtype: "" - } - member { - name: "INTER_OP_PARALLELISM_THREADS_FIELD_NUMBER" - mtype: "" - } - member { - name: "INTRA_OP_PARALLELISM_THREADS_FIELD_NUMBER" - mtype: "" - } - member { - name: "ISOLATE_SESSION_STATE_FIELD_NUMBER" - mtype: "" - } - member { - name: "LOG_DEVICE_PLACEMENT_FIELD_NUMBER" - mtype: "" - } - member { - name: "OPERATION_TIMEOUT_IN_MS_FIELD_NUMBER" - mtype: "" - } - member { - name: "PLACEMENT_PERIOD_FIELD_NUMBER" - mtype: "" - } - member { - name: "RPC_OPTIONS_FIELD_NUMBER" - mtype: "" - } - member { - name: "SESSION_INTER_OP_THREAD_POOL_FIELD_NUMBER" - mtype: "" - } - member { - name: "USE_PER_SESSION_THREADS_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "ConfigProto" + field { + name: "device_count" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.ConfigProto.DeviceCountEntry" + } + field { + name: "intra_op_parallelism_threads" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "inter_op_parallelism_threads" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "use_per_session_threads" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "session_inter_op_thread_pool" + number: 12 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.ThreadPoolOptionProto" + } + field { + name: "placement_period" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "device_filters" + number: 4 + label: LABEL_REPEATED + type: TYPE_STRING + } + field { + name: "gpu_options" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.GPUOptions" + } + field { + name: "allow_soft_placement" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "log_device_placement" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "graph_options" + number: 10 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.GraphOptions" + } + field { + name: "operation_timeout_in_ms" + number: 11 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "rpc_options" + number: 13 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.RPCOptions" + } + field { + name: "cluster_def" + number: 14 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.ClusterDef" + } + field { + name: "isolate_session_state" + number: 15 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "experimental" + number: 16 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.ConfigProto.Experimental" + } + nested_type { + name: "DeviceCountEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + options { + map_entry: true + } + } + nested_type { + name: "Experimental" + field { + name: "collective_group_leader" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-event.pbtxt b/tensorflow/tools/api/golden/tensorflow.-event.pbtxt index 9bf8c12428..3b75a1735b 100644 --- a/tensorflow/tools/api/golden/tensorflow.-event.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-event.pbtxt @@ -1,112 +1,74 @@ path: "tensorflow.Event" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FILE_VERSION_FIELD_NUMBER" - mtype: "" - } - member { - name: "GRAPH_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "LOG_MESSAGE_FIELD_NUMBER" - mtype: "" - } - member { - name: "META_GRAPH_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "SESSION_LOG_FIELD_NUMBER" - mtype: "" - } - member { - name: "STEP_FIELD_NUMBER" - mtype: "" - } - member { - name: "SUMMARY_FIELD_NUMBER" - mtype: "" - } - member { - name: "TAGGED_RUN_METADATA_FIELD_NUMBER" - mtype: "" - } - member { - name: "WALL_TIME_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Event" + field { + name: "wall_time" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_DOUBLE + } + field { + name: "step" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "file_version" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + oneof_index: 0 + } + field { + name: "graph_def" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + oneof_index: 0 + } + field { + name: "summary" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary" + oneof_index: 0 + } + field { + name: "log_message" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.LogMessage" + oneof_index: 0 + } + field { + name: "session_log" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SessionLog" + oneof_index: 0 + } + field { + name: "tagged_run_metadata" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TaggedRunMetadata" + oneof_index: 0 + } + field { + name: "meta_graph_def" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_BYTES + oneof_index: 0 + } + oneof_decl { + name: "what" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt b/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt index 875d802a9c..f819b174c0 100644 --- a/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt @@ -1,116 +1,86 @@ path: "tensorflow.GPUOptions" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "ALLOCATOR_TYPE_FIELD_NUMBER" - mtype: "" - } - member { - name: "ALLOW_GROWTH_FIELD_NUMBER" - mtype: "" - } - member { - name: "DEFERRED_DELETION_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "EXPERIMENTAL_FIELD_NUMBER" - mtype: "" - } - member { - name: "Experimental" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FORCE_GPU_COMPATIBLE_FIELD_NUMBER" - mtype: "" - } - member { - name: "PER_PROCESS_GPU_MEMORY_FRACTION_FIELD_NUMBER" - mtype: "" - } - member { - name: "POLLING_ACTIVE_DELAY_USECS_FIELD_NUMBER" - mtype: "" - } - member { - name: "POLLING_INACTIVE_DELAY_MSECS_FIELD_NUMBER" - mtype: "" - } - member { - name: "VISIBLE_DEVICE_LIST_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "GPUOptions" + field { + name: "per_process_gpu_memory_fraction" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_DOUBLE + } + field { + name: "allow_growth" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "allocator_type" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "deferred_deletion_bytes" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "visible_device_list" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "polling_active_delay_usecs" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "polling_inactive_delay_msecs" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "force_gpu_compatible" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "experimental" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.GPUOptions.Experimental" + } + nested_type { + name: "Experimental" + field { + name: "virtual_devices" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.GPUOptions.Experimental.VirtualDevices" + } + field { + name: "use_unified_memory" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + nested_type { + name: "VirtualDevices" + field { + name: "memory_limit_mb" + number: 1 + label: LABEL_REPEATED + type: TYPE_FLOAT + } + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-graph-def.pbtxt b/tensorflow/tools/api/golden/tensorflow.-graph-def.pbtxt index 1495e847cb..19eccff03d 100644 --- a/tensorflow/tools/api/golden/tensorflow.-graph-def.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-graph-def.pbtxt @@ -1,92 +1,36 @@ path: "tensorflow.GraphDef" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "LIBRARY_FIELD_NUMBER" - mtype: "" - } - member { - name: "NODE_FIELD_NUMBER" - mtype: "" - } - member { - name: "VERSIONS_FIELD_NUMBER" - mtype: "" - } - member { - name: "VERSION_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "GraphDef" + field { + name: "node" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.NodeDef" + } + field { + name: "versions" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.VersionDef" + } + field { + name: "version" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT32 + options { + deprecated: true + } + } + field { + name: "library" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.FunctionDefLibrary" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-graph-options.pbtxt b/tensorflow/tools/api/golden/tensorflow.-graph-options.pbtxt index 0844f891ca..a9f99bc171 100644 --- a/tensorflow/tools/api/golden/tensorflow.-graph-options.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-graph-options.pbtxt @@ -1,112 +1,67 @@ path: "tensorflow.GraphOptions" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "BUILD_COST_MODEL_AFTER_FIELD_NUMBER" - mtype: "" - } - member { - name: "BUILD_COST_MODEL_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "ENABLE_BFLOAT16_SENDRECV_FIELD_NUMBER" - mtype: "" - } - member { - name: "ENABLE_RECV_SCHEDULING_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "INFER_SHAPES_FIELD_NUMBER" - mtype: "" - } - member { - name: "OPTIMIZER_OPTIONS_FIELD_NUMBER" - mtype: "" - } - member { - name: "PLACE_PRUNED_GRAPH_FIELD_NUMBER" - mtype: "" - } - member { - name: "REWRITE_OPTIONS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TIMELINE_STEP_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "GraphOptions" + field { + name: "enable_recv_scheduling" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "optimizer_options" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.OptimizerOptions" + } + field { + name: "build_cost_model" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "build_cost_model_after" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "infer_shapes" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "place_pruned_graph" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "enable_bfloat16_sendrecv" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "timeline_step" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "rewrite_options" + number: 10 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.RewriterConfig" + } + reserved_range { + start: 1 + end: 2 + } + reserved_name: "skip_common_subexpression_elimination" } } diff --git a/tensorflow/tools/api/golden/tensorflow.-histogram-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.-histogram-proto.pbtxt index 2567d2fe60..d4402f330b 100644 --- a/tensorflow/tools/api/golden/tensorflow.-histogram-proto.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-histogram-proto.pbtxt @@ -1,104 +1,54 @@ path: "tensorflow.HistogramProto" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "BUCKET_FIELD_NUMBER" - mtype: "" - } - member { - name: "BUCKET_LIMIT_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "MAX_FIELD_NUMBER" - mtype: "" - } - member { - name: "MIN_FIELD_NUMBER" - mtype: "" - } - member { - name: "NUM_FIELD_NUMBER" - mtype: "" - } - member { - name: "SUM_FIELD_NUMBER" - mtype: "" - } - member { - name: "SUM_SQUARES_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "HistogramProto" + field { + name: "min" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_DOUBLE + } + field { + name: "max" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_DOUBLE + } + field { + name: "num" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_DOUBLE + } + field { + name: "sum" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_DOUBLE + } + field { + name: "sum_squares" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_DOUBLE + } + field { + name: "bucket_limit" + number: 6 + label: LABEL_REPEATED + type: TYPE_DOUBLE + options { + packed: true + } + } + field { + name: "bucket" + number: 7 + label: LABEL_REPEATED + type: TYPE_DOUBLE + options { + packed: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-log-message.pbtxt b/tensorflow/tools/api/golden/tensorflow.-log-message.pbtxt index a43c5eb7e3..5023aa96bf 100644 --- a/tensorflow/tools/api/golden/tensorflow.-log-message.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-log-message.pbtxt @@ -1,112 +1,46 @@ path: "tensorflow.LogMessage" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DEBUGGING" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "ERROR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FATAL" - mtype: "" - } - member { - name: "INFO" - mtype: "" - } - member { - name: "LEVEL_FIELD_NUMBER" - mtype: "" - } - member { - name: "Level" - mtype: "" - } - member { - name: "MESSAGE_FIELD_NUMBER" - mtype: "" - } - member { - name: "UNKNOWN" - mtype: "" - } - member { - name: "WARN" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "LogMessage" + field { + name: "level" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".tensorflow.LogMessage.Level" + } + field { + name: "message" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + enum_type { + name: "Level" + value { + name: "UNKNOWN" + number: 0 + } + value { + name: "DEBUGGING" + number: 10 + } + value { + name: "INFO" + number: 20 + } + value { + name: "WARN" + number: 30 + } + value { + name: "ERROR" + number: 40 + } + value { + name: "FATAL" + number: 50 + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-collection-def-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-collection-def-entry.pbtxt index 3572126fbf..0ba09bec4b 100644 --- a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-collection-def-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-collection-def-entry.pbtxt @@ -1,84 +1,22 @@ path: "tensorflow.MetaGraphDef.CollectionDefEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "CollectionDefEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.CollectionDef" + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-meta-info-def.pbtxt b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-meta-info-def.pbtxt index b0e9831154..41c62a407b 100644 --- a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-meta-info-def.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-meta-info-def.pbtxt @@ -1,104 +1,50 @@ path: "tensorflow.MetaGraphDef.MetaInfoDef" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "ANY_INFO_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "META_GRAPH_VERSION_FIELD_NUMBER" - mtype: "" - } - member { - name: "STRIPPED_DEFAULT_ATTRS_FIELD_NUMBER" - mtype: "" - } - member { - name: "STRIPPED_OP_LIST_FIELD_NUMBER" - mtype: "" - } - member { - name: "TAGS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TENSORFLOW_GIT_VERSION_FIELD_NUMBER" - mtype: "" - } - member { - name: "TENSORFLOW_VERSION_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "MetaInfoDef" + field { + name: "meta_graph_version" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "stripped_op_list" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.OpList" + } + field { + name: "any_info" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".google.protobuf.Any" + } + field { + name: "tags" + number: 4 + label: LABEL_REPEATED + type: TYPE_STRING + } + field { + name: "tensorflow_version" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "tensorflow_git_version" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "stripped_default_attrs" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-signature-def-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-signature-def-entry.pbtxt index 48fccac99d..73dc414a77 100644 --- a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-signature-def-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.-signature-def-entry.pbtxt @@ -1,84 +1,22 @@ path: "tensorflow.MetaGraphDef.SignatureDefEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "SignatureDefEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SignatureDef" + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.pbtxt b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.pbtxt index 3e683a8715..d71c2358c9 100644 --- a/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-meta-graph-def.pbtxt @@ -1,112 +1,133 @@ path: "tensorflow.MetaGraphDef" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "ASSET_FILE_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "COLLECTION_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "CollectionDefEntry" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "GRAPH_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "META_INFO_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "MetaInfoDef" - mtype: "" - } - member { - name: "SAVER_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "SIGNATURE_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "SignatureDefEntry" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "MetaGraphDef" + field { + name: "meta_info_def" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.MetaGraphDef.MetaInfoDef" + } + field { + name: "graph_def" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.GraphDef" + } + field { + name: "saver_def" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SaverDef" + } + field { + name: "collection_def" + number: 4 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.MetaGraphDef.CollectionDefEntry" + } + field { + name: "signature_def" + number: 5 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.MetaGraphDef.SignatureDefEntry" + } + field { + name: "asset_file_def" + number: 6 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.AssetFileDef" + } + nested_type { + name: "MetaInfoDef" + field { + name: "meta_graph_version" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "stripped_op_list" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.OpList" + } + field { + name: "any_info" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".google.protobuf.Any" + } + field { + name: "tags" + number: 4 + label: LABEL_REPEATED + type: TYPE_STRING + } + field { + name: "tensorflow_version" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "tensorflow_git_version" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "stripped_default_attrs" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + } + nested_type { + name: "CollectionDefEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.CollectionDef" + } + options { + map_entry: true + } + } + nested_type { + name: "SignatureDefEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SignatureDef" + } + options { + map_entry: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-name-attr-list.-attr-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.-name-attr-list.-attr-entry.pbtxt index 2750bd780c..b119b20877 100644 --- a/tensorflow/tools/api/golden/tensorflow.-name-attr-list.-attr-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-name-attr-list.-attr-entry.pbtxt @@ -1,84 +1,22 @@ path: "tensorflow.NameAttrList.AttrEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "AttrEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.AttrValue" + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-name-attr-list.pbtxt b/tensorflow/tools/api/golden/tensorflow.-name-attr-list.pbtxt index d10faf67d0..fcdb411ffc 100644 --- a/tensorflow/tools/api/golden/tensorflow.-name-attr-list.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-name-attr-list.pbtxt @@ -1,88 +1,38 @@ path: "tensorflow.NameAttrList" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "ATTR_FIELD_NUMBER" - mtype: "" - } - member { - name: "AttrEntry" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "NAME_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "NameAttrList" + field { + name: "name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "attr" + number: 2 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.NameAttrList.AttrEntry" + } + nested_type { + name: "AttrEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.AttrValue" + } + options { + map_entry: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-node-def.-attr-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.-node-def.-attr-entry.pbtxt index b1b62d60f1..622e4c3d0f 100644 --- a/tensorflow/tools/api/golden/tensorflow.-node-def.-attr-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-node-def.-attr-entry.pbtxt @@ -1,84 +1,22 @@ path: "tensorflow.NodeDef.AttrEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "AttrEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.AttrValue" + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-node-def.pbtxt b/tensorflow/tools/api/golden/tensorflow.-node-def.pbtxt index b812b4df2b..646fa8abb9 100644 --- a/tensorflow/tools/api/golden/tensorflow.-node-def.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-node-def.pbtxt @@ -1,100 +1,56 @@ path: "tensorflow.NodeDef" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "ATTR_FIELD_NUMBER" - mtype: "" - } - member { - name: "AttrEntry" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "DEVICE_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "INPUT_FIELD_NUMBER" - mtype: "" - } - member { - name: "NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "OP_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "NodeDef" + field { + name: "name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "op" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "input" + number: 3 + label: LABEL_REPEATED + type: TYPE_STRING + } + field { + name: "device" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "attr" + number: 5 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.NodeDef.AttrEntry" + } + nested_type { + name: "AttrEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.AttrValue" + } + options { + map_entry: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-optimizer-options.pbtxt b/tensorflow/tools/api/golden/tensorflow.-optimizer-options.pbtxt index 6cac5c4d99..3ccf9d459b 100644 --- a/tensorflow/tools/api/golden/tensorflow.-optimizer-options.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-optimizer-options.pbtxt @@ -1,132 +1,74 @@ path: "tensorflow.OptimizerOptions" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DEFAULT" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "DO_COMMON_SUBEXPRESSION_ELIMINATION_FIELD_NUMBER" - mtype: "" - } - member { - name: "DO_CONSTANT_FOLDING_FIELD_NUMBER" - mtype: "" - } - member { - name: "DO_FUNCTION_INLINING_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "GLOBAL_JIT_LEVEL_FIELD_NUMBER" - mtype: "" - } - member { - name: "GlobalJitLevel" - mtype: "" - } - member { - name: "L0" - mtype: "" - } - member { - name: "L1" - mtype: "" - } - member { - name: "Level" - mtype: "" - } - member { - name: "MAX_FOLDED_CONSTANT_IN_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "OFF" - mtype: "" - } - member { - name: "ON_1" - mtype: "" - } - member { - name: "ON_2" - mtype: "" - } - member { - name: "OPT_LEVEL_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "OptimizerOptions" + field { + name: "do_common_subexpression_elimination" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "do_constant_folding" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "max_folded_constant_in_bytes" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "do_function_inlining" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "opt_level" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".tensorflow.OptimizerOptions.Level" + } + field { + name: "global_jit_level" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".tensorflow.OptimizerOptions.GlobalJitLevel" + } + enum_type { + name: "Level" + value { + name: "L1" + number: 0 + } + value { + name: "L0" + number: -1 + } + } + enum_type { + name: "GlobalJitLevel" + value { + name: "DEFAULT" + number: 0 + } + value { + name: "OFF" + number: -1 + } + value { + name: "ON_1" + number: 1 + } + value { + name: "ON_2" + number: 2 + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-run-metadata.pbtxt b/tensorflow/tools/api/golden/tensorflow.-run-metadata.pbtxt index 808fa0fa21..1287940326 100644 --- a/tensorflow/tools/api/golden/tensorflow.-run-metadata.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-run-metadata.pbtxt @@ -1,88 +1,27 @@ path: "tensorflow.RunMetadata" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "COST_GRAPH_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "PARTITION_GRAPHS_FIELD_NUMBER" - mtype: "" - } - member { - name: "STEP_STATS_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "RunMetadata" + field { + name: "step_stats" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.StepStats" + } + field { + name: "cost_graph" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.CostGraphDef" + } + field { + name: "partition_graphs" + number: 3 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.GraphDef" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/tensorflow.-run-options.-experimental.pbtxt index 6a5e46a0b8..537e73aa89 100644 --- a/tensorflow/tools/api/golden/tensorflow.-run-options.-experimental.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-run-options.-experimental.pbtxt @@ -1,80 +1,12 @@ path: "tensorflow.RunOptions.Experimental" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "COLLECTIVE_GRAPH_KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Experimental" + field { + name: "collective_graph_key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt index 65e55883da..cec04a2bf0 100644 --- a/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-run-options.pbtxt @@ -1,128 +1,83 @@ path: "tensorflow.RunOptions" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DEBUG_OPTIONS_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "EXPERIMENTAL_FIELD_NUMBER" - mtype: "" - } - member { - name: "Experimental" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FULL_TRACE" - mtype: "" - } - member { - name: "HARDWARE_TRACE" - mtype: "" - } - member { - name: "INTER_OP_THREAD_POOL_FIELD_NUMBER" - mtype: "" - } - member { - name: "NO_TRACE" - mtype: "" - } - member { - name: "OUTPUT_PARTITION_GRAPHS_FIELD_NUMBER" - mtype: "" - } - member { - name: "REPORT_TENSOR_ALLOCATIONS_UPON_OOM_FIELD_NUMBER" - mtype: "" - } - member { - name: "SOFTWARE_TRACE" - mtype: "" - } - member { - name: "TIMEOUT_IN_MS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TRACE_LEVEL_FIELD_NUMBER" - mtype: "" - } - member { - name: "TraceLevel" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "RunOptions" + field { + name: "trace_level" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".tensorflow.RunOptions.TraceLevel" + } + field { + name: "timeout_in_ms" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "inter_op_thread_pool" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "output_partition_graphs" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "debug_options" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.DebugOptions" + } + field { + name: "report_tensor_allocations_upon_oom" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "experimental" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.RunOptions.Experimental" + } + nested_type { + name: "Experimental" + field { + name: "collective_graph_key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + } + enum_type { + name: "TraceLevel" + value { + name: "NO_TRACE" + number: 0 + } + value { + name: "SOFTWARE_TRACE" + number: 1 + } + value { + name: "HARDWARE_TRACE" + number: 2 + } + value { + name: "FULL_TRACE" + number: 3 + } + } + reserved_range { + start: 4 + end: 5 + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-session-log.pbtxt b/tensorflow/tools/api/golden/tensorflow.-session-log.pbtxt index ec66d7f335..259f241874 100644 --- a/tensorflow/tools/api/golden/tensorflow.-session-log.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-session-log.pbtxt @@ -1,108 +1,44 @@ path: "tensorflow.SessionLog" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "CHECKPOINT" - mtype: "" - } - member { - name: "CHECKPOINT_PATH_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "MSG_FIELD_NUMBER" - mtype: "" - } - member { - name: "START" - mtype: "" - } - member { - name: "STATUS_FIELD_NUMBER" - mtype: "" - } - member { - name: "STATUS_UNSPECIFIED" - mtype: "" - } - member { - name: "STOP" - mtype: "" - } - member { - name: "SessionStatus" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "SessionLog" + field { + name: "status" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".tensorflow.SessionLog.SessionStatus" + } + field { + name: "checkpoint_path" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "msg" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + enum_type { + name: "SessionStatus" + value { + name: "STATUS_UNSPECIFIED" + number: 0 + } + value { + name: "START" + number: 1 + } + value { + name: "STOP" + number: 2 + } + value { + name: "CHECKPOINT" + number: 3 + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-summary-metadata.-plugin-data.pbtxt b/tensorflow/tools/api/golden/tensorflow.-summary-metadata.-plugin-data.pbtxt index 067f02ce8c..a66b74b315 100644 --- a/tensorflow/tools/api/golden/tensorflow.-summary-metadata.-plugin-data.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-summary-metadata.-plugin-data.pbtxt @@ -1,84 +1,18 @@ path: "tensorflow.SummaryMetadata.PluginData" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "CONTENT_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "PLUGIN_NAME_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "PluginData" + field { + name: "plugin_name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "content" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-summary-metadata.pbtxt b/tensorflow/tools/api/golden/tensorflow.-summary-metadata.pbtxt index b9156521cc..c02575b962 100644 --- a/tensorflow/tools/api/golden/tensorflow.-summary-metadata.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-summary-metadata.pbtxt @@ -1,92 +1,40 @@ path: "tensorflow.SummaryMetadata" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "DISPLAY_NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "PLUGIN_DATA_FIELD_NUMBER" - mtype: "" - } - member { - name: "PluginData" - mtype: "" - } - member { - name: "SUMMARY_DESCRIPTION_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "SummaryMetadata" + field { + name: "plugin_data" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SummaryMetadata.PluginData" + } + field { + name: "display_name" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "summary_description" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + nested_type { + name: "PluginData" + field { + name: "plugin_name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "content" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-summary.-audio.pbtxt b/tensorflow/tools/api/golden/tensorflow.-summary.-audio.pbtxt index 781010d75e..94f712073e 100644 --- a/tensorflow/tools/api/golden/tensorflow.-summary.-audio.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-summary.-audio.pbtxt @@ -1,96 +1,36 @@ path: "tensorflow.Summary.Audio" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "CONTENT_TYPE_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "ENCODED_AUDIO_STRING_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "LENGTH_FRAMES_FIELD_NUMBER" - mtype: "" - } - member { - name: "NUM_CHANNELS_FIELD_NUMBER" - mtype: "" - } - member { - name: "SAMPLE_RATE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Audio" + field { + name: "sample_rate" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + } + field { + name: "num_channels" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "length_frames" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "encoded_audio_string" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } + field { + name: "content_type" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_STRING + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-summary.-image.pbtxt b/tensorflow/tools/api/golden/tensorflow.-summary.-image.pbtxt index feb9c7ee92..fc1acb483b 100644 --- a/tensorflow/tools/api/golden/tensorflow.-summary.-image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-summary.-image.pbtxt @@ -1,92 +1,30 @@ path: "tensorflow.Summary.Image" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "COLORSPACE_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "ENCODED_IMAGE_STRING_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "HEIGHT_FIELD_NUMBER" - mtype: "" - } - member { - name: "WIDTH_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Image" + field { + name: "height" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "width" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "colorspace" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "encoded_image_string" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-summary.-value.pbtxt b/tensorflow/tools/api/golden/tensorflow.-summary.-value.pbtxt index ffb4f45fc5..feb84b6ee9 100644 --- a/tensorflow/tools/api/golden/tensorflow.-summary.-value.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-summary.-value.pbtxt @@ -1,112 +1,74 @@ path: "tensorflow.Summary.Value" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "AUDIO_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "HISTO_FIELD_NUMBER" - mtype: "" - } - member { - name: "IMAGE_FIELD_NUMBER" - mtype: "" - } - member { - name: "METADATA_FIELD_NUMBER" - mtype: "" - } - member { - name: "NODE_NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "OBSOLETE_OLD_STYLE_HISTOGRAM_FIELD_NUMBER" - mtype: "" - } - member { - name: "SIMPLE_VALUE_FIELD_NUMBER" - mtype: "" - } - member { - name: "TAG_FIELD_NUMBER" - mtype: "" - } - member { - name: "TENSOR_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Value" + field { + name: "node_name" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "tag" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "metadata" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SummaryMetadata" + } + field { + name: "simple_value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + oneof_index: 0 + } + field { + name: "obsolete_old_style_histogram" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_BYTES + oneof_index: 0 + } + field { + name: "image" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Image" + oneof_index: 0 + } + field { + name: "histo" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.HistogramProto" + oneof_index: 0 + } + field { + name: "audio" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Audio" + oneof_index: 0 + } + field { + name: "tensor" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorProto" + oneof_index: 0 + } + oneof_decl { + name: "value" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-summary.pbtxt b/tensorflow/tools/api/golden/tensorflow.-summary.pbtxt index 38de17fa9e..b2bdff7171 100644 --- a/tensorflow/tools/api/golden/tensorflow.-summary.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-summary.pbtxt @@ -1,92 +1,144 @@ path: "tensorflow.Summary" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "Audio" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "Image" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member { - name: "Value" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Summary" + field { + name: "value" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Value" + } + nested_type { + name: "Image" + field { + name: "height" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "width" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "colorspace" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "encoded_image_string" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } + } + nested_type { + name: "Audio" + field { + name: "sample_rate" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + } + field { + name: "num_channels" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "length_frames" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "encoded_audio_string" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } + field { + name: "content_type" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + } + nested_type { + name: "Value" + field { + name: "node_name" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "tag" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "metadata" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SummaryMetadata" + } + field { + name: "simple_value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + oneof_index: 0 + } + field { + name: "obsolete_old_style_histogram" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_BYTES + oneof_index: 0 + } + field { + name: "image" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Image" + oneof_index: 0 + } + field { + name: "histo" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.HistogramProto" + oneof_index: 0 + } + field { + name: "audio" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Audio" + oneof_index: 0 + } + field { + name: "tensor" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorProto" + oneof_index: 0 + } + oneof_decl { + name: "value" + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-tensor-info.-coo-sparse.pbtxt b/tensorflow/tools/api/golden/tensorflow.-tensor-info.-coo-sparse.pbtxt index 425c35e067..0064c8460c 100644 --- a/tensorflow/tools/api/golden/tensorflow.-tensor-info.-coo-sparse.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-tensor-info.-coo-sparse.pbtxt @@ -1,88 +1,24 @@ path: "tensorflow.TensorInfo.CooSparse" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DENSE_SHAPE_TENSOR_NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "INDICES_TENSOR_NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUES_TENSOR_NAME_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "CooSparse" + field { + name: "values_tensor_name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "indices_tensor_name" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "dense_shape_tensor_name" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.-tensor-info.pbtxt b/tensorflow/tools/api/golden/tensorflow.-tensor-info.pbtxt index 41ea393be5..63566c808e 100644 --- a/tensorflow/tools/api/golden/tensorflow.-tensor-info.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-tensor-info.pbtxt @@ -1,96 +1,59 @@ path: "tensorflow.TensorInfo" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "COO_SPARSE_FIELD_NUMBER" - mtype: "" - } - member { - name: "CooSparse" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "DTYPE_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "TENSOR_SHAPE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "TensorInfo" + field { + name: "name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + oneof_index: 0 + } + field { + name: "coo_sparse" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorInfo.CooSparse" + oneof_index: 0 + } + field { + name: "dtype" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".tensorflow.DataType" + } + field { + name: "tensor_shape" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorShapeProto" + } + nested_type { + name: "CooSparse" + field { + name: "values_tensor_name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "indices_tensor_name" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "dense_shape_tensor_name" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + } + oneof_decl { + name: "encoding" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.-checker.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.-checker.pbtxt index bd5c36f390..e09c44cc9c 100644 --- a/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.-checker.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.-checker.pbtxt @@ -1,80 +1,12 @@ path: "tensorflow.profiler.AdviceProto.Checker" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "REPORTS_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Checker" + field { + name: "reports" + number: 2 + label: LABEL_REPEATED + type: TYPE_STRING + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.-checkers-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.-checkers-entry.pbtxt index 7c8c68e155..8746243549 100644 --- a/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.-checkers-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.-checkers-entry.pbtxt @@ -1,84 +1,22 @@ path: "tensorflow.profiler.AdviceProto.CheckersEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "CheckersEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.AdviceProto.Checker" + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.pbtxt index 1b789f4fc9..a8a8858ccd 100644 --- a/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.profiler.-advice-proto.pbtxt @@ -1,88 +1,41 @@ path: "tensorflow.profiler.AdviceProto" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "CHECKERS_FIELD_NUMBER" - mtype: "" - } - member { - name: "Checker" - mtype: "" - } - member { - name: "CheckersEntry" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "AdviceProto" + field { + name: "checkers" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.AdviceProto.CheckersEntry" + } + nested_type { + name: "CheckersEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.AdviceProto.Checker" + } + options { + map_entry: true + } + } + nested_type { + name: "Checker" + field { + name: "reports" + number: 2 + label: LABEL_REPEATED + type: TYPE_STRING + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.-input-shapes-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.-input-shapes-entry.pbtxt index f0b9605bee..afec73f537 100644 --- a/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.-input-shapes-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.-input-shapes-entry.pbtxt @@ -1,84 +1,22 @@ path: "tensorflow.profiler.GraphNodeProto.InputShapesEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "InputShapesEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorShapeProto" + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.pbtxt index b80896a8a0..3c83177005 100644 --- a/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.profiler.-graph-node-proto.pbtxt @@ -1,188 +1,191 @@ path: "tensorflow.profiler.GraphNodeProto" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "ACCELERATOR_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "CHILDREN_FIELD_NUMBER" - mtype: "" - } - member { - name: "CPU_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "DEVICES_FIELD_NUMBER" - mtype: "" - } - member { - name: "EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FLOAT_OPS_FIELD_NUMBER" - mtype: "" - } - member { - name: "INPUT_SHAPES_FIELD_NUMBER" - mtype: "" - } - member { - name: "InputShapesEntry" - mtype: "" - } - member { - name: "NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "OUTPUT_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "PARAMETERS_FIELD_NUMBER" - mtype: "" - } - member { - name: "PEAK_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "REQUESTED_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "RESIDUAL_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "RUN_COUNT_FIELD_NUMBER" - mtype: "" - } - member { - name: "SHAPES_FIELD_NUMBER" - mtype: "" - } - member { - name: "TENSOR_VALUE_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_ACCELERATOR_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_CPU_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_DEFINITION_COUNT_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_FLOAT_OPS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_OUTPUT_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_PARAMETERS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_PEAK_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_REQUESTED_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_RESIDUAL_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_RUN_COUNT_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "GraphNodeProto" + field { + name: "name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "tensor_value" + number: 15 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.TFProfTensorProto" + } + field { + name: "run_count" + number: 21 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "exec_micros" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "accelerator_exec_micros" + number: 17 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "cpu_exec_micros" + number: 18 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "requested_bytes" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "peak_bytes" + number: 24 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "residual_bytes" + number: 25 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "output_bytes" + number: 26 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "parameters" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "float_ops" + number: 13 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "devices" + number: 10 + label: LABEL_REPEATED + type: TYPE_STRING + } + field { + name: "total_definition_count" + number: 23 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_run_count" + number: 22 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_exec_micros" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_accelerator_exec_micros" + number: 19 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_cpu_exec_micros" + number: 20 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_requested_bytes" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_peak_bytes" + number: 27 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_residual_bytes" + number: 28 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_output_bytes" + number: 29 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_parameters" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_float_ops" + number: 14 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "shapes" + number: 11 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorShapeProto" + } + field { + name: "input_shapes" + number: 16 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.GraphNodeProto.InputShapesEntry" + } + field { + name: "children" + number: 12 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.GraphNodeProto" + } + nested_type { + name: "InputShapesEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorShapeProto" + } + options { + map_entry: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-multi-graph-node-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-multi-graph-node-proto.pbtxt index 33deff6497..2b08a05437 100644 --- a/tensorflow/tools/api/golden/tensorflow.profiler.-multi-graph-node-proto.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.profiler.-multi-graph-node-proto.pbtxt @@ -1,160 +1,134 @@ path: "tensorflow.profiler.MultiGraphNodeProto" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "ACCELERATOR_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "CHILDREN_FIELD_NUMBER" - mtype: "" - } - member { - name: "CPU_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FLOAT_OPS_FIELD_NUMBER" - mtype: "" - } - member { - name: "GRAPH_NODES_FIELD_NUMBER" - mtype: "" - } - member { - name: "NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "OUTPUT_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "PARAMETERS_FIELD_NUMBER" - mtype: "" - } - member { - name: "PEAK_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "REQUESTED_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "RESIDUAL_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_ACCELERATOR_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_CPU_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_EXEC_MICROS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_FLOAT_OPS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_OUTPUT_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_PARAMETERS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_PEAK_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_REQUESTED_BYTES_FIELD_NUMBER" - mtype: "" - } - member { - name: "TOTAL_RESIDUAL_BYTES_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "MultiGraphNodeProto" + field { + name: "name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "exec_micros" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "accelerator_exec_micros" + number: 12 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "cpu_exec_micros" + number: 13 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "requested_bytes" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "peak_bytes" + number: 16 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "residual_bytes" + number: 17 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "output_bytes" + number: 18 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "parameters" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "float_ops" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_exec_micros" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_accelerator_exec_micros" + number: 14 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_cpu_exec_micros" + number: 15 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_requested_bytes" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_peak_bytes" + number: 19 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_residual_bytes" + number: 20 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_output_bytes" + number: 21 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_parameters" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "total_float_ops" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "graph_nodes" + number: 10 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.GraphNodeProto" + } + field { + name: "children" + number: 11 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.MultiGraphNodeProto" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-op-log-proto.-id-to-string-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-op-log-proto.-id-to-string-entry.pbtxt index 8c4727cf35..b3adc50c7e 100644 --- a/tensorflow/tools/api/golden/tensorflow.profiler.-op-log-proto.-id-to-string-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.profiler.-op-log-proto.-id-to-string-entry.pbtxt @@ -1,84 +1,21 @@ path: "tensorflow.profiler.OpLogProto.IdToStringEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "IdToStringEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-op-log-proto.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-op-log-proto.pbtxt index 1071a82b5c..7510c566ba 100644 --- a/tensorflow/tools/api/golden/tensorflow.profiler.-op-log-proto.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.profiler.-op-log-proto.pbtxt @@ -1,88 +1,38 @@ path: "tensorflow.profiler.OpLogProto" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "ID_TO_STRING_FIELD_NUMBER" - mtype: "" - } - member { - name: "IdToStringEntry" - mtype: "" - } - member { - name: "LOG_ENTRIES_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "OpLogProto" + field { + name: "log_entries" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.OpLogEntry" + } + field { + name: "id_to_string" + number: 2 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.tfprof.OpLogProto.IdToStringEntry" + } + nested_type { + name: "IdToStringEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + options { + map_entry: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.summary.-event.pbtxt b/tensorflow/tools/api/golden/tensorflow.summary.-event.pbtxt index ab3449d80f..eb99d0f533 100644 --- a/tensorflow/tools/api/golden/tensorflow.summary.-event.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.summary.-event.pbtxt @@ -1,112 +1,74 @@ path: "tensorflow.summary.Event" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FILE_VERSION_FIELD_NUMBER" - mtype: "" - } - member { - name: "GRAPH_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "LOG_MESSAGE_FIELD_NUMBER" - mtype: "" - } - member { - name: "META_GRAPH_DEF_FIELD_NUMBER" - mtype: "" - } - member { - name: "SESSION_LOG_FIELD_NUMBER" - mtype: "" - } - member { - name: "STEP_FIELD_NUMBER" - mtype: "" - } - member { - name: "SUMMARY_FIELD_NUMBER" - mtype: "" - } - member { - name: "TAGGED_RUN_METADATA_FIELD_NUMBER" - mtype: "" - } - member { - name: "WALL_TIME_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Event" + field { + name: "wall_time" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_DOUBLE + } + field { + name: "step" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "file_version" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + oneof_index: 0 + } + field { + name: "graph_def" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + oneof_index: 0 + } + field { + name: "summary" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary" + oneof_index: 0 + } + field { + name: "log_message" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.LogMessage" + oneof_index: 0 + } + field { + name: "session_log" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SessionLog" + oneof_index: 0 + } + field { + name: "tagged_run_metadata" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TaggedRunMetadata" + oneof_index: 0 + } + field { + name: "meta_graph_def" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_BYTES + oneof_index: 0 + } + oneof_decl { + name: "what" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.summary.-session-log.pbtxt b/tensorflow/tools/api/golden/tensorflow.summary.-session-log.pbtxt index 92ca4872ca..73de73869c 100644 --- a/tensorflow/tools/api/golden/tensorflow.summary.-session-log.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.summary.-session-log.pbtxt @@ -1,108 +1,44 @@ path: "tensorflow.summary.SessionLog" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "CHECKPOINT" - mtype: "" - } - member { - name: "CHECKPOINT_PATH_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "MSG_FIELD_NUMBER" - mtype: "" - } - member { - name: "START" - mtype: "" - } - member { - name: "STATUS_FIELD_NUMBER" - mtype: "" - } - member { - name: "STATUS_UNSPECIFIED" - mtype: "" - } - member { - name: "STOP" - mtype: "" - } - member { - name: "SessionStatus" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "SessionLog" + field { + name: "status" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".tensorflow.SessionLog.SessionStatus" + } + field { + name: "checkpoint_path" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "msg" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + enum_type { + name: "SessionStatus" + value { + name: "STATUS_UNSPECIFIED" + number: 0 + } + value { + name: "START" + number: 1 + } + value { + name: "STOP" + number: 2 + } + value { + name: "CHECKPOINT" + number: 3 + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.summary.-summary-description.pbtxt b/tensorflow/tools/api/golden/tensorflow.summary.-summary-description.pbtxt index f93da2196a..4a8b59cf02 100644 --- a/tensorflow/tools/api/golden/tensorflow.summary.-summary-description.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.summary.-summary-description.pbtxt @@ -1,80 +1,12 @@ path: "tensorflow.summary.SummaryDescription" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "TYPE_HINT_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "SummaryDescription" + field { + name: "type_hint" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.summary.-summary.-audio.pbtxt b/tensorflow/tools/api/golden/tensorflow.summary.-summary.-audio.pbtxt index 605e305e82..8b271cf58f 100644 --- a/tensorflow/tools/api/golden/tensorflow.summary.-summary.-audio.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.summary.-summary.-audio.pbtxt @@ -1,96 +1,36 @@ path: "tensorflow.summary.Summary.Audio" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "CONTENT_TYPE_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "ENCODED_AUDIO_STRING_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "LENGTH_FRAMES_FIELD_NUMBER" - mtype: "" - } - member { - name: "NUM_CHANNELS_FIELD_NUMBER" - mtype: "" - } - member { - name: "SAMPLE_RATE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Audio" + field { + name: "sample_rate" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + } + field { + name: "num_channels" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "length_frames" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "encoded_audio_string" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } + field { + name: "content_type" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_STRING + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.summary.-summary.-image.pbtxt b/tensorflow/tools/api/golden/tensorflow.summary.-summary.-image.pbtxt index 0646972196..dbbc02dd05 100644 --- a/tensorflow/tools/api/golden/tensorflow.summary.-summary.-image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.summary.-summary.-image.pbtxt @@ -1,92 +1,30 @@ path: "tensorflow.summary.Summary.Image" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "COLORSPACE_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "ENCODED_IMAGE_STRING_FIELD_NUMBER" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "HEIGHT_FIELD_NUMBER" - mtype: "" - } - member { - name: "WIDTH_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Image" + field { + name: "height" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "width" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "colorspace" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "encoded_image_string" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.summary.-summary.-value.pbtxt b/tensorflow/tools/api/golden/tensorflow.summary.-summary.-value.pbtxt index b319cd03d9..4176171cd9 100644 --- a/tensorflow/tools/api/golden/tensorflow.summary.-summary.-value.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.summary.-summary.-value.pbtxt @@ -1,112 +1,74 @@ path: "tensorflow.summary.Summary.Value" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "AUDIO_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "HISTO_FIELD_NUMBER" - mtype: "" - } - member { - name: "IMAGE_FIELD_NUMBER" - mtype: "" - } - member { - name: "METADATA_FIELD_NUMBER" - mtype: "" - } - member { - name: "NODE_NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "OBSOLETE_OLD_STYLE_HISTOGRAM_FIELD_NUMBER" - mtype: "" - } - member { - name: "SIMPLE_VALUE_FIELD_NUMBER" - mtype: "" - } - member { - name: "TAG_FIELD_NUMBER" - mtype: "" - } - member { - name: "TENSOR_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Value" + field { + name: "node_name" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "tag" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "metadata" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SummaryMetadata" + } + field { + name: "simple_value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + oneof_index: 0 + } + field { + name: "obsolete_old_style_histogram" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_BYTES + oneof_index: 0 + } + field { + name: "image" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Image" + oneof_index: 0 + } + field { + name: "histo" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.HistogramProto" + oneof_index: 0 + } + field { + name: "audio" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Audio" + oneof_index: 0 + } + field { + name: "tensor" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorProto" + oneof_index: 0 + } + oneof_decl { + name: "value" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.summary.-summary.pbtxt b/tensorflow/tools/api/golden/tensorflow.summary.-summary.pbtxt index 132ef1b7d2..d6c5e3a87a 100644 --- a/tensorflow/tools/api/golden/tensorflow.summary.-summary.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.summary.-summary.pbtxt @@ -1,92 +1,144 @@ path: "tensorflow.summary.Summary" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "Audio" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "Image" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member { - name: "Value" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Summary" + field { + name: "value" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Value" + } + nested_type { + name: "Image" + field { + name: "height" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "width" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "colorspace" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "encoded_image_string" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } + } + nested_type { + name: "Audio" + field { + name: "sample_rate" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + } + field { + name: "num_channels" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "length_frames" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT64 + } + field { + name: "encoded_audio_string" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } + field { + name: "content_type" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + } + nested_type { + name: "Value" + field { + name: "node_name" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "tag" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "metadata" + number: 9 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.SummaryMetadata" + } + field { + name: "simple_value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + oneof_index: 0 + } + field { + name: "obsolete_old_style_histogram" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_BYTES + oneof_index: 0 + } + field { + name: "image" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Image" + oneof_index: 0 + } + field { + name: "histo" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.HistogramProto" + oneof_index: 0 + } + field { + name: "audio" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Summary.Audio" + oneof_index: 0 + } + field { + name: "tensor" + number: 8 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.TensorProto" + oneof_index: 0 + } + oneof_decl { + name: "value" + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.summary.-tagged-run-metadata.pbtxt b/tensorflow/tools/api/golden/tensorflow.summary.-tagged-run-metadata.pbtxt index 4dce20819d..27c8873320 100644 --- a/tensorflow/tools/api/golden/tensorflow.summary.-tagged-run-metadata.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.summary.-tagged-run-metadata.pbtxt @@ -1,84 +1,18 @@ path: "tensorflow.summary.TaggedRunMetadata" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "RUN_METADATA_FIELD_NUMBER" - mtype: "" - } - member { - name: "TAG_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "TaggedRunMetadata" + field { + name: "tag" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "run_metadata" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BYTES + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-bytes-list.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-bytes-list.pbtxt index 8cf52b817f..87e4f160e5 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-bytes-list.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-bytes-list.pbtxt @@ -1,80 +1,12 @@ path: "tensorflow.train.BytesList" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "BytesList" + field { + name: "value" + number: 1 + label: LABEL_REPEATED + type: TYPE_BYTES + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-cluster-def.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-cluster-def.pbtxt index 93ff856b09..f9de26839f 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-cluster-def.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-cluster-def.pbtxt @@ -1,80 +1,13 @@ path: "tensorflow.train.ClusterDef" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "JOB_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "ClusterDef" + field { + name: "job" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.JobDef" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-example.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-example.pbtxt index f7215a2037..23c30f1ef4 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-example.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-example.pbtxt @@ -1,80 +1,13 @@ path: "tensorflow.train.Example" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FEATURES_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Example" + field { + name: "features" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Features" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-feature-list.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-feature-list.pbtxt index 3ad98354d6..2a8b3714fc 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-feature-list.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-feature-list.pbtxt @@ -1,80 +1,13 @@ path: "tensorflow.train.FeatureList" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FEATURE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "FeatureList" + field { + name: "feature" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.Feature" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-feature-lists.-feature-list-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-feature-lists.-feature-list-entry.pbtxt index cd171f4ca3..cd1d56e606 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-feature-lists.-feature-list-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-feature-lists.-feature-list-entry.pbtxt @@ -1,84 +1,22 @@ path: "tensorflow.train.FeatureLists.FeatureListEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "FeatureListEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.FeatureList" + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-feature-lists.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-feature-lists.pbtxt index 3d95017d58..3c183a6476 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-feature-lists.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-feature-lists.pbtxt @@ -1,84 +1,32 @@ path: "tensorflow.train.FeatureLists" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FEATURE_LIST_FIELD_NUMBER" - mtype: "" - } - member { - name: "FeatureListEntry" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "FeatureLists" + field { + name: "feature_list" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.FeatureLists.FeatureListEntry" + } + nested_type { + name: "FeatureListEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.FeatureList" + } + options { + map_entry: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-feature.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-feature.pbtxt index 9cca132bba..5d0eb871c2 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-feature.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-feature.pbtxt @@ -1,88 +1,33 @@ path: "tensorflow.train.Feature" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "BYTES_LIST_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FLOAT_LIST_FIELD_NUMBER" - mtype: "" - } - member { - name: "INT64_LIST_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Feature" + field { + name: "bytes_list" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.BytesList" + oneof_index: 0 + } + field { + name: "float_list" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.FloatList" + oneof_index: 0 + } + field { + name: "int64_list" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Int64List" + oneof_index: 0 + } + oneof_decl { + name: "kind" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-features.-feature-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-features.-feature-entry.pbtxt index 858aee0341..f912005f1c 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-features.-feature-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-features.-feature-entry.pbtxt @@ -1,84 +1,22 @@ path: "tensorflow.train.Features.FeatureEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "FeatureEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Feature" + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-features.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-features.pbtxt index 49cd12153b..b788ca1d57 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-features.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-features.pbtxt @@ -1,84 +1,32 @@ path: "tensorflow.train.Features" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FEATURE_FIELD_NUMBER" - mtype: "" - } - member { - name: "FeatureEntry" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Features" + field { + name: "feature" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.Features.FeatureEntry" + } + nested_type { + name: "FeatureEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Feature" + } + options { + map_entry: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-float-list.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-float-list.pbtxt index e3f01334b5..55d3b46f20 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-float-list.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-float-list.pbtxt @@ -1,80 +1,15 @@ path: "tensorflow.train.FloatList" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "FloatList" + field { + name: "value" + number: 1 + label: LABEL_REPEATED + type: TYPE_FLOAT + options { + packed: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-int64-list.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-int64-list.pbtxt index 8917dc122c..1de92b3ab7 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-int64-list.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-int64-list.pbtxt @@ -1,80 +1,15 @@ path: "tensorflow.train.Int64List" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "Int64List" + field { + name: "value" + number: 1 + label: LABEL_REPEATED + type: TYPE_INT64 + options { + packed: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-job-def.-tasks-entry.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-job-def.-tasks-entry.pbtxt index ac6d81541a..58115590a5 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-job-def.-tasks-entry.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-job-def.-tasks-entry.pbtxt @@ -1,84 +1,21 @@ path: "tensorflow.train.JobDef.TasksEntry" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "KEY_FIELD_NUMBER" - mtype: "" - } - member { - name: "VALUE_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "TasksEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + options { + map_entry: true + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-job-def.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-job-def.pbtxt index ce34537fa1..d7eb505e27 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-job-def.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-job-def.pbtxt @@ -1,88 +1,37 @@ path: "tensorflow.train.JobDef" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "TASKS_FIELD_NUMBER" - mtype: "" - } - member { - name: "TasksEntry" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "JobDef" + field { + name: "name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "tasks" + number: 2 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.JobDef.TasksEntry" + } + nested_type { + name: "TasksEntry" + field { + name: "key" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "value" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + options { + map_entry: true + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-saver-def.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-saver-def.pbtxt index 84498a64f5..4ec99469e4 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-saver-def.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-saver-def.pbtxt @@ -1,120 +1,64 @@ path: "tensorflow.train.SaverDef" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "CheckpointFormatVersion" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FILENAME_TENSOR_NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "KEEP_CHECKPOINT_EVERY_N_HOURS_FIELD_NUMBER" - mtype: "" - } - member { - name: "LEGACY" - mtype: "" - } - member { - name: "MAX_TO_KEEP_FIELD_NUMBER" - mtype: "" - } - member { - name: "RESTORE_OP_NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "SAVE_TENSOR_NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "SHARDED_FIELD_NUMBER" - mtype: "" - } - member { - name: "V1" - mtype: "" - } - member { - name: "V2" - mtype: "" - } - member { - name: "VERSION_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "SaverDef" + field { + name: "filename_tensor_name" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "save_tensor_name" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "restore_op_name" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "max_to_keep" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "sharded" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } + field { + name: "keep_checkpoint_every_n_hours" + number: 6 + label: LABEL_OPTIONAL + type: TYPE_FLOAT + } + field { + name: "version" + number: 7 + label: LABEL_OPTIONAL + type: TYPE_ENUM + type_name: ".tensorflow.SaverDef.CheckpointFormatVersion" + } + enum_type { + name: "CheckpointFormatVersion" + value { + name: "LEGACY" + number: 0 + } + value { + name: "V1" + number: 1 + } + value { + name: "V2" + number: 2 + } + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-sequence-example.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-sequence-example.pbtxt index 9ab9553702..6a4553bbc1 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-sequence-example.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-sequence-example.pbtxt @@ -1,84 +1,20 @@ path: "tensorflow.train.SequenceExample" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "CONTEXT_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "FEATURE_LISTS_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "SequenceExample" + field { + name: "context" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.Features" + } + field { + name: "feature_lists" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.FeatureLists" + } } } diff --git a/tensorflow/tools/api/golden/tensorflow.train.-server-def.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-server-def.pbtxt index af0a3b73cc..83ee7b3eb9 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-server-def.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-server-def.pbtxt @@ -1,96 +1,38 @@ path: "tensorflow.train.ServerDef" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "CLUSTER_FIELD_NUMBER" - mtype: "" - } - member { - name: "DEFAULT_SESSION_CONFIG_FIELD_NUMBER" - mtype: "" - } - member { - name: "DESCRIPTOR" - mtype: "" - } - member { - name: "Extensions" - mtype: "" - } - member { - name: "JOB_NAME_FIELD_NUMBER" - mtype: "" - } - member { - name: "PROTOCOL_FIELD_NUMBER" - mtype: "" - } - member { - name: "TASK_INDEX_FIELD_NUMBER" - mtype: "" - } - member_method { - name: "ByteSize" - } - member_method { - name: "Clear" - } - member_method { - name: "ClearExtension" - } - member_method { - name: "ClearField" - } - member_method { - name: "CopyFrom" - } - member_method { - name: "DiscardUnknownFields" - } - member_method { - name: "FindInitializationErrors" - } - member_method { - name: "FromString" - } - member_method { - name: "HasExtension" - } - member_method { - name: "HasField" - } - member_method { - name: "IsInitialized" - } - member_method { - name: "ListFields" - } - member_method { - name: "MergeFrom" - } - member_method { - name: "MergeFromString" - } - member_method { - name: "ParseFromString" - } - member_method { - name: "RegisterExtension" - } - member_method { - name: "SerializePartialToString" - } - member_method { - name: "SerializeToString" - } - member_method { - name: "SetInParent" - } - member_method { - name: "WhichOneof" - } - member_method { - name: "__init__" +tf_proto { + descriptor { + name: "ServerDef" + field { + name: "cluster" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.ClusterDef" + } + field { + name: "job_name" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "task_index" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_INT32 + } + field { + name: "default_session_config" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.ConfigProto" + } + field { + name: "protocol" + number: 5 + label: LABEL_OPTIONAL + type: TYPE_STRING + } } } diff --git a/tensorflow/tools/api/lib/api_objects.proto b/tensorflow/tools/api/lib/api_objects.proto index 0966a5f1d5..7dcde0bbc3 100644 --- a/tensorflow/tools/api/lib/api_objects.proto +++ b/tensorflow/tools/api/lib/api_objects.proto @@ -1,5 +1,7 @@ syntax = "proto2"; +import "google/protobuf/descriptor.proto"; + package third_party.tensorflow.tools.api; message TFAPIMember { @@ -24,8 +26,13 @@ message TFAPIClass { repeated TFAPIMethod member_method = 3; }; +message TFAPIProto { + optional google.protobuf.DescriptorProto descriptor = 1; +}; + message TFAPIObject { optional string path = 1; optional TFAPIModule tf_module = 2; optional TFAPIClass tf_class = 3; + optional TFAPIProto tf_proto = 4; }; diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py index 0b30f7b4d1..1cf330e702 100644 --- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py +++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py @@ -19,6 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from google.protobuf import message from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect @@ -101,6 +102,11 @@ def _SanitizedMRO(obj): return return_list +def _IsProtoClass(obj): + """Returns whether the passed obj is a Protocol Buffer class.""" + return isinstance(obj, type) and issubclass(obj, message.Message) + + class PythonObjectToProtoVisitor(object): """A visitor that summarizes given python objects as protobufs.""" @@ -153,6 +159,13 @@ class PythonObjectToProtoVisitor(object): # Store the constructed module object. self._protos[lib_path] = api_objects_pb2.TFAPIObject( path=lib_path, tf_module=module_obj) + elif _IsProtoClass(parent): + proto_obj = api_objects_pb2.TFAPIProto() + parent.DESCRIPTOR.CopyToProto(proto_obj.descriptor) + + # Store the constructed proto object. + self._protos[lib_path] = api_objects_pb2.TFAPIObject( + path=lib_path, tf_proto=proto_obj) elif tf_inspect.isclass(parent): # Construct a class. class_obj = api_objects_pb2.TFAPIClass() @@ -161,7 +174,7 @@ class PythonObjectToProtoVisitor(object): if name in parent_corner_cases: # If we have an empty entry, skip this object. if parent_corner_cases[name]: - module_obj.member.add(**(parent_corner_cases[name])) + class_obj.member.add(**(parent_corner_cases[name])) else: _AddMember(name, child, class_obj) diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index 1ad6b6d1c0..90375a794f 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -35,6 +35,7 @@ import unittest import tensorflow as tf +from google.protobuf import message from google.protobuf import text_format from tensorflow.python.lib.io import file_io @@ -195,6 +196,25 @@ class ApiCompatibilityTest(test.TestCase): else: logging.info('No differences found between API and golden.') + def testNoSubclassOfMessage(self): + + def Visit(path, parent, unused_children): + """A Visitor that crashes on subclasses of generated proto classes.""" + # If the traversed object is a proto Message class + if not (isinstance(parent, type) and + issubclass(parent, message.Message)): + return + if parent is message.Message: + return + # Check that it is a direct subclass of Message. + if message.Message not in parent.__bases__: + raise NotImplementedError( + 'Object tf.%s is a subclass of a generated proto Message. ' + 'They are not yet supported by the API tools.' % path) + visitor = public_api.PublicAPIVisitor(Visit) + visitor.do_not_descend_map['tf'].append('contrib') + traverse.traverse(tf, visitor) + @unittest.skipUnless( sys.version_info.major == 2, 'API compabitility test goldens are generated using python2.') -- GitLab From 2d83e131b930581b15a50538a020bda30af08ad4 Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Mon, 28 May 2018 06:32:04 -0700 Subject: [PATCH 191/902] Sort variables in C++ instead of Python. PiperOrigin-RevId: 198298103 --- tensorflow/python/eager/backprop.py | 21 +++++++---------- tensorflow/python/eager/pywrap_tfe.h | 3 ++- tensorflow/python/eager/pywrap_tfe_src.cc | 28 +++++++++++++++++++---- 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 2d859dca16..b2e6c60021 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -207,16 +207,14 @@ def implicit_val_and_grad(f): f.__name__)) finally: tape.pop_tape(this_tape) - # Sorting variables by id, which is monotonically increasing in construction - # order. This ensures unique order across executions. - # TODO(josh11b): Move the sort to the C++ implementation in pywrap_tfe_src.cc. - variables = list(sorted(this_tape.watched_variables(), - key=lambda v: v.handle._id)) # pylint: disable=protected-access - sources = [x.handle for x in variables] - - if not sources: + # Note: variables are returned in construction order. This ensures unique + # order across executions. + variables = this_tape.watched_variables() + if not variables: raise ValueError("No trainable variables were accessed while the " "function was being computed.") + + sources = [v.handle for v in variables] grad = imperative_grad.imperative_grad(_default_vspace, this_tape, nest.flatten(end_node), @@ -801,11 +799,8 @@ class GradientTape(object): self._push_tape() def watched_variables(self): - # Sorting variables by id, which is monotonically increasing in construction - # order. This ensures unique order across executions. - # TODO(josh11b): Move the sort to the C++ implementation in pywrap_tfe_src.cc. - return list(sorted(self._tape.watched_variables(), - key=lambda v: v.handle._id)) # pylint: disable=protected-access + """Returns variables watched by this tape in order of construction.""" + return self._tape.watched_variables() def gradient(self, target, sources, output_gradients=None): """Computes the gradient using operations recorded in context of this tape. diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index c502fe9e85..a916a75f00 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -197,7 +197,8 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, PyObject* results, PyObject* name); -// Returns the set of variables watched by the given tape. +// Returns all variables watched by the given tape in the order those variables +// were created. PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape); // Returns an EagerTensor of dimension [len(`tensors`)] containing diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 9bbb6f5941..52b90504f3 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -873,6 +873,22 @@ static tensorflow::DataType FastTensorDtype(PyObject* tensor) { return static_cast(id); } +static tensorflow::int64 FastHandleId(PyObject* variable) { + PyObject* handle = PyObject_GetAttrString(variable, "handle"); + if (handle == nullptr) { + return -1; + } + tensorflow::int64 id = FastTensorId(handle); + Py_DECREF(handle); + return id; +} + +struct CompareByHandleId { + bool operator()(PyObject* lhs, PyObject* rhs) { + return FastHandleId(lhs) < FastHandleId(rhs); + } +}; + class GradientTape : public tensorflow::eager::GradientTape { public: @@ -904,12 +920,12 @@ class GradientTape } } - const std::unordered_set WatchedVariables() { + const std::set WatchedVariables() { return watched_variables_; } private: - std::unordered_set watched_variables_; + std::set watched_variables_; }; typedef struct { @@ -1201,11 +1217,13 @@ void TFE_Py_TapeSetWatchVariable(PyObject* variable) { } PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape) { - const std::unordered_set& watched_variables = + const auto& watched_variables = reinterpret_cast(tape)->tape->WatchedVariables(); - PyObject* result = PySet_New(nullptr); + PyObject* result = PyTuple_New(watched_variables.size()); + Py_ssize_t pos = 0; for (PyObject* variable : watched_variables) { - PySet_Add(result, variable); + PyTuple_SET_ITEM(result, pos++, variable); + Py_INCREF(variable); } return result; } -- GitLab From 69095610798ec7def94fc453dfeaff758e0ee9cd Mon Sep 17 00:00:00 2001 From: Jason Zaman Date: Mon, 28 May 2018 21:50:21 +0800 Subject: [PATCH 192/902] generate-pc.sh: add option to set libdir Signed-off-by: Jason Zaman --- tensorflow/c/generate-pc.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/generate-pc.sh b/tensorflow/c/generate-pc.sh index 02a6a58b61..7184ad68fb 100755 --- a/tensorflow/c/generate-pc.sh +++ b/tensorflow/c/generate-pc.sh @@ -15,10 +15,12 @@ # ============================================================================== TF_PREFIX='/usr/local' +LIBDIR='lib' usage() { echo "Usage: $0 OPTIONS" echo -e "-p, --prefix\tset installation prefix (default: /usr/local)" + echo -e "-l, --libdir\tset lib directory (default: lib)" echo -e "-v, --version\tset TensorFlow version" echo -e "-h, --help\tdisplay this message" } @@ -26,7 +28,7 @@ usage() { [ $# == 0 ] && usage && exit 0 # read the options -ARGS=$(getopt -o p:v:h --long prefix:,version:,help -n $0 -- "$@") +ARGS=$(getopt -o p:l:v:h --long prefix:,libdir:,version:,help -n $0 -- "$@") eval set -- "$ARGS" # extract options and their arguments into variables. @@ -38,6 +40,11 @@ while true ; do "") shift 2 ;; *) TF_PREFIX=$2 ; shift 2 ;; esac ;; + -l|--libdir) + case "$2" in + "") shift 2 ;; + *) LIBDIR=$2 ; shift 2 ;; + esac ;; -v|--version) case "$2" in "") shift 2 ;; @@ -55,7 +62,7 @@ echo "Generating pkgconfig file for TensorFlow $TF_VERSION in $TF_PREFIX" cat << EOF > tensorflow.pc prefix=${TF_PREFIX} exec_prefix=\${prefix} -libdir=\${exec_prefix}/lib +libdir=\${exec_prefix}/${LIBDIR} includedir=\${prefix}/include Name: TensorFlow -- GitLab From b05a6b5c4cb685b19b8c09693d40d4743af79dea Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 28 May 2018 09:33:49 -0700 Subject: [PATCH 193/902] Adding tf.data optimization for rewriting `map(...).batch(...)` to `map_and_batch(...)`. PiperOrigin-RevId: 198310806 --- .../core/grappler/optimizers/data/BUILD | 42 ++++ .../optimizers/data/map_and_batch_fusion.cc | 133 +++++++++++++ .../optimizers/data/map_and_batch_fusion.h | 46 +++++ .../data/map_and_batch_fusion_test.cc | 184 ++++++++++++++++++ 4 files changed, 405 insertions(+) create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index 29ebb9a69e..d3fe7df583 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -32,3 +32,45 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) + +cc_library( + name = "map_and_batch_fusion", + srcs = ["map_and_batch_fusion.cc"], + hdrs = [ + "map_and_batch_fusion.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":graph_utils", + "//tensorflow/core:lib", + "//tensorflow/core/grappler:graph_view", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", + ] + tf_protos_all(), +) + +tf_cc_test( + name = "map_and_batch_fusion_test", + srcs = ["map_and_batch_fusion_test.cc"], + visibility = ["//visibility:public"], + deps = [ + ":graph_utils", + ":map_and_batch_fusion", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/grappler:grappler_item", + ], +) + +cc_library( + name = "data", + visibility = ["//visibility:public"], + deps = [ + ":map_and_batch_fusion", + ], + alwayslink = 1, +) diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc new file mode 100644 index 0000000000..5b8df61c48 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc @@ -0,0 +1,133 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h" + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/graph_view.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/platform/protobuf.h" + +namespace tensorflow { +namespace grappler { + +Status MapAndBatchFusion::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* output) { + *output = item.graph; + GraphView graph(output); + std::set nodes_to_delete; + for (const NodeDef& node : item.graph.node()) { + if (node.op() != "BatchDataset") { + continue; + } + + // Use a more descriptive variable name now that we now the node type. + NodeDef batch_node(node); + GraphView::InputPort input_port = graph.GetInputPort(batch_node.name(), 0); + NodeDef* node2 = graph.GetRegularFanin(input_port).node; + if (node2->op() != "MapDataset" && node2->op() != "ParallelMapDataset") { + continue; + } + + // Use a more descriptive variable name now that we now the node type. + NodeDef* map_node = node2; + NodeDef* new_node = output->mutable_node()->Add(); + new_node->set_op("MapAndBatchDatasetV2"); + new_node->set_name( + strings::StrCat("MapAndBatchDatasetV2/_", output->node_size())); + + // Set the `input` input argument. + new_node->add_input(map_node->input(0)); + + // Set the `other_arguments` input arguments. + int num_other_args; + if (map_node->op() == "ParallelMapDataset") { + num_other_args = map_node->input_size() - 2; + } else { + num_other_args = map_node->input_size() - 1; + } + for (int i = 0; i < num_other_args; i++) { + new_node->add_input(map_node->input(i + 1)); + } + + // Set the `batch_size` input argument. + new_node->add_input(batch_node.input(1)); + + // Set the `num_parallel_calls` input argument. + if (map_node->op() == "ParallelMapDataset") { + // The type of the `num_parallel_calls` argument in ParallelMapDataset + // and MapAndBatchDataset is different (int32 and int64 respectively) + // so we cannot reuse the same Const node and thus create a new one. + NodeDef* v = graph.GetNode(map_node->input(map_node->input_size() - 1)); + NodeDef* tmp; + TF_RETURN_IF_ERROR(graph_utils::AddScalarConstNode( + v->attr().at("value").tensor().int_val(0), output, &tmp)); + new_node->add_input(tmp->name()); + } else { + NodeDef* tmp; + TF_RETURN_IF_ERROR( + graph_utils::AddScalarConstNode(1, output, &tmp)); + new_node->add_input(tmp->name()); + } + + // Set the `drop_remainder` input argument. + { + NodeDef* tmp; + TF_RETURN_IF_ERROR( + graph_utils::AddScalarConstNode(false, output, &tmp)); + new_node->add_input(tmp->name()); + } + + // Set `f` and `Targuments` attributes. + new_node->mutable_attr()->insert(map_node->attr().begin(), + map_node->attr().end()); + // Set `output_types` and `output_shapes` attributes. + new_node->mutable_attr()->insert(batch_node.attr().begin(), + batch_node.attr().end()); + + // Mark the `Map` and `Batch` nodes for removal. + nodes_to_delete.insert(map_node->name()); + nodes_to_delete.insert(batch_node.name()); + + // Update the input of the outputs of the `Batch` node to use + // `MapAndBatch`. + GraphView::OutputPort output_port = + graph.GetOutputPort(batch_node.name(), 0); + auto fanout = graph.GetFanout(output_port); + for (auto it = fanout.begin(); it != fanout.end(); ++it) { + NodeDef* node = it->node; + node->set_input(0, new_node->name()); + } + } + TF_RETURN_IF_ERROR(graph_utils::DeleteNodes(nodes_to_delete, output)); + return Status::OK(); +} + +void MapAndBatchFusion::Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, + double result) { + // no-op +} + +REGISTER_GRAPH_OPTIMIZER_AS(MapAndBatchFusion, "map_and_batch_fusion"); + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h new file mode 100644 index 0000000000..a5a4d91df6 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_FUSION_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_FUSION_H_ + +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" + +namespace tensorflow { +namespace grappler { + +class MapAndBatchFusion : public CustomGraphOptimizer { + public: + MapAndBatchFusion() {} + ~MapAndBatchFusion() override {} + + string name() const override { return "map_and_batch_fusion"; }; + + Status Init(const tensorflow::RewriterConfig_CustomGraphOptimizer* config = + nullptr) override { + return Status::OK(); + } + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* output) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) override; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_FUSION_H_ diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc new file mode 100644 index 0000000000..51e7f37e7e --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc @@ -0,0 +1,184 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h" + +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +TEST(MapAndBatchFusionTest, FuseMapAndBatchNodesIntoOne) { + std::vector> empty_attributes; + + GrapplerItem item; + GraphDef *graph = &item.graph; + NodeDef *start_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(0, graph, &start_node)); + NodeDef *stop_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(10, graph, &stop_node)); + NodeDef *step_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(1, graph, &step_node)); + + std::vector range_inputs(3); + range_inputs[0] = start_node->name(); + range_inputs[1] = stop_node->name(); + range_inputs[2] = step_node->name(); + NodeDef *range_node; + TF_ASSERT_OK(graph_utils::AddNode("", "RangeDataset", range_inputs, + empty_attributes, graph, &range_node)); + NodeDef *captured_input_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode( + "hello", graph, &captured_input_node)); + + std::vector map_inputs(2); + map_inputs[0] = range_node->name(); + map_inputs[1] = captured_input_node->name(); + NodeDef *map_node; + TF_ASSERT_OK(graph_utils::AddNode("", "MapDataset", map_inputs, + empty_attributes, graph, &map_node)); + + NodeDef *batch_size_node; + TF_ASSERT_OK( + graph_utils::AddScalarConstNode(5, graph, &batch_size_node)); + std::vector batch_inputs(2); + batch_inputs[0] = map_node->name(); + batch_inputs[1] = batch_size_node->name(); + NodeDef *batch_node; + TF_ASSERT_OK(graph_utils::AddNode("", "BatchDataset", batch_inputs, + empty_attributes, graph, &batch_node)); + + MapAndBatchFusion optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + + EXPECT_FALSE(graph_utils::ContainsNodeWithName(map_node->name(), output)); + EXPECT_FALSE(graph_utils::ContainsNodeWithName(batch_node->name(), output)); + EXPECT_TRUE(graph_utils::ContainsNodeWithOp("MapAndBatchDatasetV2", output)); + NodeDef map_and_batch_node = + output.node(graph_utils::FindNodeWithOp("MapAndBatchDatasetV2", output)); + EXPECT_EQ(map_and_batch_node.input_size(), 5); + EXPECT_EQ(map_and_batch_node.input(0), map_node->input(0)); + EXPECT_EQ(map_and_batch_node.input(1), map_node->input(1)); + EXPECT_EQ(map_and_batch_node.input(2), batch_node->input(1)); + NodeDef num_parallel_calls_node = output.node( + graph_utils::FindNodeWithName(map_and_batch_node.input(3), output)); + EXPECT_EQ(num_parallel_calls_node.attr().at("value").tensor().int64_val(0), + 1); + NodeDef drop_remainder_node = output.node( + graph_utils::FindNodeWithName(map_and_batch_node.input(4), output)); + EXPECT_EQ(drop_remainder_node.attr().at("value").tensor().bool_val(0), false); +} + +TEST(MapAndBatchFusionTest, FuseParallelMapAndBatchNodesIntoOne) { + std::vector> empty_attributes; + + GrapplerItem item; + GraphDef *graph = &item.graph; + NodeDef *start_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(0, graph, &start_node)); + NodeDef *stop_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(10, graph, &stop_node)); + NodeDef *step_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(1, graph, &step_node)); + + std::vector range_inputs(3); + range_inputs[0] = start_node->name(); + range_inputs[1] = stop_node->name(); + range_inputs[2] = step_node->name(); + NodeDef *range_node; + TF_ASSERT_OK(graph_utils::AddNode("", "RangeDataset", range_inputs, + empty_attributes, graph, &range_node)); + NodeDef *captured_input_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode( + "hello", graph, &captured_input_node)); + NodeDef *num_parallel_calls_node; + TF_ASSERT_OK( + graph_utils::AddScalarConstNode(2, graph, &num_parallel_calls_node)); + + std::vector map_inputs(3); + map_inputs[0] = range_node->name(); + map_inputs[1] = captured_input_node->name(); + map_inputs[2] = num_parallel_calls_node->name(); + NodeDef *map_node; + TF_ASSERT_OK(graph_utils::AddNode("", "ParallelMapDataset", map_inputs, + empty_attributes, graph, &map_node)); + + NodeDef *batch_size_node; + TF_ASSERT_OK( + graph_utils::AddScalarConstNode(5, graph, &batch_size_node)); + std::vector batch_inputs(2); + batch_inputs[0] = map_node->name(); + batch_inputs[1] = batch_size_node->name(); + NodeDef *batch_node; + TF_ASSERT_OK(graph_utils::AddNode("", "BatchDataset", batch_inputs, + empty_attributes, graph, &batch_node)); + + MapAndBatchFusion optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + + EXPECT_FALSE(graph_utils::ContainsNodeWithName(map_node->name(), output)); + EXPECT_FALSE(graph_utils::ContainsNodeWithName(batch_node->name(), output)); + EXPECT_TRUE(graph_utils::ContainsNodeWithOp("MapAndBatchDatasetV2", output)); + NodeDef map_and_batch_node = + output.node(graph_utils::FindNodeWithOp("MapAndBatchDatasetV2", output)); + EXPECT_EQ(map_and_batch_node.input_size(), 5); + EXPECT_EQ(map_and_batch_node.input(0), map_node->input(0)); + EXPECT_EQ(map_and_batch_node.input(1), map_node->input(1)); + EXPECT_EQ(map_and_batch_node.input(2), batch_node->input(1)); + NodeDef num_parallel_calls_node2 = output.node( + graph_utils::FindNodeWithName(map_and_batch_node.input(3), output)); + EXPECT_EQ(num_parallel_calls_node2.attr().at("value").tensor().int64_val(0), + 2); + NodeDef drop_remainder_node = output.node( + graph_utils::FindNodeWithName(map_and_batch_node.input(4), output)); + EXPECT_EQ(drop_remainder_node.attr().at("value").tensor().bool_val(0), false); +} + +TEST(MapAndBatchFusionTest, NoChange) { + std::vector> empty_attributes; + + GrapplerItem item; + GraphDef *graph = &item.graph; + NodeDef *start_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(0, graph, &start_node)); + NodeDef *stop_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(10, graph, &stop_node)); + NodeDef *step_node; + TF_ASSERT_OK(graph_utils::AddScalarConstNode(1, graph, &step_node)); + + std::vector range_inputs(3); + range_inputs[0] = start_node->name(); + range_inputs[1] = stop_node->name(); + range_inputs[2] = step_node->name(); + NodeDef *range_node; + TF_ASSERT_OK(graph_utils::AddNode("", "RangeDataset", range_inputs, + empty_attributes, graph, &range_node)); + + MapAndBatchFusion optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + + EXPECT_TRUE(graph_utils::Compare(*graph, output)); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow -- GitLab From e62f3e5ff68aad1ddef2b581b98a90125e740ddd Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 28 May 2018 22:16:46 -0700 Subject: [PATCH 194/902] Make IndexedArrayAnalysis behave well around StatusOr PiperOrigin-RevId: 198348355 --- .../xla/service/indexed_array_analysis.cc | 111 ++++++++++-------- .../xla/service/indexed_array_analysis.h | 36 +++--- .../service/indexed_array_analysis_test.cc | 12 +- 3 files changed, 88 insertions(+), 71 deletions(-) diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc index 5d870f9fc4..21af9a615c 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc @@ -33,8 +33,6 @@ using tensorflow::gtl::ArraySlice; using tensorflow::str_util::Join; } // namespace -// TODO(sanjoy): Make this pass StatusOr safe. - string IndexedArrayAnalysis::ToString(Array* root, bool print_constants) { switch (root->kind()) { case Array::kUnknown: { @@ -69,18 +67,18 @@ string IndexedArrayAnalysis::ToString(Array* root, bool print_constants) { } } -Analysis::Array* IndexedArrayAnalysis::GetArrayFor( +StatusOr IndexedArrayAnalysis::GetArrayFor( const HloInstruction* instr) { auto it = cache_.find(instr); if (it != cache_.end()) { return it->second; } - TraverseAndPopulateCache(instr); + TF_RETURN_IF_ERROR(TraverseAndPopulateCache(instr)); return FindOrDie(cache_, instr); } -void IndexedArrayAnalysis::TraverseAndPopulateCache( +Status IndexedArrayAnalysis::TraverseAndPopulateCache( const HloInstruction* root) { // Depth first search over the DAG, invoking ComputeArrayFor in post order. // The HLO instructions already in the cache are considered leaves. @@ -116,32 +114,42 @@ void IndexedArrayAnalysis::TraverseAndPopulateCache( case kVisited: stack.pop_back(); - InsertOrDie(&cache_, instr, ComputeArrayFor(instr)); + TF_ASSIGN_OR_RETURN(Array * array, ComputeArrayFor(instr)); + InsertOrDie(&cache_, instr, array); break; } } while (!stack.empty()); + + return Status::OK(); } -Analysis::Array* IndexedArrayAnalysis::ComputeArrayFor( +StatusOr IndexedArrayAnalysis::ComputeArrayFor( const HloInstruction* instr) { Array* computed_array; if (instr->IsElementwise() && instr->operand_count() == 1) { - computed_array = ComputeArrayForElementwiseUnaryOp( - instr, FindOrDie(cache_, instr->operand(0))); + TF_ASSIGN_OR_RETURN(computed_array, + ComputeArrayForElementwiseUnaryOp( + instr, FindOrDie(cache_, instr->operand(0)))); } else if (instr->IsElementwise() && instr->operand_count() == 2) { - computed_array = ComputeArrayForElementwiseBinaryOp( - instr, FindOrDie(cache_, instr->operand(0)), - FindOrDie(cache_, instr->operand(1))); + TF_ASSIGN_OR_RETURN(computed_array, + ComputeArrayForElementwiseBinaryOp( + instr, FindOrDie(cache_, instr->operand(0)), + FindOrDie(cache_, instr->operand(1)))); } else if (instr->opcode() == HloOpcode::kConstant) { - computed_array = ComputeArrayForConstant(instr->literal()); + TF_ASSIGN_OR_RETURN(computed_array, + ComputeArrayForConstant(instr->literal())); } else if (instr->opcode() == HloOpcode::kGather) { - computed_array = ComputeArrayForGather( - instr->shape(), instr->gather_dimension_numbers(), - instr->gather_window_bounds(), FindOrDie(cache_, instr->operand(0)), - FindOrDie(cache_, instr->operand(1))); + TF_ASSIGN_OR_RETURN( + computed_array, + ComputeArrayForGather(instr->shape(), instr->gather_dimension_numbers(), + instr->gather_window_bounds(), + FindOrDie(cache_, instr->operand(0)), + FindOrDie(cache_, instr->operand(1)))); } else if (instr->opcode() == HloOpcode::kReshape) { - computed_array = ComputeArrayForReshape( - instr->shape(), FindOrDie(cache_, instr->operand(0))); + TF_ASSIGN_OR_RETURN( + computed_array, + ComputeArrayForReshape(instr->shape(), + FindOrDie(cache_, instr->operand(0)))); } else { computed_array = nullptr; } @@ -153,12 +161,12 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayFor( return computed_array; } -Analysis::Array* IndexedArrayAnalysis::ComputeArrayForConstant( +StatusOr IndexedArrayAnalysis::ComputeArrayForConstant( const Literal& literal) { return Construct(&literal); } -ScalarIndexedArray* IndexedArrayAnalysis::FoldGatherOfGather( +StatusOr IndexedArrayAnalysis::FoldGatherOfGather( ScalarIndexedArray* source, Array* indices, int64 source_dim, tensorflow::gtl::ArraySlice output_dims, Shape shape) { // We want to transform Gather(Gather(A, X), Y) => Gather(A, Gather(X, Y)). @@ -224,7 +232,7 @@ ScalarIndexedArray* IndexedArrayAnalysis::FoldGatherOfGather( std::move(shape)); } -Analysis::Array* IndexedArrayAnalysis::ComputeArrayForGather( +StatusOr IndexedArrayAnalysis::ComputeArrayForGather( const Shape& shape, const GatherDimensionNumbers& dim_numbers, tensorflow::gtl::ArraySlice window_bounds, Array* source, Array* indices) { @@ -397,7 +405,7 @@ int64 FindSourcePositionForPassthroughResultDim(ArraySlice operand_shape, }; // namespace -Analysis::Array* IndexedArrayAnalysis::ComputeArrayForReshape( +StatusOr IndexedArrayAnalysis::ComputeArrayForReshape( const Shape& shape, Array* operand) { auto* scalar_indexed = dynamic_cast(operand); if (!scalar_indexed) { @@ -541,10 +549,12 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayForReshape( std::back_inserter(output_dims_for_new_scalar_indexed_node), map_passthrough_operand_dim_to_result_dim); - Array* new_scalar_indexed_source = ComputeArrayForConstant( - *TakeOwnership(scalar_indexed->literal() - .Reshape(new_scalar_indexed_source_shape) - .ValueOrDie())); + TF_ASSIGN_OR_RETURN(const Literal* new_scalar_indexed_source_literal, + TakeOwnership(scalar_indexed->literal().Reshape( + new_scalar_indexed_source_shape))); + TF_ASSIGN_OR_RETURN( + Array * new_scalar_indexed_source, + ComputeArrayForConstant(*new_scalar_indexed_source_literal)); return ConstructScalarIndexedArray( new_scalar_indexed_source, scalar_indexed->indices(), @@ -552,7 +562,8 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayForReshape( output_dims_for_new_scalar_indexed_node, shape); } -Analysis::Array* IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp( +StatusOr +IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp( const HloInstruction* instr, Array* lhs, Array* rhs) { // Try to fold BinaryOp(Broadcast(Const0), ScalarIndexed(Const1, Indices)) // => ScalarIndexed(BinaryOp(Broadcast'(Const0), Const1), Indices) @@ -642,28 +653,25 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp( // inner_broadcast_result is the Broadcast'(Const0) bit in // BinaryOp(Broadcast'(Const0), Const1) - std::unique_ptr inner_broadcast_result = - broadcast_const_operand->literal() - .Broadcast(scalar_indexed_const->source()->shape(), - new_inner_broadcast_dims) - .ConsumeValueOrDie(); + TF_ASSIGN_OR_RETURN( + std::unique_ptr inner_broadcast_result, + broadcast_const_operand->literal().Broadcast( + scalar_indexed_const->source()->shape(), new_inner_broadcast_dims)); // literal_for_new_source is BinaryOp(Broadcast'(Const0), Const1) const Literal* literal_for_new_source; if (lhs_is_indexed) { - literal_for_new_source = - TakeOwnership(HloEvaluator{} - .EvaluateElementwiseBinaryOp( - instr->opcode(), scalar_indexed_const->literal(), - *inner_broadcast_result) - .ConsumeValueOrDie()); + TF_ASSIGN_OR_RETURN( + literal_for_new_source, + TakeOwnership(HloEvaluator{}.EvaluateElementwiseBinaryOp( + instr->opcode(), scalar_indexed_const->literal(), + *inner_broadcast_result))); } else { - literal_for_new_source = - TakeOwnership(HloEvaluator{} - .EvaluateElementwiseBinaryOp( - instr->opcode(), *inner_broadcast_result, - scalar_indexed_const->literal()) - .ConsumeValueOrDie()); + TF_ASSIGN_OR_RETURN( + literal_for_new_source, + TakeOwnership(HloEvaluator{}.EvaluateElementwiseBinaryOp( + instr->opcode(), *inner_broadcast_result, + scalar_indexed_const->literal()))); } ConstantArray* new_source = Construct(literal_for_new_source); @@ -675,7 +683,8 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp( scalar_indexed_const->shape()); } -Analysis::Array* IndexedArrayAnalysis::ComputeArrayForElementwiseUnaryOp( +StatusOr +IndexedArrayAnalysis::ComputeArrayForElementwiseUnaryOp( const HloInstruction* instr, Array* operand) { auto* scalar_indexed_const = dynamic_cast(operand); @@ -686,11 +695,9 @@ Analysis::Array* IndexedArrayAnalysis::ComputeArrayForElementwiseUnaryOp( // Fold UnaryOp(ScalarIndexed(Const, Indices)) // => ScalarIndexed(UnaryOp(Const), Indices) - Literal* literal_for_new_source = - TakeOwnership(HloEvaluator{} - .EvaluateElementwiseUnaryOp( - instr->opcode(), scalar_indexed_const->literal()) - .ConsumeValueOrDie()); + TF_ASSIGN_OR_RETURN(Literal * literal_for_new_source, + TakeOwnership(HloEvaluator{}.EvaluateElementwiseUnaryOp( + instr->opcode(), scalar_indexed_const->literal()))); ConstantArray* new_source = Construct(literal_for_new_source); return Construct( new_source, scalar_indexed_const->indices(), @@ -712,7 +719,7 @@ StatusOr IndexedArrayAnalysisPrinterPass::Run(HloModule* module) { IndexedArrayAnalysis analysis; for (auto* computation : module->MakeNonfusionComputations()) { for (auto* instr : computation->instructions()) { - auto* t = analysis.GetArrayFor(instr); + TF_ASSIGN_OR_RETURN(Analysis::Array * t, analysis.GetArrayFor(instr)); if (!dynamic_cast(t) && !dynamic_cast(t)) { VLOG(2) << instr->ToString() << " -> " << analysis.ToString(t); } diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.h b/tensorflow/compiler/xla/service/indexed_array_analysis.h index 8c1f616fab..561832ab59 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.h +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.h @@ -220,7 +220,7 @@ class IndexedArrayAnalysis { // NB! By inspecting the implementation, you may be able to infer a stronger // caching guarantee than what is mentioned above. Nevertheless, what is // stated above is the contract. - Array* GetArrayFor(const HloInstruction* instr); + StatusOr GetArrayFor(const HloInstruction* instr); // Pretty-prints the expression rooted at `root`. string ToString(Array* root, bool print_constants = false); @@ -228,18 +228,18 @@ class IndexedArrayAnalysis { private: // Helper function that ensures that every HLO instruction that is // transitively used by `root` has an entry in `cache_`. - void TraverseAndPopulateCache(const HloInstruction* root); + Status TraverseAndPopulateCache(const HloInstruction* root); // Creates an Array instance for `instr` under the assumption that all // operations of `instr` are present in `cache_`. - Array* ComputeArrayFor(const HloInstruction* instr); + StatusOr ComputeArrayFor(const HloInstruction* instr); - Array* ComputeArrayForConstant(const Literal& literal); + StatusOr ComputeArrayForConstant(const Literal& literal); - Array* ComputeArrayForGather(const Shape& shape, - const GatherDimensionNumbers& dim_numbers, - tensorflow::gtl::ArraySlice window_bounds, - Array* source, Array* indices); + StatusOr ComputeArrayForGather( + const Shape& shape, const GatherDimensionNumbers& dim_numbers, + tensorflow::gtl::ArraySlice window_bounds, Array* source, + Array* indices); // This tries to fold a ScalarIndexedArray which has another // ScalarIndexedArray as a source into a ScalarIndexedArray that instead has a @@ -262,16 +262,16 @@ class IndexedArrayAnalysis { // // I2 = [I0[i] for i in I1] // G1 = [Arr[i] for i in I2] - ScalarIndexedArray* FoldGatherOfGather( + StatusOr FoldGatherOfGather( ScalarIndexedArray* source, Array* indices, int64 source_dim, tensorflow::gtl::ArraySlice output_dims, Shape shape); - Array* ComputeArrayForReshape(const Shape& shape, Array* operand); + StatusOr ComputeArrayForReshape(const Shape& shape, Array* operand); - Array* ComputeArrayForElementwiseBinaryOp(const HloInstruction* instr, - Array* lhs, Array* rhs); - Array* ComputeArrayForElementwiseUnaryOp(const HloInstruction* instr, - Array* operand); + StatusOr ComputeArrayForElementwiseBinaryOp( + const HloInstruction* instr, Array* lhs, Array* rhs); + StatusOr ComputeArrayForElementwiseUnaryOp( + const HloInstruction* instr, Array* operand); template T* Construct(Args&&... args) { @@ -299,6 +299,14 @@ class IndexedArrayAnalysis { return owned_literals_.back().get(); } + StatusOr TakeOwnership( + StatusOr> literal_or_error) { + TF_ASSIGN_OR_RETURN(std::unique_ptr literal, + std::move(literal_or_error)); + owned_literals_.push_back(std::move(literal)); + return owned_literals_.back().get(); + } + std::vector> owned_tensors_; std::vector> owned_literals_; tensorflow::gtl::FlatMap cache_; diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc index 76e7e7086c..68f247bfc3 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc @@ -40,12 +40,14 @@ class IndexedArrayAnalysisTest : public HloVerifiedTestBase { IndexedArrayAnalysis indexed_tensor_analysis; ParseAndVerifyModule(hlo_text); - string result = indexed_tensor_analysis.ToString( + TF_ASSERT_OK_AND_ASSIGN( + IndexedArrayAnalysis::Array* const array_result, indexed_tensor_analysis.GetArrayFor( - module().entry_computation()->root_instruction()), - print_constants); - LOG(INFO) << result; - ASSERT_EQ(result, root_expression); + module().entry_computation()->root_instruction())); + string string_result = + indexed_tensor_analysis.ToString(array_result, print_constants); + LOG(INFO) << string_result; + ASSERT_EQ(string_result, root_expression); } }; -- GitLab From 64b17a3eed29a0bd976a019375748708bf47c117 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 28 May 2018 23:03:39 -0700 Subject: [PATCH 195/902] Pass HloOpcode instead of HloInstruction; NFC Minor code cleanup change. PiperOrigin-RevId: 198351045 --- .../xla/service/indexed_array_analysis.cc | 33 ++++++++++--------- .../xla/service/indexed_array_analysis.h | 8 ++--- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc index 21af9a615c..11d931cbd4 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc @@ -127,14 +127,16 @@ StatusOr IndexedArrayAnalysis::ComputeArrayFor( const HloInstruction* instr) { Array* computed_array; if (instr->IsElementwise() && instr->operand_count() == 1) { - TF_ASSIGN_OR_RETURN(computed_array, - ComputeArrayForElementwiseUnaryOp( - instr, FindOrDie(cache_, instr->operand(0)))); + TF_ASSIGN_OR_RETURN( + computed_array, + ComputeArrayForElementwiseUnaryOp( + instr->opcode(), FindOrDie(cache_, instr->operand(0)))); } else if (instr->IsElementwise() && instr->operand_count() == 2) { - TF_ASSIGN_OR_RETURN(computed_array, - ComputeArrayForElementwiseBinaryOp( - instr, FindOrDie(cache_, instr->operand(0)), - FindOrDie(cache_, instr->operand(1)))); + TF_ASSIGN_OR_RETURN( + computed_array, + ComputeArrayForElementwiseBinaryOp( + instr->opcode(), FindOrDie(cache_, instr->operand(0)), + FindOrDie(cache_, instr->operand(1)))); } else if (instr->opcode() == HloOpcode::kConstant) { TF_ASSIGN_OR_RETURN(computed_array, ComputeArrayForConstant(instr->literal())); @@ -563,8 +565,9 @@ StatusOr IndexedArrayAnalysis::ComputeArrayForReshape( } StatusOr -IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp( - const HloInstruction* instr, Array* lhs, Array* rhs) { +IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp(HloOpcode opcode, + Array* lhs, + Array* rhs) { // Try to fold BinaryOp(Broadcast(Const0), ScalarIndexed(Const1, Indices)) // => ScalarIndexed(BinaryOp(Broadcast'(Const0), Const1), Indices) // @@ -664,14 +667,12 @@ IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp( TF_ASSIGN_OR_RETURN( literal_for_new_source, TakeOwnership(HloEvaluator{}.EvaluateElementwiseBinaryOp( - instr->opcode(), scalar_indexed_const->literal(), - *inner_broadcast_result))); + opcode, scalar_indexed_const->literal(), *inner_broadcast_result))); } else { TF_ASSIGN_OR_RETURN( literal_for_new_source, TakeOwnership(HloEvaluator{}.EvaluateElementwiseBinaryOp( - instr->opcode(), *inner_broadcast_result, - scalar_indexed_const->literal()))); + opcode, *inner_broadcast_result, scalar_indexed_const->literal()))); } ConstantArray* new_source = Construct(literal_for_new_source); @@ -684,8 +685,8 @@ IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp( } StatusOr -IndexedArrayAnalysis::ComputeArrayForElementwiseUnaryOp( - const HloInstruction* instr, Array* operand) { +IndexedArrayAnalysis::ComputeArrayForElementwiseUnaryOp(HloOpcode opcode, + Array* operand) { auto* scalar_indexed_const = dynamic_cast(operand); if (operand == nullptr) { @@ -697,7 +698,7 @@ IndexedArrayAnalysis::ComputeArrayForElementwiseUnaryOp( TF_ASSIGN_OR_RETURN(Literal * literal_for_new_source, TakeOwnership(HloEvaluator{}.EvaluateElementwiseUnaryOp( - instr->opcode(), scalar_indexed_const->literal()))); + opcode, scalar_indexed_const->literal()))); ConstantArray* new_source = Construct(literal_for_new_source); return Construct( new_source, scalar_indexed_const->indices(), diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.h b/tensorflow/compiler/xla/service/indexed_array_analysis.h index 561832ab59..ce92fd2919 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.h +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.h @@ -268,10 +268,10 @@ class IndexedArrayAnalysis { StatusOr ComputeArrayForReshape(const Shape& shape, Array* operand); - StatusOr ComputeArrayForElementwiseBinaryOp( - const HloInstruction* instr, Array* lhs, Array* rhs); - StatusOr ComputeArrayForElementwiseUnaryOp( - const HloInstruction* instr, Array* operand); + StatusOr ComputeArrayForElementwiseBinaryOp(HloOpcode opcode, + Array* lhs, Array* rhs); + StatusOr ComputeArrayForElementwiseUnaryOp(HloOpcode opcode, + Array* operand); template T* Construct(Args&&... args) { -- GitLab From 23c70a240219da7201806e4ebc1354f568581557 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 28 May 2018 23:55:19 -0700 Subject: [PATCH 196/902] Fix an incorrect precondition check in IndexedArrayAnalysis PiperOrigin-RevId: 198354001 --- .../xla/service/indexed_array_analysis.cc | 2 +- .../service/indexed_array_analysis_test.cc | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc index 11d931cbd4..8b3fa6c157 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc @@ -689,7 +689,7 @@ IndexedArrayAnalysis::ComputeArrayForElementwiseUnaryOp(HloOpcode opcode, Array* operand) { auto* scalar_indexed_const = dynamic_cast(operand); - if (operand == nullptr) { + if (scalar_indexed_const == nullptr) { return nullptr; } diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc index 68f247bfc3..373556ebeb 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc @@ -472,5 +472,33 @@ ENTRY main { AssertArrayForRootExpressionIs(hlo_text, "%add"); } + +TEST_F(IndexedArrayAnalysisTest, RegularUnaryOp) { + string hlo_text = R"( +HloModule RegularUnaryOp + +ENTRY main { + input = f32[100] parameter(0) + ROOT tanh = f32[100] tanh(input) +} +)"; + + AssertArrayForRootExpressionIs(hlo_text, "%tanh"); +} + +TEST_F(IndexedArrayAnalysisTest, RegularBinaryOp) { + string hlo_text = R"( +HloModule RegularUnaryOp + +ENTRY main { + input0 = f32[100] parameter(0) + input1 = f32[100] parameter(1) + ROOT add = f32[100] add(input0, input1) +} +)"; + + AssertArrayForRootExpressionIs(hlo_text, "%add"); +} + } // namespace } // namespace xla -- GitLab From d97695384baad9612e41715cbd7823908ee63bf6 Mon Sep 17 00:00:00 2001 From: Christoph Boeddeker Date: Tue, 29 May 2018 09:00:47 +0200 Subject: [PATCH 197/902] Add a note that stop_gradient in moments does not change the gradient --- tensorflow/python/ops/nn_impl.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 783d485892..e2ef1f66b1 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -689,6 +689,9 @@ def moments( # Compute true mean while keeping the dims for proper broadcasting. mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean") # sample variance, not unbiased variance + # Note: stop_gradient does not change the gradient that gets + # backpropagated to the mean from the variance calculation, + # because that gradient is zero variance = math_ops.reduce_mean( math_ops.squared_difference(y, array_ops.stop_gradient(mean)), axes, -- GitLab From 8ecf1ebc5d83e66b29a07113b53c49ef8264703c Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Tue, 29 May 2018 00:50:34 -0700 Subject: [PATCH 198/902] Re-apply #18192. PiperOrigin-RevId: 198358055 --- tensorflow/java/BUILD | 41 +- tensorflow/java/build_defs.bzl | 1 + tensorflow/java/src/gen/cc/java_defs.h | 80 +-- tensorflow/java/src/gen/cc/op_gen_main.cc | 46 +- tensorflow/java/src/gen/cc/op_generator.cc | 464 +++++++++++++++++- tensorflow/java/src/gen/cc/op_generator.h | 37 +- tensorflow/java/src/gen/cc/op_specs.cc | 423 ++++++++++++++++ tensorflow/java/src/gen/cc/op_specs.h | 173 +++++++ tensorflow/java/src/gen/cc/source_writer.cc | 142 +++--- tensorflow/java/src/gen/cc/source_writer.h | 55 +-- .../java/src/gen/cc/source_writer_test.cc | 155 +++--- tensorflow/java/src/gen/gen_ops.bzl | 74 +-- 12 files changed, 1363 insertions(+), 328 deletions(-) create mode 100644 tensorflow/java/src/gen/cc/op_specs.cc create mode 100644 tensorflow/java/src/gen/cc/op_specs.h diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 78596d147a..19d2133a55 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -60,9 +60,7 @@ java_library( filegroup( name = "java_op_sources", - srcs = glob(["src/main/java/org/tensorflow/op/**/*.java"]) + [ - ":java_op_gen_sources", - ], + srcs = glob(["src/main/java/org/tensorflow/op/**/*.java"]) + [":java_op_gen_sources"], visibility = [ "//tensorflow/java:__pkg__", ], @@ -70,43 +68,27 @@ filegroup( tf_java_op_gen_srcjar( name = "java_op_gen_sources", - gen_base_package = "org.tensorflow.op", - gen_tool = "java_op_gen_tool", - ops_libs = [ - "array_ops", - "candidate_sampling_ops", - "control_flow_ops", - "data_flow_ops", - "image_ops", - "io_ops", - "linalg_ops", - "logging_ops", - "math_ops", - "nn_ops", - "no_op", - "parsing_ops", - "random_ops", - "sparse_ops", - "state_ops", - "string_ops", - "training_ops", - "user_ops", + api_def_srcs = [ + "//tensorflow/core/api_def:base_api_def", ], + base_package = "org.tensorflow.op", + gen_tool = ":java_op_gen_tool", ) -# Build the gen tool as a library, as it will be linked to a core/ops binary -# file before making it an executable. See tf_java_op_gen_srcjar(). -cc_library( +tf_cc_binary( name = "java_op_gen_tool", srcs = [ "src/gen/cc/op_gen_main.cc", ], copts = tf_copts(), + linkopts = ["-lm"], + linkstatic = 1, deps = [ ":java_op_gen_lib", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", + "//tensorflow/core:ops", ], ) @@ -114,11 +96,13 @@ cc_library( name = "java_op_gen_lib", srcs = [ "src/gen/cc/op_generator.cc", + "src/gen/cc/op_specs.cc", "src/gen/cc/source_writer.cc", ], hdrs = [ "src/gen/cc/java_defs.h", "src/gen/cc/op_generator.h", + "src/gen/cc/op_specs.h", "src/gen/cc/source_writer.h", ], copts = tf_copts(), @@ -127,6 +111,9 @@ cc_library( "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:op_gen_lib", + "//tensorflow/core:protos_all_cc", + "@com_googlesource_code_re2//:re2", ], ) diff --git a/tensorflow/java/build_defs.bzl b/tensorflow/java/build_defs.bzl index ab7f60d03d..e1916ca4d9 100644 --- a/tensorflow/java/build_defs.bzl +++ b/tensorflow/java/build_defs.bzl @@ -15,6 +15,7 @@ JAVA_VERSION_OPTS = [ XLINT_OPTS = [ "-Werror", "-Xlint:all", + "-Xlint:-processing", "-Xlint:-serial", "-Xlint:-try", "-Xlint:-classfile", # see b/32750402, go/javac-warnings#classfile diff --git a/tensorflow/java/src/gen/cc/java_defs.h b/tensorflow/java/src/gen/cc/java_defs.h index 59f8beaee7..d9d6f8adc8 100644 --- a/tensorflow/java/src/gen/cc/java_defs.h +++ b/tensorflow/java/src/gen/cc/java_defs.h @@ -16,19 +16,22 @@ limitations under the License. #ifndef TENSORFLOW_JAVA_SRC_GEN_CC_JAVA_DEFS_H_ #define TENSORFLOW_JAVA_SRC_GEN_CC_JAVA_DEFS_H_ -#include #include +#include +#include +#include namespace tensorflow { namespace java { // An enumeration of different modifiers commonly used in Java enum Modifier { - PUBLIC = (1 << 0), + PACKAGE = 0, + PUBLIC = (1 << 0), PROTECTED = (1 << 1), - PRIVATE = (1 << 2), - STATIC = (1 << 3), - FINAL = (1 << 4), + PRIVATE = (1 << 2), + STATIC = (1 << 3), + FINAL = (1 << 4), }; class Annotation; @@ -72,6 +75,8 @@ class Type { // Reflection API does return Type(Type::PRIMITIVE, "void"); } + static Type Generic(const string& name) { return Type(Type::GENERIC, name); } + static Type Wildcard() { return Type(Type::GENERIC, ""); } static Type Class(const string& name, const string& package = "") { return Type(Type::CLASS, name, package); } @@ -81,9 +86,6 @@ class Type { static Type Enum(const string& name, const string& package = "") { return Type(Type::ENUM, name, package); } - static Type Generic(const string& name = "") { - return Type(Type::GENERIC, name); - } static Type ClassOf(const Type& type) { return Class("Class").add_parameter(type); } @@ -96,11 +98,10 @@ class Type { const Kind& kind() const { return kind_; } const string& name() const { return name_; } const string& package() const { return package_; } - const string& description() const { return description_; } - Type& description(const string& description) { - description_ = description; - return *this; + const string canonical_name() const { + return package_.empty() ? name_ : package_ + "." + name_; } + bool wildcard() const { return name_.empty(); } // only wildcards has no name const std::list& parameters() const { return parameters_; } Type& add_parameter(const Type& parameter) { parameters_.push_back(parameter); @@ -120,14 +121,6 @@ class Type { } return *this; } - // Returns true if "type" is of a known collection type (only a few for now) - bool IsCollection() const { - return name_ == "List" || name_ == "Iterable"; - } - // Returns true if this instance is a wildcard () - bool IsWildcard() const { - return kind_ == GENERIC && name_.empty(); - } protected: Type(Kind kind, const string& name, const string& package = "") @@ -137,7 +130,6 @@ class Type { Kind kind_; string name_; string package_; - string description_; std::list parameters_; std::list annotations_; std::list supertypes_; @@ -180,16 +172,11 @@ class Variable { const string& name() const { return name_; } const Type& type() const { return type_; } bool variadic() const { return variadic_; } - const string& description() const { return description_; } - Variable& description(const string& description) { - description_ = description; - return *this; - } + private: string name_; Type type_; bool variadic_; - string description_; Variable(const string& name, const Type& type, bool variadic) : name_(name), type_(type), variadic_(variadic) {} @@ -210,16 +197,6 @@ class Method { bool constructor() const { return constructor_; } const string& name() const { return name_; } const Type& return_type() const { return return_type_; } - const string& description() const { return description_; } - Method& description(const string& description) { - description_ = description; - return *this; - } - const string& return_description() const { return return_description_; } - Method& return_description(const string& description) { - return_description_ = description; - return *this; - } const std::list& arguments() const { return arguments_; } Method& add_argument(const Variable& var) { arguments_.push_back(var); @@ -235,8 +212,6 @@ class Method { string name_; Type return_type_; bool constructor_; - string description_; - string return_description_; std::list arguments_; std::list annotations_; @@ -244,6 +219,33 @@ class Method { : name_(name), return_type_(return_type), constructor_(constructor) {} }; +// A definition of a documentation bloc for a Java element (JavaDoc) +class Javadoc { + public: + static Javadoc Create(const string& brief = "") { return Javadoc(brief); } + const string& brief() const { return brief_; } + const string& details() const { return details_; } + Javadoc& details(const string& details) { + details_ = details; + return *this; + } + const std::list>& tags() const { return tags_; } + Javadoc& add_tag(const string& tag, const string& text) { + tags_.push_back(std::make_pair(tag, text)); + return *this; + } + Javadoc& add_param_tag(const string& name, const string& text) { + return add_tag("param", name + " " + text); + } + + private: + string brief_; + string details_; + std::list> tags_; + + explicit Javadoc(const string& brief) : brief_(brief) {} +}; + } // namespace java } // namespace tensorflow diff --git a/tensorflow/java/src/gen/cc/op_gen_main.cc b/tensorflow/java/src/gen/cc/op_gen_main.cc index bea99f3d7f..0d9e0883af 100644 --- a/tensorflow/java/src/gen/cc/op_gen_main.cc +++ b/tensorflow/java/src/gen/cc/op_gen_main.cc @@ -36,49 +36,43 @@ const char kUsageHeader[] = "Operation wrappers are generated under the path specified by the " "'--output_dir' argument. This path can be absolute or relative to the\n" "current working directory and will be created if it does not exists.\n\n" - "The '--lib_name' argument is used to classify the set of operations. If " - "the chosen name contains more than one word, it must be provided in \n" - "snake_case. This value is declined into other meaningful names, such as " - "the group and package of the generated operations. For example,\n" - "'--lib_name=my_lib' generates the operations under the " - "'org.tensorflow.op.mylib' package and add them to the 'myLib()' operator\n" - "group.\n\n" - "Note that the operator group assigned to the generated wrappers is just " - "an annotation tag at this stage. Operations will not be available " - "through\n" - "the 'org.tensorflow.op.Ops' API as a group until the generated classes " - "are compiled using an appropriate annotation processor.\n\n" - "Finally, the '--base_package' overrides the default parent package " - "under which the generated subpackage and classes are to be located.\n\n"; + "Note that the operations will not be available through the " + "'org.tensorflow.op.Ops' API until the generated classes are compiled\n" + "using an appropriate annotation processor.\n\n" + "The '--base_package' overrides the default parent package under which " + "the generated subpackage and classes are to be located.\n\n" + "Finally, the `--api_dirs` argument takes a list of comma-separated " + "directories of API definitions can be provided to override default\n" + "values found in the ops definitions. Directories are ordered by priority " + "(the last having precedence over the first).\n\n"; } // namespace java } // namespace tensorflow int main(int argc, char* argv[]) { - tensorflow::string lib_name; tensorflow::string output_dir; tensorflow::string base_package = "org.tensorflow.op"; + tensorflow::string api_dirs_str; std::vector flag_list = { tensorflow::Flag("output_dir", &output_dir, "Root directory into which output files are generated"), - tensorflow::Flag( - "lib_name", &lib_name, - "A name, in snake_case, used to classify this set of operations"), tensorflow::Flag( "base_package", &base_package, - "Package parent to the generated subpackage and classes")}; + "Package parent to the generated subpackage and classes"), + tensorflow::Flag( + "api_dirs", &api_dirs_str, + "List of directories that contains the ops api definitions")}; tensorflow::string usage = tensorflow::java::kUsageHeader; usage += tensorflow::Flags::Usage(argv[0], flag_list); bool parsed_flags_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); tensorflow::port::InitMain(usage.c_str(), &argc, &argv); - QCHECK(parsed_flags_ok && !lib_name.empty() && !output_dir.empty()) << usage; - - tensorflow::java::OpGenerator generator; + QCHECK(parsed_flags_ok && !output_dir.empty()) << usage; + std::vector api_dirs = tensorflow::str_util::Split( + api_dirs_str, ",", tensorflow::str_util::SkipEmpty()); + tensorflow::java::OpGenerator generator(api_dirs); tensorflow::OpList ops; - tensorflow::OpRegistry::Global()->Export(true, &ops); - tensorflow::Status status = - generator.Run(ops, lib_name, base_package, output_dir); - TF_QCHECK_OK(status); + tensorflow::OpRegistry::Global()->Export(false, &ops); + TF_CHECK_OK(generator.Run(ops, base_package, output_dir)); return 0; } diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc index def06baf2d..debd95fc62 100644 --- a/tensorflow/java/src/gen/cc/op_generator.cc +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -13,54 +13,466 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include +#include +#include +#include #include +#include +#include "tensorflow/core/framework/op_gen_lib.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/java/src/gen/cc/java_defs.h" #include "tensorflow/java/src/gen/cc/op_generator.h" +#include "tensorflow/java/src/gen/cc/op_specs.h" +#include "tensorflow/java/src/gen/cc/source_writer.h" namespace tensorflow { namespace java { namespace { -string CamelCase(const string& str, char delimiter, bool upper) { - string result; - bool cap = upper; - for (string::const_iterator it = str.begin(); it != str.end(); ++it) { - const char c = *it; - if (c == delimiter) { - cap = true; - } else if (cap) { - result += toupper(c); - cap = false; +const char* kLicense = + "/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.\n" + "\n" + "Licensed under the Apache License, Version 2.0 (the \"License\");\n" + "you may not use this file except in compliance with the License.\n" + "You may obtain a copy of the License at\n" + "\n" + " http://www.apache.org/licenses/LICENSE-2.0\n" + "\n" + "Unless required by applicable law or agreed to in writing, software\n" + "distributed under the License is distributed on an \"AS IS\" BASIS,\n" + "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" + "See the License for the specific language governing permissions and\n" + "limitations under the License.\n" + "=======================================================================*/" + "\n"; + +// There is three different modes to render an op class, depending on the +// number and type of outputs it has: +// +// DEFAULT: This mode does not provide any specialization for the op class, it +// is applied when the operation does not comply with any other mode +// +// OPERAND: The op class implements the Operand interface, allowing an +// instance to be passed directly in input to another operation +// +// LIST_OPERAND: The op class implements the Iterable> interface, +// allowing an instance to be passed directly as a list input to +// another operation +// +enum RenderMode { DEFAULT, OPERAND, LIST_OPERAND }; + +void AddArgument(const Variable& var, const string& description, + Method* method_out, Javadoc* javadoc_out) { + method_out->add_argument(var); + javadoc_out->add_param_tag(var.name(), description); +} + +void CollectOpDependencies(const OpSpec& op, RenderMode mode, + std::list* out) { + out->push_back(Type::Class("Operation", "org.tensorflow")); + out->push_back(Type::Class("OperationBuilder", "org.tensorflow")); + out->push_back(Type::Class("Scope", "org.tensorflow.op")); + if (mode == OPERAND) { + out->push_back(Type::Class("Output", "org.tensorflow")); + } else if (mode == LIST_OPERAND) { + out->push_back(Type::Interface("Iterator", "java.util")); + } + // Don't pay attention to duplicate types in the dependency list, they will + // be filtered out by the SourceWriter. + for (const ArgumentSpec& input : op.inputs()) { + out->push_back(input.var().type()); + if (input.iterable()) { + out->push_back(Type::Class("Operands", "org.tensorflow.op")); + } + } + for (const ArgumentSpec& output : op.outputs()) { + out->push_back(output.var().type()); + if (output.iterable()) { + out->push_back(Type::Class("Arrays", "java.util")); + } + } + for (const AttributeSpec& attribute : op.attributes()) { + out->push_back(attribute.var().type()); + out->push_back(attribute.jni_type()); + } + for (const AttributeSpec& optional_attribute : op.optional_attributes()) { + out->push_back(optional_attribute.var().type()); + } +} + +void WriteSetAttrDirective(const AttributeSpec& attr, bool optional, + SourceWriter* writer) { + string var_name = optional ? "opts." + attr.var().name() : attr.var().name(); + if (attr.iterable()) { + string array_name = attr.var().name() + "Array"; + writer->AppendType(attr.jni_type()) + .Append("[] " + array_name + " = new ") + .AppendType(attr.jni_type()) + .Append("[" + var_name + ".size()];") + .EndLine() + .BeginBlock("for (int i = 0; i < " + array_name + ".length; ++i)") + .Append(array_name + "[i] = "); + if (attr.type().kind() == Type::GENERIC) { + writer->Append("DataType.fromClass(" + var_name + ".get(i));"); + } else { + writer->Append(var_name + ".get(i);"); + } + writer->EndLine() + .EndBlock() + .Append("opBuilder.setAttr(\"" + attr.op_def_name() + "\", ") + .Append(array_name + ");") + .EndLine(); + } else { + writer->Append("opBuilder.setAttr(\"" + attr.op_def_name() + "\", "); + if (attr.var().type().name() == "Class") { + writer->Append("DataType.fromClass(" + var_name + "));"); } else { - result += c; + writer->Append(var_name + ");"); } + writer->EndLine(); } - return result; } -} // namespace +void RenderFactoryMethods(const OpSpec& op, const Type& op_class, + SourceWriter* writer) { + Method factory = Method::Create("create", op_class); + Javadoc factory_doc = + Javadoc::Create("Factory method to create a class to wrap a new " + + op_class.name() + " operation to the graph."); + Variable scope = + Variable::Create("scope", Type::Class("Scope", "org.tensorflow.op")); + AddArgument(scope, "current graph scope", &factory, &factory_doc); + for (const ArgumentSpec& input : op.inputs()) { + AddArgument(input.var(), input.description(), &factory, &factory_doc); + } + for (const AttributeSpec& attr : op.attributes()) { + AddArgument(attr.var(), attr.description(), &factory, &factory_doc); + } + if (!op.optional_attributes().empty()) { + AddArgument(Variable::Varargs("options", Type::Class("Options")), + "carries optional attributes values", &factory, &factory_doc); + } + factory_doc.add_tag("return", "a new instance of " + op_class.name()); -OpGenerator::OpGenerator() : env(Env::Default()) {} + writer->BeginMethod(factory, PUBLIC | STATIC, &factory_doc); + writer->Append("OperationBuilder opBuilder = scope.graph().opBuilder(\"" + + op.graph_op_name() + "\", scope.makeOpName(\"" + + op_class.name() + "\"));"); + writer->EndLine(); + for (const ArgumentSpec& input : op.inputs()) { + if (input.iterable()) { + writer->Append("opBuilder.addInputList(Operands.asOutputs(" + + input.var().name() + "));"); + writer->EndLine(); + } else { + writer->Append("opBuilder.addInput(" + input.var().name() + + ".asOutput());"); + writer->EndLine(); + } + } + for (const AttributeSpec& attribute : op.attributes()) { + WriteSetAttrDirective(attribute, false, writer); + } + if (!op.optional_attributes().empty()) { + writer->BeginBlock("if (options != null)") + .BeginBlock("for (Options opts : options)"); + for (const AttributeSpec& attribute : op.optional_attributes()) { + writer->BeginBlock("if (opts." + attribute.var().name() + " != null)"); + WriteSetAttrDirective(attribute, true, writer); + writer->EndBlock(); + } + writer->EndBlock().EndBlock(); + } + writer->Append("return new ") + .AppendType(op_class) + .Append("(opBuilder.build());") + .EndLine(); + writer->EndMethod(); +} -OpGenerator::~OpGenerator() {} +void RenderConstructor(const OpSpec& op, const Type& op_class, + SourceWriter* writer) { + Variable operation = + Variable::Create("operation", Type::Class("Operation", "org.tensorflow")); + Method constructor = Method::ConstructorFor(op_class).add_argument(operation); + for (const ArgumentSpec& output : op.outputs()) { + if (output.iterable() && !output.type().wildcard()) { + constructor.add_annotation( + Annotation::Create("SuppressWarnings").attributes("\"unchecked\"")); + break; + } + } + writer->BeginMethod(constructor, PRIVATE) + .Append("super(operation);") + .EndLine(); + if (!op.outputs().empty()) { + writer->Append("int outputIdx = 0;").EndLine(); + for (const ArgumentSpec& output : op.outputs()) { + if (output.iterable()) { + string var_length = output.var().name() + "Length"; + writer->Append("int " + var_length) + .Append(" = operation.outputListLength(\"" + output.op_def_name() + + "\");") + .EndLine() + .Append(output.var().name() + " = Arrays.asList("); + if (!output.type().wildcard()) { + writer->Append("(") + .AppendType(output.var().type().parameters().front()) + .Append("[])"); + } + writer->Append("operation.outputList(outputIdx, " + var_length + "));") + .EndLine() + .Append("outputIdx += " + var_length + ";") + .EndLine(); + } else { + writer + ->Append(output.var().name() + " = operation.output(outputIdx++);") + .EndLine(); + } + } + } + writer->EndMethod(); +} -Status OpGenerator::Run(const OpList& ops, const string& lib_name, - const string& base_package, const string& output_dir) { - const string package = - base_package + '.' + str_util::StringReplace(lib_name, "_", "", true); - const string package_path = - output_dir + '/' + str_util::StringReplace(package, ".", "/", true); - const string group = CamelCase(lib_name, '_', false); +void RenderGettersAndSetters(const OpSpec& op, SourceWriter* writer) { + for (const AttributeSpec& attr : op.optional_attributes()) { + Method setter = Method::Create(attr.var().name(), Type::Class("Options")); + Javadoc setter_doc = Javadoc::Create(); + AddArgument(attr.var(), attr.description(), &setter, &setter_doc); + writer->BeginMethod(setter, PUBLIC | STATIC, &setter_doc) + .Append("return new Options()." + attr.var().name() + "(" + + attr.var().name() + ");") + .EndLine() + .EndMethod(); + } + for (const ArgumentSpec& output : op.outputs()) { + Method getter = Method::Create(output.var().name(), output.var().type()); + Javadoc getter_doc = Javadoc::Create(output.description()); + writer->BeginMethod(getter, PUBLIC, &getter_doc) + .Append("return " + output.var().name() + ";") + .EndLine() + .EndMethod(); + } +} - if (!env->FileExists(package_path).ok()) { - TF_CHECK_OK(env->RecursivelyCreateDir(package_path)); +void RenderInterfaceImpl(const OpSpec& op, RenderMode mode, + SourceWriter* writer) { + ArgumentSpec output = op.outputs().front(); + + if (mode == OPERAND) { + bool cast2obj = output.type().wildcard(); + Type return_type = + Type::Class("Output", "org.tensorflow") + .add_parameter(cast2obj ? Type::Class("Object") : output.type()); + Method as_output = Method::Create("asOutput", return_type) + .add_annotation(Annotation::Create("Override")); + if (cast2obj) { + as_output.add_annotation( + Annotation::Create("SuppressWarnings").attributes("\"unchecked\"")); + } + writer->BeginMethod(as_output, PUBLIC); + if (cast2obj) { + writer->Append("return (").AppendType(return_type).Append(") "); + } else { + writer->Append("return "); + } + writer->Append(output.var().name() + ";").EndLine().EndMethod(); + + } else if (mode == LIST_OPERAND) { + Type operand = Type::Interface("Operand", "org.tensorflow"); + if (output.type().wildcard()) { + operand.add_parameter(Type::Class("Object")); + } else { + operand.add_parameter(output.type()); + } + Type return_type = + Type::Interface("Iterator", "java.util").add_parameter(operand); + Method iterator = + Method::Create("iterator", return_type) + .add_annotation(Annotation::Create("Override")) + .add_annotation(Annotation::Create("SuppressWarnings") + .attributes("{\"rawtypes\", \"unchecked\"}")); + // cast the output list using a raw List + writer->BeginMethod(iterator, PUBLIC) + .Append("return (" + return_type.name() + ") ") + .Append(output.var().name() + ".iterator();") + .EndLine() + .EndMethod(); + } +} + +void RenderOptionsClass(const OpSpec& op, const Type& op_class, + SourceWriter* writer) { + Type options_class = Type::Class("Options"); + Javadoc options_doc = Javadoc::Create("Optional attributes for {@link " + + op_class.canonical_name() + "}"); + writer->BeginInnerType(options_class, PUBLIC | STATIC, &options_doc); + for (const AttributeSpec& attr : op.optional_attributes()) { + Method setter = Method::Create(attr.var().name(), options_class); + Javadoc setter_doc = Javadoc::Create(); + AddArgument(attr.var(), attr.description(), &setter, &setter_doc); + writer->BeginMethod(setter, PUBLIC, &setter_doc) + .Append("this." + attr.var().name() + " = " + attr.var().name() + ";") + .EndLine() + .Append("return this;") + .EndLine() + .EndMethod(); + } + writer->EndLine(); + for (const AttributeSpec& optional_attribute : op.optional_attributes()) { + writer->WriteField(optional_attribute.var(), PRIVATE); } + Method constructor = Method::ConstructorFor(options_class); + writer->BeginMethod(constructor, PRIVATE).EndMethod(); + writer->EndType(); +} + +inline Type ClassOf(const EndpointSpec& endpoint, const string& base_package) { + return Type::Class( + endpoint.name(), + base_package + "." + str_util::Lowercase(endpoint.package())); +} - LOG(INFO) << "Generating Java wrappers for '" << lib_name << "' operations"; - // TODO(karllessard) generate wrappers from list of ops +void GenerateOp(const OpSpec& op, const EndpointSpec& endpoint, + const string& base_package, const string& output_dir, + Env* env) { + Type op_class( + ClassOf(endpoint, base_package) + .add_supertype(Type::Class("PrimitiveOp", "org.tensorflow.op"))); + Javadoc op_javadoc(endpoint.javadoc()); + // op interfaces + RenderMode mode = DEFAULT; + if (op.outputs().size() == 1) { + const ArgumentSpec& output = op.outputs().front(); + Type operand_type(output.type().wildcard() ? Type::Class("Object") + : output.type()); + Type operand_inf(Type::Interface("Operand", "org.tensorflow") + .add_parameter(operand_type)); + if (output.iterable()) { + mode = LIST_OPERAND; + op_class.add_supertype(Type::IterableOf(operand_inf)); + } else { + mode = OPERAND; + op_class.add_supertype(operand_inf); + } + } + // op generic parameters + std::set generics; + for (const ArgumentSpec& output : op.outputs()) { + if (output.type().kind() == Type::GENERIC && !output.type().wildcard() && + generics.find(output.type().name()) == generics.end()) { + op_class.add_parameter(output.type()); + op_javadoc.add_param_tag( + "<" + output.type().name() + ">", + "data type for {@code " + output.var().name() + "()} output"); + generics.insert(output.type().name()); + } + } + // op annotations + op_class.add_annotation( + Annotation::Create("Generated", "javax.annotation") + .attributes("value = \"TensorFlow Java Op Generator\"")); + if (endpoint.deprecated()) { + op_class.add_annotation(Annotation::Create("Deprecated")); + string explanation; + if (!op.endpoints().front().deprecated()) { + explanation = + "use {@link " + + ClassOf(op.endpoints().front(), base_package).canonical_name() + + "} instead"; + } else { + explanation = op.deprecation_explanation(); + } + op_javadoc.add_tag("deprecated", explanation); + } + if (!op.hidden()) { + // expose the op in the Ops Graph API only if it is visible + op_class.add_annotation( + Annotation::Create("Operator", "org.tensorflow.op.annotation") + .attributes("group = \"" + endpoint.package() + "\"")); + } + // create op class file + const string op_dir_name = io::JoinPath( + output_dir, str_util::StringReplace(op_class.package(), ".", "/", true)); + if (!env->FileExists(op_dir_name).ok()) { + TF_CHECK_OK(Env::Default()->RecursivelyCreateDir(op_dir_name)) + << op_dir_name; + } + const string op_file_name = op_class.name() + ".java"; + std::unique_ptr op_file; + TF_CHECK_OK( + env->NewWritableFile(io::JoinPath(op_dir_name, op_file_name), &op_file)) + << op_file_name; + + // render endpoint source code + SourceFileWriter writer(op_file.get()); + std::list dependencies; + CollectOpDependencies(op, mode, &dependencies); + writer.Write(kLicense).EndLine().BeginType(op_class, PUBLIC | FINAL, + &dependencies, &op_javadoc); + if (!op.optional_attributes().empty()) { + RenderOptionsClass(op, op_class, &writer); + } + RenderFactoryMethods(op, op_class, &writer); + RenderGettersAndSetters(op, &writer); + if (mode != DEFAULT) { + RenderInterfaceImpl(op, mode, &writer); + } + writer.EndLine(); + for (const ArgumentSpec& output : op.outputs()) { + writer.WriteField(output.var(), PRIVATE); + } + RenderConstructor(op, op_class, &writer); + writer.EndType(); +} + +bool CanGenerateOp(const OpDef& op_def, const ApiDef& api_def) { + if (api_def.visibility() == ApiDef::SKIP) { + return false; + } + for (const auto& attr : op_def.attr()) { + if (attr.type() == "func") { + return false; // TODO(karllessard) add support for function attributes + } + } + return true; +} + +} // namespace + +Status OpGenerator::Run(const OpList& op_list, const string& base_package, + const string& output_dir) { + ApiDefMap api_map(op_list); + if (!api_dirs_.empty()) { + // Only load api files that correspond to the requested "op_list" + for (const auto& op : op_list.op()) { + for (const auto& api_def_dir : api_dirs_) { + const std::string api_def_file_pattern = + io::JoinPath(api_def_dir, "api_def_" + op.name() + ".pbtxt"); + if (env_->FileExists(api_def_file_pattern).ok()) { + TF_CHECK_OK(api_map.LoadFile(env_, api_def_file_pattern)) + << api_def_file_pattern; + } + } + } + } + api_map.UpdateDocs(); + for (const auto& op_def : op_list.op()) { + const ApiDef* api_def = api_map.GetApiDef(op_def.name()); + if (CanGenerateOp(op_def, *api_def)) { + OpSpec op(OpSpec::Create(op_def, *api_def)); + for (const EndpointSpec& endpoint : op.endpoints()) { + GenerateOp(op, endpoint, base_package, output_dir, env_); + } + } + } return Status::OK(); } diff --git a/tensorflow/java/src/gen/cc/op_generator.h b/tensorflow/java/src/gen/cc/op_generator.h index 4b55ed3ed9..05decd6b54 100644 --- a/tensorflow/java/src/gen/cc/op_generator.h +++ b/tensorflow/java/src/gen/cc/op_generator.h @@ -17,34 +17,39 @@ limitations under the License. #define TENSORFLOW_JAVA_SRC_GEN_CC_OP_GENERATOR_H_ #include +#include -#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/api_def.pb.h" +#include "tensorflow/core/framework/op_def.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/java/src/gen/cc/op_specs.h" namespace tensorflow { namespace java { -/// \brief A generator of Java operation wrappers. -/// -/// Such generator is normally ran only once per executable, outputting -/// wrappers for the all registered operations it has been compiled with. -/// Nonetheless, it is designed to support multiple runs, giving a different -/// list of operations on each cycle. +// A generator of Java operation wrappers. +// +// This generator takes a list of ops definitions in input and outputs +// a Java Op wrapper for each of them in the provided directory. The same +// generator instance can be invoked multiple times with a different list of +// ops definitions. class OpGenerator { public: - OpGenerator(); - virtual ~OpGenerator(); + explicit OpGenerator(const std::vector& api_dirs, + Env* env = Env::Default()) + : api_dirs_(api_dirs), env_(env) {} - /// \brief Generates wrappers for the given list of 'ops'. - /// - /// Output files are generated in //, - /// where 'lib_package' is derived from 'lib_name'. - Status Run(const OpList& ops, const string& lib_name, - const string& base_package, const string& output_dir); + // Generates wrappers for the given list of 'ops'. + // + // Output files are generated in //, + // where 'op_package' is derived from ops endpoints. + Status Run(const OpList& op_list, const string& base_package, + const string& output_dir); private: - Env* env; + const std::vector api_dirs_; + Env* env_; }; } // namespace java diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc new file mode 100644 index 0000000000..181fd4c5e3 --- /dev/null +++ b/tensorflow/java/src/gen/cc/op_specs.cc @@ -0,0 +1,423 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "re2/re2.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/java/src/gen/cc/op_specs.h" + +namespace tensorflow { +namespace java { +namespace { + +inline bool IsRealNumbers(const AttrValue& values) { + if (!values.has_list()) { + return RealNumberTypes().Contains(values.type()); + } + for (int i = 0; i < values.list().type_size(); ++i) { + if (!RealNumberTypes().Contains(values.list().type(i))) { + return false; + } + } + return true; +} + +class TypeResolver { + public: + explicit TypeResolver(const OpDef& op_def) : op_def_(op_def) {} + + // Returns the class type of an input/output argument + // + // For example, if the argument's datatype is DT_STRING, this method will + // return "java.lang.String", so the argument can become "Operand" + // in the Ops API + Type TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out); + + // Returns types of an input attribute + // + // The first element of the pair is the class type of this attribute while + // the second is its JNI/primitive type equivalent, required for explicit + // unboxing. + // + // For example, if the attribute is of type "float", this method will return + // , so the attribute can be used as a "Float" object + // in the Ops API and casted to a "float" when passing through the JNI layer. + std::pair TypesOf(const OpDef_AttrDef& attr_def, + bool* iterable_out); + + // Returns true if the type of this attribute has already been resolved + bool IsAttributeVisited(const string& attr_name) { + return visited_attrs_.find(attr_name) != visited_attrs_.cend(); + } + + private: + const OpDef op_def_; + std::map visited_attrs_; + char next_generic_letter_ = 'T'; + + std::pair MakeTypePair(const Type& type, const Type& jni_type) { + return std::make_pair(type, jni_type); + } + std::pair MakeTypePair(const Type& type) { + return std::make_pair(type, type); + } + Type NextGeneric() { + char generic_letter = next_generic_letter_++; + if (next_generic_letter_ > 'Z') { + next_generic_letter_ = 'A'; + } + return Type::Generic(string(1, generic_letter)); + } +}; + +Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { + *iterable_out = false; + if (!arg_def.number_attr().empty()) { + // when number_attr is set, argument has to be a list of tensors + *iterable_out = true; + visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); + } + Type type = Type::Wildcard(); + if (arg_def.type() != DataType::DT_INVALID) { + // resolve type from DataType + switch (arg_def.type()) { + case DataType::DT_BOOL: + type = Type::Class("Boolean"); + break; + case DataType::DT_STRING: + type = Type::Class("String"); + break; + case DataType::DT_FLOAT: + type = Type::Class("Float"); + break; + case DataType::DT_DOUBLE: + type = Type::Class("Double"); + break; + case DataType::DT_UINT8: + type = Type::Class("UInt8", "org.tensorflow.types"); + break; + case DataType::DT_INT32: + type = Type::Class("Integer"); + break; + case DataType::DT_INT64: + type = Type::Class("Long"); + break; + case DataType::DT_RESOURCE: + // TODO(karllessard) create a Resource utility class that could be + // used to store a resource and its type (passed in a second argument). + // For now, we need to force a wildcard and we will unfortunately lose + // track of the resource type. + break; + default: + // Any other datatypes does not have a equivalent in Java and must + // remain a wildcard (e.g. DT_COMPLEX64, DT_QINT8, ...) + break; + } + } else if (!arg_def.type_attr().empty()) { + // resolve type from attribute (if already visited, retrieve its type) + if (IsAttributeVisited(arg_def.type_attr())) { + type = visited_attrs_.at(arg_def.type_attr()); + } else { + for (const auto& attr_def : op_def_.attr()) { + if (attr_def.name() == arg_def.type_attr()) { + type = TypesOf(attr_def, iterable_out).first; + break; + } + } + } + } else if (!arg_def.type_list_attr().empty()) { + // type is a list of tensors that can be of different data types, so leave + // it as a list of wildcards + *iterable_out = true; + visited_attrs_.insert(std::make_pair(arg_def.type_list_attr(), type)); + + } else { + LOG(FATAL) << "Cannot resolve data type of argument \"" << arg_def.name() + << "\" in operation \"" << op_def_.name() << "\""; + } + return type; +} + +std::pair TypeResolver::TypesOf(const OpDef_AttrDef& attr_def, + bool* iterable_out) { + std::pair types = MakeTypePair(Type::Wildcard()); + *iterable_out = false; + StringPiece attr_type = attr_def.type(); + if (str_util::ConsumePrefix(&attr_type, "list(")) { + attr_type.remove_suffix(1); // remove closing brace + *iterable_out = true; + } + if (attr_type == "string") { + types = MakeTypePair(Type::Class("String")); + + } else if (attr_type == "int") { + types = MakeTypePair(Type::Class("Long"), Type::Long()); + + } else if (attr_type == "float") { + types = MakeTypePair(Type::Class("Float"), Type::Float()); + + } else if (attr_type == "bool") { + types = MakeTypePair(Type::Class("Boolean"), Type::Boolean()); + + } else if (attr_type == "shape") { + types = MakeTypePair(Type::Class("Shape", "org.tensorflow")); + + } else if (attr_type == "tensor") { + types = MakeTypePair(Type::Class("Tensor", "org.tensorflow") + .add_parameter(Type::Wildcard())); + + } else if (attr_type == "type") { + Type type = *iterable_out ? Type::Wildcard() : NextGeneric(); + if (IsRealNumbers(attr_def.allowed_values())) { + type.add_supertype(Type::Class("Number")); + } + types = MakeTypePair(type, Type::Enum("DataType", "org.tensorflow")); + + } else { + LOG(FATAL) << "Cannot resolve data type for attribute \"" << attr_type + << "\" in operation \"" << op_def_.name() << "\""; + } + visited_attrs_.insert(std::make_pair(attr_def.name(), types.first)); + return types; +} + +string SnakeToCamelCase(const string& str, bool upper = false) { + string result; + bool cap = upper; + for (string::const_iterator it = str.begin(); it != str.end(); ++it) { + const char c = *it; + if (c == '_') { + cap = true; + } else if (cap) { + result += toupper(c); + cap = false; + } else { + result += c; + } + } + return result; +} + +bool FindAndCut(string* input, const RE2& expr, string* before_match, + string* ret_match = nullptr) { + string match; + if (!RE2::PartialMatch(*input, expr, &match)) return false; + *before_match = input->substr(0, input->find(match)); + *input = input->substr(before_match->size() + match.size()); + if (ret_match != nullptr) *ret_match = match; + return true; +} + +string ParseDocumentation(const string& inp) { + std::stringstream javadoc_text; + + // TODO(karllessard) This is a very minimalist utility method for converting + // markdown syntax, as found in ops descriptions, to Javadoc/html tags. Check + // for alternatives to increase the level of support for markups. + std::vector markups_subexpr; + markups_subexpr.push_back("\n+\\*\\s+"); // lists + markups_subexpr.push_back("\n{2,}"); // paragraphs + markups_subexpr.push_back("`{3,}\\s*[^\\s\n]*\\s*\n"); // code blocks + markups_subexpr.push_back("`+"); // inlined code and code blocks + markups_subexpr.push_back("\\*{1,2}\\b"); // text emphasis + markups_subexpr.push_back("\\["); // hyperlinks + const RE2 markup_expr("(" + str_util::Join(markups_subexpr, "|") + ")"); + + bool in_list = false; + string input = inp; + while (true) { + string text, markup; + if (!FindAndCut(&input, markup_expr, &text, &markup)) { + javadoc_text << input; + break; // end of loop + } + javadoc_text << text; + if (str_util::StartsWith(markup, "\n")) { + javadoc_text << "\n"; + if (str_util::StrContains(markup, "*")) { + // new list item + javadoc_text << (in_list ? "\n" : "
    \n") << "
  • \n"; + in_list = true; + } else if (in_list) { + // end of list + javadoc_text << "
  • \n
\n"; + in_list = false; + } else if (!str_util::StartsWith(input, "```")) { + // new paragraph (not required if a
 block follows)
+        javadoc_text << "

\n"; + } + } else if (str_util::StartsWith(markup, "```")) { + // code blocks + if (FindAndCut(&input, "(```\\s*\n*)", &text)) { + javadoc_text << "

{@code\n" << text << "}
\n"; + } else { + javadoc_text << markup; + } + } else if (str_util::StartsWith("(" + markup + ")", "`")) { + // inlined code + if (FindAndCut(&input, markup, &text)) { + javadoc_text << "{@code " << text << "}"; + } else { + javadoc_text << markup; + } + } else if (markup == "**") { + // text emphasis (strong) + if (FindAndCut(&input, "(\\b\\*{2})", &text)) { + javadoc_text << "" << ParseDocumentation(text) << ""; + } else { + javadoc_text << markup; + } + } else if (markup == "*") { + // text emphasis (normal) + if (FindAndCut(&input, "(\\b\\*{1})", &text)) { + javadoc_text << "" << ParseDocumentation(text) << ""; + } else { + javadoc_text << markup; + } + } else if (str_util::StartsWith(markup, "[")) { + // hyperlinks + string label; + string link; + if (RE2::PartialMatch(input, "([^\\[]+)\\]\\((http.+)\\)", &label, + &link) && + str_util::StartsWith(input, label + link)) { + input = input.substr(label.size() + link.size()); + javadoc_text << "" + << ParseDocumentation(label) << ""; + } else { + javadoc_text << markup; + } + } else { + // safe fallback + javadoc_text << markup; + } + } + return javadoc_text.str(); +} + +ArgumentSpec CreateInput(const OpDef_ArgDef& input_def, + const ApiDef::Arg& input_api_def, + TypeResolver* type_resolver) { + bool iterable = false; + Type type = type_resolver->TypeOf(input_def, &iterable); + Type var_type = + Type::Interface("Operand", "org.tensorflow").add_parameter(type); + if (iterable) { + var_type = Type::IterableOf(var_type); + } + return ArgumentSpec( + input_api_def.name(), + Variable::Create(SnakeToCamelCase(input_api_def.rename_to()), var_type), + type, ParseDocumentation(input_api_def.description()), iterable); +} + +AttributeSpec CreateAttribute(const OpDef_AttrDef& attr_def, + const ApiDef::Attr& attr_api_def, + TypeResolver* type_resolver) { + bool iterable = false; + std::pair types = type_resolver->TypesOf(attr_def, &iterable); + Type var_type = types.first.kind() == Type::GENERIC + ? Type::Class("Class").add_parameter(types.first) + : types.first; + if (iterable) { + var_type = Type::ListOf(var_type); + } + return AttributeSpec( + attr_api_def.name(), + Variable::Create(SnakeToCamelCase(attr_api_def.rename_to()), var_type), + types.first, types.second, ParseDocumentation(attr_api_def.description()), + iterable, attr_api_def.has_default_value()); +} + +ArgumentSpec CreateOutput(const OpDef_ArgDef& output_def, + const ApiDef::Arg& output_api, + TypeResolver* type_resolver) { + bool iterable = false; + Type type = type_resolver->TypeOf(output_def, &iterable); + Type var_type = Type::Class("Output", "org.tensorflow").add_parameter(type); + if (iterable) { + var_type = Type::ListOf(var_type); + } + return ArgumentSpec( + output_api.name(), + Variable::Create(SnakeToCamelCase(output_api.rename_to()), var_type), + type, ParseDocumentation(output_api.description()), iterable); +} + +EndpointSpec CreateEndpoint(const OpDef& op_def, const ApiDef& api_def, + const ApiDef_Endpoint& endpoint_def) { + std::vector name_tokens = str_util::Split(endpoint_def.name(), "."); + string package; + string name; + if (name_tokens.size() > 1) { + package = name_tokens.at(0); + name = name_tokens.at(1); + } else { + package = "core"; // generate unclassified ops in the 'core' package + name = name_tokens.at(0); + } + return EndpointSpec(package, name, + Javadoc::Create(ParseDocumentation(api_def.summary())) + .details(ParseDocumentation(api_def.description()))); +} + +} // namespace + +OpSpec OpSpec::Create(const OpDef& op_def, const ApiDef& api_def) { + OpSpec op(api_def.graph_op_name(), api_def.visibility() == ApiDef::HIDDEN, + op_def.deprecation().explanation()); + TypeResolver type_resolver(op_def); + for (const string& next_input_name : api_def.arg_order()) { + for (int i = 0; i < op_def.input_arg().size(); ++i) { + if (op_def.input_arg(i).name() == next_input_name) { + op.inputs_.push_back(CreateInput(op_def.input_arg(i), api_def.in_arg(i), + &type_resolver)); + break; + } + } + } + for (int i = 0; i < op_def.attr().size(); ++i) { + // do not parse attributes already visited, they have probably been inferred + // before as an input argument type + if (!type_resolver.IsAttributeVisited(op_def.attr(i).name())) { + AttributeSpec attr = + CreateAttribute(op_def.attr(i), api_def.attr(i), &type_resolver); + // attributes with a default value are optional + if (attr.has_default_value() && attr.type().kind() != Type::GENERIC) { + op.optional_attributes_.push_back(attr); + } else { + op.attributes_.push_back(attr); + } + } + } + for (int i = 0; i < op_def.output_arg().size(); ++i) { + op.outputs_.push_back( + CreateOutput(op_def.output_arg(i), api_def.out_arg(i), &type_resolver)); + } + for (const auto& endpoint_def : api_def.endpoint()) { + op.endpoints_.push_back(CreateEndpoint(op_def, api_def, endpoint_def)); + } + return op; +} + +} // namespace java +} // namespace tensorflow diff --git a/tensorflow/java/src/gen/cc/op_specs.h b/tensorflow/java/src/gen/cc/op_specs.h new file mode 100644 index 0000000000..ca0ba16745 --- /dev/null +++ b/tensorflow/java/src/gen/cc/op_specs.h @@ -0,0 +1,173 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_OP_SPECS_H_ +#define TENSORFLOW_JAVA_SRC_GEN_CC_OP_SPECS_H_ + +#include +#include + +#include "tensorflow/core/framework/api_def.pb.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/java/src/gen/cc/java_defs.h" + +namespace tensorflow { +namespace java { + +class EndpointSpec { + public: + // A specification for an operation endpoint + // + // package: package of this endpoint (from which also derives its package) + // name: name of this endpoint class + // javadoc: the endpoint class documentation + // TODO(annarev): hardcode depcreated to false until deprecated is possible + EndpointSpec(const string& package, const string& name, + const Javadoc& javadoc) + : package_(package), name_(name), javadoc_(javadoc), deprecated_(false) {} + + const string& package() const { return package_; } + const string& name() const { return name_; } + const Javadoc& javadoc() const { return javadoc_; } + bool deprecated() const { return deprecated_; } + + private: + const string package_; + const string name_; + const Javadoc javadoc_; + const bool deprecated_; +}; + +class ArgumentSpec { + public: + // A specification for an operation argument + // + // op_def_name: argument name, as known by TensorFlow core + // var: a variable to represent this argument in Java + // type: the tensor type of this argument + // description: a description of this argument, in javadoc + // iterable: true if this argument is a list + ArgumentSpec(const string& op_def_name, const Variable& var, const Type& type, + const string& description, bool iterable) + : op_def_name_(op_def_name), + var_(var), + type_(type), + description_(description), + iterable_(iterable) {} + + const string& op_def_name() const { return op_def_name_; } + const Variable& var() const { return var_; } + const Type& type() const { return type_; } + const string& description() const { return description_; } + bool iterable() const { return iterable_; } + + private: + const string op_def_name_; + const Variable var_; + const Type type_; + const string description_; + const bool iterable_; +}; + +class AttributeSpec { + public: + // A specification for an operation attribute + // + // op_def_name: attribute name, as known by TensorFlow core + // var: a variable to represent this attribute in Java + // type: the type of this attribute + // jni_type: the type of this attribute in JNI layer (see OperationBuilder) + // description: a description of this attribute, in javadoc + // iterable: true if this attribute is a list + // has_default_value: true if this attribute has a default value if not set + AttributeSpec(const string& op_def_name, const Variable& var, + const Type& type, const Type& jni_type, + const string& description, bool iterable, + bool has_default_value) + : op_def_name_(op_def_name), + var_(var), + type_(type), + description_(description), + iterable_(iterable), + jni_type_(jni_type), + has_default_value_(has_default_value) {} + + const string& op_def_name() const { return op_def_name_; } + const Variable& var() const { return var_; } + const Type& type() const { return type_; } + const string& description() const { return description_; } + bool iterable() const { return iterable_; } + const Type& jni_type() const { return jni_type_; } + bool has_default_value() const { return has_default_value_; } + + private: + const string op_def_name_; + const Variable var_; + const Type type_; + const string description_; + const bool iterable_; + const Type jni_type_; + const bool has_default_value_; +}; + +class OpSpec { + public: + // Parses an op definition and its API to produce a specification used for + // rendering its Java wrapper + // + // op_def: Op definition + // api_def: Op API definition + static OpSpec Create(const OpDef& op_def, const ApiDef& api_def); + + const string& graph_op_name() const { return graph_op_name_; } + bool hidden() const { return hidden_; } + const string& deprecation_explanation() const { + return deprecation_explanation_; + } + const std::vector endpoints() const { return endpoints_; } + const std::vector& inputs() const { return inputs_; } + const std::vector& outputs() const { return outputs_; } + const std::vector& attributes() const { return attributes_; } + const std::vector& optional_attributes() const { + return optional_attributes_; + } + + private: + // A specification for an operation + // + // graph_op_name: name of this op, as known by TensorFlow core engine + // hidden: true if this op should not be visible through the Graph Ops API + // deprecation_explanation: message to show if all endpoints are deprecated + explicit OpSpec(const string& graph_op_name, bool hidden, + const string& deprecation_explanation) + : graph_op_name_(graph_op_name), + hidden_(hidden), + deprecation_explanation_(deprecation_explanation) {} + + const string graph_op_name_; + const bool hidden_; + const string deprecation_explanation_; + std::vector endpoints_; + std::vector inputs_; + std::vector outputs_; + std::vector attributes_; + std::vector optional_attributes_; +}; + +} // namespace java +} // namespace tensorflow + +#endif // TENSORFLOW_JAVA_SRC_GEN_CC_OP_SPECS_H_ diff --git a/tensorflow/java/src/gen/cc/source_writer.cc b/tensorflow/java/src/gen/cc/source_writer.cc index a02f75ad6e..66401bdba7 100644 --- a/tensorflow/java/src/gen/cc/source_writer.cc +++ b/tensorflow/java/src/gen/cc/source_writer.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include #include -#include +#include +#include #include "tensorflow/java/src/gen/cc/source_writer.h" @@ -83,20 +83,22 @@ SourceWriter& SourceWriter::Append(const StringPiece& str) { } SourceWriter& SourceWriter::AppendType(const Type& type) { - if (type.kind() == Type::Kind::GENERIC && type.name().empty()) { + if (type.wildcard()) { Append("?"); } else { Append(type.name()); - } - if (!type.parameters().empty()) { - Append("<"); - for (const Type& t : type.parameters()) { - if (&t != &type.parameters().front()) { - Append(", "); + if (!type.parameters().empty()) { + Append("<"); + bool first = true; + for (const Type& t : type.parameters()) { + if (!first) { + Append(", "); + } + AppendType(t); + first = false; } - AppendType(t); + Append(">"); } - Append(">"); } return *this; } @@ -107,7 +109,21 @@ SourceWriter& SourceWriter::EndLine() { return *this; } -SourceWriter& SourceWriter::BeginMethod(const Method& method, int modifiers) { +SourceWriter& SourceWriter::BeginBlock(const string& expression) { + if (!expression.empty()) { + Append(expression + " {"); + } else { + Append(newline_ ? "{" : " {"); + } + return EndLine().Indent(2); +} + +SourceWriter& SourceWriter::EndBlock() { + return Indent(-2).Append("}").EndLine(); +} + +SourceWriter& SourceWriter::BeginMethod(const Method& method, int modifiers, + const Javadoc* javadoc) { GenericNamespace* generic_namespace = PushGenericNamespace(modifiers); if (!method.constructor()) { generic_namespace->Visit(method.return_type()); @@ -116,8 +132,9 @@ SourceWriter& SourceWriter::BeginMethod(const Method& method, int modifiers) { generic_namespace->Visit(v.type()); } EndLine(); - WriteDoc(method.description(), method.return_description(), - &method.arguments()); + if (javadoc != nullptr) { + WriteJavadoc(*javadoc); + } if (!method.annotations().empty()) { WriteAnnotations(method.annotations()); } @@ -130,11 +147,13 @@ SourceWriter& SourceWriter::BeginMethod(const Method& method, int modifiers) { AppendType(method.return_type()).Append(" "); } Append(method.name()).Append("("); + bool first = true; for (const Variable& v : method.arguments()) { - if (&v != &method.arguments().front()) { + if (!first) { Append(", "); } AppendType(v.type()).Append(v.variadic() ? "... " : " ").Append(v.name()); + first = false; } return Append(")").BeginBlock(); } @@ -145,29 +164,36 @@ SourceWriter& SourceWriter::EndMethod() { return *this; } -SourceWriter& SourceWriter::BeginType(const Type& type, - const std::list* dependencies, int modifiers) { +SourceWriter& SourceWriter::BeginType(const Type& type, int modifiers, + const std::list* extra_dependencies, + const Javadoc* javadoc) { if (!type.package().empty()) { Append("package ").Append(type.package()).Append(";").EndLine(); } - if (dependencies != nullptr && !dependencies->empty()) { - TypeImporter type_importer(type.package()); - for (const Type& t : *dependencies) { + TypeImporter type_importer(type.package()); + type_importer.Visit(type); + if (extra_dependencies != nullptr) { + for (const Type& t : *extra_dependencies) { type_importer.Visit(t); } + } + if (!type_importer.imports().empty()) { EndLine(); for (const string& s : type_importer.imports()) { Append("import ").Append(s).Append(";").EndLine(); } } - return BeginInnerType(type, modifiers); + return BeginInnerType(type, modifiers, javadoc); } -SourceWriter& SourceWriter::BeginInnerType(const Type& type, int modifiers) { +SourceWriter& SourceWriter::BeginInnerType(const Type& type, int modifiers, + const Javadoc* javadoc) { GenericNamespace* generic_namespace = PushGenericNamespace(modifiers); generic_namespace->Visit(type); EndLine(); - WriteDoc(type.description()); + if (javadoc != nullptr) { + WriteJavadoc(*javadoc); + } if (!type.annotations().empty()) { WriteAnnotations(type.annotations()); } @@ -200,14 +226,15 @@ SourceWriter& SourceWriter::EndType() { return *this; } -SourceWriter& SourceWriter::WriteFields(const std::list& fields, - int modifiers) { - EndLine(); - for (const Variable& v : fields) { - WriteModifiers(modifiers); - AppendType(v.type()).Append(" ").Append(v.name()).Append(";"); - EndLine(); +SourceWriter& SourceWriter::WriteField(const Variable& field, int modifiers, + const Javadoc* javadoc) { + // If present, write field javadoc only as one brief line + if (javadoc != nullptr && !javadoc->brief().empty()) { + Append("/** ").Append(javadoc->brief()).Append(" */").EndLine(); } + WriteModifiers(modifiers); + AppendType(field.type()).Append(" ").Append(field.name()).Append(";"); + EndLine(); return *this; } @@ -228,39 +255,33 @@ SourceWriter& SourceWriter::WriteModifiers(int modifiers) { return *this; } -SourceWriter& SourceWriter::WriteDoc(const string& description, - const string& return_description, const std::list* parameters) { - if (description.empty() && return_description.empty() - && (parameters == nullptr || parameters->empty())) { - return *this; // no doc to write - } +SourceWriter& SourceWriter::WriteJavadoc(const Javadoc& javadoc) { + Append("/**").Prefix(" * ").EndLine(); bool do_line_break = false; - Append("/**").EndLine().Prefix(" * "); - if (!description.empty()) { - Write(description).EndLine(); + if (!javadoc.brief().empty()) { + Write(javadoc.brief()).EndLine(); do_line_break = true; } - if (parameters != nullptr && !parameters->empty()) { + if (!javadoc.details().empty()) { if (do_line_break) { - EndLine(); - do_line_break = false; - } - for (const Variable& v : *parameters) { - Append("@param ").Append(v.name()); - if (!v.description().empty()) { - Append(" ").Write(v.description()); - } - EndLine(); + Append("

").EndLine(); } + Write(javadoc.details()).EndLine(); + do_line_break = true; } - if (!return_description.empty()) { + if (!javadoc.tags().empty()) { if (do_line_break) { EndLine(); - do_line_break = false; } - Append("@return ").Write(return_description).EndLine(); + for (const auto& p : javadoc.tags()) { + Append("@" + p.first); + if (!p.second.empty()) { + Append(" ").Write(p.second); + } + EndLine(); + } } - return Prefix("").Append(" **/").EndLine(); + return Prefix("").Append(" */").EndLine(); } SourceWriter& SourceWriter::WriteAnnotations( @@ -278,14 +299,16 @@ SourceWriter& SourceWriter::WriteAnnotations( SourceWriter& SourceWriter::WriteGenerics( const std::list& generics) { Append("<"); + bool first = true; for (const Type* pt : generics) { - if (pt != generics.front()) { + if (!first) { Append(", "); } Append(pt->name()); if (!pt->supertypes().empty()) { Append(" extends ").AppendType(pt->supertypes().front()); } + first = false; } return Append(">"); } @@ -311,21 +334,20 @@ void SourceWriter::PopGenericNamespace() { void SourceWriter::TypeVisitor::Visit(const Type& type) { DoVisit(type); for (const Type& t : type.parameters()) { - DoVisit(t); + Visit(t); } for (const Annotation& t : type.annotations()) { DoVisit(t); } for (const Type& t : type.supertypes()) { - DoVisit(t); + Visit(t); } } void SourceWriter::GenericNamespace::DoVisit(const Type& type) { // ignore non-generic parameters, wildcards and generics already declared - if (type.kind() == Type::GENERIC - && !type.IsWildcard() - && generic_names_.find(type.name()) == generic_names_.end()) { + if (type.kind() == Type::GENERIC && !type.wildcard() && + generic_names_.find(type.name()) == generic_names_.end()) { declared_types_.push_back(&type); generic_names_.insert(type.name()); } @@ -333,7 +355,7 @@ void SourceWriter::GenericNamespace::DoVisit(const Type& type) { void SourceWriter::TypeImporter::DoVisit(const Type& type) { if (!type.package().empty() && type.package() != current_package_) { - imports_.insert(type.package() + '.' + type.name()); + imports_.insert(type.canonical_name()); } } diff --git a/tensorflow/java/src/gen/cc/source_writer.h b/tensorflow/java/src/gen/cc/source_writer.h index f011acd30a..de0113bd5b 100644 --- a/tensorflow/java/src/gen/cc/source_writer.h +++ b/tensorflow/java/src/gen/cc/source_writer.h @@ -93,25 +93,22 @@ class SourceWriter { // This method appends a new opening brace to the current data and indent the // next lines according to Google Java Style Guide. The block can optionally // be preceded by an expression (e.g. Append("if(true)").BeginBlock();) - SourceWriter& BeginBlock() { - return Append(newline_ ? "{" : " {").EndLine().Indent(2); - } + SourceWriter& BeginBlock(const string& expression = ""); // Ends the current block of source code. // // This method appends a new closing brace to the current data and outdent the // next lines back to the margin used before BeginBlock() was invoked. - SourceWriter& EndBlock() { - return Indent(-2).Append("}").EndLine(); - } + SourceWriter& EndBlock(); // Begins to write a method. // // This method outputs the signature of the Java method from the data passed - // in the 'method' parameter and starts a new block. Additionnal modifiers can - // also be passed in parameter to define the accesses and the scope of this - // method. - SourceWriter& BeginMethod(const Method& method, int modifiers = 0); + // in the 'method' parameter and starts a new block. Modifiers are also passed + // in parameter to define the access scope of this method and, optionally, + // a Javadoc. + SourceWriter& BeginMethod(const Method& method, int modifiers, + const Javadoc* javadoc = nullptr); // Ends the current method. // @@ -122,22 +119,24 @@ class SourceWriter { // Begins to write the main type of a source file. // // This method outputs the declaration of the Java type from the data passed - // in the 'type' parameter and starts a new block. Additionnal modifiers can - // also be passed in parameter to define the accesses and the scope of this - // type. + // in the 'type' parameter and starts a new block. Modifiers are also passed + // in parameter to define the access scope of this type and, optionally, + // a Javadoc. // - // If not null, all types found in the 'dependencies' list will be imported - // before declaring the new type. - SourceWriter& BeginType(const Type& clazz, - const std::list* dependencies, int modifiers = 0); + // If not null, all types found in the 'extra_dependencies' list will be + // imported before declaring the new type. + SourceWriter& BeginType(const Type& type, int modifiers, + const std::list* extra_dependencies = nullptr, + const Javadoc* javadoc = nullptr); // Begins to write a new inner type. // // This method outputs the declaration of the Java type from the data passed - // in the 'type' parameter and starts a new block. Additionnal modifiers can - // also be passed in parameter to define the accesses and the scope of this - // type. - SourceWriter& BeginInnerType(const Type& type, int modifiers = 0); + // in the 'type' parameter and starts a new block. Modifiers are also passed + // in parameter to define the accesses and the scope of this type and, + // optionally, a Javadoc. + SourceWriter& BeginInnerType(const Type& type, int modifiers, + const Javadoc* javadoc = nullptr); // Ends the current type. // @@ -145,13 +144,13 @@ class SourceWriter { // BeginType() or BeginInnerType() prior to this. SourceWriter& EndType(); - // Writes a list of variables as fields of a type. + // Writes a variable as fields of a type. // // This method must be called within the definition of a type (see BeginType() - // or BeginInnerType()). Additional modifiers can also be passed in parameter - // to define the accesses and the scope of those fields. - SourceWriter& WriteFields(const std::list& fields, - int modifiers = 0); + // or BeginInnerType()). Modifiers are also be passed in parameter to define + // the accesses and the scope of this field and, optionally, a Javadoc. + SourceWriter& WriteField(const Variable& field, int modifiers, + const Javadoc* javadoc = nullptr); protected: virtual void DoAppend(const StringPiece& str) = 0; @@ -207,9 +206,7 @@ class SourceWriter { std::stack generic_namespaces_; SourceWriter& WriteModifiers(int modifiers); - SourceWriter& WriteDoc(const string& description, - const string& return_description = "", - const std::list* parameters = nullptr); + SourceWriter& WriteJavadoc(const Javadoc& javadoc); SourceWriter& WriteAnnotations(const std::list& annotations); SourceWriter& WriteGenerics(const std::list& generics); GenericNamespace* PushGenericNamespace(int modifiers); diff --git a/tensorflow/java/src/gen/cc/source_writer_test.cc b/tensorflow/java/src/gen/cc/source_writer_test.cc index 4bce2fea70..fb8fc64dff 100644 --- a/tensorflow/java/src/gen/cc/source_writer_test.cc +++ b/tensorflow/java/src/gen/cc/source_writer_test.cc @@ -245,12 +245,17 @@ TEST(StreamTest, Types) { SourceBufferWriter writer; Type generic = Type::Generic("T").add_supertype(Type::Class("Number")); - writer.AppendType(Type::Int()).Append(", ") - .AppendType(Type::Class("String")).Append(", ") - .AppendType(generic).Append(", ") - .AppendType(Type::ListOf(generic)).Append(", ") - .AppendType(Type::ListOf(Type::IterableOf(generic))).Append(", ") - .AppendType(Type::ListOf(Type::Generic())); + writer.AppendType(Type::Int()) + .Append(", ") + .AppendType(Type::Class("String")) + .Append(", ") + .AppendType(generic) + .Append(", ") + .AppendType(Type::ListOf(generic)) + .Append(", ") + .AppendType(Type::ListOf(Type::IterableOf(generic))) + .Append(", ") + .AppendType(Type::ListOf(Type::Wildcard())); const char* expected = "int, String, T, List, List>, List"; @@ -282,7 +287,7 @@ TEST(WriteType, SimpleClass) { SourceBufferWriter writer; Type clazz = Type::Class("Test", "org.tensorflow"); - writer.BeginType(clazz, nullptr, PUBLIC).EndType(); + writer.BeginType(clazz, PUBLIC).EndType(); const char* expected = "package org.tensorflow;\n\n" @@ -300,7 +305,7 @@ TEST(WriteType, SimpleClassWithDependencies) { deps.push_back(Type::Class("SamePackageType", "org.tensorflow")); deps.push_back(Type::Class("NoPackageType")); - writer.BeginType(clazz, &deps, PUBLIC).EndType(); + writer.BeginType(clazz, PUBLIC, &deps).EndType(); const char* expected = "package org.tensorflow;\n\n" @@ -313,20 +318,22 @@ TEST(WriteType, SimpleClassWithDependencies) { TEST(WriteType, AnnotatedAndDocumentedClass) { SourceBufferWriter writer; Type clazz = Type::Class("Test", "org.tensorflow"); - clazz.description("This class has a\n

\nmultiline description."); + Javadoc clazz_doc = Javadoc::Create("Javadoc test") + .details("This is a\nmultiline description."); clazz.add_annotation(Annotation::Create("Bean")); clazz.add_annotation(Annotation::Create("SuppressWarnings") .attributes("\"rawtypes\"")); - writer.BeginType(clazz, nullptr, PUBLIC).EndType(); + writer.BeginType(clazz, PUBLIC, nullptr, &clazz_doc).EndType(); const char* expected = "package org.tensorflow;\n\n" "/**\n" - " * This class has a\n" + " * Javadoc test\n" " *

\n" + " * This is a\n" " * multiline description.\n" - " **/\n" + " */\n" "@Bean\n" "@SuppressWarnings(\"rawtypes\")\n" "public class Test {\n}\n"; @@ -339,7 +346,7 @@ TEST(WriteType, ParameterizedClass) { clazz.add_parameter(Type::Generic("T")); clazz.add_parameter(Type::Generic("U").add_supertype(Type::Class("Number"))); - writer.BeginType(clazz, nullptr, PUBLIC).EndType(); + writer.BeginType(clazz, PUBLIC).EndType(); const char* expected = "package org.tensorflow;\n\n" @@ -358,7 +365,7 @@ TEST(WriteType, ParameterizedClassAndSupertypes) { clazz.add_supertype(Type::Interface("Runnable")); clazz.add_supertype(Type::Class("SuperTest").add_parameter(type_t)); - writer.BeginType(clazz, nullptr, PUBLIC).EndType(); + writer.BeginType(clazz, PUBLIC).EndType(); const char* expected = "package org.tensorflow;\n\n" @@ -372,24 +379,23 @@ TEST(WriteType, ParameterizedClassFields) { Type clazz = Type::Class("Test", "org.tensorflow"); Type type_t = Type::Generic("T").add_supertype(Type::Class("Number")); clazz.add_parameter(type_t); - std::list static_fields; - static_fields.push_back(Variable::Create("field1", Type::Class("String"))); - std::list member_fields; - member_fields.push_back(Variable::Create("field2", Type::Class("String"))); - member_fields.push_back(Variable::Create("field3", type_t)); + Variable field1 = Variable::Create("field1", Type::Class("String")); + Variable field2 = Variable::Create("field2", Type::Class("String")); + Variable field3 = Variable::Create("field3", type_t); + Javadoc field3_doc = Javadoc::Create("This variable is documented"); - writer.BeginType(clazz, nullptr, PUBLIC) - .WriteFields(static_fields, STATIC | PUBLIC | FINAL) - .WriteFields(member_fields, PRIVATE) - .EndType(); + writer.BeginType(clazz, PUBLIC) + .WriteField(field1, STATIC | PUBLIC | FINAL) + .WriteField(field2, PRIVATE) + .WriteField(field3, PRIVATE, &field3_doc) + .EndType(); const char* expected = "package org.tensorflow;\n\n" "public class Test {\n" - " \n" " public static final String field1;\n" - " \n" " private String field2;\n" + " /** This variable is documented */\n" " private T field3;\n" "}\n"; ASSERT_STREQ(expected, writer.str().data()); @@ -400,10 +406,10 @@ TEST(WriteType, SimpleInnerClass) { Type clazz = Type::Class("Test", "org.tensorflow"); Type inner_class = Type::Class("InnerTest"); - writer.BeginType(clazz, nullptr, PUBLIC) - .BeginInnerType(inner_class, PUBLIC) - .EndType() - .EndType(); + writer.BeginType(clazz, PUBLIC) + .BeginInnerType(inner_class, PUBLIC) + .EndType() + .EndType(); const char* expected = "package org.tensorflow;\n\n" @@ -423,10 +429,10 @@ TEST(WriteType, StaticParameterizedInnerClass) { Type inner_class = Type::Class("InnerTest"); inner_class.add_parameter(type_t); - writer.BeginType(clazz, nullptr, PUBLIC) - .BeginInnerType(inner_class, PUBLIC | STATIC) - .EndType() - .EndType(); + writer.BeginType(clazz, PUBLIC) + .BeginInnerType(inner_class, PUBLIC | STATIC) + .EndType() + .EndType(); const char* expected = "package org.tensorflow;\n\n" @@ -443,9 +449,10 @@ TEST(WriteMethod, SimpleMethod) { Type clazz = Type::Class("Test", "org.tensorflow"); Method method = Method::Create("doNothing", Type::Void()); - writer.BeginType(clazz, nullptr, PUBLIC) - .BeginMethod(method, PUBLIC).EndMethod() - .EndType(); + writer.BeginType(clazz, PUBLIC) + .BeginMethod(method, PUBLIC) + .EndMethod() + .EndType(); const char* expected = "package org.tensorflow;\n\n" @@ -461,24 +468,28 @@ TEST(WriteMethod, AnnotatedAndDocumentedMethod) { SourceBufferWriter writer; Type clazz = Type::Class("Test", "org.tensorflow"); Method method = Method::Create("doNothing", Type::Void()); - method.description("This method has a\n

\nmultiline description."); + Javadoc method_doc = + Javadoc::Create("Javadoc test") + .details("This method has a\nmultiline description."); method.add_annotation(Annotation::Create("Override")); method.add_annotation(Annotation::Create("SuppressWarnings") .attributes("\"rawtypes\"")); - writer.BeginType(clazz, nullptr, PUBLIC) - .BeginMethod(method, PUBLIC).EndMethod() - .EndType(); + writer.BeginType(clazz, PUBLIC) + .BeginMethod(method, PUBLIC, &method_doc) + .EndMethod() + .EndType(); const char* expected = "package org.tensorflow;\n\n" "public class Test {\n" " \n" " /**\n" - " * This method has a\n" + " * Javadoc test\n" " *

\n" + " * This method has a\n" " * multiline description.\n" - " **/\n" + " */\n" " @Override\n" " @SuppressWarnings(\"rawtypes\")\n" " public void doNothing() {\n" @@ -490,23 +501,27 @@ TEST(WriteMethod, AnnotatedAndDocumentedMethod) { TEST(WriteMethod, DocumentedMethodWithArguments) { SourceBufferWriter writer; Type clazz = Type::Class("Test", "org.tensorflow"); + Variable reverse = Variable::Create("reverse", Type::Boolean()); Method method = Method::Create("boolToInt", Type::Int()); - method.description("Converts a boolean to an int"); - method.return_description("int value for this boolean"); method.add_argument(Variable::Create("b", Type::Boolean())); - Variable reverse = Variable::Create("reverse", Type::Boolean()); - reverse.description("if true, value is reversed"); method.add_argument(reverse); - - writer.BeginType(clazz, nullptr, PUBLIC) - .BeginMethod(method, PUBLIC) - .Append("if (b && !reverse)") - .BeginBlock() - .Append("return 1;").EndLine() - .EndBlock() - .Append("return 0;").EndLine() - .EndMethod() - .EndType(); + Javadoc method_doc = + Javadoc::Create("Converts a boolean to an int") + .details("This method will convert\na boolean to an int") + .add_param_tag(reverse.name(), "if true, value is reversed") + .add_tag("return", "int value for this boolean"); + + writer.BeginType(clazz, PUBLIC) + .BeginMethod(method, PUBLIC, &method_doc) + .Append("if (b && !reverse)") + .BeginBlock() + .Append("return 1;") + .EndLine() + .EndBlock() + .Append("return 0;") + .EndLine() + .EndMethod() + .EndType(); const char* expected = "package org.tensorflow;\n\n" @@ -514,11 +529,13 @@ TEST(WriteMethod, DocumentedMethodWithArguments) { " \n" " /**\n" " * Converts a boolean to an int\n" + " *

\n" + " * This method will convert\n" + " * a boolean to an int\n" " * \n" - " * @param b\n" " * @param reverse if true, value is reversed\n" " * @return int value for this boolean\n" - " **/\n" + " */\n" " public int boolToInt(boolean b, boolean reverse) {\n" " if (b && !reverse) {\n" " return 1;\n" @@ -536,11 +553,12 @@ TEST(WriteMethod, ParameterizedMethod) { clazz.add_parameter(type_t); Method method = Method::Create("doNothing", type_t); - writer.BeginType(clazz, nullptr, PUBLIC) - .BeginMethod(method, PUBLIC) - .Append("return null;").EndLine() - .EndMethod() - .EndType(); + writer.BeginType(clazz, PUBLIC) + .BeginMethod(method, PUBLIC) + .Append("return null;") + .EndLine() + .EndMethod() + .EndType(); const char* expected = "package org.tensorflow;\n\n" @@ -560,11 +578,12 @@ TEST(WriteMethod, StaticParameterizedMethod) { clazz.add_parameter(type_t); Method method = Method::Create("doNothing", type_t); - writer.BeginType(clazz, nullptr, PUBLIC) - .BeginMethod(method, PUBLIC | STATIC) - .Append("return null;").EndLine() - .EndMethod() - .EndType(); + writer.BeginType(clazz, PUBLIC) + .BeginMethod(method, PUBLIC | STATIC) + .Append("return null;") + .EndLine() + .EndMethod() + .EndType(); const char* expected = "package org.tensorflow;\n\n" diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl index a6650fc4ea..f4ff34ea03 100644 --- a/tensorflow/java/src/gen/gen_ops.bzl +++ b/tensorflow/java/src/gen/gen_ops.bzl @@ -1,62 +1,62 @@ # -*- Python -*- -load("//tensorflow:tensorflow.bzl", - "tf_binary_additional_srcs", - "tf_cc_binary", - "tf_copts") +load( + "//tensorflow:tensorflow.bzl", + "tf_binary_additional_srcs", +) -# Given a list of "ops_libs" (a list of files in the core/ops directory -# without their .cc extensions), generate Java wrapper code for all operations -# found in the ops files. -# Then, combine all those source files into a single archive (.srcjar). +# Generate Java wrapper classes for all registered core operations and package +# them into a single source archive (.srcjar). # # For example: -# tf_java_op_gen_srcjar("gen_sources", "gen_tool", "my.package", [ "array_ops", "math_ops" ]) +# tf_java_op_gen_srcjar("gen_sources", ":gen_tool", "my.package") # -# will create a genrule named "gen_sources" that first generate source files: -# ops/src/main/java/my/package/array/*.java -# ops/src/main/java/my/package/math/*.java +# will create a genrule named "gen_sources" that generates source files under +# ops/src/main/java/my/package/**/*.java # -# and then archive those source files in: +# and then archive those source files into # ops/gen_sources.srcjar # def tf_java_op_gen_srcjar(name, gen_tool, - gen_base_package, - ops_libs=[], - ops_libs_pkg="//tensorflow/core", + base_package, + api_def_srcs=[], out_dir="ops/", out_src_dir="src/main/java/", visibility=["//tensorflow/java:__pkg__"]): - gen_tools = [] gen_cmds = ["rm -rf $(@D)"] # Always start from fresh when generating source files + srcs = api_def_srcs[:] - # Construct an op generator binary for each ops library. - for ops_lib in ops_libs: - gen_lib = ops_lib[:ops_lib.rfind("_")] - out_gen_tool = out_dir + ops_lib + "_gen_tool" + if not api_def_srcs: + api_def_args_str = "," + else: + api_def_args = [] + for api_def_src in api_def_srcs: + # Add directory of the first ApiDef source to args. + # We are assuming all ApiDefs in a single api_def_src are in the + # same directory. + api_def_args.append( + "$$(dirname $$(echo $(locations " + api_def_src + + ") | cut -d\" \" -f1))") + api_def_args_str = ",".join(api_def_args) - tf_cc_binary( - name=out_gen_tool, - copts=tf_copts(), - linkopts=["-lm"], - linkstatic=1, # Faster to link this one-time-use binary dynamically - deps=[gen_tool, ops_libs_pkg + ":" + ops_lib + "_op_lib"]) - - gen_tools += [":" + out_gen_tool] - gen_cmds += ["$(location :" + out_gen_tool + ")" + - " --output_dir=$(@D)/" + out_src_dir + - " --lib_name=" + gen_lib + - " --base_package=" + gen_base_package] + gen_cmds += ["$(location " + gen_tool + ")" + + " --output_dir=$(@D)/" + out_src_dir + + " --base_package=" + base_package + + " --api_dirs=" + api_def_args_str] # Generate a source archive containing generated code for these ops. gen_srcjar = out_dir + name + ".srcjar" gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) src"] - gen_tools += ["@local_jdk//:jar"] + ["@local_jdk//:jdk"] - gen_tools += tf_binary_additional_srcs() + native.genrule( name=name, + srcs=srcs, outs=[gen_srcjar], - tools=gen_tools, - cmd="&&".join(gen_cmds)) + tools=[ + "@local_jdk//:jar", + "@local_jdk//:jdk", + gen_tool + ] + tf_binary_additional_srcs(), + cmd=" && ".join(gen_cmds)) -- GitLab From cff06379c2e1ac01de3b3c0ca32c3a3037d5b833 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 29 May 2018 08:36:14 -0700 Subject: [PATCH 199/902] Generalize assert_true_mean_equal and assert_true_mean_equal_two_sample to assert_true_mean_in_interval. PiperOrigin-RevId: 198400265 --- .../kernel_tests/statistical_testing_test.py | 33 ++++- .../python/ops/statistical_testing.py | 131 +++++++++++++----- 2 files changed, 122 insertions(+), 42 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py index ce6cf702d5..4a5a6b5ae1 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py @@ -129,16 +129,41 @@ class StatisticalTestingTest(test.TestCase): # Test that the test assertion confirms that the mean of the # standard uniform distribution is not 0.4. - with self.assertRaisesOpError("Mean confidence interval too high"): + with self.assertRaisesOpError("true mean greater than expected"): sess.run(st.assert_true_mean_equal_by_dkwm( samples, 0., 1., 0.4, false_fail_rate=1e-6)) # Test that the test assertion confirms that the mean of the # standard uniform distribution is not 0.6. - with self.assertRaisesOpError("Mean confidence interval too low"): + with self.assertRaisesOpError("true mean smaller than expected"): sess.run(st.assert_true_mean_equal_by_dkwm( samples, 0., 1., 0.6, false_fail_rate=1e-6)) + def test_dkwm_mean_in_interval_one_sample_assertion(self): + rng = np.random.RandomState(seed=0) + num_samples = 5000 + + # Test that the test assertion agrees that the mean of the standard + # uniform distribution is between 0.4 and 0.6. + samples = rng.uniform(size=num_samples).astype(np.float32) + self.evaluate(st.assert_true_mean_in_interval_by_dkwm( + samples, 0., 1., + expected_low=0.4, expected_high=0.6, false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is not between 0.2 and 0.4. + with self.assertRaisesOpError("true mean greater than expected"): + self.evaluate(st.assert_true_mean_in_interval_by_dkwm( + samples, 0., 1., + expected_low=0.2, expected_high=0.4, false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is not between 0.6 and 0.8. + with self.assertRaisesOpError("true mean smaller than expected"): + self.evaluate(st.assert_true_mean_in_interval_by_dkwm( + samples, 0., 1., + expected_low=0.6, expected_high=0.8, false_fail_rate=1e-6)) + def test_dkwm_mean_two_sample_assertion(self): rng = np.random.RandomState(seed=0) num_samples = 4000 @@ -172,7 +197,7 @@ class StatisticalTestingTest(test.TestCase): # Test that the test assertion confirms that the mean of the # standard uniform distribution is different from the mean of beta(2, 1). beta_high_samples = rng.beta(2, 1, size=num_samples).astype(np.float32) - with self.assertRaisesOpError("samples1 has a smaller mean"): + with self.assertRaisesOpError("true mean smaller than expected"): sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( samples1, 0., 1., beta_high_samples, 0., 1., @@ -190,7 +215,7 @@ class StatisticalTestingTest(test.TestCase): # Test that the test assertion confirms that the mean of the # standard uniform distribution is different from the mean of beta(1, 2). beta_low_samples = rng.beta(1, 2, size=num_samples).astype(np.float32) - with self.assertRaisesOpError("samples2 has a smaller mean"): + with self.assertRaisesOpError("true mean greater than expected"): sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( samples1, 0., 1., beta_low_samples, 0., 1., diff --git a/tensorflow/contrib/distributions/python/ops/statistical_testing.py b/tensorflow/contrib/distributions/python/ops/statistical_testing.py index 9c69435fac..3ea9a331c7 100644 --- a/tensorflow/contrib/distributions/python/ops/statistical_testing.py +++ b/tensorflow/contrib/distributions/python/ops/statistical_testing.py @@ -140,6 +140,7 @@ __all__ = [ "assert_true_mean_equal_by_dkwm", "min_discrepancy_of_true_means_detectable_by_dkwm", "min_num_samples_for_dkwm_mean_test", + "assert_true_mean_in_interval_by_dkwm", "assert_true_mean_equal_by_dkwm_two_sample", "min_discrepancy_of_true_means_detectable_by_dkwm_two_sample", "min_num_samples_for_dkwm_mean_two_sample_test", @@ -454,20 +455,8 @@ def assert_true_mean_equal_by_dkwm( with ops.name_scope( name, "assert_true_mean_equal_by_dkwm", [samples, low, high, expected, false_fail_rate]): - samples = ops.convert_to_tensor(samples, name="samples") - low = ops.convert_to_tensor(low, name="low") - high = ops.convert_to_tensor(high, name="high") - expected = ops.convert_to_tensor(expected, name="expected") - false_fail_rate = ops.convert_to_tensor( - false_fail_rate, name="false_fail_rate") - samples = _check_shape_dominates(samples, [low, high, expected]) - min_mean, max_mean = true_mean_confidence_interval_by_dkwm( - samples, low, high, error_rate=false_fail_rate) - less_op = check_ops.assert_less( - min_mean, expected, message="Mean confidence interval too high") - with ops.control_dependencies([less_op]): - return check_ops.assert_greater( - max_mean, expected, message="Mean confidence interval too low") + return assert_true_mean_in_interval_by_dkwm( + samples, low, high, expected, expected, false_fail_rate) def min_discrepancy_of_true_means_detectable_by_dkwm( @@ -505,12 +494,15 @@ def min_discrepancy_of_true_means_detectable_by_dkwm( some scalar distribution supported on `[low[i], high[i]]` is enough to detect a difference in means of size `discr[i]` or more. Specifically, we guarantee that (a) if the true mean is the expected - mean, `assert_true_mean_equal_by_dkwm` will fail with probability at - most `false_fail_rate / K` (which amounts to `false_fail_rate` if - applied to the whole batch at once), and (b) if the true mean - differs from the expected mean by at least `discr[i]`, - `assert_true_mean_equal_by_dkwm` will pass with probability at most - `false_pass_rate`. + mean (resp. in the expected interval), then `assert_true_mean_equal_by_dkwm` + (resp. `assert_true_mean_in_interval_by_dkwm`) will fail with + probability at most `false_fail_rate / K` (which amounts to + `false_fail_rate` if applied to the whole batch at once), and (b) if + the true mean differs from the expected mean (resp. falls outside + the expected interval) by at least `discr[i]`, + `assert_true_mean_equal_by_dkwm` + (resp. `assert_true_mean_in_interval_by_dkwm`) will pass with + probability at most `false_pass_rate`. The detectable discrepancy scales as @@ -578,12 +570,15 @@ def min_num_samples_for_dkwm_mean_test( some scalar distribution supported on `[low[i], high[i]]` is enough to detect a difference in means of size `discrepancy[i]` or more. Specifically, we guarantee that (a) if the true mean is the expected - mean, `assert_true_mean_equal_by_dkwm` will fail with probability at - most `false_fail_rate / K` (which amounts to `false_fail_rate` if - applied to the whole batch at once), and (b) if the true mean - differs from the expected mean by at least `discrepancy[i]`, - `assert_true_mean_equal_by_dkwm` will pass with probability at most - `false_pass_rate`. + mean (resp. in the expected interval), then `assert_true_mean_equal_by_dkwm` + (resp. `assert_true_mean_in_interval_by_dkwm`) will fail with + probability at most `false_fail_rate / K` (which amounts to + `false_fail_rate` if applied to the whole batch at once), and (b) if + the true mean differs from the expected mean (resp. falls outside + the expected interval) by at least `discrepancy[i]`, + `assert_true_mean_equal_by_dkwm` + (resp. `assert_true_mean_in_interval_by_dkwm`) will pass with + probability at most `false_pass_rate`. The required number of samples scales as `O((high[i] - low[i])**2)`, `O(-log(false_fail_rate/K))`, @@ -610,6 +605,76 @@ def min_num_samples_for_dkwm_mean_test( return math_ops.maximum(n1, n2) +def assert_true_mean_in_interval_by_dkwm( + samples, low, high, expected_low, expected_high, + false_fail_rate=1e-6, name=None): + """Asserts the mean of the given distribution is in the given interval. + + More precisely, fails if there is enough evidence (using the + [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval)) + that the mean of the distribution from which the given samples are + drawn is _outside_ the given interval with statistical significance + `false_fail_rate` or stronger, otherwise passes. If you also want + to check that you are gathering enough evidence that a pass is not + spurious, see `min_num_samples_for_dkwm_mean_test` and + `min_discrepancy_of_true_means_detectable_by_dkwm`. + + Note that `false_fail_rate` is a total false failure rate for all + the assertions in the batch. As such, if the batch is nontrivial, + the assertion will insist on stronger evidence to fail any one member. + + Args: + samples: Floating-point `Tensor` of samples from the distribution(s) + of interest. Entries are assumed IID across the 0th dimension. + The other dimensions must broadcast with `low` and `high`. + The support is bounded: `low <= samples <= high`. + low: Floating-point `Tensor` of lower bounds on the distributions' + supports. + high: Floating-point `Tensor` of upper bounds on the distributions' + supports. + expected_low: Floating-point `Tensor` of lower bounds on the + expected true means. + expected_high: Floating-point `Tensor` of upper bounds on the + expected true means. + false_fail_rate: *Scalar* floating-point `Tensor` admissible total + rate of mistakes. + name: A name for this operation (optional). + + Returns: + check: Op that raises `InvalidArgumentError` if any expected mean + interval does not overlap with the corresponding confidence + interval. + """ + with ops.name_scope( + name, "assert_true_mean_in_interval_by_dkwm", + [samples, low, high, expected_low, expected_high, false_fail_rate]): + samples = ops.convert_to_tensor(samples, name="samples") + low = ops.convert_to_tensor(low, name="low") + high = ops.convert_to_tensor(high, name="high") + expected_low = ops.convert_to_tensor(expected_low, name="expected_low") + expected_high = ops.convert_to_tensor(expected_high, name="expected_high") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + samples = _check_shape_dominates( + samples, [low, high, expected_low, expected_high]) + min_mean, max_mean = true_mean_confidence_interval_by_dkwm( + samples, low, high, false_fail_rate) + # Assert that the interval [min_mean, max_mean] intersects the + # interval [expected_low, expected_high]. This is true if + # max_mean >= expected_low and min_mean <= expected_high. + # By DeMorgan's law, that's also equivalent to + # not (max_mean < expected_low or min_mean > expected_high), + # which is a way of saying the two intervals are not disjoint. + check_confidence_interval_can_intersect = check_ops.assert_greater_equal( + max_mean, expected_low, message="Confidence interval does not " + "intersect: true mean smaller than expected") + with ops.control_dependencies([check_confidence_interval_can_intersect]): + return check_ops.assert_less_equal( + min_mean, expected_high, message="Confidence interval does not " + "intersect: true mean greater than expected") + + def assert_true_mean_equal_by_dkwm_two_sample( samples1, low1, high1, samples2, low2, high2, false_fail_rate=1e-6, name=None): @@ -676,20 +741,10 @@ def assert_true_mean_equal_by_dkwm_two_sample( # and sample counts should be valid; however, because the intervals # scale as O(-log(false_fail_rate)), there doesn't seem to be much # room to win. - min_mean_1, max_mean_1 = true_mean_confidence_interval_by_dkwm( - samples1, low1, high1, false_fail_rate / 2.) min_mean_2, max_mean_2 = true_mean_confidence_interval_by_dkwm( samples2, low2, high2, false_fail_rate / 2.) - # I want to assert - # not (max_mean_1 < min_mean_2 or min_mean_1 > max_mean_2), - # but I think I only have and-combination of asserts, so use DeMorgan. - check_confidence_intervals_can_intersect = check_ops.assert_greater_equal( - max_mean_1, min_mean_2, message="Confidence intervals do not " - "intersect: samples1 has a smaller mean than samples2") - with ops.control_dependencies([check_confidence_intervals_can_intersect]): - return check_ops.assert_less_equal( - min_mean_1, max_mean_2, message="Confidence intervals do not " - "intersect: samples2 has a smaller mean than samples1") + return assert_true_mean_in_interval_by_dkwm( + samples1, low1, high1, min_mean_2, max_mean_2, false_fail_rate / 2.) def min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( -- GitLab From 9f38ecf3bd6c6e96bf3bb56f1e37f6aff180c21e Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 29 May 2018 08:45:28 -0700 Subject: [PATCH 200/902] Allow assignment to subscripts in static analysis. Move the handling of syntactic unpackings to a generic helper function since the pattern is used in multiple places. Update the type info analyzer to correctly process function arguments. PiperOrigin-RevId: 198401368 --- .../pyct/static_analysis/type_info.py | 55 ++++++++----------- .../pyct/static_analysis/type_info_test.py | 13 +++++ .../contrib/autograph/pyct/transformer.py | 48 ++++++++++++++++ .../autograph/pyct/transformer_test.py | 4 +- 4 files changed, 86 insertions(+), 34 deletions(-) diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py b/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py index c00946f9c4..d6555dc7e0 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py @@ -136,14 +136,14 @@ class TypeInfoResolver(transformer.Base): def _process_function_arg(self, arg_name): str_name = str(arg_name) + type_holder = arg_name.ast() + self.scope.setval(arg_name, type_holder) if len(self.enclosing_entities) == 1 and str_name in self.context.arg_types: # Forge a node to hold the type information, so that method calls on # it can resolve the type. - type_holder = arg_name.ast() type_string, type_obj = self.context.arg_types[str_name] anno.setanno(type_holder, 'type', type_obj) anno.setanno(type_holder, 'type_fqn', tuple(type_string.split('.'))) - self.scope.setval(arg_name, type_holder) def visit_arg(self, node): self._process_function_arg(anno.getanno(node.arg, anno.Basic.QN)) @@ -167,50 +167,41 @@ class TypeInfoResolver(transformer.Base): anno.getanno(definition, 'element_type')) return node - def _process_variable_assignment(self, source, targets): - # Special case: constructors. - if isinstance(source, gast.Call): - func = source.func + def _process_variable_assignment(self, target, value): + # Constructors + if isinstance(value, gast.Call): + func = value.func if anno.hasanno(func, 'live_val'): func_obj = anno.getanno(func, 'live_val') if tf_inspect.isclass(func_obj): - anno.setanno(source, 'is_constructor', True) - anno.setanno(source, 'type', func_obj) - anno.setanno(source, 'type_fqn', anno.getanno(func, 'fqn')) + anno.setanno(value, 'is_constructor', True) + anno.setanno(value, 'type', func_obj) + anno.setanno(value, 'type_fqn', anno.getanno(func, 'fqn')) # TODO(mdan): Raise an error if constructor has side effects. # We can have a whitelist of no-side-effects constructors. # We can also step inside the constructor and further analyze. - # Multiple targets mean multiple assignment. - for target in targets: - # Tuple target means unpacking. - if isinstance(target, (gast.Tuple, gast.List)): - for i, target_item in enumerate(target.elts): - # Two cases here: - # 1. Static unpacking, e.g. a, b = c, d - # 2. Dynamic unpacking, e.g. a, b = c - # The former case is optimized away. - if isinstance(source, (gast.Tuple, gast.List)): - source_item = source.elts[i] - else: - source_item = gast.Subscript(source, gast.Index(i), ctx=None) - self._process_variable_assignment(source_item, (target_item,)) - elif isinstance(target, (gast.Name, gast.Attribute)): - target_symbol = anno.getanno(target, anno.Basic.QN) - self.scope.setval(target_symbol, source) - else: - raise ValueError('assignment target has unknown type: %s' % target) + if isinstance(target, (gast.Name, gast.Attribute)): + target_symbol = anno.getanno(target, anno.Basic.QN) + self.scope.setval(target_symbol, value) + elif isinstance(target, gast.Subscript): + pass + else: + raise ValueError('assignment target has unknown type: %s' % target) def visit_With(self, node): - for wi in node.items: - if wi.optional_vars is not None: - self._process_variable_assignment(wi.context_expr, (wi.optional_vars,)) + for item in node.items: + if item.optional_vars is not None: + self.apply_to_single_assignments((item.optional_vars,), + item.context_expr, + self._process_variable_assignment) self.generic_visit(node) return node def visit_Assign(self, node): self.generic_visit(node) - self._process_variable_assignment(node.value, node.targets) + self.apply_to_single_assignments( + node.targets, node.value, self._process_variable_assignment) return node def visit_Call(self, node): diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py index 46b7701624..95cbf5ca79 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py @@ -196,6 +196,19 @@ class TypeInfoResolverTest(test.TestCase): f_ref = node.body[0].body[1].value self.assertEqual(anno.getanno(f_ref, 'element_type'), Foo) + def test_type_annotation_args(self): + + class Foo(object): + pass + + def test_fn(f): + utils.set_element_type(f, Foo) + return f + + node = self._parse_and_analyze(test_fn, {'Foo': Foo, 'utils': utils}) + f_ref = node.body[0].body[1].value + self.assertEqual(anno.getanno(f_ref, 'element_type'), Foo) + def test_nested_unpacking(self): class Foo(object): diff --git a/tensorflow/contrib/autograph/pyct/transformer.py b/tensorflow/contrib/autograph/pyct/transformer.py index 4db6cc0adf..4c65edb6de 100644 --- a/tensorflow/contrib/autograph/pyct/transformer.py +++ b/tensorflow/contrib/autograph/pyct/transformer.py @@ -103,6 +103,54 @@ class Base(gast.NodeTransformer): results.append(replacement) return results + # TODO(mdan): Once we have error tracing, we may be able to just go to SSA. + def apply_to_single_assignments(self, targets, values, apply_fn): + """Applies a fuction to each individual assignment. + + This function can process a possibly-unpacked (e.g. a, b = c, d) assignment. + It tries to break down the unpacking if possible. In effect, it has the same + effect as passing the assigned values in SSA form to apply_fn. + + Examples: + + The following will result in apply_fn(a, c), apply_fn(b, d): + + a, b = c, d + + The following will result in apply_fn(a, c[0]), apply_fn(b, c[1]): + + a, b = c + + The following will result in apply_fn(a, (b, c)): + + a = b, c + + It uses the visitor pattern to allow subclasses to process single + assignments individually. + + Args: + targets: list, tuple of or individual AST node. Should be used with the + targets field of an ast.Assign node. + values: an AST node. + apply_fn: a function of a single argument, which will be called with the + respective nodes of each single assignment. The signaure is + apply_fn(target, value), no return value. + """ + if not isinstance(targets, (list, tuple)): + targets = (targets,) + for target in targets: + if isinstance(target, (gast.Tuple, gast.List)): + for i in range(len(target.elts)): + target_el = target.elts[i] + if isinstance(values, (gast.Tuple, gast.List)): + value_el = values.elts[i] + else: + value_el = gast.Subscript(values, gast.Index(i), ctx=gast.Store()) + self.apply_to_single_assignments(target_el, value_el, apply_fn) + else: + # TODO(mdan): Look into allowing to rewrite the AST here. + apply_fn(target, values) + def visit(self, node): source_code = self.context.source_code source_file = self.context.source_file diff --git a/tensorflow/contrib/autograph/pyct/transformer_test.py b/tensorflow/contrib/autograph/pyct/transformer_test.py index f96b0dc377..1f1adf4fbd 100644 --- a/tensorflow/contrib/autograph/pyct/transformer_test.py +++ b/tensorflow/contrib/autograph/pyct/transformer_test.py @@ -94,7 +94,7 @@ class TransformerTest(test.TestCase): inner_function, lambda_node), anno.getanno(lambda_expr, 'enclosing_entities')) - def test_statement_info_stack(self): + def test_local_scope_info_stack(self): class TestTransformer(transformer.Base): @@ -142,7 +142,7 @@ class TransformerTest(test.TestCase): self.assertFalse(anno.hasanno(while_node, 'string')) self.assertEqual('1', anno.getanno(while_node, 'test')) - def test_statement_info_stack_checks_integrity(self): + def test_local_scope_info_stack_checks_integrity(self): class TestTransformer(transformer.Base): -- GitLab From 78446bda808364c8b2c0a87d122763aecc7caea2 Mon Sep 17 00:00:00 2001 From: voegtlel Date: Tue, 29 May 2018 18:15:10 +0200 Subject: [PATCH 201/902] Fixed memory leak with py_func (#18292) (#19085) * Fixing memory leak with py_func (#18292) * Fixed memory leak with py_func (#18292) --- .../python/kernel_tests/py_func_test.py | 31 ++++++++++++---- tensorflow/python/ops/script_ops.py | 35 +++++++------------ 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index b9f44d728a..c89994591c 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -19,6 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import gc import re import numpy as np @@ -432,13 +433,29 @@ class PyFuncTest(test.TestCase): # ----- Tests shared by py_func and eager_py_func ----- def testCleanup(self): - for _ in xrange(1000): - g = ops.Graph() - with g.as_default(): - c = constant_op.constant([1.], dtypes.float32) - _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) - _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) - self.assertTrue(script_ops._py_funcs.size() < 100) + # Delete everything created by previous tests to avoid side effects. + ops.reset_default_graph() + gc.collect() + initial_size = script_ops._py_funcs.size() + # Encapsulate the graph generation, so locals can be deleted. + def make_graphs(): + for _ in xrange(1000): + g = ops.Graph() + with g.as_default(): + c = constant_op.constant([1.], dtypes.float32) + _ = script_ops.py_func(lambda x: x + 1, [c], [dtypes.float32]) + _ = script_ops.eager_py_func(lambda x: x + 1, [c], [dtypes.float32]) + # These ops have a reference to 'c' which has a reference to the graph. + # Checks if the functions are being deleted though the graph is referenced from them. + # (see #18292) + _ = script_ops.py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) + _ = script_ops.eager_py_func(lambda x: x + c.shape[0], [c], [dtypes.float32]) + + # Call garbage collector to enforce deletion. + make_graphs() + ops.reset_default_graph() + gc.collect() + self.assertEqual(initial_size, script_ops._py_funcs.size()) # ----- Tests for eager_py_func ----- @test_util.run_in_graph_and_eager_modes() diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index f87c5dc5e3..16c73213d5 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -24,6 +24,7 @@ import threading # Used by py_util.cc to get tracebacks. import traceback # pylint: disable=unused-import +import weakref import numpy as np import six @@ -88,11 +89,14 @@ class FuncRegistry(object): def __init__(self): self._lock = threading.Lock() self._unique_id = 0 # GUARDED_BY(self._lock) - self._funcs = {} + # Only store weakrefs to the funtions. The strong reference is stored in + # the graph. + self._funcs = weakref.WeakValueDictionary() def insert(self, func): """Registers `func` and returns a unique token for this entry.""" token = self._next_unique_token() + # Store a weakref to the function self._funcs[token] = func return token @@ -145,7 +149,7 @@ class FuncRegistry(object): Raises: ValueError: if no function is registered for `token`. """ - func = self._funcs[token] + func = self._funcs.get(token, None) if func is None: raise ValueError("callback %s is not found" % token) if isinstance(func, EagerFunc): @@ -180,19 +184,6 @@ _py_funcs = FuncRegistry() pywrap_tensorflow.InitializePyTrampoline(_py_funcs) -class CleanupFunc(object): - """A helper class to remove a registered function from _py_funcs.""" - - def __init__(self, token): - self._token = token - - def __del__(self): - if _py_funcs is not None: - # If _py_funcs is None, the program is most likely in shutdown, and the - # _py_funcs object has been destroyed already. - _py_funcs.remove(self._token) - - def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None): """See documentation for py_func and eager_py_func.""" @@ -216,17 +207,15 @@ def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None): # bound to that of the outer graph instead. graph = graph._outer_graph - cleanup = CleanupFunc(token) - # TODO(zhifengc): Consider adding a Graph method to collect # `cleanup` objects in one of its member. - if not hasattr(graph, "_cleanup_py_funcs_used_in_graph"): - graph._cleanup_py_funcs_used_in_graph = [] + if not hasattr(graph, "_py_funcs_used_in_graph"): + graph._py_funcs_used_in_graph = [] - # When `graph` is destroyed, elements in _cleanup_py_funcs_used_in_graph - # will be destroyed and their __del__ will remove the 'token' from - # the funcs registry. - graph._cleanup_py_funcs_used_in_graph.append(cleanup) + # Store a reference to the function in the graph to ensure it stays alive + # as long as the graph lives. When the graph is destroyed, the function + # is left to the garbage collector for destruction as well. + graph._py_funcs_used_in_graph.append(func) # pylint: enable=protected-access if eager: -- GitLab From 550251d0664a5c9f449a5f92c9c42917525a5547 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 29 May 2018 09:15:44 -0700 Subject: [PATCH 202/902] Clean up: remove useless super delegation. PiperOrigin-RevId: 198405670 --- tensorflow/contrib/autograph/converters/builtin_functions.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/contrib/autograph/converters/builtin_functions.py b/tensorflow/contrib/autograph/converters/builtin_functions.py index 317711a866..46e39da16a 100644 --- a/tensorflow/contrib/autograph/converters/builtin_functions.py +++ b/tensorflow/contrib/autograph/converters/builtin_functions.py @@ -31,9 +31,6 @@ class BuiltinFunctionTransformer(transformer.Base): TF equivalent, like `len`. """ - def __init__(self, context): - super(BuiltinFunctionTransformer, self).__init__(context) - def _convert_builtin(self, node): template = """ ag__.utils.dynamic_builtin(func, args) -- GitLab From 5dd290ea8ccf3320215fd84e982724c91e72100c Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 29 May 2018 09:46:21 -0700 Subject: [PATCH 203/902] Clean up: handle the hidden additional clause on for loops in a way that's clearer about what it does. PiperOrigin-RevId: 198409797 --- .../autograph/converters/break_statements.py | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/autograph/converters/break_statements.py b/tensorflow/contrib/autograph/converters/break_statements.py index 35877224b8..5b7508c9a5 100644 --- a/tensorflow/contrib/autograph/converters/break_statements.py +++ b/tensorflow/contrib/autograph/converters/break_statements.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import gast - from tensorflow.contrib.autograph.pyct import anno from tensorflow.contrib.autograph.pyct import templates from tensorflow.contrib.autograph.pyct import transformer @@ -54,13 +52,9 @@ class BreakStatementTransformer(transformer.Base): def _guard_if_present(self, block, var_name): """Prevents the block from executing if var_name is set.""" - - # If we don't have statements that immediately depend on the break - # we still need to make sure that the break variable remains - # used, in case the break becomes useful in later stages of transformation. - # Not having this broke the break_in_inner_loop test. if not block: - block = [gast.Pass()] + return block + template = """ if not var_name: block @@ -73,7 +67,7 @@ class BreakStatementTransformer(transformer.Base): def visit_While(self, node): scope = anno.getanno(node, NodeAnno.BODY_SCOPE) - break_var = self.context.namer.new_symbol('break__', scope.referenced) + break_var = self.context.namer.new_symbol('break_', scope.referenced) node.test = self.visit(node.test) node.body, break_used = self._track_body(node.body, break_var) @@ -81,6 +75,10 @@ class BreakStatementTransformer(transformer.Base): node.orelse = self.visit_block(node.orelse) if break_used: + # Python's else clause only triggers if the loop exited cleanly (e.g. + # break did not trigger). + guarded_orelse = self._guard_if_present(node.orelse, break_var) + template = """ var_name = False while test and not var_name: @@ -88,20 +86,18 @@ class BreakStatementTransformer(transformer.Base): else: orelse """ - # Python's else clause only triggers if the loop exited cleanly (e.g. - # break did not trigger). node = templates.replace( template, var_name=break_var, test=node.test, body=node.body, - orelse=self._guard_if_present(node.orelse, break_var)) + orelse=guarded_orelse) return node def visit_For(self, node): scope = anno.getanno(node, NodeAnno.BODY_SCOPE) - break_var = self.context.namer.new_symbol('break__', scope.referenced) + break_var = self.context.namer.new_symbol('break_', scope.referenced) node.target = self.visit(node.target) node.iter = self.visit(node.iter) @@ -110,19 +106,32 @@ class BreakStatementTransformer(transformer.Base): node.orelse = self.visit_block(node.orelse) if break_used: - node.orelse = self._guard_if_present(node.orelse, break_var) + # Python's else clause only triggers if the loop exited cleanly (e.g. + # break did not trigger). + guarded_orelse = self._guard_if_present(node.orelse, break_var) + extra_test = templates.replace_as_expression( + 'not var_name', var_name=break_var) + + # The extra test is hidden in the AST, which will confuse the static + # analysis. To mitigate that, we insert a no-op statement that ensures + # the control variable is marked as used. + # TODO(mdan): Use a marker instead, e.g. ag__.condition_loop_on(var_name) template = """ var_name = False - for_stmt + for target in iter_: + (var_name,) + body + else: + orelse """ - # Python's else clause only triggers if the loop exited cleanly (e.g. - # break did not trigger). node = templates.replace( template, var_name=break_var, - for_stmt=node) - extra_test = templates.replace_as_expression( - 'not var_name', var_name=break_var) + iter_=node.iter, + target=node.target, + body=node.body, + orelse=guarded_orelse) + anno.setanno(node[1], 'extra_test', extra_test) return node -- GitLab From 6df91ed1c9c33ac0b3cac12680f5f40b07d39beb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 29 May 2018 09:49:42 -0700 Subject: [PATCH 204/902] Eliminate self.test_session in favor of self.evaluate in statistical_testing_test.py. PiperOrigin-RevId: 198410306 --- .../kernel_tests/statistical_testing_test.py | 135 ++++++++---------- 1 file changed, 63 insertions(+), 72 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py index 4a5a6b5ae1..9c4dfed836 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py @@ -98,23 +98,21 @@ class StatisticalTestingTest(test.TestCase): num_samples = 5000 # 5000 samples is chosen to be enough to find discrepancies of # size 0.1 or more with assurance 1e-6, as confirmed here: - with self.test_session() as sess: - d = st.min_discrepancy_of_true_means_detectable_by_dkwm( - num_samples, 0., 1., false_fail_rate=1e-6, false_pass_rate=1e-6) - d = sess.run(d) - self.assertLess(d, 0.1) + d = st.min_discrepancy_of_true_means_detectable_by_dkwm( + num_samples, 0., 1., false_fail_rate=1e-6, false_pass_rate=1e-6) + d = self.evaluate(d) + self.assertLess(d, 0.1) # Test that the confidence interval computed for the mean includes # 0.5 and excludes 0.4 and 0.6. - with self.test_session() as sess: - samples = rng.uniform(size=num_samples).astype(np.float32) - (low, high) = st.true_mean_confidence_interval_by_dkwm( - samples, 0., 1., error_rate=1e-6) - low, high = sess.run([low, high]) - self.assertGreater(low, 0.4) - self.assertLess(low, 0.5) - self.assertGreater(high, 0.5) - self.assertLess(high, 0.6) + samples = rng.uniform(size=num_samples).astype(np.float32) + (low, high) = st.true_mean_confidence_interval_by_dkwm( + samples, 0., 1., error_rate=1e-6) + low, high = self.evaluate([low, high]) + self.assertGreater(low, 0.4) + self.assertLess(low, 0.5) + self.assertGreater(high, 0.5) + self.assertLess(high, 0.6) def test_dkwm_mean_one_sample_assertion(self): rng = np.random.RandomState(seed=0) @@ -123,21 +121,20 @@ class StatisticalTestingTest(test.TestCase): # Test that the test assertion agrees that the mean of the standard # uniform distribution is 0.5. samples = rng.uniform(size=num_samples).astype(np.float32) - with self.test_session() as sess: - sess.run(st.assert_true_mean_equal_by_dkwm( - samples, 0., 1., 0.5, false_fail_rate=1e-6)) - - # Test that the test assertion confirms that the mean of the - # standard uniform distribution is not 0.4. - with self.assertRaisesOpError("true mean greater than expected"): - sess.run(st.assert_true_mean_equal_by_dkwm( - samples, 0., 1., 0.4, false_fail_rate=1e-6)) - - # Test that the test assertion confirms that the mean of the - # standard uniform distribution is not 0.6. - with self.assertRaisesOpError("true mean smaller than expected"): - sess.run(st.assert_true_mean_equal_by_dkwm( - samples, 0., 1., 0.6, false_fail_rate=1e-6)) + self.evaluate(st.assert_true_mean_equal_by_dkwm( + samples, 0., 1., 0.5, false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is not 0.4. + with self.assertRaisesOpError("true mean greater than expected"): + self.evaluate(st.assert_true_mean_equal_by_dkwm( + samples, 0., 1., 0.4, false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is not 0.6. + with self.assertRaisesOpError("true mean smaller than expected"): + self.evaluate(st.assert_true_mean_equal_by_dkwm( + samples, 0., 1., 0.6, false_fail_rate=1e-6)) def test_dkwm_mean_in_interval_one_sample_assertion(self): rng = np.random.RandomState(seed=0) @@ -170,20 +167,18 @@ class StatisticalTestingTest(test.TestCase): # 4000 samples is chosen to be enough to find discrepancies of # size 0.2 or more with assurance 1e-6, as confirmed here: - with self.test_session() as sess: - d = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( - num_samples, 0., 1., num_samples, 0., 1., - false_fail_rate=1e-6, false_pass_rate=1e-6) - d = sess.run(d) - self.assertLess(d, 0.2) + d = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( + num_samples, 0., 1., num_samples, 0., 1., + false_fail_rate=1e-6, false_pass_rate=1e-6) + d = self.evaluate(d) + self.assertLess(d, 0.2) # Test that the test assertion agrees that the standard # uniform distribution has the same mean as itself. samples1 = rng.uniform(size=num_samples).astype(np.float32) samples2 = rng.uniform(size=num_samples).astype(np.float32) - with self.test_session() as sess: - sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( - samples1, 0., 1., samples2, 0., 1., false_fail_rate=1e-6)) + self.evaluate(st.assert_true_mean_equal_by_dkwm_two_sample( + samples1, 0., 1., samples2, 0., 1., false_fail_rate=1e-6)) def test_dkwm_mean_two_sample_assertion_beta_2_1_false(self): rng = np.random.RandomState(seed=0) @@ -193,15 +188,14 @@ class StatisticalTestingTest(test.TestCase): # As established above, 4000 samples is enough to find discrepancies # of size 0.2 or more with assurance 1e-6. - with self.test_session() as sess: - # Test that the test assertion confirms that the mean of the - # standard uniform distribution is different from the mean of beta(2, 1). - beta_high_samples = rng.beta(2, 1, size=num_samples).astype(np.float32) - with self.assertRaisesOpError("true mean smaller than expected"): - sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( - samples1, 0., 1., - beta_high_samples, 0., 1., - false_fail_rate=1e-6)) + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is different from the mean of beta(2, 1). + beta_high_samples = rng.beta(2, 1, size=num_samples).astype(np.float32) + with self.assertRaisesOpError("true mean smaller than expected"): + self.evaluate(st.assert_true_mean_equal_by_dkwm_two_sample( + samples1, 0., 1., + beta_high_samples, 0., 1., + false_fail_rate=1e-6)) def test_dkwm_mean_two_sample_assertion_beta_1_2_false(self): rng = np.random.RandomState(seed=0) @@ -211,15 +205,14 @@ class StatisticalTestingTest(test.TestCase): # As established above, 4000 samples is enough to find discrepancies # of size 0.2 or more with assurance 1e-6. - with self.test_session() as sess: - # Test that the test assertion confirms that the mean of the - # standard uniform distribution is different from the mean of beta(1, 2). - beta_low_samples = rng.beta(1, 2, size=num_samples).astype(np.float32) - with self.assertRaisesOpError("true mean greater than expected"): - sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( - samples1, 0., 1., - beta_low_samples, 0., 1., - false_fail_rate=1e-6)) + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is different from the mean of beta(1, 2). + beta_low_samples = rng.beta(1, 2, size=num_samples).astype(np.float32) + with self.assertRaisesOpError("true mean greater than expected"): + self.evaluate(st.assert_true_mean_equal_by_dkwm_two_sample( + samples1, 0., 1., + beta_low_samples, 0., 1., + false_fail_rate=1e-6)) def test_dkwm_argument_validity_checking(self): rng = np.random.RandomState(seed=0) @@ -228,18 +221,17 @@ class StatisticalTestingTest(test.TestCase): # Test that the test library complains if the given samples fall # outside the purported bounds. - with self.test_session() as sess: - with self.assertRaisesOpError("maximum value exceeds expectations"): - sess.run(st.true_mean_confidence_interval_by_dkwm( - samples, [[0., 1.]], [[0.5, 1.5]], error_rate=0.5)) - with self.assertRaisesOpError("minimum value falls below expectations"): - sess.run(st.true_mean_confidence_interval_by_dkwm( - samples, [[0.5, 1.5]], [[1., 2.]], error_rate=0.5)) - - # But doesn't complain if they don't. - op = st.true_mean_confidence_interval_by_dkwm( - samples, [[0., 1.]], [[1., 2.]], error_rate=0.5) - _ = sess.run(op) + with self.assertRaisesOpError("maximum value exceeds expectations"): + self.evaluate(st.true_mean_confidence_interval_by_dkwm( + samples, [[0., 1.]], [[0.5, 1.5]], error_rate=0.5)) + with self.assertRaisesOpError("minimum value falls below expectations"): + self.evaluate(st.true_mean_confidence_interval_by_dkwm( + samples, [[0.5, 1.5]], [[1., 2.]], error_rate=0.5)) + + # But doesn't complain if they don't. + op = st.true_mean_confidence_interval_by_dkwm( + samples, [[0., 1.]], [[1., 2.]], error_rate=0.5) + _ = self.evaluate(op) def test_do_maximum_mean(self): n = 117 @@ -248,10 +240,9 @@ class StatisticalTestingTest(test.TestCase): samples = rng.uniform(size=n).astype(np.float32) # Compute the answer in TF using the code under test - with self.test_session() as sess: - envelope_t = ops.convert_to_tensor(envelope) - max_mean = st._do_maximum_mean(samples, envelope_t, 1) - max_mean = sess.run(max_mean) + envelope_t = ops.convert_to_tensor(envelope) + max_mean = st._do_maximum_mean(samples, envelope_t, 1) + max_mean = self.evaluate(max_mean) # Compute the correct answer for this case in numpy. In this # example, `n` and `envelope` are such that `samples[2]` is the -- GitLab From 1b5a1559cc6a7a585728a889611c24bbc0e7e064 Mon Sep 17 00:00:00 2001 From: Guido Zuidhof Date: Tue, 29 May 2018 18:17:51 +0100 Subject: [PATCH 205/902] Add mirror for nasm (#19349) --- tensorflow/workspace.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 07c8f5e416..286459d01c 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -201,6 +201,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): urls = [ "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2", "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.12.02.tar.bz2/d15843c3fb7db39af80571ee27ec6fad/nasm-2.12.02.tar.bz2", + "http://www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2", ], sha256 = "00b0891c678c065446ca59bcee64719d0096d54d6886e6e472aeee2e170ae324", strip_prefix = "nasm-2.12.02", -- GitLab From 0acb6b7435f25e798acd59f24e590eeebef7df9a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 29 May 2018 10:17:39 -0700 Subject: [PATCH 206/902] Clarify argument types and relationships in docstrings of statistical_testing.py. PiperOrigin-RevId: 198414898 --- .../python/ops/statistical_testing.py | 143 ++++++++++-------- 1 file changed, 80 insertions(+), 63 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/statistical_testing.py b/tensorflow/contrib/distributions/python/ops/statistical_testing.py index 3ea9a331c7..c25e8c51d7 100644 --- a/tensorflow/contrib/distributions/python/ops/statistical_testing.py +++ b/tensorflow/contrib/distributions/python/ops/statistical_testing.py @@ -210,17 +210,17 @@ def _maximum_mean(samples, envelope, high, name=None): separately. Args: - samples: Floating-point tensor of samples from the distribution(s) + samples: Floating-point `Tensor` of samples from the distribution(s) of interest. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `envelope` and `high`. - envelope: Floating-point tensor of sizes of admissible CDF + envelope: Floating-point `Tensor` of sizes of admissible CDF envelopes (i.e., the `eps` above). - high: Floating-point tensor of upper bounds on the distributions' - supports. + high: Floating-point `Tensor` of upper bounds on the distributions' + supports. `samples <= high`. name: A name for this operation (optional). Returns: - bound: Floating-point tensor of upper bounds on the true means. + bound: Floating-point `Tensor` of upper bounds on the true means. Raises: InvalidArgumentError: If some `sample` is found to be larger than @@ -255,17 +255,17 @@ def _minimum_mean(samples, envelope, low, name=None): separately. Args: - samples: Floating-point tensor of samples from the distribution(s) + samples: Floating-point `Tensor` of samples from the distribution(s) of interest. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `envelope` and `low`. - envelope: Floating-point tensor of sizes of admissible CDF + envelope: Floating-point `Tensor` of sizes of admissible CDF envelopes (i.e., the `eps` above). - low: Floating-point tensor of lower bounds on the distributions' - supports. + low: Floating-point `Tensor` of lower bounds on the distributions' + supports. `samples >= low`. name: A name for this operation (optional). Returns: - bound: Floating-point tensor of lower bounds on the true means. + bound: Floating-point `Tensor` of lower bounds on the true means. Raises: InvalidArgumentError: If some `sample` is found to be smaller than @@ -301,12 +301,12 @@ def _dkwm_cdf_envelope(n, error_rate, name=None): probability above. Args: - n: Tensor of numbers of samples drawn. - error_rate: Floating-point tensor of admissible rates of mistakes. + n: `Tensor` of numbers of samples drawn. + error_rate: Floating-point `Tensor` of admissible rates of mistakes. name: A name for this operation (optional). Returns: - eps: Tensor of maximum distances the true CDF can be from the + eps: `Tensor` of maximum distances the true CDF can be from the empirical CDF. This scales as `O(sqrt(-log(error_rate)))` and as `O(1 / sqrt(n))`. The shape is the broadcast of `n` and `error_rate`. @@ -325,8 +325,8 @@ def _check_shape_dominates(samples, parameters): sample counts end up inflated. Args: - samples: A Tensor whose shape is to be protected against broadcasting. - parameters: A list of Tensors who are parameters for the statistical test. + samples: A `Tensor` whose shape is to be protected against broadcasting. + parameters: A list of `Tensor`s who are parameters for the statistical test. Returns: samples: Return original `samples` with control dependencies attached @@ -370,19 +370,23 @@ def true_mean_confidence_interval_by_dkwm( members. Args: - samples: Floating-point tensor of samples from the distribution(s) + samples: Floating-point `Tensor` of samples from the distribution(s) of interest. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `low` and `high`. - low: Floating-point tensor of lower bounds on the distributions' + The support is bounded: `low <= samples <= high`. + low: Floating-point `Tensor` of lower bounds on the distributions' supports. - high: Floating-point tensor of upper bounds on the distributions' + high: Floating-point `Tensor` of upper bounds on the distributions' supports. - error_rate: *Scalar* admissible total rate of mistakes. + error_rate: *Scalar* floating-point `Tensor` admissible total rate + of mistakes. name: A name for this operation (optional). Returns: - low: A floating-point tensor of stochastic lower bounds on the true means. - high: A floating-point tensor of stochastic upper bounds on the true means. + low: A floating-point `Tensor` of stochastic lower bounds on the + true means. + high: A floating-point `Tensor` of stochastic upper bounds on the + true means. """ with ops.name_scope( name, "true_mean_confidence_interval_by_dkwm", @@ -437,15 +441,17 @@ def assert_true_mean_equal_by_dkwm( the assertion will insist on stronger evidence to fail any one member. Args: - samples: Floating-point tensor of samples from the distribution(s) + samples: Floating-point `Tensor` of samples from the distribution(s) of interest. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `low` and `high`. - low: Floating-point tensor of lower bounds on the distributions' + The support is bounded: `low <= samples <= high`. + low: Floating-point `Tensor` of lower bounds on the distributions' supports. - high: Floating-point tensor of upper bounds on the distributions' + high: Floating-point `Tensor` of upper bounds on the distributions' supports. - expected: Floating-point tensor of expected true means. - false_fail_rate: *Scalar* admissible total rate of mistakes. + expected: Floating-point `Tensor` of expected true means. + false_fail_rate: *Scalar* floating-point `Tensor` admissible total + rate of mistakes. name: A name for this operation (optional). Returns: @@ -476,18 +482,20 @@ def min_discrepancy_of_true_means_detectable_by_dkwm( with the same `false_pass_rate`. Args: - n: Tensor of numbers of samples to be drawn from the distributions + n: `Tensor` of numbers of samples to be drawn from the distributions of interest. - low: Floating-point tensor of lower bounds on the distributions' + low: Floating-point `Tensor` of lower bounds on the distributions' supports. - high: Floating-point tensor of upper bounds on the distributions' + high: Floating-point `Tensor` of upper bounds on the distributions' supports. - false_fail_rate: *Scalar* admissible total rate of false failures. - false_pass_rate: *Scalar* admissible rate of false passes. + false_fail_rate: *Scalar* floating-point `Tensor` admissible total + rate of false failures. + false_pass_rate: *Scalar* floating-point `Tensor` admissible rate + of false passes. name: A name for this operation (optional). Returns: - discr: Tensor of lower bounds on the distances between true + discr: `Tensor` of lower bounds on the distances between true means detectable by a DKWM-based test. For each batch member `i`, of `K` total, drawing `n[i]` samples from @@ -550,17 +558,19 @@ def min_num_samples_for_dkwm_mean_test( on a scalar distribution supported on `[low, high]`. Args: - discrepancy: Floating-point tensor of desired upper limits on mean + discrepancy: Floating-point `Tensor` of desired upper limits on mean differences that may go undetected with probability higher than `1 - false_pass_rate`. - low: Tensor of lower bounds on the distributions' support. - high: Tensor of upper bounds on the distributions' support. - false_fail_rate: *Scalar* admissible total rate of false failures. - false_pass_rate: *Scalar* admissible rate of false passes. + low: `Tensor` of lower bounds on the distributions' support. + high: `Tensor` of upper bounds on the distributions' support. + false_fail_rate: *Scalar* floating-point `Tensor` admissible total + rate of false failures. + false_pass_rate: *Scalar* floating-point `Tensor` admissible rate + of false passes. name: A name for this operation (optional). Returns: - n: Tensor of numbers of samples to be drawn from the distributions + n: `Tensor` of numbers of samples to be drawn from the distributions of interest. The `discrepancy`, `low`, and `high` tensors must have @@ -695,23 +705,26 @@ def assert_true_mean_equal_by_dkwm_two_sample( the assertion will insist on stronger evidence to fail any one member. Args: - samples1: Floating-point tensor of samples from the + samples1: Floating-point `Tensor` of samples from the distribution(s) A. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `low1`, `high1`, `low2`, and `high2`. - low1: Floating-point tensor of lower bounds on the supports of the + The support is bounded: `low1 <= samples1 <= high1`. + low1: Floating-point `Tensor` of lower bounds on the supports of the distributions A. - high1: Floating-point tensor of upper bounds on the supports of + high1: Floating-point `Tensor` of upper bounds on the supports of the distributions A. - samples2: Floating-point tensor of samples from the + samples2: Floating-point `Tensor` of samples from the distribution(s) B. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `low1`, `high1`, `low2`, and `high2`. - low2: Floating-point tensor of lower bounds on the supports of the + The support is bounded: `low2 <= samples2 <= high2`. + low2: Floating-point `Tensor` of lower bounds on the supports of the distributions B. - high2: Floating-point tensor of upper bounds on the supports of + high2: Floating-point `Tensor` of upper bounds on the supports of the distributions B. - false_fail_rate: *Scalar* admissible total rate of mistakes. + false_fail_rate: *Scalar* floating-point `Tensor` admissible total + rate of mistakes. name: A name for this operation (optional). Returns: @@ -765,22 +778,24 @@ def min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( with the same `false_pass_rate`. Args: - n1: Tensor of numbers of samples to be drawn from the distributions A. - low1: Floating-point tensor of lower bounds on the supports of the + n1: `Tensor` of numbers of samples to be drawn from the distributions A. + low1: Floating-point `Tensor` of lower bounds on the supports of the distributions A. - high1: Floating-point tensor of upper bounds on the supports of + high1: Floating-point `Tensor` of upper bounds on the supports of the distributions A. - n2: Tensor of numbers of samples to be drawn from the distributions B. - low2: Floating-point tensor of lower bounds on the supports of the + n2: `Tensor` of numbers of samples to be drawn from the distributions B. + low2: Floating-point `Tensor` of lower bounds on the supports of the distributions B. - high2: Floating-point tensor of upper bounds on the supports of + high2: Floating-point `Tensor` of upper bounds on the supports of the distributions B. - false_fail_rate: *Scalar* admissible total rate of false failures. - false_pass_rate: *Scalar* admissible rate of false passes. + false_fail_rate: *Scalar* floating-point `Tensor` admissible total + rate of false failures. + false_pass_rate: *Scalar* floating-point `Tensor` admissible rate + of false passes. name: A name for this operation (optional). Returns: - discr: Tensor of lower bounds on the distances between true means + discr: `Tensor` of lower bounds on the distances between true means detectable by a two-sample DKWM-based test. For each batch member `i`, of `K` total, drawing `n1[i]` samples @@ -831,24 +846,26 @@ def min_num_samples_for_dkwm_mean_two_sample_test( (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). Args: - discrepancy: Floating-point tensor of desired upper limits on mean + discrepancy: Floating-point `Tensor` of desired upper limits on mean differences that may go undetected with probability higher than `1 - false_pass_rate`. - low1: Floating-point tensor of lower bounds on the supports of the + low1: Floating-point `Tensor` of lower bounds on the supports of the distributions A. - high1: Floating-point tensor of upper bounds on the supports of + high1: Floating-point `Tensor` of upper bounds on the supports of the distributions A. - low2: Floating-point tensor of lower bounds on the supports of the + low2: Floating-point `Tensor` of lower bounds on the supports of the distributions B. - high2: Floating-point tensor of upper bounds on the supports of + high2: Floating-point `Tensor` of upper bounds on the supports of the distributions B. - false_fail_rate: *Scalar* admissible total rate of false failures. - false_pass_rate: *Scalar* admissible rate of false passes. + false_fail_rate: *Scalar* floating-point `Tensor` admissible total + rate of false failures. + false_pass_rate: *Scalar* floating-point `Tensor` admissible rate + of false passes. name: A name for this operation (optional). Returns: - n1: Tensor of numbers of samples to be drawn from the distributions A. - n2: Tensor of numbers of samples to be drawn from the distributions B. + n1: `Tensor` of numbers of samples to be drawn from the distributions A. + n2: `Tensor` of numbers of samples to be drawn from the distributions B. For each batch member `i`, of `K` total, drawing `n1[i]` samples from scalar distribution A supported on `[low1[i], high1[i]]` and `n2[i]` -- GitLab From 70674b950ab48f913ed1c99e48c4162287595d46 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 29 May 2018 10:34:20 -0700 Subject: [PATCH 207/902] Add AnonymousIteratorHandleOp for non-shared Iterator resources Fixes Iterator cleanup when executing eagerly. DestroyResourceOp will now remove the last reference from the Iterator resource when it runs (after the last Python reference to an EagerIterator is removed). Previously EagerIterator used IteratorHandleOp to create resource handles, which used one kernel per (unique) shared name since the shared name was an attribute. These kernels each held a reference to their resource, which kept it alive indefinitely. Fixes #19499. PiperOrigin-RevId: 198417997 --- .../base_api/api_def_AnonymousIterator.pbtxt | 13 ++++ .../api_def_AnonymousIterator.pbtxt | 4 + tensorflow/core/kernels/data/iterator_ops.cc | 74 +++++++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 6 ++ .../kernel_tests/reader_dataset_ops_test.py | 36 +++++++++ tensorflow/python/data/ops/iterator_ops.py | 4 +- 6 files changed, 134 insertions(+), 3 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_AnonymousIterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AnonymousIterator.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_AnonymousIterator.pbtxt b/tensorflow/core/api_def/base_api/api_def_AnonymousIterator.pbtxt new file mode 100644 index 0000000000..d8c2ed40a3 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_AnonymousIterator.pbtxt @@ -0,0 +1,13 @@ +op { + graph_op_name: "AnonymousIterator" + out_arg { + name: "handle" + description: <

The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code + * Interpreter}. + */ + public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer) { + wrapper = new NativeInterpreterWrapper(mappedByteBuffer); + } + + /** + * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file and + * specifies the number of threads used for inference. + * + *

The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code + * Interpreter}. + */ + public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer, int numThreads) { + wrapper = new NativeInterpreterWrapper(mappedByteBuffer, numThreads); + } + /** * Runs model inference if the model takes only one input, and provides only one output. * @@ -231,5 +253,14 @@ public final class Interpreter implements AutoCloseable { wrapper = null; } + @Override + protected void finalize() throws Throwable { + try { + close(); + } finally { + super.finalize(); + } + } + NativeInterpreterWrapper wrapper; } -- GitLab From 46afa1f0e8a8b269054025aefe9a7d42290f8e8d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 10:49:48 -0700 Subject: [PATCH 397/902] Amend cluster resolver error to suggest oauth2client as a possible issue. PiperOrigin-RevId: 198894470 --- .../python/training/tpu_cluster_resolver.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index 880fca4ea6..d44e23aadc 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -170,10 +170,11 @@ class TPUClusterResolver(ClusterResolver): if service is None and should_resolve: if not _GOOGLE_API_CLIENT_INSTALLED: - raise ImportError('googleapiclient must be installed before using the ' - 'TPU cluster resolver. Execute: `pip install ' - '--upgrade google-api-python-client` to install with ' - 'pip.') + raise ImportError('googleapiclient and oauth2client must be installed ' + 'before using the TPU cluster resolver. Execute: ' + '`pip install --upgrade google-api-python-client` ' + 'and `pip install --upgrade oauth2lclient` to ' + 'install with pip.') final_discovery_url = self._discoveryUrl() or discovery_url if final_discovery_url: -- GitLab From 229a6fbb72a9c2a19113b7bdd85c3662603b4218 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 11:06:22 -0700 Subject: [PATCH 398/902] Printing bools in graphviz. PiperOrigin-RevId: 198897530 --- tensorflow/contrib/lite/toco/dump_graphviz.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/contrib/lite/toco/dump_graphviz.cc b/tensorflow/contrib/lite/toco/dump_graphviz.cc index 3aeebb14f1..8913b5c3ea 100644 --- a/tensorflow/contrib/lite/toco/dump_graphviz.cc +++ b/tensorflow/contrib/lite/toco/dump_graphviz.cc @@ -132,6 +132,12 @@ void AppendArrayVal(string* string, Array const& array, int index) { return; } AppendF(string, "%d", data[index]); + } else if (array.buffer->type == ArrayDataType::kBool) { + const auto& data = array.GetBuffer().data; + if (index >= data.size()) { + return; + } + AppendF(string, "%d", data[index]); } } -- GitLab From 508860fa5b28827e9425db0b3462c0fa8ed34ae5 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Fri, 1 Jun 2018 11:34:57 -0700 Subject: [PATCH 399/902] [TF2XLA] Decompose resize bilinear with large filters to work on dimensions indpendently. PiperOrigin-RevId: 198902279 --- tensorflow/compiler/tests/image_ops_test.py | 39 +++- .../tf2xla/kernels/image_resize_ops.cc | 183 +++++++++++++----- 2 files changed, 168 insertions(+), 54 deletions(-) diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py index 42e637734c..7cf953ef25 100644 --- a/tensorflow/compiler/tests/image_ops_test.py +++ b/tensorflow/compiler/tests/image_ops_test.py @@ -65,9 +65,7 @@ class RGBToHSVTest(XLATestCase): join1 = array_ops.stack(split1) join2 = array_ops.stack(split2) batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2], - { - batch0: inp - }) + {batch0: inp}) # Verify that processing batch elements together is the same as separate self.assertAllClose(batch1, join1) @@ -401,9 +399,7 @@ class AdjustSaturationTest(XLATestCase): x = array_ops.placeholder(dtypes.float32, shape=x_shape) with self.test_scope(): y_fused = self._adjust_saturation(x, - scale).eval(feed_dict={ - x: x_np - }) + scale).eval(feed_dict={x: x_np}) self.assertAllClose(y_fused, y_baseline, rtol=2e-5, atol=1e-5) @@ -412,7 +408,8 @@ class ResizeBilinearTest(XLATestCase): def _assertForwardOpMatchesExpected(self, image_np, target_shape, - expected=None): + expected=None, + large_tolerance=False): if expected is None: self.fail("expected must be specified") with self.test_session() as sess, self.test_scope(): @@ -420,7 +417,11 @@ class ResizeBilinearTest(XLATestCase): resized = gen_image_ops.resize_bilinear( image, target_shape, align_corners=True) out = sess.run(resized, {image: image_np[np.newaxis, :, :, np.newaxis]}) - self.assertAllClose(expected[np.newaxis, :, :, np.newaxis], out) + if large_tolerance: + self.assertAllClose( + expected[np.newaxis, :, :, np.newaxis], out, rtol=0.03, atol=0.1) + else: + self.assertAllClose(expected[np.newaxis, :, :, np.newaxis], out) def _assertBackwardOpMatchesExpected(self, grads_np, @@ -555,6 +556,28 @@ class ResizeBilinearTest(XLATestCase): [[12.5, 27.5, 21.875], [42.5, 80.0, 57.5], [40.625, 72.5, 50]], dtype=np.float32)) + def testAlignCorners4x4To8x8(self): + self._assertForwardOpMatchesExpected( + (np.array([[0, 1, 2, 3]], dtype=np.float32) + np.array( + [[0], [1], [2], [3]], dtype=np.float32)) * 7.0, [8, 8], + expected=3 * + (np.array([[0, 1, 2, 3, 4, 5, 6, 7]], dtype=np.float32) + np.array( + [[0], [1], [2], [3], [4], [5], [6], [7]], dtype=np.float32)), + large_tolerance=True) + + def testAlignCorners8x8To16x16(self): + self._assertForwardOpMatchesExpected( + (np.array([[0, 1, 2, 3, 4, 5, 6, 7]], dtype=np.float32) + np.array( + [[0], [1], [2], [3], [4], [5], [6], [7]], dtype=np.float32)) * 15.0, + [16, 16], + expected=7 * (np.array( + [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]], + dtype=np.float32) + np.array( + [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], + [12], [13], [14], [15]], + dtype=np.float32)), + large_tolerance=True) + if __name__ == "__main__": test.main() diff --git a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc index 9058cbc747..91bff995a1 100644 --- a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc @@ -99,27 +99,34 @@ ResizeConvolutionDims ComputeResizeConvolutionParameters( return dims; } +// Form a 2D convolution kernel like: +// 1 2 3 2 1 +// 2 4 6 4 2 +// 1/9 * 3 6 9 6 3 +// 2 4 6 4 2 +// 1 2 3 2 1 +// by multiplying two 1D kernels of the form: +// 1/3 * [1 2 3 2 1] +// If the 2D kernel would be very large, the 1D kernel can be applied once in +// each dimension due to the symmetry of the kernel along all axis to reduce the +// computational intensity. +std::vector Make1DKernel(int64 n) { + std::vector kernel(n * 2 - 1); + for (int64 i = 0; i < n; ++i) { + float v = (i + 1.0f) / n; + kernel[i] = v; + kernel[n * 2 - 2 - i] = v; + } + return kernel; +} + +// Kernels with more than 16 spatial elements are considered intense and the +// kernel should applied to each dimension independently. +const int64 kMax2DKernelSize = 16; + xla::XlaOp MakeBilinearResizeKernel(xla::XlaBuilder* builder, gtl::ArraySlice kernel_size, int64 channels) { - // Form a 2D convolution kernel like: - // 1 2 3 2 1 - // 2 4 6 4 2 - // 1/9 * 3 6 9 6 3 - // 2 4 6 4 2 - // 1 2 3 2 1 - // by multiplying two 1D kernels of the form: - // 1/3 * [1 2 3 2 1] - auto make_1d_kernel = [](int64 n) { - std::vector kernel(n * 2 - 1); - for (int64 i = 0; i < n; ++i) { - float v = (i + 1.0f) / n; - kernel[i] = v; - kernel[n * 2 - 2 - i] = v; - } - return kernel; - }; - xla::XlaOp channels_iota; // DT_INT32 Iota will always return status::OK(). TF_CHECK_OK( @@ -133,12 +140,37 @@ xla::XlaOp MakeBilinearResizeKernel(xla::XlaBuilder* builder, xla::PrimitiveType::F32); return builder->Mul( builder->Mul(diag, - builder->ConstantR1(make_1d_kernel(kernel_size[1])), + builder->ConstantR1(Make1DKernel(kernel_size[1])), /*broadcast_dimensions=*/{1}), - builder->ConstantR1(make_1d_kernel(kernel_size[0])), + builder->ConstantR1(Make1DKernel(kernel_size[0])), /*broadcast_dimensions=*/{0}); } +xla::XlaOp MakeBilinearResizeKernelInDim(xla::XlaBuilder* builder, + gtl::ArraySlice kernel_size, + int64 channels, int64 dim) { + xla::XlaOp channels_iota; + // DT_INT32 Iota will always return status::OK(). + TF_CHECK_OK( + XlaHelpers::Iota(builder, DataType::DT_INT32, channels, &channels_iota)); + + auto diag = builder->ConvertElementType( + builder->Eq(builder->Broadcast( + channels_iota, + {dim == 0 ? (2 * kernel_size[0] - 1) : 1, + dim == 1 ? (2 * kernel_size[1] - 1) : 1, channels}), + channels_iota, /*broadcast_dimensions=*/{2}), + xla::PrimitiveType::F32); + if (dim == 1) { + return builder->Mul( + diag, builder->ConstantR1(Make1DKernel(kernel_size[1])), + /*broadcast_dimensions=*/{1}); + } + return builder->Mul(diag, + builder->ConstantR1(Make1DKernel(kernel_size[0])), + /*broadcast_dimensions=*/{0}); +} + xla::XlaOp ResizeUsingDilationAndConvolution(xla::XlaBuilder* builder, const xla::XlaOp& input, const int num_spatial_dims, @@ -170,15 +202,37 @@ xla::XlaOp ResizeUsingDilationAndConvolution(xla::XlaBuilder* builder, ResizeConvolutionDims dims = ComputeResizeConvolutionParameters(in_size, out_size); - xla::XlaOp kernel = - MakeBilinearResizeKernel(builder, dims.kernel_size, channels); - xla::XlaOp output = builder->ConvGeneralDilated( - input, kernel, dims.stride, - /*padding=*/ - {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1}, - {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}}, - /*lhs_dilation=*/dims.kernel_size, - /*rhs_dilation=*/{1, 1}, dimension_numbers); + xla::XlaOp output; + // Split convolutions into independent dimensions if they wmuld be a very + // large kernel. + if (dims.kernel_size[0] * dims.kernel_size[1] < kMax2DKernelSize) { + xla::XlaOp kernel = + MakeBilinearResizeKernel(builder, dims.kernel_size, channels); + output = builder->ConvGeneralDilated( + input, kernel, dims.stride, + /*padding=*/ + {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1}, + {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}}, + /*lhs_dilation=*/dims.kernel_size, + /*rhs_dilation=*/{1, 1}, dimension_numbers); + } else { + xla::XlaOp kernel0 = + MakeBilinearResizeKernelInDim(builder, dims.kernel_size, channels, 0); + output = builder->ConvGeneralDilated( + input, kernel0, {dims.stride[0], 1}, + /*padding=*/ + {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1}, {0, 0}}, + /*lhs_dilation=*/{dims.kernel_size[0], 1}, + /*rhs_dilation=*/{1, 1}, dimension_numbers); + xla::XlaOp kernel1 = + MakeBilinearResizeKernelInDim(builder, dims.kernel_size, channels, 1); + output = builder->ConvGeneralDilated( + output, kernel1, {1, dims.stride[1]}, + /*padding=*/ + {{0, 0}, {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}}, + /*lhs_dilation=*/{1, dims.kernel_size[1]}, + /*rhs_dilation=*/{1, 1}, dimension_numbers); + } // Add broadcasts to handle expanding from a size == 1 dimension to a // size > 1 dimension. @@ -214,26 +268,63 @@ xla::XlaOp ResizeUsingDilationAndConvolutionGradOp(xla::XlaBuilder* builder, } dimension_numbers.set_kernel_input_feature_dimension(num_spatial_dims); dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims + 1); - xla::XlaOp kernel = - MakeBilinearResizeKernel(builder, dims.kernel_size, channels); + xla::XlaOp output; + if (dims.kernel_size[0] * dims.kernel_size[1] < kMax2DKernelSize) { + xla::XlaOp kernel = + MakeBilinearResizeKernel(builder, dims.kernel_size, channels); + + // Broadcast the input kernel where the forward op expanded from a size == 1 + // dimension to a size > 1 dimension. This has the effect of summing the + // gradient contributions in that dimension. + for (int i = 0; i < num_spatial_dims; ++i) { + if (in_size[i] == 1 && grad_size[i] > 1) { + kernel = + builder->Add(kernel, builder->ConstantR1(grad_size[i], 0), + /*broadcast_dimensions=*/{i}); + } + } - // Broadcast the input kernel where the forward op expanded from a size == 1 - // dimension to a size > 1 dimension. This has the effect of summing the - // gradient contributions in that dimension. - for (int i = 0; i < num_spatial_dims; ++i) { - if (in_size[i] == 1 && grad_size[i] > 1) { - kernel = builder->Add(kernel, builder->ConstantR1(grad_size[i], 0), - /*broadcast_dimensions=*/{i}); + output = builder->ConvGeneralDilated( + grad, kernel, /*window_strides=*/dims.kernel_size, + /*padding=*/ + {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1}, + {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}}, + /*lhs_dilation=*/dims.stride, + /*rhs_dilation=*/{1, 1}, dimension_numbers); + } else { + xla::XlaOp kernel0 = + MakeBilinearResizeKernelInDim(builder, dims.kernel_size, channels, 0); + xla::XlaOp kernel1 = + MakeBilinearResizeKernelInDim(builder, dims.kernel_size, channels, 1); + + // Broadcast the input kernel where the forward op expanded from a size == 1 + // dimension to a size > 1 dimension. This has the effect of summing the + // gradient contributions in that dimension. + if (in_size[0] == 1 && grad_size[0] > 1) { + kernel0 = + builder->Add(kernel0, builder->ConstantR1(grad_size[0], 0), + /*broadcast_dimensions=*/{0}); + } + if (in_size[1] == 1 && grad_size[1] > 1) { + kernel1 = + builder->Add(kernel0, builder->ConstantR1(grad_size[1], 0), + /*broadcast_dimensions=*/{1}); } - } - xla::XlaOp output = builder->ConvGeneralDilated( - grad, kernel, /*window_strides=*/dims.kernel_size, - /*padding=*/ - {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1}, - {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}}, - /*lhs_dilation=*/dims.stride, - /*rhs_dilation=*/{1, 1}, dimension_numbers); + output = builder->ConvGeneralDilated( + grad, kernel0, /*window_strides=*/{dims.kernel_size[0], 1}, + /*padding=*/ + {{dims.kernel_size[0] - 1, dims.kernel_size[0] - 1}, {0, 0}}, + /*lhs_dilation=*/{dims.stride[0], 1}, + /*rhs_dilation=*/{1, 1}, dimension_numbers); + + output = builder->ConvGeneralDilated( + output, kernel1, /*window_strides=*/{1, dims.kernel_size[1]}, + /*padding=*/ + {{0, 0}, {dims.kernel_size[1] - 1, dims.kernel_size[1] - 1}}, + /*lhs_dilation=*/{1, dims.stride[1]}, + /*rhs_dilation=*/{1, 1}, dimension_numbers); + } // If in_size[i] > 1 and grad_size[i] == 1, pad the output in dimension i. // Opposite of the slice performed by the forward op. -- GitLab From 5fa6409cbb7476697acc07bbd35f1a6c1597c845 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 1 Jun 2018 12:02:05 -0700 Subject: [PATCH 400/902] [TF:XLA] Bump open source llvm revision to r333578 PiperOrigin-RevId: 198906281 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 16c1846e17..0672615d5e 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -453,11 +453,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/bf13d093f13a295d71080614c3036ada591201d5.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/bf13d093f13a295d71080614c3036ada591201d5.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/80f62ff390cc9440ef48ccac94ea6f7f51da3b93.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/80f62ff390cc9440ef48ccac94ea6f7f51da3b93.tar.gz", ], - sha256 = "3c5b4538a4df95090693bf6b758e861afc5b8c599592368f9dc57901f7560bd0", - strip_prefix = "llvm-bf13d093f13a295d71080614c3036ada591201d5", + sha256 = "119e7d9687a20103088677d5157cf70352392a423943de3cb549f6e4638edc59", + strip_prefix = "llvm-80f62ff390cc9440ef48ccac94ea6f7f51da3b93", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) -- GitLab From 10b2b3b44a6f93f4fd414e8ac450587ece2207ae Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 1 Jun 2018 12:20:08 -0700 Subject: [PATCH 401/902] [TF:XLA] Refactor implementation of TruncatedNormal to avoid redundant computations. Add an additional test. PiperOrigin-RevId: 198908904 --- tensorflow/compiler/tests/random_ops_test.py | 7 +++ .../compiler/tf2xla/kernels/random_ops.cc | 62 +++++++++---------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py index d6c93088d4..70be22936a 100644 --- a/tensorflow/compiler/tests/random_ops_test.py +++ b/tensorflow/compiler/tests/random_ops_test.py @@ -76,6 +76,13 @@ class RandomOpsTest(XLATestCase): self.assertTrue((y >= -2).sum() == 1000) self.assertTrue((y < 33).sum() == 1000) + def testTruncatedNormalIsNotConstant(self): + def rng(dtype): + return random_ops.truncated_normal(shape=[2], dtype=dtype) + + # TODO(b/34339814): implement inverse erf support for non-F32 types. + self._testRngIsNotConstant(rng, dtypes.float32) + def testTruncatedNormalIsInRange(self): count = 10000 # TODO(b/34339814): implement inverse erf support for non-F32 types. diff --git a/tensorflow/compiler/tf2xla/kernels/random_ops.cc b/tensorflow/compiler/tf2xla/kernels/random_ops.cc index 5f5bd58637..39149d56ad 100644 --- a/tensorflow/compiler/tf2xla/kernels/random_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/random_ops.cc @@ -17,6 +17,7 @@ limitations under the License. // TODO(misard,phawkins): handle random number generator seeds/states correctly. // TODO(misard,phawkins): add tests. +#include "tensorflow/compiler/tf2xla/lib/while_loop.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" @@ -127,13 +128,8 @@ class TruncatedNormalOp : public XlaOpKernel { OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &shape)); xla::Shape xla_shape; OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype, shape, &xla_shape)); - xla::Shape xla_element_shape = - xla::ShapeUtil::MakeShape(xla_shape.element_type(), {}); xla::XlaBuilder* b = ctx->builder(); - xla::XlaOp mean = XlaHelpers::Zero(b, dtype); - xla::XlaOp stddev = XlaHelpers::One(b, dtype); - xla::XlaOp candidate = b->RngNormal(mean, stddev, xla_shape); auto two_sd = [dtype](bool negate, xla::XlaBuilder* b) { return XlaHelpers::FloatLiteral(b, dtype, negate ? -2.0 : 2.0); @@ -151,34 +147,38 @@ class TruncatedNormalOp : public XlaOpKernel { // out_of_range_mask := candidate < mean-2*sd || candidate > mean+2*sd // candidate = select(out_of_range_mask, rng_normal(), candidate) // } - std::unique_ptr test_builder = - b->CreateSubBuilder("truncated_normal_test"); - { - auto* b = test_builder.get(); - xla::XlaOp candidate = b->Parameter(0, xla_shape, "candidate"); - out_of_range_mask(candidate, b); - OP_REQUIRES_OK(ctx, Any(out_of_range_mask(candidate, b), b).status()); - } - - std::unique_ptr body_builder = - b->CreateSubBuilder("truncated_normal_body"); - { - auto* b = body_builder.get(); - xla::XlaOp candidate = b->Parameter(0, xla_shape, "candidate"); - xla::XlaOp to_resample = out_of_range_mask(candidate, b); + std::vector initial_values = { + // The current candidate. + b->Broadcast(XlaHelpers::Zero(b, dtype), shape.dim_sizes()), + // The to_resample mask, where 'true' identifies a location in the + // current candidate that is out of range and must be regenerated. + b->Broadcast(b->ConstantR0(true), shape.dim_sizes()), + // Is any element in the mask true? + b->ConstantR0(true)}; + auto condition = [&](gtl::ArraySlice values, + xla::XlaBuilder* b) -> xla::StatusOr { + // Continue while any element in the mask is true. + return values[2]; + }; + auto body = + [&](gtl::ArraySlice values, + xla::XlaBuilder* b) -> xla::StatusOr> { + xla::XlaOp candidate = values[0]; + xla::XlaOp to_resample = values[1]; xla::XlaOp mean = XlaHelpers::Zero(b, dtype); xla::XlaOp stddev = XlaHelpers::One(b, dtype); - b->Select(to_resample, b->RngNormal(mean, stddev, xla_shape), candidate); - } - - xla::StatusOr test_computation = test_builder->Build(); - OP_REQUIRES_OK(ctx, test_computation.status()); - xla::StatusOr body_computation = body_builder->Build(); - OP_REQUIRES_OK(ctx, body_computation.status()); - xla::XlaOp result = b->While(test_computation.ValueOrDie(), - body_computation.ValueOrDie(), candidate); - - ctx->SetOutput(0, result); + candidate = b->Select(to_resample, b->RngNormal(mean, stddev, xla_shape), + candidate); + // Compute a new to_resample mask, and determine whether any value is + // still out of range. + to_resample = out_of_range_mask(candidate, b); + TF_ASSIGN_OR_RETURN(xla::XlaOp done, Any(to_resample, b)); + return std::vector{candidate, to_resample, done}; + }; + auto result = + XlaWhileLoop(condition, body, initial_values, "truncated_normal", b); + OP_REQUIRES_OK(ctx, result.status()); + ctx->SetOutput(0, result.ValueOrDie()[0]); } }; -- GitLab From eebb9e0449b38703869ae7ccd0aa2c649f9f5aaf Mon Sep 17 00:00:00 2001 From: Clayne Robison Date: Fri, 1 Jun 2018 12:29:39 -0700 Subject: [PATCH 402/902] Finished incomplete support for bad usernames in the CI build scripts. ci_build.sh now passes the environment variable to the container, and the with_the_same_user script adds the --force-badname param to addgroup as well. (#19699) --- tensorflow/tools/ci_build/builds/with_the_same_user | 2 +- tensorflow/tools/ci_build/ci_build.sh | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user index d4bf546d40..b216e3549f 100755 --- a/tensorflow/tools/ci_build/builds/with_the_same_user +++ b/tensorflow/tools/ci_build/builds/with_the_same_user @@ -40,7 +40,7 @@ if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then ADDUSER_OPTS="--force-badname" fi -getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" +getent group "${CI_BUILD_GID}" || addgroup ${ADDUSER_OPTS} --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \ --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \ --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \ diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh index 072dd6ab99..1f0fd0387a 100755 --- a/tensorflow/tools/ci_build/ci_build.sh +++ b/tensorflow/tools/ci_build/ci_build.sh @@ -134,6 +134,12 @@ if [[ $? != "0" ]]; then die "ERROR: docker build failed. Dockerfile is at ${DOCKERFILE_PATH}" fi +# If caller wants the with_the_same_user script to allow bad usernames, +# pass the var to the docker environment +if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then + CI_BUILD_USER_FORCE_BADNAME_ENV="-e CI_BUILD_USER_FORCE_BADNAME=yes" +fi + # Run the command inside the container. echo "Running '${COMMAND[*]}' inside ${DOCKER_IMG_NAME}..." mkdir -p ${WORKSPACE}/bazel-ci_build-cache @@ -148,6 +154,7 @@ ${DOCKER_BINARY} run --rm --pid=host \ -e "CI_BUILD_GROUP=$(id -g -n)" \ -e "CI_BUILD_GID=$(id -g)" \ -e "CI_TENSORFLOW_SUBMODULE_PATH=${CI_TENSORFLOW_SUBMODULE_PATH}" \ + ${CI_BUILD_USER_FORCE_BADNAME_ENV} \ -v ${WORKSPACE}:/workspace \ -w /workspace \ ${GPU_EXTRA_PARAMS} \ -- GitLab From b812f37e26889bb168fa0279a536b907c3fb5fdd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 12:53:54 -0700 Subject: [PATCH 403/902] TFLite: adding tile and expand_dims ops. PiperOrigin-RevId: 198913026 --- tensorflow/contrib/lite/build_def.bzl | 2 + tensorflow/contrib/lite/builtin_ops.h | 2 + tensorflow/contrib/lite/kernels/BUILD | 31 +++ .../contrib/lite/kernels/expand_dims.cc | 113 ++++++++ .../contrib/lite/kernels/expand_dims_test.cc | 83 ++++++ tensorflow/contrib/lite/kernels/register.cc | 4 + tensorflow/contrib/lite/kernels/tile.cc | 194 +++++++++++++ tensorflow/contrib/lite/kernels/tile_test.cc | 256 ++++++++++++++++++ tensorflow/contrib/lite/model.cc | 4 + tensorflow/contrib/lite/nnapi_delegate.cc | 2 + tensorflow/contrib/lite/schema/schema.fbs | 10 + .../contrib/lite/schema/schema_generated.h | 236 +++++++++++++++- .../contrib/lite/testing/generate_examples.py | 67 +++++ .../contrib/lite/toco/tflite/operator.cc | 38 +++ 14 files changed, 1036 insertions(+), 6 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/expand_dims.cc create mode 100644 tensorflow/contrib/lite/kernels/expand_dims_test.cc create mode 100644 tensorflow/contrib/lite/kernels/tile.cc create mode 100644 tensorflow/contrib/lite/kernels/tile_test.cc diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index b9e40cc50c..aa6a60dc9e 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -205,6 +205,7 @@ def generated_test_models(): "depthwiseconv", "div", "exp", + "expand_dims", "floor", "fully_connected", "fused_batch_norm", @@ -245,6 +246,7 @@ def generated_test_models(): "strided_slice", "strided_slice_1d_exhaustive", "sub", + "tile", "topk", "transpose", "transpose_conv", diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index c797e3589a..fc6fdd6eef 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -94,6 +94,8 @@ typedef enum { kTfLiteBuiltinSin = 66, kTfLiteBuiltinTransposeConv = 67, kTfLiteBuiltinSparseToDense = 68, + kTfLiteBuiltinTile = 69, + kTfLiteBuiltinExpandDims = 70, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 0af659b5ca..cf5d0b4ce9 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -147,6 +147,7 @@ cc_library( "embedding_lookup.cc", "embedding_lookup_sparse.cc", "exp.cc", + "expand_dims.cc", "floor.cc", "fully_connected.cc", "gather.cc", @@ -176,6 +177,7 @@ cc_library( "strided_slice.cc", "sub.cc", "svdf.cc", + "tile.cc", "topk_v2.cc", "transpose.cc", "transpose_conv.cc", @@ -858,6 +860,20 @@ tf_cc_test( ], ) +tf_cc_test( + name = "tile_test", + size = "small", + srcs = ["tile_test.cc"], + tags = ["tflite_not_portable_ios"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "comparisons_test", size = "small", @@ -935,6 +951,20 @@ tf_cc_test( ], ) +tf_cc_test( + name = "expand_dims_test", + size = "small", + srcs = ["expand_dims_test.cc"], + tags = ["tflite_not_portable_ios"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "sparse_to_dense_test", size = "small", @@ -942,6 +972,7 @@ tf_cc_test( tags = ["tflite_not_portable_ios"], deps = [ ":builtin_ops", + "//tensorflow/contrib/lite:builtin_op_data", "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite/kernels:test_util", "@com_google_googletest//:gtest", diff --git a/tensorflow/contrib/lite/kernels/expand_dims.cc b/tensorflow/contrib/lite/kernels/expand_dims.cc new file mode 100644 index 0000000000..ed33012864 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/expand_dims.cc @@ -0,0 +1,113 @@ + +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +namespace tflite { +namespace ops { +namespace builtin { +namespace expand_dims { +constexpr int kInput = 0; +constexpr int kAxis = 1; +constexpr int kOutput = 0; + +namespace { +TfLiteStatus ExpandTensorDim(TfLiteContext* context, const TfLiteTensor& input, + int axis, TfLiteTensor* output) { + const TfLiteIntArray& input_dims = *input.dims; + if (axis < 0) { + axis = input_dims.size + 1 + axis; + } + TF_LITE_ENSURE(context, axis <= input_dims.size); + + TfLiteIntArray* output_dims = TfLiteIntArrayCreate(input_dims.size + 1); + for (int i = 0; i < output_dims->size; ++i) { + if (i < axis) { + output_dims->data[i] = input_dims.data[i]; + } else if (i == axis) { + output_dims->data[i] = 1; + } else { + output_dims->data[i] = input_dims.data[i - 1]; + } + } + + return context->ResizeTensor(context, output, output_dims); +} + +TfLiteStatus GetAxisValueFromTensor(TfLiteContext* context, + const TfLiteTensor& axis, int* axis_value) { + TF_LITE_ENSURE_EQ(context, NumElements(&axis), 1); + switch (axis.type) { + case kTfLiteInt32: + *axis_value = *GetTensorData(&axis); + return kTfLiteOk; + case kTfLiteInt64: + *axis_value = *GetTensorData(&axis); + return kTfLiteOk; + default: + return kTfLiteError; + } +} + +} // namespace + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + const TfLiteTensor* input = GetInput(context, node, kInput); + const TfLiteTensor* axis = GetInput(context, node, kAxis); + TfLiteTensor* output = GetOutput(context, node, 0); + output->type = input->type; + if (IsConstantTensor(axis)) { + int axis_value; + TF_LITE_ENSURE_OK(context, + GetAxisValueFromTensor(context, *axis, &axis_value)); + return ExpandTensorDim(context, *input, axis_value, output); + } + SetTensorToDynamic(output); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + // Just copy input to output. + const TfLiteTensor* input = GetInput(context, node, kInput); + TfLiteTensor* output = GetOutput(context, node, 0); + const TfLiteTensor* axis = GetInput(context, node, kAxis); + if (IsDynamicTensor(output)) { + int axis_value; + TF_LITE_ENSURE_OK(context, + GetAxisValueFromTensor(context, *axis, &axis_value)); + TF_LITE_ENSURE_OK(context, + ExpandTensorDim(context, *input, axis_value, output)); + } + memcpy(output->data.raw, input->data.raw, input->bytes); + return kTfLiteOk; +} + +} // namespace expand_dims +TfLiteRegistration* Register_EXPAND_DIMS() { + static TfLiteRegistration r = {nullptr, nullptr, expand_dims::Prepare, + expand_dims::Eval}; + return &r; +} +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/expand_dims_test.cc b/tensorflow/contrib/lite/kernels/expand_dims_test.cc new file mode 100644 index 0000000000..b755e8ce29 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/expand_dims_test.cc @@ -0,0 +1,83 @@ + +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class ExpandDimsOpModel : public SingleOpModel { + public: + ExpandDimsOpModel(std::initializer_list input_shape, + TensorType input_type) { + input_ = AddInput(input_type); + axis_ = AddInput(TensorType_INT32); + output_ = AddOutput(input_type); + SetBuiltinOp(BuiltinOperator_EXPAND_DIMS, BuiltinOptions_ExpandDimsOptions, + 0); + BuildInterpreter({input_shape, {1}}); + } + void SetInputFloat(std::initializer_list data) { + PopulateTensor(input_, data); + } + void SetAxis(int axis) { PopulateTensor(axis_, {axis}); } + std::vector GetValuesFloat() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input_; + int axis_; + int output_; +}; + +TEST(ExpandDimsOpTest, DifferentAxis) { + ExpandDimsOpModel m({2, 2}, TensorType_FLOAT32); + const auto values = {-1.f, 1.f, -2.f, 2.f}; + m.SetInputFloat(values); + m.SetAxis(0); + m.Invoke(); + EXPECT_THAT(m.GetValuesFloat(), ElementsAreArray(values)); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2, 2})); + + m.SetAxis(1); + m.Invoke(); + EXPECT_THAT(m.GetValuesFloat(), ElementsAreArray(values)); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 1, 2})); + + m.SetAxis(2); + m.Invoke(); + EXPECT_THAT(m.GetValuesFloat(), ElementsAreArray(values)); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 1})); + + m.SetAxis(-1); + m.Invoke(); + EXPECT_THAT(m.GetValuesFloat(), ElementsAreArray(values)); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 1})); +} +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 4eea9921b2..c7d72738d6 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -85,11 +85,13 @@ TfLiteRegistration* Register_GREATER_EQUAL(); TfLiteRegistration* Register_LESS(); TfLiteRegistration* Register_LESS_EQUAL(); TfLiteRegistration* Register_FLOOR(); +TfLiteRegistration* Register_TILE(); TfLiteRegistration* Register_NEG(); TfLiteRegistration* Register_SELECT(); TfLiteRegistration* Register_SLICE(); TfLiteRegistration* Register_SIN(); TfLiteRegistration* Register_TRANSPOSE_CONV(); +TfLiteRegistration* Register_EXPAND_DIMS(); TfLiteRegistration* Register_SPARSE_TO_DENSE(); BuiltinOpResolver::BuiltinOpResolver() { @@ -162,6 +164,8 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_SLICE, Register_SLICE()); AddBuiltin(BuiltinOperator_SIN, Register_SIN()); AddBuiltin(BuiltinOperator_TRANSPOSE_CONV, Register_TRANSPOSE_CONV()); + AddBuiltin(BuiltinOperator_TILE, Register_TILE()); + AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS()); AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that diff --git a/tensorflow/contrib/lite/kernels/tile.cc b/tensorflow/contrib/lite/kernels/tile.cc new file mode 100644 index 0000000000..af77f07474 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/tile.cc @@ -0,0 +1,194 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +namespace tflite { +namespace ops { +namespace builtin { +namespace tile { + +constexpr int kInputTensor = 0; +constexpr int kInputMultipliers = 1; +constexpr int kOutputTensor = 0; + +namespace { +template +TfLiteIntArray* MultiplyShapeDims(const TfLiteIntArray& shape, + const TfLiteTensor* multipliers, + int num_dimensions) { + const T* multipliers_v = GetTensorData(multipliers); + + TfLiteIntArray* output_shape = TfLiteIntArrayCreate(num_dimensions); + for (int i = 0; i < num_dimensions; ++i) { + output_shape->data[i] = shape.data[i] * multipliers_v[i]; + } + return output_shape; +} + +TfLiteStatus ResizeOutput(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteTensor* multipliers = GetInput(context, node, kInputMultipliers); + + const int num_dimensions = NumDimensions(input); + const int num_multipliers = NumElements(multipliers); + TF_LITE_ENSURE_EQ(context, num_dimensions, num_multipliers); + switch (multipliers->type) { + case kTfLiteInt32: + return context->ResizeTensor( + context, output, + MultiplyShapeDims(*input->dims, multipliers, + num_dimensions)); + case kTfLiteInt64: + return context->ResizeTensor( + context, output, + MultiplyShapeDims(*input->dims, multipliers, + num_dimensions)); + default: + context->ReportError(context, "Tile not supported multiply tensor type."); + return kTfLiteError; + } +} + +template +void CopyMultipleTimes(const T* in_data, int32_t in_size, int32_t multiplier, + T* out_data) { + for (int i = 0; i < multiplier; ++i) { + const T* in_end = in_data + in_size; + T* new_out_data = std::copy(in_data, in_end, out_data); + in_data = out_data; + out_data = new_out_data; + } +} + +template +std::pair TileOneDimension(const TfLiteIntArray& in_dimensions, + const T* in_data, const M* multipliers, + T* out_data, int dimension) { + const int dimension_size = in_dimensions.data[dimension]; + if (dimension == in_dimensions.size - 1) { + CopyMultipleTimes(in_data, dimension_size, multipliers[dimension], + out_data); + return std::make_pair(dimension_size, + dimension_size * multipliers[dimension]); + } + int total_stride_size = 0, total_tiled_stride_size = 0; + const T* copy_from_data = in_data; + T* copy_to_data = out_data; + for (int i = 0; i < dimension_size; ++i) { + int stride_size = 0, tiled_stride_size = 0; + std::tie(stride_size, tiled_stride_size) = + TileOneDimension(in_dimensions, copy_from_data, multipliers, + copy_to_data, dimension + 1); + copy_from_data += stride_size; + copy_to_data += tiled_stride_size; + total_stride_size += stride_size; + total_tiled_stride_size += tiled_stride_size; + } + CopyMultipleTimes(out_data, total_tiled_stride_size, + multipliers[dimension] - 1, + out_data + total_tiled_stride_size); + return std::make_pair(total_stride_size, + total_tiled_stride_size * multipliers[dimension]); +} + +template +void Tile(const TfLiteIntArray& in_dimensions, const TfLiteTensor* in_data, + const TfLiteTensor* multipliers, TfLiteTensor* out_data) { + // Doing recursively tiling from top to down dimension. + switch (multipliers->type) { + case kTfLiteInt32: + TileOneDimension(in_dimensions, GetTensorData(in_data), + GetTensorData(multipliers), + GetTensorData(out_data), 0); + break; + case kTfLiteInt64: + TileOneDimension(in_dimensions, GetTensorData(in_data), + GetTensorData(multipliers), + GetTensorData(out_data), 0); + break; + default: + break; + } +} +} // namespace + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + const TfLiteTensor* multipliers = GetInput(context, node, kInputMultipliers); + // Only int32 and int64 multipliers type is supported. + TF_LITE_ENSURE_MSG(context, + (multipliers->type == kTfLiteInt32) || + (multipliers->type == kTfLiteInt64), + "Tile only supports int32 and int64 mutlipliers."); + + if (IsConstantTensor(multipliers)) { + TF_LITE_ENSURE_OK(context, ResizeOutput(context, node)); + } else { + SetTensorToDynamic(output); + } + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteTensor* multipliers = GetInput(context, node, kInputMultipliers); + + if (IsDynamicTensor(output)) { + TF_LITE_ENSURE_OK(context, ResizeOutput(context, node)); + } + + switch (output->type) { + case kTfLiteFloat32: + Tile(*(input->dims), input, multipliers, output); + break; + case kTfLiteUInt8: + Tile(*(input->dims), input, multipliers, output); + break; + case kTfLiteInt32: + Tile(*(input->dims), input, multipliers, output); + break; + case kTfLiteInt64: + Tile(*(input->dims), input, multipliers, output); + break; + default: + context->ReportError(context, "Type is currently not supported by Tile."); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace tile +TfLiteRegistration* Register_TILE() { + static TfLiteRegistration r = {nullptr, nullptr, tile::Prepare, tile::Eval}; + return &r; +} +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/tile_test.cc b/tensorflow/contrib/lite/kernels/tile_test.cc new file mode 100644 index 0000000000..a134a75d56 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/tile_test.cc @@ -0,0 +1,256 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; +class TileOpModel : public SingleOpModel { + public: + TileOpModel(std::initializer_list input_shape, TensorType input_type, + TensorType multiply_type) { + input_ = AddInput(input_type); + multipliers_ = AddInput(TensorType_INT32); + output_ = AddOutput(input_type); + SetBuiltinOp(BuiltinOperator_TILE, BuiltinOptions_TileOptions, 0); + BuildInterpreter({input_shape, {static_cast(input_shape.size())}}); + } + + void SetInputFloat(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetInputUInt8(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetInputInt32(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetInputInt64(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetMultipliers(std::initializer_list data) { + PopulateTensor(multipliers_, data); + } + + std::vector GetOutputFloat() { return ExtractVector(output_); } + + std::vector GetOutputUInt8() { return ExtractVector(output_); } + + std::vector GetOutputInt32() { return ExtractVector(output_); } + + std::vector GetOutputInt64() { + return ExtractVector(output_); + } + + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input_; + int multipliers_; + int output_; +}; + +TEST(TileTest, Float32Vector) { + TileOpModel m({3}, TensorType_FLOAT32, TensorType_INT32); + m.SetInputFloat({1.f, 2.f, 3.f}); + m.SetMultipliers({2}); + m.Invoke(); + EXPECT_THAT(m.GetOutputFloat(), + ElementsAreArray({1.f, 2.f, 3.f, 1.f, 2.f, 3.f})); +} + +TEST(TileTest, Float32Matrix) { + TileOpModel m({2, 3}, TensorType_FLOAT32, TensorType_INT32); + m.SetInputFloat({ + 11.f, + 12.f, + 13.f, + 21.f, + 22.f, + 23.f, + }); + m.SetMultipliers({2, 1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputFloat(), ElementsAreArray({ + 11.f, + 12.f, + 13.f, + 21.f, + 22.f, + 23.f, + 11.f, + 12.f, + 13.f, + 21.f, + 22.f, + 23.f, + })); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4, 3})); +} + +TEST(TileTest, Float32HighDimension) { + TileOpModel m({1, 2, 3}, TensorType_FLOAT32, TensorType_INT32); + m.SetInputFloat({ + 11.f, + 12.f, + 13.f, + 21.f, + 22.f, + 23.f, + }); + m.SetMultipliers({2, 3, 1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutputFloat(), + ElementsAreArray({11.f, 12.f, 13.f, 21.f, 22.f, 23.f, 11.f, 12.f, 13.f, + 21.f, 22.f, 23.f, 11.f, 12.f, 13.f, 21.f, 22.f, 23.f, + 11.f, 12.f, 13.f, 21.f, 22.f, 23.f, 11.f, 12.f, 13.f, + 21.f, 22.f, 23.f, 11.f, 12.f, 13.f, 21.f, 22.f, 23.f})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 6, 3})); +} + +TEST(TileTest, Uint8Matrix) { + TileOpModel m({2, 3}, TensorType_UINT8, TensorType_INT32); + m.SetInputUInt8({ + 11, + 12, + 13, + 21, + 22, + 23, + }); + m.SetMultipliers({2, 1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputUInt8(), ElementsAreArray({ + 11, + 12, + 13, + 21, + 22, + 23, + 11, + 12, + 13, + 21, + 22, + 23, + })); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4, 3})); +} + +TEST(TileTest, Int32Matrix) { + TileOpModel m({2, 3}, TensorType_INT32, TensorType_INT32); + m.SetInputInt32({ + 11, + 12, + 13, + 21, + 22, + 23, + }); + m.SetMultipliers({2, 1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputInt32(), ElementsAreArray({ + 11, + 12, + 13, + 21, + 22, + 23, + 11, + 12, + 13, + 21, + 22, + 23, + })); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4, 3})); +} + +TEST(TileTest, Int64Matrix) { + TileOpModel m({2, 3}, TensorType_INT64, TensorType_INT32); + m.SetInputInt64({ + 11, + 12, + 13, + 21, + 22, + 23, + }); + m.SetMultipliers({2, 1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputInt64(), ElementsAreArray({ + 11, + 12, + 13, + 21, + 22, + 23, + 11, + 12, + 13, + 21, + 22, + 23, + })); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4, 3})); +} + +TEST(TileTest, Int64Matrix64Multipliers) { + TileOpModel m({2, 3}, TensorType_INT64, TensorType_INT64); + m.SetInputInt64({ + 11, + 12, + 13, + 21, + 22, + 23, + }); + m.SetMultipliers({2, 1}); + m.Invoke(); + EXPECT_THAT(m.GetOutputInt64(), ElementsAreArray({ + 11, + 12, + 13, + 21, + 22, + 23, + 11, + 12, + 13, + 21, + 22, + 23, + })); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4, 3})); +} +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 6ac41a94bd..ca115a1c59 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -714,6 +714,10 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, error_reporter->Report("DELEGATE op shouldn't exist in model."); return kTfLiteError; } + case BuiltinOperator_EXPAND_DIMS: + case BuiltinOperator_TILE: { + break; + } } return kTfLiteOk; } diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index fad08bbfe6..d27ab0c033 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -491,6 +491,8 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_SLICE: case tflite::BuiltinOperator_SIN: case tflite::BuiltinOperator_TRANSPOSE_CONV: + case tflite::BuiltinOperator_TILE: + case tflite::BuiltinOperator_EXPAND_DIMS: case tflite::BuiltinOperator_SPARSE_TO_DENSE: FATAL("Op code %d is currently not delegated to NNAPI", builtin); nn_op_type = -1; // set to invalid diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 522eac25b3..7d76134e3d 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -146,6 +146,8 @@ enum BuiltinOperator : byte { SIN = 66, TRANSPOSE_CONV = 67, SPARSE_TO_DENSE = 68, + TILE = 69, + EXPAND_DIMS = 70, } // Options for the builtin operators. @@ -200,6 +202,8 @@ union BuiltinOptions { SliceOptions, TransposeConvOptions, SparseToDenseOptions, + TileOptions, + ExpandDimsOptions, } enum Padding : byte { SAME, VALID } @@ -421,6 +425,9 @@ table DequantizeOptions { table MaximumMinimumOptions { } +table TileOptions { +} + table ArgMaxOptions { output_type : TensorType; } @@ -452,6 +459,9 @@ table TransposeConvOptions { stride_h:int; } +table ExpandDimsOptions { +} + table SparseToDenseOptions { validate_indices:bool; } diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 746dd26796..0a60fcd3d0 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -151,6 +151,9 @@ struct DequantizeOptionsT; struct MaximumMinimumOptions; struct MaximumMinimumOptionsT; +struct TileOptions; +struct TileOptionsT; + struct ArgMaxOptions; struct ArgMaxOptionsT; @@ -178,6 +181,9 @@ struct SliceOptionsT; struct TransposeConvOptions; struct TransposeConvOptionsT; +struct ExpandDimsOptions; +struct ExpandDimsOptionsT; + struct SparseToDenseOptions; struct SparseToDenseOptionsT; @@ -309,11 +315,13 @@ enum BuiltinOperator { BuiltinOperator_SIN = 66, BuiltinOperator_TRANSPOSE_CONV = 67, BuiltinOperator_SPARSE_TO_DENSE = 68, + BuiltinOperator_TILE = 69, + BuiltinOperator_EXPAND_DIMS = 70, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_SPARSE_TO_DENSE + BuiltinOperator_MAX = BuiltinOperator_EXPAND_DIMS }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[68] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[70] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -382,7 +390,9 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[68] { BuiltinOperator_SLICE, BuiltinOperator_SIN, BuiltinOperator_TRANSPOSE_CONV, - BuiltinOperator_SPARSE_TO_DENSE + BuiltinOperator_SPARSE_TO_DENSE, + BuiltinOperator_TILE, + BuiltinOperator_EXPAND_DIMS }; return values; } @@ -458,6 +468,8 @@ inline const char **EnumNamesBuiltinOperator() { "SIN", "TRANSPOSE_CONV", "SPARSE_TO_DENSE", + "TILE", + "EXPAND_DIMS", nullptr }; return names; @@ -520,11 +532,13 @@ enum BuiltinOptions { BuiltinOptions_SliceOptions = 48, BuiltinOptions_TransposeConvOptions = 49, BuiltinOptions_SparseToDenseOptions = 50, + BuiltinOptions_TileOptions = 51, + BuiltinOptions_ExpandDimsOptions = 52, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_SparseToDenseOptions + BuiltinOptions_MAX = BuiltinOptions_ExpandDimsOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[51] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[53] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -576,7 +590,9 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[51] { BuiltinOptions_SelectOptions, BuiltinOptions_SliceOptions, BuiltinOptions_TransposeConvOptions, - BuiltinOptions_SparseToDenseOptions + BuiltinOptions_SparseToDenseOptions, + BuiltinOptions_TileOptions, + BuiltinOptions_ExpandDimsOptions }; return values; } @@ -634,6 +650,8 @@ inline const char **EnumNamesBuiltinOptions() { "SliceOptions", "TransposeConvOptions", "SparseToDenseOptions", + "TileOptions", + "ExpandDimsOptions", nullptr }; return names; @@ -848,6 +866,14 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TileOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -1279,6 +1305,22 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_SparseToDenseOptions ? reinterpret_cast(value) : nullptr; } + TileOptionsT *AsTileOptions() { + return type == BuiltinOptions_TileOptions ? + reinterpret_cast(value) : nullptr; + } + const TileOptionsT *AsTileOptions() const { + return type == BuiltinOptions_TileOptions ? + reinterpret_cast(value) : nullptr; + } + ExpandDimsOptionsT *AsExpandDimsOptions() { + return type == BuiltinOptions_ExpandDimsOptions ? + reinterpret_cast(value) : nullptr; + } + const ExpandDimsOptionsT *AsExpandDimsOptions() const { + return type == BuiltinOptions_ExpandDimsOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -4152,6 +4194,46 @@ inline flatbuffers::Offset CreateMaximumMinimumOptions( flatbuffers::Offset CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct TileOptionsT : public flatbuffers::NativeTable { + typedef TileOptions TableType; + TileOptionsT() { + } +}; + +struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TileOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + TileOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct TileOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + TileOptionsBuilder &operator=(const TileOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTileOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + TileOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct ArgMaxOptionsT : public flatbuffers::NativeTable { typedef ArgMaxOptions TableType; TensorType output_type; @@ -4564,6 +4646,46 @@ inline flatbuffers::Offset CreateTransposeConvOptions( flatbuffers::Offset CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct ExpandDimsOptionsT : public flatbuffers::NativeTable { + typedef ExpandDimsOptions TableType; + ExpandDimsOptionsT() { + } +}; + +struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ExpandDimsOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + ExpandDimsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ExpandDimsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ExpandDimsOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateExpandDimsOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + ExpandDimsOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct SparseToDenseOptionsT : public flatbuffers::NativeTable { typedef SparseToDenseOptions TableType; bool validate_indices; @@ -4899,6 +5021,12 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const { return builtin_options_type() == BuiltinOptions_SparseToDenseOptions ? static_cast(builtin_options()) : nullptr; } + const TileOptions *builtin_options_as_TileOptions() const { + return builtin_options_type() == BuiltinOptions_TileOptions ? static_cast(builtin_options()) : nullptr; + } + const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const { + return builtin_options_type() == BuiltinOptions_ExpandDimsOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -5125,6 +5253,14 @@ template<> inline const SparseToDenseOptions *Operator::builtin_options_as inline const TileOptions *Operator::builtin_options_as() const { + return builtin_options_as_TileOptions(); +} + +template<> inline const ExpandDimsOptions *Operator::builtin_options_as() const { + return builtin_options_as_ExpandDimsOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -6725,6 +6861,29 @@ inline flatbuffers::Offset CreateMaximumMinimumOptions(fl _fbb); } +inline TileOptionsT *TileOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new TileOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void TileOptions::UnPackTo(TileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset TileOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateTileOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TileOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateTileOptions( + _fbb); +} + inline ArgMaxOptionsT *ArgMaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new ArgMaxOptionsT(); UnPackTo(_o, _resolver); @@ -6944,6 +7103,29 @@ inline flatbuffers::Offset CreateTransposeConvOptions(flat _stride_h); } +inline ExpandDimsOptionsT *ExpandDimsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new ExpandDimsOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void ExpandDimsOptions::UnPackTo(ExpandDimsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ExpandDimsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateExpandDimsOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ExpandDimsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateExpandDimsOptions( + _fbb); +} + inline SparseToDenseOptionsT *SparseToDenseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SparseToDenseOptionsT(); UnPackTo(_o, _resolver); @@ -7356,6 +7538,14 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_TileOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ExpandDimsOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -7574,6 +7764,14 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_TileOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ExpandDimsOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -7780,6 +7978,14 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateSparseToDenseOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_TileOptions: { + auto ptr = reinterpret_cast(value); + return CreateTileOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ExpandDimsOptions: { + auto ptr = reinterpret_cast(value); + return CreateExpandDimsOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -7986,6 +8192,14 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new SparseToDenseOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_TileOptions: { + value = new TileOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ExpandDimsOptions: { + value = new ExpandDimsOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -8243,6 +8457,16 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_TileOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ExpandDimsOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 6a6d12ed67..f07e36fc7d 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -2517,6 +2517,72 @@ def make_transpose_conv_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_tile_tests(zip_path): + """Make a set of tests to do tile.""" + test_parameters = [{ + "input_dtype": [tf.float32, tf.int32], + "input_shape": [[3, 2, 1], [2, 2, 2]], + "multiplier_dtype": [tf.int32, tf.int64], + "multiplier_shape": [[3]] + }] + + def build_graph(parameters): + """Build the tile op testing graph.""" + input_value = tf.placeholder( + dtype=parameters["input_dtype"], + shape=parameters["input_shape"], + name="input") + multiplier_value = tf.placeholder( + dtype=parameters["multiplier_dtype"], + shape=parameters["multiplier_shape"], + name="multiplier") + out = tf.tile(input_value, multiplier_value) + return [input_value, multiplier_value], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["input_dtype"], + parameters["input_shape"]) + multipliers_value = create_tensor_data(parameters["multiplier_dtype"], + parameters["multiplier_shape"]) + return [input_value, multipliers_value], sess.run( + outputs, + feed_dict={ + inputs[0]: input_value, + inputs[1]: multipliers_value + }) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_expand_dims_tests(zip_path): + """Make a set of tests to do expand_dims.""" + + test_parameters = [{ + "input_type": [tf.float32, tf.int32], + "input_shape": [[3, 4], [10, 10, 3]], + "axis_value": [0, 1, 2, -1, -2], + }] + + def build_graph(parameters): + """Build the where op testing graph.""" + input_value = tf.placeholder( + dtype=parameters["input_type"], + name="input", + shape=parameters["input_shape"]) + axis_value = tf.placeholder(dtype=tf.int32, name="axis", shape=[1]) + out = tf.expand_dims(input_value, axis=axis_value) + return [input_value, axis_value], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["input_type"], + parameters["input_shape"]) + axis_value = np.array([parameters["axis_value"]], dtype=np.int32) + return [input_value, axis_value], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_value, axis_value]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + def make_sparse_to_dense_tests(zip_path): """Make a set of tests to do sparse to dense.""" @@ -2578,6 +2644,7 @@ def make_sparse_to_dense_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + # Toco binary path provided by the generate rule. bin_path = None diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index 8f0f2e24db..84a5410839 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -507,6 +507,22 @@ class Pad : public BuiltinOperator { + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateTileOptions(*builder); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override {} + int GetVersion(const Operator& op) const override { return 1; } +}; + class PadV2 : public BuiltinOperator { public: @@ -815,6 +831,24 @@ class SparseToDense int GetVersion(const Operator& op) const override { return 1; } }; +class ExpandDims + : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateExpandDimsOptions(*builder); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override {} + + int GetVersion(const Operator& op) const override { return 1; } +}; + class TensorFlowUnsupported : public BaseOperator { public: using BaseOperator::BaseOperator; @@ -997,6 +1031,10 @@ std::vector> BuildOperatorList() { new Cast(::tflite::BuiltinOperator_CAST, OperatorType::kCast)); ops.emplace_back( new ArgMax(::tflite::BuiltinOperator_ARG_MAX, OperatorType::kArgMax)); + ops.emplace_back( + new Tile(::tflite::BuiltinOperator_TILE, OperatorType::kTensorFlowTile)); + ops.emplace_back(new ExpandDims(::tflite::BuiltinOperator_EXPAND_DIMS, + OperatorType::kExpandDims)); ops.emplace_back(new TransposeConv(::tflite::BuiltinOperator_TRANSPOSE_CONV, OperatorType::kTransposeConv)); ops.emplace_back(new SparseToDense(::tflite::BuiltinOperator_SPARSE_TO_DENSE, -- GitLab From 03d67b43d3e1432ab6490be75ef49e01c032ed06 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 13:45:49 -0700 Subject: [PATCH 404/902] Add wrapper header file for SerialDeviceBatchScheduler PiperOrigin-RevId: 198919964 --- tensorflow/contrib/batching/BUILD | 8 +++++++ .../batching/serial_device_batch_scheduler.h | 21 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 tensorflow/contrib/batching/serial_device_batch_scheduler.h diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD index b6dae3cc1f..b27a19b16c 100644 --- a/tensorflow/contrib/batching/BUILD +++ b/tensorflow/contrib/batching/BUILD @@ -49,6 +49,14 @@ cc_library( ], ) +cc_library( + name = "serial_device_batch_scheduler", + hdrs = ["serial_device_batch_scheduler.h"], + deps = [ + "//tensorflow/core/kernels/batching_util:serial_device_batch_scheduler", + ], +) + cc_library( name = "basic_batch_scheduler", hdrs = ["basic_batch_scheduler.h"], diff --git a/tensorflow/contrib/batching/serial_device_batch_scheduler.h b/tensorflow/contrib/batching/serial_device_batch_scheduler.h new file mode 100644 index 0000000000..bf6b708361 --- /dev/null +++ b/tensorflow/contrib/batching/serial_device_batch_scheduler.h @@ -0,0 +1,21 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_ +#define TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_ + +#include "tensorflow/core/kernels/batching_util/serial_device_batch_scheduler.h" + +#endif // TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_ -- GitLab From b2702807daa79e3d97a05fba01e846e128dae0a5 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Fri, 1 Jun 2018 13:49:27 -0700 Subject: [PATCH 405/902] In the Swift API, deprecate `a.dot(b)` and `?` to `matmul(a, b)` to accurately reflect the operator?s mathematical properties and make it familiar to TensorFlow users. Currently the deprecation is a warning - when we update tensorflow/swift-models, I'll start another CL to remove it completely. Previously `dot` was chosen over `matmul` because of naming convention concerns (acronyms aren?t common in Swift) and that we wanted to make it short (so full names like `a.matrixMultiplied(by: b)` isn?t acceptable). Beyond these concerns, `matmul` is really a word of art and thus should be preferred. The ? operator often denotes outer product and Kronecker product. So it's removed, too. PiperOrigin-RevId: 198920621 --- tensorflow/docs_src/community/swift.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/community/swift.md b/tensorflow/docs_src/community/swift.md index d1625d3b93..070f9931e0 100644 --- a/tensorflow/docs_src/community/swift.md +++ b/tensorflow/docs_src/community/swift.md @@ -21,7 +21,7 @@ import TensorFlow var x = Tensor([[1, 2], [3, 4]]) for i in 1...5 { - x += x ⊗ x + x += matmul(x, x) } print(x) -- GitLab From 829aad441d2a9a48e234cd7572d8ad9281034698 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 1 Jun 2018 13:58:11 -0700 Subject: [PATCH 406/902] [TF:XLA] Bump open source llvm revision to r333732 PiperOrigin-RevId: 198921960 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0672615d5e..e4b7f9a695 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -453,11 +453,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/80f62ff390cc9440ef48ccac94ea6f7f51da3b93.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/80f62ff390cc9440ef48ccac94ea6f7f51da3b93.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/48c1879dcedb834e95a95da8715b30897a49edbe.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/48c1879dcedb834e95a95da8715b30897a49edbe.tar.gz", ], - sha256 = "119e7d9687a20103088677d5157cf70352392a423943de3cb549f6e4638edc59", - strip_prefix = "llvm-80f62ff390cc9440ef48ccac94ea6f7f51da3b93", + sha256 = "0e0767199c169f738718461d05d3fdada80b533a6e8e2e07c9ae852356be3c0a", + strip_prefix = "llvm-48c1879dcedb834e95a95da8715b30897a49edbe", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) -- GitLab From 37ab09a4697ebfda5ce9c8c296090e1d1ffefdda Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 13:58:47 -0700 Subject: [PATCH 407/902] [xla] expose a ConvGeneralDilated op in the local Python client PiperOrigin-RevId: 198922037 --- tensorflow/compiler/xla/python/xla_client.py | 55 +++++++++++++++++++ .../compiler/xla/python/xla_client_test.py | 40 ++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 50b548afa5..6a4bae253b 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -1112,6 +1112,61 @@ class ComputationBuilder(object): dimension_numbers.output_spatial_dimensions.extend(range(2, 2 + nd)) return dimension_numbers + def ConvGeneralDilated(self, lhs, rhs, window_strides, padding, lhs_dilation, + rhs_dilation, dimension_numbers): + """Enqueues a ConvGeneralDilated operation onto the computation. + + Args: + lhs: LocalOp for the rank N+2 array of inputs. + rhs: LocalOp for the rank N+2 array of kernel weights. + window_strides: length-N array-like of integer kernel strides. + padding: length-N array-like of pairs of integers of (low, high) padding. + lhs_dilation: length-N array-like of integer dilation factors. + rhs_dilation: length-N array-like of integer dilation factors. + dimension_numbers: either an xla_data_pb2.ConvolutionDimensionNumbers or a + triple (lhs_spec, rhs_spec, out_spec) where each element is a string of + length N+2 identifying by position (1) batch dimensions in lhs, rhs, and + the output with the character 'N', (2) feature dimensions in lhs and the + output with the character 'C', (3) input and output feature dimensions + in rhs with the characters 'I' and 'O' respectively, and (4) spatial + dimension correspondences between lhs, rhs, and the output using any + distinct characters. For example, to indicate dimension numbers + consistent with the Conv operation with two spatial dimensions, one + could use ('NCHW', 'OIHW', 'NCHW'). As another example, to indicate + dimension numbers consistent with the TensorFlow Conv2D operation, one + could use ('NHWC', 'HWIO', 'NHWC'). When using the latter form of + convolution dimension specification, window strides are associated with + spatial dimension character labels according to the order in which the + labels appear in the rhs_spec string, so that window_strides[0] is + matched with the dimension corresponding to the first character + appearing in rhs_spec that is not 'I' or 'O'. + + Returns: a LocalOp representing the ConvGenralDilated operation. + """ + if not isinstance(dimension_numbers, + xla_data_pb2.ConvolutionDimensionNumbers): + lhs_spec, rhs_spec, out_spec = dimension_numbers + dimension_numbers = xla_data_pb2.ConvolutionDimensionNumbers() + + dimension_numbers.input_batch_dimension = lhs_spec.index('N') + dimension_numbers.input_feature_dimension = lhs_spec.index('C') + dimension_numbers.output_batch_dimension = out_spec.index('N') + dimension_numbers.output_feature_dimension = out_spec.index('C') + dimension_numbers.kernel_output_feature_dimension = rhs_spec.index('O') + dimension_numbers.kernel_input_feature_dimension = rhs_spec.index('I') + + dimension_numbers.kernel_spatial_dimensions.extend( + i for i, c in enumerate(rhs_spec) if c not in {'I', 'O'}) + dimension_numbers.input_spatial_dimensions.extend( + sorted((i for i, c in enumerate(lhs_spec) if c not in {'N', 'C'}), + key=lambda i: rhs_spec.index(lhs_spec[i]))) + dimension_numbers.output_spatial_dimensions.extend( + sorted((i for i, c in enumerate(out_spec) if c not in {'N', 'C'}), + key=lambda i: rhs_spec.index(out_spec[i]))) + return self._client.ConvGeneralDilated(lhs, rhs, window_strides, padding, + lhs_dilation, rhs_dilation, + dimension_numbers) + def _forward_methods_to_local_builder(): """Forward remaining ComputationBuilder methods to the C API. diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py index e3d393bccc..375e720f9b 100644 --- a/tensorflow/compiler/xla/python/xla_client_test.py +++ b/tensorflow/compiler/xla/python/xla_client_test.py @@ -519,6 +519,46 @@ class SingleOpTest(LocalComputationTest): [40., 50., 0.]]]]) self._ExecuteAndCompareClose(c, expected=result) + def testConvGeneralDilatedF32(self): + c = self._NewComputation() + a = lambda *dims: np.arange(np.prod(dims)).reshape(dims).astype("float32") + lhs = a(1, 1, 2, 3) + rhs = a(1, 1, 1, 2) * 10 + strides = [1, 1] + pads = [(1, 0), (0, 1)] + lhs_dilation = (2, 1) + rhs_dilation = (1, 1) + dimension_numbers = ("NCHW", "OIHW", "NCHW") + c.ConvGeneralDilated(c.Constant(lhs), c.Constant(rhs), + strides, pads, lhs_dilation, rhs_dilation, + dimension_numbers) + result = np.array([[[[0., 0., 0.], + [10., 20., 0.], + [0., 0., 0.], + [40., 50., 0.]]]]) + self._ExecuteAndCompareClose(c, expected=result) + + def testConvGeneralDilatedPermutedF32(self): + c = self._NewComputation() + a = lambda *dims: np.arange(np.prod(dims)).reshape(dims).astype("float32") + lhs = a(1, 1, 2, 3) + rhs = a(1, 1, 1, 2) * 10 + strides = [1, 1] + pads = [(1, 0), (0, 1)] + lhs_dilation = (2, 1) + rhs_dilation = (1, 1) + + dimension_numbers = ("NHWC", "OIHW", "CWNH") + c.ConvGeneralDilated(c.Constant(np.transpose(lhs, (0, 2, 3, 1))), + c.Constant(rhs), + strides, pads, lhs_dilation, rhs_dilation, + dimension_numbers) + result = np.array([[[[0., 0., 0.], + [10., 20., 0.], + [0., 0., 0.], + [40., 50., 0.]]]]) + self._ExecuteAndCompareClose(c, expected=np.transpose(result, (1, 3, 0, 2))) + def testBooleanNot(self): c = self._NewComputation() arr = NumpyArrayBool([True, False, True]) -- GitLab From d1a3c24745aaf54098b7de3069d65fa92002b221 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 14:11:57 -0700 Subject: [PATCH 408/902] Optimized implementation of dilated convolution. Added a DilatedIm2Col() function to leverage GEMM optimizations. PiperOrigin-RevId: 198924313 --- .../internal/optimized/optimized_ops.h | 187 ++++++++++-------- .../contrib/lite/kernels/internal/types.h | 8 + 2 files changed, 116 insertions(+), 79 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index f7011b28fd..0ce781db59 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1776,6 +1776,100 @@ inline void ExtractPatchIntoBufferColumn( } } +template +void DilatedIm2col(const T* input_data, const Dims<4>& input_dims, + const Dims<4>& filter_dims, int stride_width, + int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + const Dims<4>& output_dims, uint8 byte_zero, + T* im2col_data) { + // For dilated convolution, the input pixels are not contiguous therefore we + // can't use the same opitimizations as Im2Col(). Though note this code would + // work fine for the non-dilated case too (though likely a bit slower). + gemmlowp::ScopedProfilingLabel label("DilatedIm2col"); + TFLITE_DCHECK(dilation_width_factor != 1 || dilation_height_factor != 1); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + TFLITE_DCHECK(im2col_data); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + MatchingArraySize(output_dims, 0, filter_dims, 3); + + // Construct the MxN sized im2col matrix. + // The rows M, are sub-ordered B x H x W + Dims<4> row_dims; + row_dims.sizes[0] = output_width; + row_dims.sizes[1] = output_height; + row_dims.sizes[2] = batches; + row_dims.sizes[3] = 1; + ComputeStrides(&row_dims); + + // The columns, N, are sub-ordered Kh x Kw x Din + Dims<4> col_dims; + col_dims.sizes[0] = input_depth; + col_dims.sizes[1] = filter_width; + col_dims.sizes[2] = filter_height; + col_dims.sizes[3] = 1; + ComputeStrides(&col_dims); + + // Use dimensions M and N to construct dims for indexing directly into im2col + Dims<4> im2col_dims; + im2col_dims.sizes[0] = col_dims.strides[3]; + im2col_dims.sizes[1] = row_dims.strides[3]; + im2col_dims.sizes[2] = 1; + im2col_dims.sizes[3] = 1; + ComputeStrides(&im2col_dims); + + // Loop through the output rows (B x H x W) + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + // Each row is an output pixel. Arrange the input data into this row in + // an order we can conveniently multiply with the filter data. + int row_offset = Offset(row_dims, out_x, out_y, batch, 0); + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + // Loop through all the pixels of the filter (Kh x Kw) + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + const int in_y = in_y_origin + dilation_height_factor * filter_y; + if ((in_y >= 0) && (in_y < input_height)) { + // Filter row is within the input data. + // Loop through all the filter pixels in this row. + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + int col_offset = Offset(col_dims, 0, filter_x, filter_y, 0); + T* dst = im2col_data + + Offset(im2col_dims, col_offset, row_offset, 0, 0); + if ((in_x >= 0) && (in_x < input_width)) { + // Filter pixel is within the input, copy the data. + T const* src = + input_data + Offset(input_dims, 0, in_x, in_y, batch); + memcpy(dst, src, input_depth * sizeof(T)); + } else { + // Filter pixel is outside the input, zero it out. + memset(dst, byte_zero, input_depth * sizeof(T)); + } + } + } else { + // Filter row is outside the input, zero out the entire im2col row. + int col_offset = Offset(col_dims, 0, 0, filter_y, 0); + T* dst = + im2col_data + Offset(im2col_dims, col_offset, row_offset, 0, 0); + memset(dst, byte_zero, filter_width * input_depth * sizeof(T)); + } + } + } + } + } +} + template void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int kheight, @@ -1816,74 +1910,6 @@ void Im2col(const T* input_data, const Dims<4>& input_dims, int stride, kwidth, byte_zero, output_data, output_dims); } -inline void DilatedConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, - int dilation_width_factor, int dilation_height_factor, - int pad_width, int pad_height, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { - gemmlowp::ScopedProfilingLabel label("DilatedConv"); - // This is a copy of the reference Conv implementation. We do not currently - // have an optimized path for dilation. - (void)im2col_data; // only used in optimized code. - (void)im2col_dims; // only used in optimized code. - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); - const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0); - if (bias_data) { - TFLITE_DCHECK_EQ(ArraySize(filter_dims, 3), ArraySize(bias_dims, 0)); - } - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; - float total = 0.f; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = - in_y_origin + dilation_height_factor * filter_y; - // If the location is outside the bounds of the input image, - // use zero as a default value. - if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height)) { - float input_value = input_data[Offset(input_dims, in_channel, - in_x, in_y, batch)]; - float filter_value = - filter_data[Offset(filter_dims, in_channel, filter_x, - filter_y, out_channel)]; - total += (input_value * filter_value); - } - } - } - } - float bias_value = 0.0f; - if (bias_data) { - bias_value = bias_data[Offset(bias_dims, out_channel, 0, 0, 0)]; - } - output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] = - ActivationFunctionWithMinMax(total + bias_value, - output_activation_min, - output_activation_max); - } - } - } - } -} - inline void Conv(const float* input_data, const Dims<4>& input_dims, const float* filter_data, const Dims<4>& filter_dims, const float* bias_data, const Dims<4>& bias_dims, @@ -1892,29 +1918,32 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, float output_activation_min, float output_activation_max, float* output_data, const Dims<4>& output_dims, float* im2col_data, const Dims<4>& im2col_dims) { - if ((dilation_width_factor != 1) || (dilation_height_factor != 1)) { - return DilatedConv(input_data, input_dims, filter_data, filter_dims, - bias_data, bias_dims, stride_width, stride_height, - dilation_width_factor, dilation_height_factor, pad_width, - pad_height, output_activation_min, output_activation_max, - output_data, output_dims, im2col_data, im2col_dims); - } - (void)im2col_data; (void)im2col_dims; gemmlowp::ScopedProfilingLabel label("Conv"); + // A float set to 0x00000000h == 0.0f + const uint8 float_zero_byte = 0x00; const float* gemm_input_data = nullptr; const Dims<4>* gemm_input_dims = nullptr; const int filter_width = ArraySize(filter_dims, 1); const int filter_height = ArraySize(filter_dims, 2); + const bool need_dilated_im2col = + dilation_width_factor != 1 || dilation_height_factor != 1; const bool need_im2col = stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1; - if (need_im2col) { + if (need_dilated_im2col) { + DilatedIm2col(input_data, input_dims, filter_dims, stride_width, + stride_height, dilation_width_factor, dilation_height_factor, + pad_width, pad_height, output_dims, float_zero_byte, + im2col_data); + gemm_input_data = im2col_data; + gemm_input_dims = &im2col_dims; + } else if (need_im2col) { TFLITE_DCHECK(im2col_data); Im2col(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, filter_height, filter_width, 0, im2col_data, - im2col_dims); + pad_height, filter_height, filter_width, float_zero_byte, + im2col_data, im2col_dims); gemm_input_data = im2col_data; gemm_input_dims = &im2col_dims; } else { diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index fc8ed753c5..0c7fb7a76a 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -358,6 +358,14 @@ bool IsPackedWithoutStrides(const Dims& dims) { return true; } +template +void ComputeStrides(Dims* dims) { + dims->strides[0] = 1; + for (int d = 1; d < N; d++) { + dims->strides[d] = dims->strides[d - 1] * dims->sizes[d - 1]; + } +} + } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TYPES_H_ -- GitLab From 5ab4e1346dba1d5bb820452883c1561d144759f7 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 1 Jun 2018 14:19:03 -0700 Subject: [PATCH 409/902] Updating release notes for r1.9. --- RELEASE.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 84d9d52868..600294478d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,60 @@ +# Release 1.9.0 + +## Major Features And Improvements +* Update tf.keras to the Keras 2.1.6 API. +* `tfe.Network` is deprecated. Please inherit from `tf.keras.Model`. +* Adding support of core feature columns and losses to gradient boosted trees estimators. +* The Bijector API now requires 'event_ndims' passed in to the `log_det_jacobian` methods, while `event_ndims` is removed from the base class and replaced with `forward_min_event_ndims`. The signature is now `log_det_jacobian(x, event_ndims)`. The main rationale for this change is that it allows Bijectors to broadcast. +RELNOTES: If you were using layers from `tf.keras.layers` in conjunction with custom variable scopes, your layer variable names might have changed. If you were using layers from `tf.layers` in a subclassed `tf.keras.Model` class, then your variable names have changed (you can restore the prior names by importing the same layers from `tf.keras.layers` instead of `tf.layers`). + +## Breaking Chances + * If you're opening empty variable scopes; replace `variable_scope`('', ...) by `variable_scope`(`tf.get_variable_scope()`, ...). + +## Bug Fixes and Other Changes +* `tf.data`: + * The `DatasetBase::DebugString()` method is now `const`. + * Added the `tf.contrib.data.sample_from_datasets()` API for randomly sampling from multiple datasets. +* Eager Execution: +* `tf.keras`: + * Move Keras code out of _impl folder and remove API files. + * `tf.keras.Model.save_weights` now saves in TensorFlow format by default. + * Enable dataset iterators to be passed to `tf.keras.Model` training/eval methods. +* Accelerated Linear Algebra (XLA): +* TensorFlow Debugger (tfdbg) CLI: +* `tf.contrib`: + * Add `tf.contrib.data.choose_from_datasets()`. + * `tf.contrib.data.make_csv_dataset()` now supports line breaks in quoted strings. Two arguments were removed from `make_csv_dataset`. + * `tf.contrib.framework.zero_initializer` supports ResourceVariable. + * Adding "constrained_optimization" to tensorflow/contrib. +* Other: + * Add GCS Configuration Ops. + * Changing signature of `MakeIterator` to enable propagating error status. + * KL divergence for two Dirichlet distributions. + * More consistent GcsFileSystem behavior for certain reads past EOF. + * Update benchmark for tf.scan to match ranges across eager and graph modes. + * Fixed bug in `tf.reduce_prod gradient` for complex dtypes. + * Add optional `args` argument to `Dataset.from_generator()`. + * Allow the use of '.' in variables (e.g. "hparams.parse('a.b=1.0')"), which would previously raise an error. This will correspond to an attribute name with an embedded '.' symbol (e.g. 'a.b'), which can only be accessed indirectly (e.g. through getattr and setattr). To set this up the user will first need to explicitly add the variable to the hparam object (e.g. "hparams.add_hparam(name='a.b', value=0.0)"). + * Benchmark for tf.scan in graph and eager modes. + * Added complex128 support to FFT, FFT2D, FFT3D, IFFT, IFFT2D, and IFFT3D. + * Making ids unique in `nn.embedding_lookup_sparse`. This helps to reduce RPC calls for looking up the embeddings when there are repeated ids in the batch. + * Support indicator column in boosted trees. + * Prevent `tf.gradients()` from backpropagating through integer tensors. + * LinearOperator[1D,2D,3D]Circulant added to `tensorflow.linalg`. + * Conv3D, Conv3DBackpropInput, Conv3DBackpropFilter now supports arbitrary. + * Added `tf.train.Checkpoint` for reading/writing object-based checkpoints. + * `Dataset.list_files()` now produces determinstic results when `shuffle=False` or a `seed` is passed. + * Added LinearOperatorKronecker, a dense-free implementation of the Kronecker Product. + * Allow LinearOperator to broadcast. + * SavedModelBuilder will now deduplicate asset names that point to files with the same basename and the same contents. Note that this may result in new asset files included in SavedModels in cases where assets with the same name but different contents were previously overwriting each other. + + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +Abdullah Alrasheed, Achal Shah, Ad-530, ADiegoCAlonso, Aditya Yogi, Ag Ramesh, akindyakov, Andy Kernahan, Anya Petrova, Aurelien Geron, Ben, Ben Barsdell, Bhavani-Subramanian, braincodercn, Brett Koonce, Brian Nemsick, Brian Zier, Bryan Heden, candy.dc, cclauss, Clayne Robison, ctiijima, Dalmo Cirne, David Norman, David T.H. Kao, DosLin, ekelsen, Elson Rodriguez, Erik Smistad, Felix Abecassis, Fergal Cotter, fo40225, foo0x29a, Freedom" Koan-Sin Tan, FréDéRic Branchaud-Charron, gdh1995, Geoffrey Irving, Giuseppe, gracehoney, Guido Zuidhof, Guillaume Klein, Guozhong Zhuang, Haggai, Harald Husum, imsheridan, Ivan Zhang, Jan Zikes, Jayaram Bobba, Jesse Benson, Jesse Gumz, Jiajia Li, Jie, jinghuangintel, Jingwen, jjsjann123, Joe Yearsley, Joel Hestness, Joel Shor, josephyearsley, Junpeng Lao, Karol M. Langner, Kb Sriram, krantideep95, Krish Ravindranath, Letian Feng, Loo Rong Jie, Lukas Geiger, Maciej, Mahmoud Abuzaina, ManHyuk, Mark Ryan, mbhuiyan, Michal Turek, Mostafa Alaa, Myungsung Kwak, Nand Dalal, Nehal J Wani, Neil Tenenholtz, ngc92, Nicholas Nadeau, P.Eng., Avs, Niranjan Hasabnis, P-Hidringer, Paul Van Eck, Peng Yu, Qing Zhao, Qingying Chen, Quanlong, Rajendra Arora, Rholais Lii, rmanyari, Robin Richtsfeld, Russell Klopfer, Sagi, Sam Sendelbach, Sandeep N Gupta, Sandip Giri, Sarah Edkins, Scott Tseng, Sdalbsoo, Sergii Khomenko, Seungwoo Choi (Biggie), Seyed Majid Azimi, Shaoning Zeng, shengfuintel, Siu Kei, Muk, Smit Shilu, soonson, Stefan Schweter, Sukhwan Kim, Sunitha Kambhampati, Taehoon Lee, tamimaddari82, Tang, Wenyi, Ted Chang, u2takey, Utkarsh Upadhyay, Vadim Markovtsev, voegtlel, Wai Hon Law, wangsiyu, Wenhao Hu, wenhao.hu, William D. Irons, Yan Facai (颜发才), Yanbo Liang, Yihong Wang, Yilei (Dolee) Yang, Yong Tang, Yuan (Terry) Tang + # Release 1.8.0 ## Major Features And Improvements -- GitLab From 672bd9fd8c446eb2c69e4b0f13ed9b74d0a5956f Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 1 Jun 2018 14:26:07 -0700 Subject: [PATCH 410/902] Updating version for 1.9.0-rc0. --- tensorflow/core/public/version.h | 4 ++-- tensorflow/docs_src/get_started/eager.md | 2 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 18 +++++++-------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 9 ++++++-- tensorflow/tools/docker/Dockerfile.devel | 2 +- .../tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/pip_package/setup.py | 2 +- 12 files changed, 41 insertions(+), 36 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 522a9d84fd..cb1fd09dbb 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 8 +#define TF_MINOR_VERSION 9 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc0" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/get_started/eager.md b/tensorflow/docs_src/get_started/eager.md index f08ac74425..bbb25e20c6 100644 --- a/tensorflow/docs_src/get_started/eager.md +++ b/tensorflow/docs_src/get_started/eager.md @@ -1,3 +1,3 @@ # Get Started with Eager Execution -[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.8.0/samples/core/get_started/eager.ipynb) +[Colab notebook](https://colab.research.google.com/github/tensorflow/models/blob/r1.9.0/samples/core/get_started/eager.ipynb) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 1abd840ab3..2901848745 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 52a2a3f8a6..55bc0f64e7 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.9.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 1256fb99c4..b3b739212e 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.8.0 + 1.9.0-rc0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.8.0 + 1.9.0-rc0 @@ -124,12 +124,12 @@ instead: org.tensorflow libtensorflow - 1.8.0 + 1.9.0-rc0 org.tensorflow libtensorflow_jni_gpu - 1.8.0 + 1.9.0-rc0 ``` @@ -148,7 +148,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -175,10 +175,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.9.0-rc0.zip). 3. Extract this .zip file. @@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -

javac -cp libtensorflow-1.8.0.jar HelloTF.java
+
javac -cp libtensorflow-1.9.0-rc0.jar HelloTF.java
### Running @@ -241,11 +241,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.8.0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.9.0-rc0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.8.0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.9.0-rc0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 3b9381625f..2ecab808c4 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -438,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
## Validate your installation @@ -684,14 +684,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -703,14 +703,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -722,14 +722,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
 
@@ -741,14 +741,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 29a867a9e3..9d01271c5a 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl @@ -522,7 +522,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl
 
@@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 5ba522b436..d25e641cee 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.8.0 on Linux: +for TensorFlow 1.9.0rc0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.9.0rc0-py2-none-any.whl
 
## Validate your installation @@ -433,6 +433,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** + + @@ -456,6 +458,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.11.0N/AN/A
tensorflow_gpu-1.9.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.11.079
tensorflow-1.8.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.8.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.7.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
+ @@ -472,6 +475,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.11.0N/AN/A
tensorflow-1.8.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.7.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
+ + diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 406d134699..57a491255e 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -76,7 +76,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index a6cd44ced1..6796ad70e5 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.8 +ARG TF_BRANCH=r1.9 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index e4dcce9cdd..204b5b4dba 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -85,7 +85,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.9 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index d25a9e77b1..78d955c637 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.8.0' +_VERSION = '1.9.0-rc0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From 441979ff0399418b7883ca6c267c08fc716ce74b Mon Sep 17 00:00:00 2001 From: Roy Frostig Date: Fri, 1 Jun 2018 14:56:17 -0700 Subject: [PATCH 411/902] [XLA] Add an unoptimized HLO output flag to ExecutableBuildOptions and to the XLA local Python client. PiperOrigin-RevId: 198930874 --- .../compiler/xla/client/executable_build_options.cc | 12 ++++++++++++ .../compiler/xla/client/executable_build_options.h | 8 ++++++++ .../compiler/xla/python/local_computation_builder.i | 5 +++++ tensorflow/compiler/xla/python/xla_client.py | 1 + tensorflow/compiler/xla/service/local_service.cc | 5 +++++ 5 files changed, 31 insertions(+) diff --git a/tensorflow/compiler/xla/client/executable_build_options.cc b/tensorflow/compiler/xla/client/executable_build_options.cc index 6e3c5cb484..7dee41f6a0 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.cc +++ b/tensorflow/compiler/xla/client/executable_build_options.cc @@ -87,6 +87,18 @@ ExecutableBuildOptions::dump_optimized_hlo_proto_to() const { return dump_optimized_hlo_proto_to_; } +ExecutableBuildOptions& +ExecutableBuildOptions::set_dump_unoptimized_hlo_proto_to( + tensorflow::StringPiece dirpath) { + dump_unoptimized_hlo_proto_to_ = dirpath.ToString(); + return *this; +} + +const tensorflow::gtl::optional& +ExecutableBuildOptions::dump_unoptimized_hlo_proto_to() const { + return dump_unoptimized_hlo_proto_to_; +} + ExecutableBuildOptions& ExecutableBuildOptions::set_dump_per_pass_hlo_proto_to( tensorflow::StringPiece dirpath) { dump_per_pass_hlo_proto_to_ = dirpath.ToString(); diff --git a/tensorflow/compiler/xla/client/executable_build_options.h b/tensorflow/compiler/xla/client/executable_build_options.h index 393da381fb..9dc9be4423 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.h +++ b/tensorflow/compiler/xla/client/executable_build_options.h @@ -65,6 +65,13 @@ class ExecutableBuildOptions { tensorflow::StringPiece dirpath); const tensorflow::gtl::optional& dump_optimized_hlo_proto_to() const; + // If set, specifies a dirpath to dump the start-of-optimization-pipeline HLO + // protobuf to (as in DebugOptions). + ExecutableBuildOptions& set_dump_unoptimized_hlo_proto_to( + tensorflow::StringPiece dirpath); + const tensorflow::gtl::optional& dump_unoptimized_hlo_proto_to() + const; + // If set, specifies a dirpath to dump the per-pass-in-pipeline HLO protobufs // to (as in DebugOptions). ExecutableBuildOptions& set_dump_per_pass_hlo_proto_to( @@ -95,6 +102,7 @@ class ExecutableBuildOptions { bool result_layout_set_ = false; tensorflow::gtl::optional generate_hlo_graph_; tensorflow::gtl::optional dump_optimized_hlo_proto_to_; + tensorflow::gtl::optional dump_unoptimized_hlo_proto_to_; tensorflow::gtl::optional dump_per_pass_hlo_proto_to_; DeviceMemoryAllocator* device_allocator_ = nullptr; std::vector disabled_hlo_passes_; diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index 51412ca474..536b93c6f9 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -851,6 +851,11 @@ tensorflow::ImportNumpy(); })) { return nullptr; } + if (!HandleStringAttribute($input, "dump_unoptimized_hlo_proto_to", [&](string s) { + build_options.set_dump_unoptimized_hlo_proto_to(std::move(s)); + })) { + return nullptr; + } if (!HandleStringAttribute($input, "dump_per_pass_hlo_proto_to", [&](string s) { build_options.set_dump_per_pass_hlo_proto_to(std::move(s)); })) { diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 6a4bae253b..11611ac612 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -353,6 +353,7 @@ class CompileOptions(object): def __init__(self): self.generate_hlo_graph = None self.dump_optimized_hlo_proto_to = None + self.dump_unoptimized_hlo_proto_to = None self.dump_per_pass_hlo_proto_to = None self.hlo_profile = False diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 375c4a6780..1d9c9e0678 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -108,6 +108,11 @@ ExecutionOptions CreateExecutionOptions( ->set_xla_dump_optimized_hlo_proto_to( build_options.dump_optimized_hlo_proto_to().value()); } + if (build_options.dump_unoptimized_hlo_proto_to().has_value()) { + execution_options.mutable_debug_options() + ->set_xla_dump_unoptimized_hlo_proto_to( + build_options.dump_unoptimized_hlo_proto_to().value()); + } if (build_options.dump_per_pass_hlo_proto_to().has_value()) { execution_options.mutable_debug_options() ->set_xla_dump_per_pass_hlo_proto_to( -- GitLab From af1d59aff9bf3b43dfff4d99e50d22f527201e76 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 15:29:06 -0700 Subject: [PATCH 412/902] DepthwiseConv Optimizations PiperOrigin-RevId: 198935499 --- .../depthwiseconv_uint8_3x3_filter.h | 920 +++++++++++++++++- 1 file changed, 891 insertions(+), 29 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index 8cd72239e9..a7b0d805a3 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -42,6 +42,7 @@ struct DepthwiseConvParams { int64_t input_row_size; int64_t output_depth; int64_t output_row_size; + int64_t filter_row_size; int32 input_offset; int32 output_offset; int32 filter_offset; @@ -51,6 +52,8 @@ struct DepthwiseConvParams { int32 output_shift; int32 input_width; int32 input_height; + int32 stride_width; + int32 stride_height; int32 output_width; int32 output_height; }; @@ -65,17 +68,20 @@ struct DepthwiseConvParams { #define OFFSET_INPUT_ROW_SIZE 8 #define OFFSET_OUTPUT_DEPTH 16 #define OFFSET_OUTPUT_ROW_SIZE 24 -#define OFFSET_INPUT_OFFSET 32 -#define OFFSET_OUTPUT_OFFSET 36 -#define OFFSET_FILTER_OFFSET 40 -#define OFFSET_OUTPUT_MULTIPLIER 44 -#define OFFSET_OUTPUT_ACTIVATION_MIN 48 -#define OFFSET_OUTPUT_ACTIVATION_MAX 52 -#define OFFSET_OUTPUT_SHIFT 56 -#define OFFSET_INPUT_WIDTH 60 -#define OFFSET_INPUT_HEIGHT 64 -#define OFFSET_OUTPUT_WIDTH 68 -#define OFFSET_OUTPUT_HEIGHT 72 +#define OFFSET_FILTER_ROW_SIZE 32 +#define OFFSET_INPUT_OFFSET 40 +#define OFFSET_OUTPUT_OFFSET 44 +#define OFFSET_FILTER_OFFSET 48 +#define OFFSET_OUTPUT_MULTIPLIER 52 +#define OFFSET_OUTPUT_ACTIVATION_MIN 56 +#define OFFSET_OUTPUT_ACTIVATION_MAX 60 +#define OFFSET_OUTPUT_SHIFT 64 +#define OFFSET_INPUT_WIDTH 68 +#define OFFSET_INPUT_HEIGHT 72 +#define OFFSET_STRIDE_WIDTH 76 +#define OFFSET_STRIDE_HEIGHT 80 +#define OFFSET_OUTPUT_WIDTH 84 +#define OFFSET_OUTPUT_HEIGHT 88 static_assert(offsetof(DepthwiseConvParams, input_depth) == OFFSET_INPUT_DEPTH, ""); @@ -85,6 +91,8 @@ static_assert(offsetof(DepthwiseConvParams, output_depth) == OFFSET_OUTPUT_DEPTH, ""); static_assert(offsetof(DepthwiseConvParams, output_row_size) == OFFSET_OUTPUT_ROW_SIZE, ""); +static_assert(offsetof(DepthwiseConvParams, filter_row_size) == + OFFSET_FILTER_ROW_SIZE, ""); static_assert(offsetof(DepthwiseConvParams, input_offset) == OFFSET_INPUT_OFFSET, ""); static_assert(offsetof(DepthwiseConvParams, output_offset) == @@ -103,6 +111,10 @@ static_assert(offsetof(DepthwiseConvParams, input_width) == OFFSET_INPUT_WIDTH, ""); static_assert(offsetof(DepthwiseConvParams, input_height) == OFFSET_INPUT_HEIGHT, ""); +static_assert(offsetof(DepthwiseConvParams, stride_width) == + OFFSET_STRIDE_WIDTH, ""); +static_assert(offsetof(DepthwiseConvParams, stride_height) == + OFFSET_STRIDE_HEIGHT, ""); static_assert(offsetof(DepthwiseConvParams, output_width) == OFFSET_OUTPUT_WIDTH, ""); static_assert(offsetof(DepthwiseConvParams, output_height) == @@ -114,7 +126,7 @@ struct DepthwiseConvWindow {}; template <> struct DepthwiseConvWindow<8, 1, 1> { public: - static void Run(const uint8* input_ptr, const uint8* filter_ptr, + static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, const int32* bias_ptr, uint8* output_ptr, int64_t input_depth, int64_t input_row_size, int32 output_window_height, int32 output_window_width, @@ -1097,7 +1109,7 @@ struct DepthwiseConvWindow<8, 1, 1> { template <> struct DepthwiseConvWindow<8, 2, 2> { - static void Run(const uint8* input_ptr, const uint8* filter_ptr, + static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, const int32* bias_ptr, uint8* output_ptr, int64_t input_depth, int64_t input_row_size, int32 output_window_height, int32 output_window_width, @@ -2179,6 +2191,715 @@ struct DepthwiseConvWindow<8, 2, 2> { } }; +enum class EdgeType { kCorner, kHorizontal, kVertical, kCenter }; + +template +struct DepthwiseConvPartial {}; + +template <> +struct DepthwiseConvPartial { + static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, + const int32* bias_ptr, uint8* output_ptr, + const DepthwiseConvParams* params_ptr) { +#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1" +#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2" + asm volatile( + // Performs depthwise convolutions for an input window of size 1x1 and + // padding of 1 across the full depth. Expects |input_ptr| and + // |filter_ptr| to be pointing to the 1x1 input and filter values. + "ld1 {v8.8b}, [%[input_ptr]], #8\n" + "ldr w9, [%[params_ptr], #" STR(OFFSET_INPUT_OFFSET) "]\n" + "ldr x11, [%[params_ptr], #" STR(OFFSET_OUTPUT_DEPTH) "]\n" + "ldr w10, [%[params_ptr], #" STR(OFFSET_OUTPUT_MULTIPLIER) "]\n" + "dup v26.8h, w9\n" + "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n" + "dup v27.4s, w10\n" + "ld1 {v0.8b}, [%[filter_ptr]], #8\n" + "cmp x11, #16\n" + "ldr w10, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n" + "dup v28.4s, w9\n" + "ldr w9, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n" + "neg w10, w10\n" + "dup v29.4s, w10\n" + "ldr w10, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MAX) "]\n" + "dup v30.4s, w9\n" + "ldr w9, [%[params_ptr], #" STR(OFFSET_FILTER_OFFSET) "]\n" + "dup v31.4s, w10\n" + "dup v25.8h, w9\n" + + "ld1 {v16.4s}, [%[bias_ptr]], #16\n" + "uaddw v8.8h, v26.8h, v8.8b\n" + "ld1 {v17.4s}, [%[bias_ptr]], #16\n" + "uaddw v0.8h, v25.8h, v0.8b\n" + + "blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_DEPTH_8_LOOP ":\n" + "smlal v16.4s, v0.4h, v8.4h\n" + "subs x11, x11, #8\n" + "smlal2 v17.4s, v0.8h, v8.8h\n" + "ld1 {v8.8b}, [%[input_ptr]], #8\n" + "cmp x11, #16\n" + "ld1 {v0.8b}, [%[filter_ptr]], #8\n" + + "sqrdmulh v16.4s, v16.4s, v27.4s\n" + "sqrdmulh v17.4s, v17.4s, v27.4s\n" + "and v18.16b, v16.16b, v29.16b\n" + "and v19.16b, v17.16b, v29.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v16.4s, v16.4s, v18.4s\n" + "sqadd v17.4s, v17.4s, v19.4s\n" + "srshl v16.4s, v16.4s, v29.4s\n" + "srshl v17.4s, v17.4s, v29.4s\n" + "add v16.4s, v16.4s, v28.4s\n" + "add v17.4s, v17.4s, v28.4s\n" + "smax v16.4s, v16.4s, v30.4s\n" + "smax v17.4s, v17.4s, v30.4s\n" + "smin v16.4s, v16.4s, v31.4s\n" + "smin v17.4s, v17.4s, v31.4s\n" + "sqxtn v16.4h, v16.4s\n" + "sqxtn2 v16.8h, v17.4s\n" + "sqxtun v16.8b, v16.8h\n" + "st1 {v16.8b}, [%[output_ptr]], #8\n" + "uaddw v8.8h, v26.8h, v8.8b\n" + "ld1 {v16.4s}, [%[bias_ptr]], #16\n" + "uaddw v0.8h, v25.8h, v0.8b\n" + "ld1 {v17.4s}, [%[bias_ptr]], #16\n" + + "bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n" + + DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP ":\n" + "smlal v16.4s, v0.4h, v8.4h\n" + "smlal2 v17.4s, v0.8h, v8.8h\n" + + "sqrdmulh v16.4s, v16.4s, v27.4s\n" + "sqrdmulh v17.4s, v17.4s, v27.4s\n" + "and v18.16b, v16.16b, v29.16b\n" + "and v19.16b, v17.16b, v29.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v16.4s, v16.4s, v18.4s\n" + "sqadd v17.4s, v17.4s, v19.4s\n" + "srshl v16.4s, v16.4s, v29.4s\n" + "srshl v17.4s, v17.4s, v29.4s\n" + + "add v16.4s, v16.4s, v28.4s\n" + "add v17.4s, v17.4s, v28.4s\n" + "smax v16.4s, v16.4s, v30.4s\n" + "smax v17.4s, v17.4s, v30.4s\n" + "smin v16.4s, v16.4s, v31.4s\n" + "smin v17.4s, v17.4s, v31.4s\n" + "sqxtn v16.4h, v16.4s\n" + "sqxtn2 v16.8h, v17.4s\n" + "sqxtun v16.8b, v16.8h\n" + "st1 {v16.8b}, [%[output_ptr]]\n" + : + // Outputs. + [filter_ptr] "+r"(filter_ptr), [input_ptr] "+r"(input_ptr), + [output_ptr] "+r"(output_ptr), [bias_ptr] "+r"(bias_ptr) + : + // Inputs. + [params_ptr] "r"(params_ptr) + : + // Clobbers. + "cc", "memory", + // We use these NEON registers. + "v0", "v8", "v16", "v17", "v18", "v19", "v25", "v26", "v27", "v28", + "v29", "v30", "v31", + // We use these general-purpose registers. + "x9", "x10", "x11"); +#undef DEPTHWISECONV_LABEL_DEPTH_8_LOOP +#undef DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP + } +}; + +template <> +struct DepthwiseConvPartial { + static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, + const int32* bias_ptr, uint8* output_ptr, + const DepthwiseConvParams* params_ptr) { +#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1" +#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2" + asm volatile( + // Performs depthwise convolutions for an input window of size 2x2 and + // padding of 1 across the full depth. Expects |input_ptr| and + // |filter_ptr| to be pointing to the beginning of the 2x2 input and + // filter values. + + // Load input and filter values. + "ldr x15, [%[params_ptr], #" STR(OFFSET_OUTPUT_DEPTH) "]\n" + "ldr x9, [%[params_ptr], #" STR(OFFSET_INPUT_ROW_SIZE) "]\n" + "cmp x15, #16\n" + "add x12, %[input_ptr], x15\n" + "add x13, %[input_ptr], x9\n" + "ld1 {v8.8b}, [%[input_ptr]], #8\n" + "add x14, x13, x15\n" + "ld1 {v9.8b}, [x12], #8\n" + "ldr x6, [%[params_ptr], #" STR(OFFSET_FILTER_ROW_SIZE) "]\n" + + "add x9, %[filter_ptr], x15\n" + "ld1 {v10.8b}, [x13], #8\n" + "add x10, %[filter_ptr], x6\n" + "ld1 {v11.8b}, [x14], #8\n" + "ld1 {v0.8b}, [%[filter_ptr]], #8\n" + "add x11, x10, x15\n" + "ld1 {v1.8b}, [x9], #8\n" + "ld1 {v2.8b}, [x10], #8\n" + "ld1 {v3.8b}, [x11], #8\n" + + // Load constants. + "ldr w6, [%[params_ptr], #" STR(OFFSET_INPUT_OFFSET) "]\n" + "ldr w7, [%[params_ptr], #" STR(OFFSET_OUTPUT_MULTIPLIER) "]\n" + "dup v26.8h, w6\n" + "ldr w6, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n" + "dup v27.4s, w7\n" + "ldr w7, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n" + "dup v28.4s, w6\n" + "ldr w6, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n" + "neg w7, w7\n" + "dup v29.4s, w7\n" + "ldr w7, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MAX) "]\n" + "dup v30.4s, w6\n" + "ldr w6, [%[params_ptr], #" STR(OFFSET_FILTER_OFFSET) "]\n" + "dup v31.4s, w7\n" + "dup v25.8h, w6\n" + + // Add input and filter offsets. + "uaddw v8.8h, v26.8h, v8.8b\n" + "ld1 {v16.4s}, [%[bias_ptr]], #16\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "ld1 {v17.4s}, [%[bias_ptr]], #16\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + + "uaddw v0.8h, v25.8h, v0.8b\n" + "uaddw v1.8h, v25.8h, v1.8b\n" + "uaddw v2.8h, v25.8h, v2.8b\n" + "uaddw v3.8h, v25.8h, v3.8b\n" + + "blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_DEPTH_8_LOOP ":\n" + "smlal v16.4s, v0.4h, v8.4h\n" + "subs x15, x15, #8\n" + "smlal2 v17.4s, v0.8h, v8.8h\n" + "ld1 {v8.8b}, [%[input_ptr]], #8\n" + "cmp x15, #16\n" + "ld1 {v0.8b}, [%[filter_ptr]], #8\n" + "smlal v16.4s, v1.4h, v9.4h\n" + "smlal2 v17.4s, v1.8h, v9.8h\n" + "ld1 {v9.8b}, [x12], #8\n" + "smlal v16.4s, v2.4h, v10.4h\n" + "ld1 {v1.8b}, [x9], #8\n" + "smlal2 v17.4s, v2.8h, v10.8h\n" + "ld1 {v10.8b}, [x13], #8\n" + "smlal v16.4s, v3.4h, v11.4h\n" + "ld1 {v2.8b}, [x10], #8\n" + "smlal2 v17.4s, v3.8h, v11.8h\n" + "ld1 {v11.8b}, [x14], #8\n" + "ld1 {v3.8b}, [x11], #8\n" + + "sqrdmulh v16.4s, v16.4s, v27.4s\n" + "sqrdmulh v17.4s, v17.4s, v27.4s\n" + "and v18.16b, v16.16b, v29.16b\n" + "and v19.16b, v17.16b, v29.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v16.4s, v16.4s, v18.4s\n" + "sqadd v17.4s, v17.4s, v19.4s\n" + "srshl v16.4s, v16.4s, v29.4s\n" + "srshl v17.4s, v17.4s, v29.4s\n" + "add v16.4s, v16.4s, v28.4s\n" + "add v17.4s, v17.4s, v28.4s\n" + "smax v16.4s, v16.4s, v30.4s\n" + "smax v17.4s, v17.4s, v30.4s\n" + "smin v16.4s, v16.4s, v31.4s\n" + "smin v17.4s, v17.4s, v31.4s\n" + "sqxtn v16.4h, v16.4s\n" + "sqxtn2 v16.8h, v17.4s\n" + "sqxtun v16.8b, v16.8h\n" + "st1 {v16.8b}, [%[output_ptr]], #8\n" + "uaddw v8.8h, v26.8h, v8.8b\n" + "ld1 {v16.4s}, [%[bias_ptr]], #16\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "ld1 {v17.4s}, [%[bias_ptr]], #16\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + "uaddw v0.8h, v25.8h, v0.8b\n" + "uaddw v1.8h, v25.8h, v1.8b\n" + "uaddw v2.8h, v25.8h, v2.8b\n" + "uaddw v3.8h, v25.8h, v3.8b\n" + + "bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n" + + DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP ":\n" + "smlal v16.4s, v0.4h, v8.4h\n" + "smlal2 v17.4s, v0.8h, v8.8h\n" + "smlal v16.4s, v1.4h, v9.4h\n" + "smlal2 v17.4s, v1.8h, v9.8h\n" + "smlal v16.4s, v2.4h, v10.4h\n" + "smlal2 v17.4s, v2.8h, v10.8h\n" + "smlal v16.4s, v3.4h, v11.4h\n" + "smlal2 v17.4s, v3.8h, v11.8h\n" + + "sqrdmulh v16.4s, v16.4s, v27.4s\n" + "sqrdmulh v17.4s, v17.4s, v27.4s\n" + "and v18.16b, v16.16b, v29.16b\n" + "and v19.16b, v17.16b, v29.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v16.4s, v16.4s, v18.4s\n" + "sqadd v17.4s, v17.4s, v19.4s\n" + "srshl v16.4s, v16.4s, v29.4s\n" + "srshl v17.4s, v17.4s, v29.4s\n" + + "add v16.4s, v16.4s, v28.4s\n" + "add v17.4s, v17.4s, v28.4s\n" + "smax v16.4s, v16.4s, v30.4s\n" + "smax v17.4s, v17.4s, v30.4s\n" + "smin v16.4s, v16.4s, v31.4s\n" + "smin v17.4s, v17.4s, v31.4s\n" + "sqxtn v16.4h, v16.4s\n" + "sqxtn2 v16.8h, v17.4s\n" + "sqxtun v16.8b, v16.8h\n" + "st1 {v16.8b}, [%[output_ptr]]\n" + : + // Outputs. + [filter_ptr] "+r"(filter_ptr), [input_ptr] "+r"(input_ptr), + [output_ptr] "+r"(output_ptr), [bias_ptr] "+r"(bias_ptr) + : + // Inputs. + [params_ptr] "r"(params_ptr) + : + // Clobbers. + "cc", "memory", + // We use these NEON registers. + "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11", "v16", "v17", "v18", + "v19", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + // We use these general-purpose registers. + "x6", "x7", "x9", "x10", "x11", "x12", "x13", "x14", "x15"); +#undef DEPTHWISECONV_LABEL_DEPTH_8_LOOP +#undef DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP + } +}; + +template <> +struct DepthwiseConvPartial { + static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, + const int32* bias_ptr, uint8* output_ptr, + const DepthwiseConvParams* params_ptr) { +#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1" +#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2" + asm volatile( + // Performs depthwise convolutions for an input window of size 2x3 and + // padding of 1 across the full depth. Expects |input_ptr| and + // |filter_ptr| to be pointing to the beginning of the 2x3 input and + // filter values. + + // Load input and filter values. + "ldr x7, [%[params_ptr], #" STR(OFFSET_INPUT_DEPTH) "]\n" + "mov x12, %[input_ptr]\n" + "ldr x11, [%[params_ptr], #" STR(OFFSET_INPUT_ROW_SIZE) "]\n" + "mov x9, %[filter_ptr]\n" + "ldr x14, [%[params_ptr], #" STR(OFFSET_FILTER_ROW_SIZE) "]\n" + "add x13, x12, x11\n" + "ldr x15, [%[params_ptr], #" STR(OFFSET_OUTPUT_DEPTH) "]\n" + + "ld1 {v8.8b}, [x12], x7\n" + "add x10, x9, x14\n" + "ld1 {v9.8b}, [x12], x7\n" + "cmp x15, #16\n" + "ld1 {v10.8b}, [x12]\n" + "add %[input_ptr], %[input_ptr], #8\n" + "ld1 {v11.8b}, [x13], x7\n" + "add %[filter_ptr], %[filter_ptr], #8\n" + "ld1 {v12.8b}, [x13], x7\n" + "ld1 {v13.8b}, [x13]\n" + + "ld1 {v0.8b}, [x9], x7\n" + "ld1 {v1.8b}, [x9], x7\n" + "ld1 {v2.8b}, [x9]\n" + "ld1 {v3.8b}, [x10], x7\n" + "ld1 {v4.8b}, [x10], x7\n" + "ld1 {v5.8b}, [x10]\n" + + // Load constants. + "ldr w12, [%[params_ptr], #" STR(OFFSET_INPUT_OFFSET) "]\n" + "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_MULTIPLIER) "]\n" + "dup v26.8h, w12\n" + "ldr w12, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n" + "dup v27.4s, w13\n" + "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n" + "dup v28.4s, w12\n" + "ldr w12, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n" + "neg w13, w13\n" + "dup v29.4s, w13\n" + "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MAX) "]\n" + "dup v30.4s, w12\n" + "ldr w12, [%[params_ptr], #" STR(OFFSET_FILTER_OFFSET) "]\n" + "dup v31.4s, w13\n" + "dup v25.8h, w12\n" + + // Add input and filter offsets. + "uaddw v8.8h, v26.8h, v8.8b\n" + "ld1 {v16.4s}, [%[bias_ptr]], #16\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "ld1 {v17.4s}, [%[bias_ptr]], #16\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "uaddw v13.8h, v26.8h, v13.8b\n" + + "uaddw v0.8h, v25.8h, v0.8b\n" + "uaddw v1.8h, v25.8h, v1.8b\n" + "uaddw v2.8h, v25.8h, v2.8b\n" + "uaddw v3.8h, v25.8h, v3.8b\n" + "uaddw v4.8h, v25.8h, v4.8b\n" + "uaddw v5.8h, v25.8h, v5.8b\n" + + "blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_DEPTH_8_LOOP ":\n" + "mov x12, %[input_ptr]\n" + "subs x15, x15, #8\n" + "add x13, x12, x11\n" + "cmp x15, #16\n" + "add %[input_ptr], %[input_ptr], #8\n" + + "smlal v16.4s, v0.4h, v8.4h\n" + "mov x9, %[filter_ptr]\n" + "smlal2 v17.4s, v0.8h, v8.8h\n" + "ld1 {v8.8b}, [x12], x7\n" + "smlal v16.4s, v1.4h, v9.4h\n" + "add x10, x9, x14\n" + "smlal2 v17.4s, v1.8h, v9.8h\n" + "ld1 {v9.8b}, [x12], x7\n" + "smlal v16.4s, v2.4h, v10.4h\n" + "add %[filter_ptr], %[filter_ptr], #8\n" + "smlal2 v17.4s, v2.8h, v10.8h\n" + "ld1 {v10.8b}, [x12]\n" + "smlal v16.4s, v3.4h, v11.4h\n" + "ld1 {v0.8b}, [x9], x7\n" + "smlal2 v17.4s, v3.8h, v11.8h\n" + "ld1 {v11.8b}, [x13], x7\n" + "smlal v16.4s, v4.4h, v12.4h\n" + "ld1 {v1.8b}, [x9], x7\n" + "smlal2 v17.4s, v4.8h, v12.8h\n" + "ld1 {v12.8b}, [x13], x7\n" + "smlal v16.4s, v5.4h, v13.4h\n" + "ld1 {v2.8b}, [x9]\n" + "smlal2 v17.4s, v5.8h, v13.8h\n" + "ld1 {v13.8b}, [x13]\n" + + "sqrdmulh v16.4s, v16.4s, v27.4s\n" + "ld1 {v3.8b}, [x10], x7\n" + "sqrdmulh v17.4s, v17.4s, v27.4s\n" + "ld1 {v4.8b}, [x10], x7\n" + "and v18.16b, v16.16b, v29.16b\n" + "ld1 {v5.8b}, [x10]\n" + "and v19.16b, v17.16b, v29.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v16.4s, v16.4s, v18.4s\n" + "sqadd v17.4s, v17.4s, v19.4s\n" + "srshl v16.4s, v16.4s, v29.4s\n" + "srshl v17.4s, v17.4s, v29.4s\n" + "add v16.4s, v16.4s, v28.4s\n" + "add v17.4s, v17.4s, v28.4s\n" + "smax v16.4s, v16.4s, v30.4s\n" + "smax v17.4s, v17.4s, v30.4s\n" + "smin v16.4s, v16.4s, v31.4s\n" + "smin v17.4s, v17.4s, v31.4s\n" + "sqxtn v16.4h, v16.4s\n" + "sqxtn2 v16.8h, v17.4s\n" + "sqxtun v16.8b, v16.8h\n" + "uaddw v8.8h, v26.8h, v8.8b\n" + "st1 {v16.8b}, [%[output_ptr]], #8\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "uaddw v13.8h, v26.8h, v13.8b\n" + + "uaddw v0.8h, v25.8h, v0.8b\n" + "uaddw v1.8h, v25.8h, v1.8b\n" + "uaddw v2.8h, v25.8h, v2.8b\n" + "ld1 {v16.4s}, [%[bias_ptr]], #16\n" + "uaddw v3.8h, v25.8h, v3.8b\n" + "ld1 {v17.4s}, [%[bias_ptr]], #16\n" + "uaddw v4.8h, v25.8h, v4.8b\n" + "uaddw v5.8h, v25.8h, v5.8b\n" + + "bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n" + + DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP ":\n" + "smlal v16.4s, v0.4h, v8.4h\n" + "smlal2 v17.4s, v0.8h, v8.8h\n" + "smlal v16.4s, v1.4h, v9.4h\n" + "smlal2 v17.4s, v1.8h, v9.8h\n" + "smlal v16.4s, v2.4h, v10.4h\n" + "smlal2 v17.4s, v2.8h, v10.8h\n" + "smlal v16.4s, v3.4h, v11.4h\n" + "smlal2 v17.4s, v3.8h, v11.8h\n" + "smlal v16.4s, v4.4h, v12.4h\n" + "smlal2 v17.4s, v4.8h, v12.8h\n" + "smlal v16.4s, v5.4h, v13.4h\n" + "smlal2 v17.4s, v5.8h, v13.8h\n" + + "sqrdmulh v16.4s, v16.4s, v27.4s\n" + "sqrdmulh v17.4s, v17.4s, v27.4s\n" + "and v18.16b, v16.16b, v29.16b\n" + "and v19.16b, v17.16b, v29.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v16.4s, v16.4s, v18.4s\n" + "sqadd v17.4s, v17.4s, v19.4s\n" + "srshl v16.4s, v16.4s, v29.4s\n" + "srshl v17.4s, v17.4s, v29.4s\n" + "add v16.4s, v16.4s, v28.4s\n" + "add v17.4s, v17.4s, v28.4s\n" + "smax v16.4s, v16.4s, v30.4s\n" + "smax v17.4s, v17.4s, v30.4s\n" + "smin v16.4s, v16.4s, v31.4s\n" + "smin v17.4s, v17.4s, v31.4s\n" + "sqxtn v16.4h, v16.4s\n" + "sqxtn2 v16.8h, v17.4s\n" + "sqxtun v16.8b, v16.8h\n" + "st1 {v16.8b}, [%[output_ptr]]\n" + : + // Outputs. + [filter_ptr] "+r"(filter_ptr), [input_ptr] "+r"(input_ptr), + [output_ptr] "+r"(output_ptr), [bias_ptr] "+r"(bias_ptr) + : + // Inputs. + [params_ptr] "r"(params_ptr) + : + // Clobbers. + "cc", "memory", + // We use these NEON registers. + "v0", "v1", "v2", "v3", "v4", "v5", "v8", "v9", "v10", "v11", "v12", + "v13", "v16", "v17", "v18", "v19", "v25", "v26", "v27", "v28", "v29", + "v30", "v31", + // We use these general-purpose registers. + "x7", "x9", "x10", "x11", "x12", "x13", "x14", "x15"); +#undef DEPTHWISECONV_LABEL_DEPTH_8_LOOP +#undef DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP + } +}; + +template <> +struct DepthwiseConvPartial { + static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, + const int32* bias_ptr, uint8* output_ptr, + const DepthwiseConvParams* params_ptr) { +#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1" +#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2" + asm volatile( + // Performs depthwise convolutions for an input window of size 3x2 and + // padding of 1 across the full depth. Expects |input_ptr| and + // |filter_ptr| to be pointing to the beginning of the 3x2 input and + // filter values. + + // Load input and filter values. + "ldr x6, [%[params_ptr], #" STR(OFFSET_INPUT_DEPTH) "]\n" + "mov x12, %[input_ptr]\n" + "ldr x11, [%[params_ptr], #" STR(OFFSET_INPUT_ROW_SIZE) "]\n" + "mov x7, %[filter_ptr]\n" + "ldr x5, [%[params_ptr], #" STR(OFFSET_FILTER_ROW_SIZE) "]\n" + "add x13, x12, x11\n" + "ldr x15, [%[params_ptr], #" STR(OFFSET_OUTPUT_DEPTH) "]\n" + "add x14, x13, x11\n" + + "ld1 {v8.8b}, [x12], x6\n" + "add x9, x7, x5\n" + "ld1 {v9.8b}, [x12]\n" + "cmp x15, #16\n" + "add x10, x9, x5\n" + "ld1 {v10.8b}, [x13], x6\n" + "add %[input_ptr], %[input_ptr], #8\n" + "ld1 {v11.8b}, [x13]\n" + "add %[filter_ptr], %[filter_ptr], #8\n" + "ld1 {v12.8b}, [x14], x6\n" + "ld1 {v13.8b}, [x14]\n" + + "ld1 {v0.8b}, [x7], x6\n" + "ld1 {v1.8b}, [x7]\n" + "ld1 {v2.8b}, [x9], x6\n" + "ld1 {v3.8b}, [x9]\n" + "ld1 {v4.8b}, [x10], x6\n" + "ld1 {v5.8b}, [x10]\n" + + // Load constants. + "ldr w12, [%[params_ptr], #" STR(OFFSET_INPUT_OFFSET) "]\n" + "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_MULTIPLIER) "]\n" + "dup v26.8h, w12\n" + "ldr w12, [%[params_ptr], #" STR(OFFSET_OUTPUT_OFFSET) "]\n" + "dup v27.4s, w13\n" + "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_SHIFT) "]\n" + "dup v28.4s, w12\n" + "ldr w12, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MIN) "]\n" + "neg w13, w13\n" + "dup v29.4s, w13\n" + "ldr w13, [%[params_ptr], #" STR(OFFSET_OUTPUT_ACTIVATION_MAX) "]\n" + "dup v30.4s, w12\n" + "ldr w12, [%[params_ptr], #" STR(OFFSET_FILTER_OFFSET) "]\n" + "dup v31.4s, w13\n" + "dup v25.8h, w12\n" + + // Add input and filter offsets. + "uaddw v8.8h, v26.8h, v8.8b\n" + "ld1 {v16.4s}, [%[bias_ptr]], #16\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "ld1 {v17.4s}, [%[bias_ptr]], #16\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "uaddw v13.8h, v26.8h, v13.8b\n" + + "uaddw v0.8h, v25.8h, v0.8b\n" + "uaddw v1.8h, v25.8h, v1.8b\n" + "uaddw v2.8h, v25.8h, v2.8b\n" + "uaddw v3.8h, v25.8h, v3.8b\n" + "uaddw v4.8h, v25.8h, v4.8b\n" + "uaddw v5.8h, v25.8h, v5.8b\n" + + "blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n" + + //"loop_%=:\n" + DEPTHWISECONV_LABEL_DEPTH_8_LOOP ":\n" + "mov x12, %[input_ptr]\n" + "subs x15, x15, #8\n" + "add x13, x12, x11\n" + "cmp x15, #16\n" + "add x14, x13, x11\n" + "add %[input_ptr], %[input_ptr], #8\n" + + "smlal v16.4s, v0.4h, v8.4h\n" + "mov x7, %[filter_ptr]\n" + "smlal2 v17.4s, v0.8h, v8.8h\n" + "ld1 {v8.8b}, [x12], x6\n" + "smlal v16.4s, v1.4h, v9.4h\n" + "add x9, x7, x5\n" + "smlal2 v17.4s, v1.8h, v9.8h\n" + "add x10, x9, x5\n" + "ld1 {v9.8b}, [x12]\n" + "smlal v16.4s, v2.4h, v10.4h\n" + "add %[filter_ptr], %[filter_ptr], #8\n" + "smlal2 v17.4s, v2.8h, v10.8h\n" + "ld1 {v10.8b}, [x13], x6\n" + "smlal v16.4s, v3.4h, v11.4h\n" + "ld1 {v0.8b}, [x7], x6\n" + "smlal2 v17.4s, v3.8h, v11.8h\n" + "ld1 {v11.8b}, [x13]\n" + "smlal v16.4s, v4.4h, v12.4h\n" + "ld1 {v1.8b}, [x7]\n" + "smlal2 v17.4s, v4.8h, v12.8h\n" + "ld1 {v12.8b}, [x14], x6\n" + "smlal v16.4s, v5.4h, v13.4h\n" + "ld1 {v2.8b}, [x9], x6\n" + "smlal2 v17.4s, v5.8h, v13.8h\n" + "ld1 {v13.8b}, [x14]\n" + + "sqrdmulh v16.4s, v16.4s, v27.4s\n" + "ld1 {v3.8b}, [x9]\n" + "sqrdmulh v17.4s, v17.4s, v27.4s\n" + "ld1 {v4.8b}, [x10], x6\n" + "and v18.16b, v16.16b, v29.16b\n" + "ld1 {v5.8b}, [x10]\n" + "and v19.16b, v17.16b, v29.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v16.4s, v16.4s, v18.4s\n" + "sqadd v17.4s, v17.4s, v19.4s\n" + "srshl v16.4s, v16.4s, v29.4s\n" + "srshl v17.4s, v17.4s, v29.4s\n" + "add v16.4s, v16.4s, v28.4s\n" + "add v17.4s, v17.4s, v28.4s\n" + "smax v16.4s, v16.4s, v30.4s\n" + "smax v17.4s, v17.4s, v30.4s\n" + "smin v16.4s, v16.4s, v31.4s\n" + "smin v17.4s, v17.4s, v31.4s\n" + "sqxtn v16.4h, v16.4s\n" + "sqxtn2 v16.8h, v17.4s\n" + "sqxtun v16.8b, v16.8h\n" + "uaddw v8.8h, v26.8h, v8.8b\n" + "st1 {v16.8b}, [%[output_ptr]], #8\n" + "uaddw v9.8h, v26.8h, v9.8b\n" + "uaddw v10.8h, v26.8h, v10.8b\n" + "uaddw v11.8h, v26.8h, v11.8b\n" + "uaddw v12.8h, v26.8h, v12.8b\n" + "uaddw v13.8h, v26.8h, v13.8b\n" + + "uaddw v0.8h, v25.8h, v0.8b\n" + "uaddw v1.8h, v25.8h, v1.8b\n" + "uaddw v2.8h, v25.8h, v2.8b\n" + "ld1 {v16.4s}, [%[bias_ptr]], #16\n" + "uaddw v3.8h, v25.8h, v3.8b\n" + "ld1 {v17.4s}, [%[bias_ptr]], #16\n" + "uaddw v4.8h, v25.8h, v4.8b\n" + "uaddw v5.8h, v25.8h, v5.8b\n" + + "bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n" + + DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP ":\n" + "smlal v16.4s, v0.4h, v8.4h\n" + "smlal2 v17.4s, v0.8h, v8.8h\n" + "smlal v16.4s, v1.4h, v9.4h\n" + "smlal2 v17.4s, v1.8h, v9.8h\n" + "smlal v16.4s, v2.4h, v10.4h\n" + "smlal2 v17.4s, v2.8h, v10.8h\n" + "smlal v16.4s, v3.4h, v11.4h\n" + "smlal2 v17.4s, v3.8h, v11.8h\n" + "smlal v16.4s, v4.4h, v12.4h\n" + "smlal2 v17.4s, v4.8h, v12.8h\n" + "smlal v16.4s, v5.4h, v13.4h\n" + "smlal2 v17.4s, v5.8h, v13.8h\n" + + "sqrdmulh v16.4s, v16.4s, v27.4s\n" + "sqrdmulh v17.4s, v17.4s, v27.4s\n" + "and v18.16b, v16.16b, v29.16b\n" + "and v19.16b, v17.16b, v29.16b\n" + "sshr v18.4s, v18.4s, #31\n" + "sshr v19.4s, v19.4s, #31\n" + "sqadd v16.4s, v16.4s, v18.4s\n" + "sqadd v17.4s, v17.4s, v19.4s\n" + "srshl v16.4s, v16.4s, v29.4s\n" + "srshl v17.4s, v17.4s, v29.4s\n" + "add v16.4s, v16.4s, v28.4s\n" + "add v17.4s, v17.4s, v28.4s\n" + "smax v16.4s, v16.4s, v30.4s\n" + "smax v17.4s, v17.4s, v30.4s\n" + "smin v16.4s, v16.4s, v31.4s\n" + "smin v17.4s, v17.4s, v31.4s\n" + "sqxtn v16.4h, v16.4s\n" + "sqxtn2 v16.8h, v17.4s\n" + "sqxtun v16.8b, v16.8h\n" + "st1 {v16.8b}, [%[output_ptr]]\n" + : + // Outputs. + [filter_ptr] "+r"(filter_ptr), [input_ptr] "+r"(input_ptr), + [output_ptr] "+r"(output_ptr), [bias_ptr] "+r"(bias_ptr) + : + // Inputs. + [params_ptr] "r"(params_ptr) + : + // Clobbers. + "cc", "memory", + // We use these NEON registers. + "v0", "v1", "v2", "v3", "v4", "v5", "v8", "v9", "v10", "v11", "v12", + "v13", "v16", "v17", "v18", "v19", "v25", "v26", "v27", "v28", "v29", + "v30", "v31", + // We use these general-purpose registers. + "x5", "x6", "x7", "x9", "x10", "x11", "x12", "x13", "x14", "x15"); +#undef DEPTHWISECONV_LABEL_DEPTH_8_LOOP +#undef DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP + } +}; + #undef OFFSET_INPUT_DEPTH #undef OFFSET_INPUT_ROW_SIZE #undef OFFSET_OUTPUT_DEPTH @@ -2266,7 +2987,7 @@ template struct DepthwiseConvMultiRow { using ConvKernel = DepthwiseConvThroughDepth; - static inline void Run(const uint8* input_data, int32 start_x, int32 start_y, + static inline void Run(const uint8* input_data, int32 start_x, int32 end_x, const uint8* filter_data, const int32* bias_data, uint8* output_data, const DepthwiseConvParams& params, const ShuffleParams& shuffle_params, @@ -2286,7 +3007,7 @@ struct DepthwiseConvMultiRow { // preshuffle the input data to maximize locality. if (params.output_depth > 64 || (params.output_depth <= 64 && params.input_width > 150)) { - for (; out_x <= (params.output_width - shuffle_params.output_width); + for (; out_x <= (end_x - shuffle_params.output_width); out_x += shuffle_params.output_width) { const uint8* input_ptr = input_data; const int32* bias_ptr = bias_data; @@ -2344,7 +3065,7 @@ struct DepthwiseConvMultiRow { } } - const int32 output_leftover_width = params.output_width - out_x; + const int32 output_leftover_width = end_x - out_x; if (output_leftover_width > 0) { ConvKernel::Run(input_data, filter_data, bias_data, output_data, 0, params.output_depth, params.input_depth, @@ -2354,6 +3075,105 @@ struct DepthwiseConvMultiRow { } }; +// Processes the borders of the input for pad_width and pad_height = 1. +// Calls 4 asm kernels: +// * 1x1 input shape. +// * Corner edges. +// * Horizontal edges. +// * Vertical edges. +inline void DepthwiseConvHandlePadding(const uint8* input_data, + const uint8* filter_data, const int32* bias_data, uint8* output_data, + const DepthwiseConvParams& params) { + if (params.input_width == 1 && params.input_height == 1) { + const uint8* filter_ptr = filter_data + params.filter_row_size + + params.output_depth; + DepthwiseConvPartial::Run(input_data, filter_ptr, + bias_data, output_data, ¶ms); + return; + } + + const int32 out_x_start_corner = 0; + const int32 out_x_end_corner = params.output_width - 1; + const int32 out_y_start_corner = 0; + const int32 out_y_end_corner = params.output_height - 1; + + // Handle top row. + const uint8* input_ptr = input_data; + const uint8* filter_ptr = filter_data + params.filter_row_size + + params.output_depth; + uint8* output_ptr = output_data; + + DepthwiseConvPartial::Run(input_ptr, filter_ptr, + bias_data, output_ptr, ¶ms); + + input_ptr += (params.stride_width - 1) * params.input_depth; + filter_ptr = filter_data + params.filter_row_size; + output_ptr += params.output_depth; + + for (int32 out_x = out_x_start_corner + 1; out_x < out_x_end_corner; + out_x++) { + DepthwiseConvPartial::Run( + input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); + input_ptr += params.stride_width * params.input_depth; + output_ptr += params.output_depth; + } + + DepthwiseConvPartial::Run(input_ptr, filter_ptr, + bias_data, output_ptr, ¶ms); + + // Handle left side. + input_ptr = input_data + (params.stride_width - 1) * params.input_row_size; + filter_ptr = filter_data + params.input_depth; + output_ptr = output_data + params.output_row_size; + + for (int32 out_y = out_y_start_corner + 1; out_y < out_y_end_corner; + out_y++) { + DepthwiseConvPartial::Run( + input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); + input_ptr += params.stride_width * params.input_row_size; + output_ptr += params.output_row_size; + } + + // Handle right side. + input_ptr = input_data + (params.input_width - 2) * params.input_depth + + (params.stride_width - 1) * params.input_row_size; + filter_ptr = filter_data; + output_ptr = output_data + params.output_row_size + + (params.output_width - 1) * params.output_depth; + + for (int32 out_y = out_y_start_corner + 1; out_y < out_y_end_corner; + out_y++) { + DepthwiseConvPartial::Run( + input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); + input_ptr += params.stride_width * params.input_row_size; + output_ptr += params.output_row_size; + } + + // Handle bottom row. + input_ptr = input_data + (params.input_height - 2) * params.input_row_size; + filter_ptr = filter_data + params.output_depth; + output_ptr = output_data + + (params.output_height - 1) * params.output_row_size; + + DepthwiseConvPartial::Run(input_ptr, filter_ptr, + bias_data, output_ptr, ¶ms); + + input_ptr += (params.stride_width == 1) ? 0 : params.input_depth; + filter_ptr = filter_data; + output_ptr += params.output_depth; + + for (int32 out_x = out_x_start_corner + 1; out_x < out_x_end_corner; + out_x++) { + DepthwiseConvPartial::Run( + input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); + input_ptr += params.stride_width * params.input_depth; + output_ptr += params.output_depth; + } + + DepthwiseConvPartial::Run(input_ptr, filter_ptr, + bias_data, output_ptr, ¶ms); +} + inline bool Fast3x3FilterKernelSupported( const Dims<4>& input_dims, const Dims<4>& filter_dims, int32 stride_width, int32 stride_height, int32 pad_width, int32 pad_height, @@ -2370,7 +3190,8 @@ inline bool Fast3x3FilterKernelSupported( filter_width == 3 && filter_height == 3 && depth_multiplier == 1 && (stride_width == 1 || stride_width == 2) && (stride_height == 1 || stride_height == 2) && - (stride_width == stride_height) && pad_width == 0 && pad_height == 0 && + (stride_width == stride_height) && (pad_width == 0 || pad_width == 1) && + (pad_height == 0 || pad_height == 1) && (pad_width == pad_height) && (input_depth % 8) == 0 && (output_shift > 0); if (!supported) { @@ -2390,8 +3211,26 @@ inline bool Fast3x3FilterKernelSupported( const int32 in_y_end = in_y_origin + filter_height; // Supported only if filter on the right and bottom boundary lies completely - // within the input. - return in_x_end <= input_width && in_y_end <= input_height; + // within the input if padding is zero. + if (pad_width == 0 && pad_height == 0) { + return in_x_end <= input_width && in_y_end <= input_height; + } + + // Else if padding is 1, supported if bottom right filter lies +1 past input + // width and height. + supported = in_x_end <= (input_width + 1) && in_y_end <= (input_height + 1); + + if (!supported) { + return false; + } + + // Shapes with width 1 and height > 1, and vice versa are not supported yet. + if (input_width == 1) { + supported = (input_width == input_height); + } else if (input_height == 1) { + supported = (input_width == input_height); + } + return supported; } inline void DepthwiseConv3x3Filter( @@ -2409,6 +3248,8 @@ inline void DepthwiseConv3x3Filter( params.input_height = ArraySize(input_dims, 2); params.input_row_size = params.input_depth * params.input_width; params.input_offset = input_offset; + params.stride_width = stride_width; + params.stride_height = stride_height; params.output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); params.output_width = ArraySize(output_dims, 1); params.output_height = ArraySize(output_dims, 2); @@ -2422,6 +3263,7 @@ inline void DepthwiseConv3x3Filter( const int32 filter_height = ArraySize(filter_dims, 2); const int32 filter_width = ArraySize(filter_dims, 1); + params.filter_row_size = params.output_depth * filter_width; // Algorithm assumes below constraints. It is optimized for depth // multiplier of 1, 3x3 filter, no padding and strides 1 and 2. @@ -2432,8 +3274,9 @@ inline void DepthwiseConv3x3Filter( TFLITE_DCHECK(stride_height == 1 || stride_height == 2); TFLITE_DCHECK(stride_width == 1 || stride_width == 2); TFLITE_DCHECK(stride_width == stride_height); - TFLITE_DCHECK(pad_height == 0); - TFLITE_DCHECK(pad_width == 0); + TFLITE_DCHECK(pad_height == 0 || pad_height == 1); + TFLITE_DCHECK(pad_width == 0 || pad_width == 1); + TFLITE_DCHECK(pad_width == pad_height); const int32 batches = MatchingArraySize(input_dims, 3, output_dims, 3); const int64_t input_batch_size = params.input_row_size * params.input_height; @@ -2471,7 +3314,26 @@ inline void DepthwiseConv3x3Filter( const uint8* input_ptr = input_data + b * input_batch_size; uint8* output_ptr = output_data + b * output_batch_size; + int32 out_x = 0; int32 out_y = 0; + int32 end_x = params.output_width; + int32 end_y = params.output_height; + + if (pad_width == 1 && pad_height == 1) { + DepthwiseConvHandlePadding(input_ptr, filter_data, bias_data, output_ptr, + params); + + // Update extents now that the edges have been handled. + out_x = 1; + end_x = params.output_width - 1; + out_y = 1; + end_y = params.output_height - 1; + const int in_x = (out_x * stride_width) - pad_width; + const int in_y = (out_y * stride_height) - pad_height; + input_ptr += in_y * params.input_row_size + in_x * params.input_depth; + output_ptr += out_y * params.output_row_size + + out_x * params.output_depth; + } // Shuffling shapes that maximize width over the shuffle workspace size // perform better since the inputs are closer together, minimizing @@ -2486,8 +3348,8 @@ inline void DepthwiseConv3x3Filter( // Handle 8 rows at a time. if (params.input_width < four_row_shuffle_params.input_width) { - for (; out_y <= params.output_height - 8; out_y += 8) { - conv_multirow_func(input_ptr, 0, out_y, filter_data, bias_data, + for (; out_y <= end_y - 8; out_y += 8) { + conv_multirow_func(input_ptr, out_x, end_x, filter_data, bias_data, output_ptr, params, eight_row_shuffle_params, shuffle_workspace); input_ptr += 8 * stride_height * params.input_row_size; @@ -2497,8 +3359,8 @@ inline void DepthwiseConv3x3Filter( // Handle 4 rows at a time. if (params.input_width < two_row_shuffle_params.input_width) { - for (; out_y <= params.output_height - 4; out_y += 4) { - conv_multirow_func(input_ptr, 0, out_y, filter_data, bias_data, + for (; out_y <= end_y - 4; out_y += 4) { + conv_multirow_func(input_ptr, out_x, end_x, filter_data, bias_data, output_ptr, params, four_row_shuffle_params, shuffle_workspace); input_ptr += 4 * stride_height * params.input_row_size; @@ -2507,8 +3369,8 @@ inline void DepthwiseConv3x3Filter( } // Handle 2 rows at a time. - for (; out_y <= params.output_height - 2; out_y += 2) { - conv_multirow_func(input_ptr, 0, out_y, filter_data, bias_data, + for (; out_y <= end_y - 2; out_y += 2) { + conv_multirow_func(input_ptr, out_x, end_x, filter_data, bias_data, output_ptr, params, two_row_shuffle_params, shuffle_workspace); input_ptr += 2 * stride_height * params.input_row_size; @@ -2516,8 +3378,8 @@ inline void DepthwiseConv3x3Filter( } // Handle one row at a time. - for (; out_y < params.output_height; out_y++) { - conv_multirow_func(input_ptr, 0, out_y, filter_data, bias_data, + for (; out_y < end_y; out_y++) { + conv_multirow_func(input_ptr, out_x, end_x, filter_data, bias_data, output_ptr, params, one_row_shuffle_params, shuffle_workspace); input_ptr += stride_height * params.input_row_size; -- GitLab From 5e0b2f2b0d0d938152334ae1ef1c9b25d229e280 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 1 Jun 2018 15:32:16 -0700 Subject: [PATCH 413/902] [XLA] Move xla/tools/parser/* into xla/service. Now that we're using the parser inside of xla/service, it's awkward for it to live inside of xla/tools, because everything else in there is a standalone tool. We've already had one person be confused by this. PiperOrigin-RevId: 198935921 --- tensorflow/compiler/xla/service/BUILD | 95 +++++-- .../xla/service/buffer_assignment_test.cc | 4 +- tensorflow/compiler/xla/service/cpu/BUILD | 6 +- .../cpu/cpu_eigen_tensor_alignment_test.cc | 6 +- .../cpu/cpu_instruction_fusion_test.cc | 10 +- .../xla/service/cpu/ir_emission_utils_test.cc | 4 +- .../compiler/xla/service/cpu/tests/BUILD | 4 +- .../cpu/tests/cpu_literal_caching_test.cc | 6 +- .../xla/service/cpu/tests/cpu_outfeed_test.cc | 4 +- .../xla/service/elemental_ir_emitter_test.cc | 4 +- .../README.md => service/g3doc/hlo_parser.md} | 0 .../xla/service/gather_expander_test.cc | 6 +- tensorflow/compiler/xla/service/gpu/BUILD | 4 +- .../xla/service/gpu/fusion_merger_test.cc | 12 +- .../service/gpu/instruction_fusion_test.cc | 32 +-- .../xla/service/gpu/while_transformer.cc | 4 +- .../compiler/xla/service/hlo_cse_test.cc | 4 +- .../compiler/xla/service/hlo_domain_test.cc | 4 +- .../xla/service/hlo_execution_profile_test.cc | 4 +- .../xla/service/hlo_instruction_test.cc | 4 +- .../{tools/parser => service}/hlo_lexer.cc | 26 +- .../xla/{tools/parser => service}/hlo_lexer.h | 17 +- .../xla/service/hlo_liveness_analysis_test.cc | 22 +- .../compiler/xla/service/hlo_matchers.h | 4 +- .../compiler/xla/service/hlo_matchers_test.cc | 3 +- .../xla/service/hlo_module_dce_test.cc | 14 +- .../compiler/xla/service/hlo_ordering_test.cc | 6 +- .../{tools/parser => service}/hlo_parser.cc | 252 ++++++++++-------- .../{tools/parser => service}/hlo_parser.h | 24 +- .../parser => service}/hlo_parser_test.cc | 90 +++---- tensorflow/compiler/xla/service/hlo_runner.cc | 6 +- .../xla/service/hlo_scheduling_test.cc | 4 +- .../compiler/xla/service/hlo_sharding_test.cc | 6 +- .../xla/{tools/parser => service}/hlo_token.h | 11 +- .../xla/service/instruction_fusion_test.cc | 20 +- .../xla/service/layout_assignment_test.cc | 6 +- .../xla/service/pattern_matcher_test.cc | 6 +- .../xla/service/transpose_folding_test.cc | 12 +- .../compiler/xla/service/tuple_util_test.cc | 4 +- .../while_loop_constant_sinking_test.cc | 10 +- .../while_loop_invariant_code_motion_test.cc | 2 +- .../compiler/xla/service/while_util_test.cc | 8 +- tensorflow/compiler/xla/tests/BUILD | 10 +- .../xla/tests/cross_replica_sum_test.cc | 11 +- .../xla/tests/gather_operation_test.cc | 4 +- .../compiler/xla/tests/hlo_test_base.cc | 2 +- .../xla/tests/hlo_verified_test_base.cc | 4 +- .../compiler/xla/tests/reduce_hlo_test.cc | 4 +- tensorflow/compiler/xla/tools/parser/BUILD | 73 ----- 49 files changed, 442 insertions(+), 436 deletions(-) rename tensorflow/compiler/xla/{tools/parser/README.md => service/g3doc/hlo_parser.md} (100%) rename tensorflow/compiler/xla/{tools/parser => service}/hlo_lexer.cc (95%) rename tensorflow/compiler/xla/{tools/parser => service}/hlo_lexer.h (90%) rename tensorflow/compiler/xla/{tools/parser => service}/hlo_parser.cc (92%) rename tensorflow/compiler/xla/{tools/parser => service}/hlo_parser.h (70%) rename tensorflow/compiler/xla/{tools/parser => service}/hlo_parser_test.cc (94%) rename tensorflow/compiler/xla/{tools/parser => service}/hlo_token.h (84%) delete mode 100644 tensorflow/compiler/xla/tools/parser/BUILD diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 2b14b63ea8..0102e4f003 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -349,8 +349,8 @@ tf_cc_test( ":hlo", ":pattern_matcher", "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:test", ], ) @@ -388,8 +388,8 @@ cc_library( deps = [ ":hlo", "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", ], ) @@ -399,6 +399,7 @@ tf_cc_test( srcs = ["hlo_matchers_test.cc"], deps = [ ":hlo_matchers", + ":hlo_parser", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", ], @@ -420,6 +421,7 @@ tf_cc_test( srcs = ["hlo_instruction_test.cc"], deps = [ ":hlo", + ":hlo_parser", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:protobuf_util", "//tensorflow/compiler/xla:shape_util", @@ -429,7 +431,6 @@ tf_cc_test( "//tensorflow/compiler/xla:window_util", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -444,9 +445,9 @@ tf_cc_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -989,9 +990,9 @@ tf_cc_test( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", ], ) @@ -1027,9 +1028,9 @@ tf_cc_test( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -1130,9 +1131,9 @@ tf_cc_test( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -1165,9 +1166,9 @@ tf_cc_test( deps = [ ":hlo_matchers", ":instruction_fusion", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -1339,9 +1340,9 @@ tf_cc_test( deps = [ ":gather_expander", "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:test_macros_header", "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -1691,9 +1692,9 @@ tf_cc_test( ":cpu_plugin", ":hlo_cost_analysis", ":hlo_execution_profile", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", ], ) @@ -1874,9 +1875,9 @@ tf_cc_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:test", ], @@ -2211,11 +2212,11 @@ tf_cc_test( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:test", ], @@ -2237,9 +2238,9 @@ tf_cc_test( "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:test_utils", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:test", ], @@ -2310,10 +2311,10 @@ tf_cc_test( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", ], ) @@ -2415,12 +2416,12 @@ tf_cc_test( ":hlo", ":hlo_domain_isolator", ":hlo_domain_remover", + ":hlo_parser", ":hlo_sharding_metadata", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/legacy_flags:debug_options_flags", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:test", ], ) @@ -2506,10 +2507,10 @@ xla_test( "//tensorflow/compiler/xla:execution_options_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -2655,10 +2656,10 @@ tf_cc_test( "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/service/gpu:ir_emission_utils", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", ], ) @@ -2795,7 +2796,7 @@ cc_library( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service:backend", "//tensorflow/compiler/xla/service:compiler", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", @@ -2831,8 +2832,8 @@ tf_cc_test( ":tuple_util", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -2857,8 +2858,8 @@ tf_cc_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -2884,8 +2885,8 @@ tf_cc_test( ":hlo_matchers", ":while_loop_invariant_code_motion", "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_verified_test_base", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:test", ], ) @@ -2911,8 +2912,8 @@ tf_cc_test( ":hlo_matchers", ":while_loop_constant_sinking", "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_verified_test_base", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:test", ], ) @@ -2965,9 +2966,57 @@ tf_cc_test( ":hlo_matchers", ":indexed_array_analysis", "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_verified_test_base", "//tensorflow/compiler/xla/tests:test_utils", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:test", ], ) + +cc_library( + name = "hlo_parser", + srcs = ["hlo_parser.cc"], + hdrs = ["hlo_parser.h"], + deps = [ + ":hlo", + ":hlo_lexer", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + +tf_cc_test( + name = "hlo_parser_test", + size = "small", + srcs = ["hlo_parser_test.cc"], + deps = [ + ":hlo_parser", + "//tensorflow/compiler/xla:window_util", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", # fixdeps: keep + ], +) + +cc_library( + name = "hlo_lexer", + srcs = ["hlo_lexer.cc"], + hdrs = [ + "hlo_lexer.h", + "hlo_token.h", + ], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:lib", + "//tensorflow/core:regexp_internal", + ], +) diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc index bdcea92882..7e86c33687 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc @@ -32,12 +32,12 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_ordering.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/hlo_scheduling.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/platform/macros.h" @@ -1793,7 +1793,7 @@ ENTRY %test_module { })"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(module_str)); + ParseHloString(module_str)); // Run CopyInsertion and check if the graph constructed above doesn't need // any copies inserted for BufferAssignment to run. diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index a15e41fee0..f10d71fdba 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -633,10 +633,10 @@ tf_cc_test( deps = [ ":cpu_instruction_fusion", "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/service:transpose_folding", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", ], ) @@ -690,9 +690,9 @@ tf_cc_test( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -942,7 +942,7 @@ tf_cc_test( ":ir_emission_utils", ":target_machine_features_fake", "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_eigen_tensor_alignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_eigen_tensor_alignment_test.cc index d12fa6bb9a..8727c72b6e 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_eigen_tensor_alignment_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_eigen_tensor_alignment_test.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/cpu/target_machine_features_fake.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/test.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { namespace cpu { @@ -40,7 +40,7 @@ ENTRY DotOperation { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); HloInstruction* dot = module->entry_computation()->root_instruction(); @@ -71,7 +71,7 @@ ENTRY ConvOperation { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); HloInstruction* conv = module->entry_computation()->root_instruction(); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc index 46fe060817..97e10a89a2 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc @@ -19,9 +19,9 @@ limitations under the License. #include #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/transpose_folding.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/lib/gtl/array_slice.h" namespace op = xla::testing::opcode_matchers; @@ -172,7 +172,7 @@ ENTRY DotOperationFusion_TransposeFusion { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); HloComputation* computation = module->entry_computation(); TransposeFolding transpose_folding( @@ -202,7 +202,7 @@ ENTRY DotOperationFusion_TransposeFusion { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); HloComputation* computation = module->entry_computation(); TransposeFolding transpose_folding( @@ -233,7 +233,7 @@ ENTRY DotOperationFusion_TransposeFusion { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); HloComputation* computation = module->entry_computation(); TransposeFolding transpose_folding( @@ -775,7 +775,7 @@ TEST_P(GatherLoopFusionTest, GatherLoopFusion) { string hlo_string = tensorflow::strings::StrCat( "HloModule ", spec.test_name, "\n\n", spec.hlo_computation_text); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); RunFusionAndCheckOpcodesWereFused( module.get(), diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils_test.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils_test.cc index abb2471e6a..530ebce854 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils_test.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils_test.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/cpu/target_machine_features_fake.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/test.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { namespace { @@ -35,7 +35,7 @@ ENTRY Conv { } )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); HloComputation* entry_computation = module->entry_computation(); diff --git a/tensorflow/compiler/xla/service/cpu/tests/BUILD b/tensorflow/compiler/xla/service/cpu/tests/BUILD index 67f776e7b5..66ae5ef0f6 100644 --- a/tensorflow/compiler/xla/service/cpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/cpu/tests/BUILD @@ -152,9 +152,9 @@ tf_cc_test( srcs = ["cpu_literal_caching_test.cc"], deps = [ "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/service/cpu:cpu_compiler", "//tensorflow/compiler/xla/service/cpu/tests:cpu_codegen_test", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", @@ -166,9 +166,9 @@ tf_cc_test( srcs = ["cpu_outfeed_test.cc"], deps = [ "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/service/cpu:cpu_compiler", "//tensorflow/compiler/xla/service/cpu/tests:cpu_codegen_test", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc index 3cb25c5c19..27044b1d62 100644 --- a/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" #include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" namespace xla { namespace cpu { @@ -60,7 +60,7 @@ CHECK-NOT: private constant [12 x float] )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_text)); + ParseHloString(hlo_text)); CpuAotCompilationOptions options{ /*triple=*/"x86_64-pc-linux", /*cpu_name=*/"", /*features=*/"", @@ -105,7 +105,7 @@ CHECK-NOT: private constant [2 x float] )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_text)); + ParseHloString(hlo_text)); CpuAotCompilationOptions options{ /*triple=*/"x86_64-pc-linux", /*cpu_name=*/"", /*features=*/"", diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc index 1a948fb4fe..1ee279290b 100644 --- a/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" #include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" namespace xla { namespace cpu { @@ -41,7 +41,7 @@ CHECK: private constant [12 x float] )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_text)); + ParseHloString(hlo_text)); CpuAotCompilationOptions options{ /*triple=*/"x86_64-pc-linux", /*cpu_name=*/"", /*features=*/"", diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter_test.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter_test.cc index b43dc0c65d..8980d43033 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter_test.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter_test.cc @@ -14,12 +14,12 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/execution_options_util.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/test_macros.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { namespace { @@ -33,7 +33,7 @@ class ElementalIrEmitterExecutionTest : public HloTestBase { HloModuleConfig config; config.set_debug_options(GetDebugOptionsForTest()); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_text, config)); + ParseHloString(hlo_text, config)); EXPECT_TRUE(RunAndCompareNoHloPasses(std::move(module), args, nullopt)); } }; diff --git a/tensorflow/compiler/xla/tools/parser/README.md b/tensorflow/compiler/xla/service/g3doc/hlo_parser.md similarity index 100% rename from tensorflow/compiler/xla/tools/parser/README.md rename to tensorflow/compiler/xla/service/g3doc/hlo_parser.md diff --git a/tensorflow/compiler/xla/service/gather_expander_test.cc b/tensorflow/compiler/xla/service/gather_expander_test.cc index 1c72ca0665..020ffcd106 100644 --- a/tensorflow/compiler/xla/service/gather_expander_test.cc +++ b/tensorflow/compiler/xla/service/gather_expander_test.cc @@ -14,9 +14,9 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/gather_expander.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/test_macros.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { namespace { @@ -36,7 +36,7 @@ ENTRY main { } )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_text)); + ParseHloString(hlo_text)); Status status = GatherExpander{}.Run(module.get()).status(); EXPECT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED); @@ -63,7 +63,7 @@ ENTRY main { } )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_text)); + ParseHloString(hlo_text)); TF_ASSERT_OK_AND_ASSIGN(bool changed, GatherExpander{}.Run(module.get())); ASSERT_TRUE(changed); diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 68297ad4ae..6bd9d4c31d 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -416,9 +416,9 @@ tf_cc_test( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -460,9 +460,9 @@ tf_cc_test( ":instruction_fusion", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc index 2217776c7d..b22bb1d39b 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc @@ -17,9 +17,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { namespace gpu { @@ -40,7 +40,7 @@ class FusionMergerTest : public HloTestBase {}; // Tuple // TEST_F(FusionMergerTest, MergeSharedFusionInstruction) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule MergeSharedFusionInstruction comp.3 { @@ -104,7 +104,7 @@ ENTRY MergeSharedFusionInstruction.Computation0 { // // Fusion2 is not merged because it exceeds the threshold flops-to-bytes ratio. TEST_F(FusionMergerTest, FlopsToBytesRatioThresholdExceeded) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule FlopsToBytesRatioThresholdExceeded comp.2 { @@ -162,7 +162,7 @@ ENTRY FlopsToBytesRatioThresholdExceeded.Computation1 { // is merged into Fusion0 and Fusion1) would exceed the bytes transferred // threshold. TEST_F(FusionMergerTest, BytesTransferredThresholdExeceeded) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule BytesTransferredThresholdExeceeded comp.2 { @@ -210,7 +210,7 @@ ENTRY BytesTransferredThresholdExeceeded.Computation2 { // Fusion2 is reduced for this test which makes the merge operation into its // operand below the bytes transferred threshold. TEST_F(FusionMergerTest, BytesTransferredThresholdNotExeceeded) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule BytesTransferredThresholdNotExeceeded comp.2 { @@ -253,7 +253,7 @@ ENTRY BytesTransferredThresholdNotExeceeded.Computation2 { // Check that we're willing to merge f1_computation into f2_computation, even // though f2 is an input fusion node. TEST_F(FusionMergerTest, WillMergeIntoInputFusion) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule m f1_computation { diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index ec60f3a167..426b1d235c 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -17,9 +17,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/util.h" namespace op = xla::testing::opcode_matchers; @@ -143,7 +143,7 @@ TEST_F(InstructionFusionTest, PotentialBitcastTransposeOfDotUnfused) { // Tests that broadcasts fused into a fusion with a reduce root. TEST_F(InstructionFusionTest, BroadcastIntoReduce) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module add { @@ -172,7 +172,7 @@ TEST_F(InstructionFusionTest, BroadcastIntoReduce) { } TEST_F(InstructionFusionTest, BitcastIntoAdd) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY BroadcastIntoAdd { @@ -194,7 +194,7 @@ TEST_F(InstructionFusionTest, BitcastIntoAdd) { } TEST_F(InstructionFusionTest, AddIntoBitcast) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY BroadcastIntoAdd { @@ -216,7 +216,7 @@ TEST_F(InstructionFusionTest, AddIntoBitcast) { } TEST_F(InstructionFusionTest, DontFuseGTE) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY DontFuseGTE { p0 = (f32[10], f32[10]) parameter(0) @@ -232,7 +232,7 @@ TEST_F(InstructionFusionTest, DontFuseGTE) { } TEST_F(InstructionFusionTest, DotOutputFusion) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { alpha = f32[] constant(3) @@ -261,7 +261,7 @@ TEST_F(InstructionFusionTest, DotOutputFusion) { // Compute sum(1/p0), where p0 has type f32, twice. Check that the division is // duplicated and fused into both reduces. TEST_F(InstructionFusionTest, FloatingPointDivIsCheap) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module Add { lhs = f32[] parameter(0) @@ -292,7 +292,7 @@ TEST_F(InstructionFusionTest, FloatingPointDivIsCheap) { // is *not* duplicated and fused into both reduces, because we say that integer // division is not cheap. TEST_F(InstructionFusionTest, IntegerDivIsNotCheap) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module Add { lhs = s32[] parameter(0) @@ -317,7 +317,7 @@ TEST_F(InstructionFusionTest, IntegerDivIsNotCheap) { } TEST_F(InstructionFusionTest, DotOutputFusionImpossible) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY NoOutputFusion { alpha = f32[] constant(3) @@ -371,7 +371,7 @@ static StatusOr FindHloInstruction( TEST_F(InstructionFusionTest, MultiOutputFusion) { // sub --> add --> tuple // \---------------/ - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[4,3]{1,0} parameter(0) @@ -403,7 +403,7 @@ TEST_F(InstructionFusionTest, MultiOutputFusion) { TEST_F(InstructionFusionTest, MultiOutputFusionExpensiveOp) { // tanh --> add --> tuple // \---------------/ - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[4,3]{1,0} parameter(0) @@ -424,7 +424,7 @@ TEST_F(InstructionFusionTest, MultiOutputFusionExpensiveOp) { TEST_F(InstructionFusionTest, MultiOutputFusion2) { // sub --> add1 --\--------\ // \----------> add2 --> tuple - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[4,3]{1,0} parameter(0) @@ -457,7 +457,7 @@ TEST_F(InstructionFusionTest, MultiOutputFusion2) { TEST_F(InstructionFusionTest, MultiOutputFusion3) { // sub --> add1 ----\--------\ // \ --> add2 --> add3 --> tuple - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[4,3]{1,0} parameter(0) @@ -492,7 +492,7 @@ TEST_F(InstructionFusionTest, MultiOutputFusion3) { TEST_F(InstructionFusionTest, NoCyclesDueToMultiOutputFusion) { // sub --> mul ---\ // \--> call --> add --> tuple - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { c = f32[] constant(42) @@ -527,7 +527,7 @@ TEST_F(InstructionFusionTest, NoCyclesDueToMultiOutputFusion) { TEST_F(InstructionFusionTest, NoMultiOutputFusionWithIncompatibleShapes) { // sub[2,3] --> add[4,3] --> tuple([2,3], [4,3]) // \-------------------------/ - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[2,3]{1,0} parameter(0) @@ -548,7 +548,7 @@ TEST_F(InstructionFusionTest, NoMultiOutputFusionWithIncompatibleShapes) { } TEST_F(InstructionFusionTest, FuseIntoInputFusionInstruction) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module add_computation { diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer.cc b/tensorflow/compiler/xla/service/gpu/while_transformer.cc index ad55728c45..7749201cbc 100644 --- a/tensorflow/compiler/xla/service/gpu/while_transformer.cc +++ b/tensorflow/compiler/xla/service/gpu/while_transformer.cc @@ -457,8 +457,8 @@ class WhileBodyComputationMatcher : public MatcherBase { return InvalidArgument("Unexpected tuple index instruction : %s", inst->name().c_str()); } else if (tag == "loop_increment") { - // Parse the constant which represents the loop induction variable - // increment value. + // ParseHloString the constant which represents the loop induction + // variable increment value. TF_RETURN_IF_ERROR(ParseConstInteger(inst, &loop_increment_)); } else if (tag == "param0" && inst != computation_->parameter_instruction(0)) { diff --git a/tensorflow/compiler/xla/service/hlo_cse_test.cc b/tensorflow/compiler/xla/service/hlo_cse_test.cc index e8c5ca347b..16db374566 100644 --- a/tensorflow/compiler/xla/service/hlo_cse_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cse_test.cc @@ -32,10 +32,10 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_utils.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/platform/types.h" @@ -486,7 +486,7 @@ TEST_F(HloCseTest, DoNotCombineCallsToImpureFunctions) { } TEST_F(HloCseTest, CompareComputations) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule m add_computation { diff --git a/tensorflow/compiler/xla/service/hlo_domain_test.cc b/tensorflow/compiler/xla/service/hlo_domain_test.cc index f29aac29c0..5553ddb153 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_test.cc +++ b/tensorflow/compiler/xla/service/hlo_domain_test.cc @@ -17,10 +17,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_domain_isolator.h" #include "tensorflow/compiler/xla/service/hlo_domain_metadata.h" #include "tensorflow/compiler/xla/service/hlo_domain_remover.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/hlo_sharding_metadata.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/lib/core/status_test_util.h" namespace xla { @@ -68,7 +68,7 @@ class HloDomainTest : public HloTestBase { tensorflow::StringPiece hlo_string) { HloModuleConfig config; config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); - return tools::Parse(hlo_string, config); + return ParseHloString(hlo_string, config); } }; diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc index 4900c813fd..eba80c0f19 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile_test.cc @@ -15,8 +15,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" #include "tensorflow/compiler/xla/service/hlo_cost_analysis.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/lib/strings/strcat.h" namespace xla { @@ -29,7 +29,7 @@ using ::testing::ContainsRegex; class HloExecutionProfileTest : public HloTestBase {}; TEST_F(HloExecutionProfileTest, Basic) { - auto hlo_module = tools::Parse(R"( + auto hlo_module = ParseHloString(R"( HloModule test_module ENTRY entry_computation { lhs = f32[30,30]{1,0} parameter(0) diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index a1a8814384..313033ddad 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -24,11 +24,11 @@ limitations under the License. #include "tensorflow/compiler/xla/protobuf_util.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/window_util.h" @@ -1533,7 +1533,7 @@ ENTRY entry (param: s32[]) -> s32[] { // Check that deep clones really deep clones every instruction and // computations, without leaving dangling pointers to the old module. TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); std::unique_ptr clone = module->Clone(); for (HloComputation* computation : clone->computations()) { EXPECT_EQ(computation->parent(), clone.get()); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc b/tensorflow/compiler/xla/service/hlo_lexer.cc similarity index 95% rename from tensorflow/compiler/xla/tools/parser/hlo_lexer.cc rename to tensorflow/compiler/xla/service/hlo_lexer.cc index 350db12653..f0d9fdbc8f 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.cc +++ b/tensorflow/compiler/xla/service/hlo_lexer.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/tools/parser/hlo_lexer.h" +#include "tensorflow/compiler/xla/service/hlo_lexer.h" #include @@ -26,9 +26,8 @@ limitations under the License. #include "tensorflow/core/platform/regexp.h" namespace xla { -namespace tools { -using tensorflow::StringPiece; +using ::tensorflow::StringPiece; namespace { @@ -67,12 +66,12 @@ bool HloLexer::CanDereference(const char* ptr) const { return ptr < buf_.end() && ptr >= buf_.begin(); } -StringPiece HloLexer::StringPieceFromPointers(const char* begin, - const char* end) const { +tensorflow::StringPiece HloLexer::StringPieceFromPointers( + const char* begin, const char* end) const { CHECK(begin <= end); CHECK(begin == buf_.end() || CanDereference(begin)); CHECK(end == buf_.end() || CanDereference(end)); - return StringPiece(begin, end - begin); + return tensorflow::StringPiece(begin, end - begin); } tensorflow::RegexpStringPiece HloLexer::RegexpStringPieceFromPointers( @@ -197,7 +196,8 @@ TokKind HloLexer::LexIdentifier() { return TokKind::kAttributeName; } - StringPiece identifier = StringPieceFromPointers(token_start_, current_ptr_); + tensorflow::StringPiece identifier = + StringPieceFromPointers(token_start_, current_ptr_); // See if this is a keyword. #define KEYWORD(STR) \ @@ -332,23 +332,24 @@ std::pair HloLexer::GetLineAndColumn(LocTy location) const { line_no_cache_.last_query = ptr; line_no_cache_.line_no_of_query = line_no; size_t line_offset = StringPieceFromPointers(start, ptr).rfind('\n'); - if (line_offset == StringPiece::npos) { + if (line_offset == tensorflow::StringPiece::npos) { line_offset = 0; } return {line_no, ptr - start - line_offset}; } -StringPiece HloLexer::GetLine(LocTy loc) const { +tensorflow::StringPiece HloLexer::GetLine(LocTy loc) const { if (!CanDereference(loc)) { return "LINE OUT OF RANGE"; } size_t line_start = StringPieceFromPointers(buf_.begin(), loc + 1).rfind('\n'); - const char* start = line_start == StringPiece::npos + const char* start = line_start == tensorflow::StringPiece::npos ? buf_.begin() : buf_.begin() + line_start + 1; size_t line_end = StringPieceFromPointers(loc, buf_.end()).find('\n'); - const char* end = line_end == StringPiece::npos ? buf_.end() : loc + line_end; + const char* end = + line_end == tensorflow::StringPiece::npos ? buf_.end() : loc + line_end; return StringPieceFromPointers(start, end); } @@ -370,7 +371,7 @@ TokKind HloLexer::LexString() { static LazyRE2 escaping_pattern = {R"("([^"\\]|\\.)*")"}; if (RE2::Consume(&consumable, *escaping_pattern)) { current_ptr_ = consumable.begin(); - StringPiece raw = + tensorflow::StringPiece raw = StringPieceFromPointers(token_start_ + 1, current_ptr_ - 1); string error; if (!tensorflow::str_util::CUnescape(raw, &str_val_, &error)) { @@ -453,5 +454,4 @@ string TokKindToString(TokKind kind) { } } -} // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h b/tensorflow/compiler/xla/service/hlo_lexer.h similarity index 90% rename from tensorflow/compiler/xla/tools/parser/hlo_lexer.h rename to tensorflow/compiler/xla/service/hlo_lexer.h index 27880b9b8a..ceb674f25e 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_lexer.h +++ b/tensorflow/compiler/xla/service/hlo_lexer.h @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_TOOLS_PARSER_HLO_LEXER_H_ -#define TENSORFLOW_COMPILER_XLA_TOOLS_PARSER_HLO_LEXER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_ #include -#include "tensorflow/compiler/xla/tools/parser/hlo_token.h" +#include "tensorflow/compiler/xla/service/hlo_token.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -27,9 +27,11 @@ limitations under the License. #include "tensorflow/core/platform/types.h" namespace xla { -namespace tools { // Lexer for the HloModule::ToString() format text. +// +// This class is meant to be used by hlo_parser.cc. You shouldn't need to use +// it directly. class HloLexer { public: explicit HloLexer(tensorflow::StringPiece buf) : buf_(buf) { @@ -57,7 +59,7 @@ class HloLexer { CHECK(GetKind() == TokKind::kShape); return shape_val_; } - int64 GetInt64Val() const { + tensorflow::int64 GetInt64Val() const { CHECK(GetKind() == TokKind::kInt); return int64_val_; } @@ -114,7 +116,7 @@ class HloLexer { TokKind current_kind_; string str_val_; Shape shape_val_; - int64 int64_val_; + tensorflow::int64 int64_val_; double decimal_val_; struct LineNoCacheTy { @@ -125,7 +127,6 @@ class HloLexer { mutable LineNoCacheTy line_no_cache_{nullptr, 0}; }; -} // namespace tools } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_TOOLS_PARSER_HLO_LEXER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_LEXER_H_ diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc index 8e2e2c7627..0275294a1a 100644 --- a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc @@ -18,12 +18,12 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" @@ -59,7 +59,7 @@ class HloLivenessAnalysisTest : public HloTestBase { // Test that add instruction at entry root is live at all output shape indices. TEST_F(HloLivenessAnalysisTest, AddAtEntryRoot) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleModule ENTRY SimpleComputation { constant.1 = s32[] constant(0) @@ -75,7 +75,7 @@ TEST_F(HloLivenessAnalysisTest, AddAtEntryRoot) { // Test that a dead add instruction is marked as dead by analysis. TEST_F(HloLivenessAnalysisTest, DeadAdd) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleModule ENTRY SimpleComputation { constant.1 = s32[] constant(0) @@ -94,7 +94,7 @@ TEST_F(HloLivenessAnalysisTest, DeadAdd) { // Test that all output shape indices of entry root tuple (and defining // instruction in its output) are marked live. TEST_F(HloLivenessAnalysisTest, TupleAtEntryRoot) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleModule ENTRY SimpleComputation { constant.1 = s32[] constant(0) @@ -113,7 +113,7 @@ TEST_F(HloLivenessAnalysisTest, TupleAtEntryRoot) { // Tests that all outputs of nested tuple and entry root (and defining // instruction values appearing in its output) are marked live. TEST_F(HloLivenessAnalysisTest, NestedTupleAtEntryRoot) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleModule ENTRY SimpleComputation { constant.1 = s32[] constant(1) @@ -140,7 +140,7 @@ TEST_F(HloLivenessAnalysisTest, NestedTupleAtEntryRoot) { // Tests that GTE at entry root of Tuple instruction only propgates liveness // to the live elements in tuple. TEST_F(HloLivenessAnalysisTest, GteOfTuple) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleModule ENTRY SimpleComputation { constant.1 = s32[] constant(0) @@ -162,7 +162,7 @@ TEST_F(HloLivenessAnalysisTest, GteOfTuple) { // Tests that GTE at entry root of nested Tuple instruction only propgates // liveness to the live elements in tuple. TEST_F(HloLivenessAnalysisTest, GteOfNestedTuple) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleModule ENTRY SimpleComputation { constant.1 = s32[] constant(0) @@ -199,7 +199,7 @@ TEST_F(HloLivenessAnalysisTest, GteOfNestedTuple) { // Tests that GTE of GTE (at entry root) of nested Tuple instruction only // propgates liveness to the live elements in tuple. TEST_F(HloLivenessAnalysisTest, GteOfGteOfNestedTuple) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleModule ENTRY SimpleComputation { constant.1 = s32[] constant(0) @@ -240,7 +240,7 @@ TEST_F(HloLivenessAnalysisTest, GteOfGteOfNestedTuple) { // Test that live/dead while tuple elements are marked live/dead correctly. TEST_F(HloLivenessAnalysisTest, WhileWithDeadTupleElement) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleLoop SimpleLoop.body { loop_var.1 = (s32[], s32[3]{0}) parameter(0) @@ -291,7 +291,7 @@ TEST_F(HloLivenessAnalysisTest, WhileWithDeadTupleElement) { // Tests that a tuple element live in while.cond computation, propagates // liveness to while.body.root/while.result/while.operand (where it is unused). TEST_F(HloLivenessAnalysisTest, WhileCondPropagatesLiveness) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleLoop SimpleLoop.body { loop_var.1 = (s32[], s32[3]{0}) parameter(0) @@ -345,7 +345,7 @@ TEST_F(HloLivenessAnalysisTest, WhileCondPropagatesLiveness) { // Tests that a use of while.result{0} propagates liveness to // while.body.param{1} to while.body.root{1}, and then to while.body.param{2}. TEST_F(HloLivenessAnalysisTest, WhileWithLiveTupleElements) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleLoop SimpleLoop.body { loop_var.1 = (s32[], s32[], s32[]) parameter(0) diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index dfefad3634..c570b420c2 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -17,8 +17,8 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MATCHERS_H_ #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/test.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/lib/gtl/optional.h" namespace xla { @@ -329,7 +329,7 @@ inline ::testing::Matcher Sharding( inline ::testing::Matcher Sharding( tensorflow::StringPiece sharding) { return ::testing::MakeMatcher(new ::xla::testing::HloShardingMatcher( - xla::tools::ParseSharding(sharding).ValueOrDie())); + ParseSharding(sharding).ValueOrDie())); } // Verifies that no HloSharding is set for an HLO instruction. inline ::testing::Matcher NoSharding() { diff --git a/tensorflow/compiler/xla/service/hlo_matchers_test.cc b/tensorflow/compiler/xla/service/hlo_matchers_test.cc index 1d10e3c4fe..9a3010cf1f 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers_test.cc +++ b/tensorflow/compiler/xla/service/hlo_matchers_test.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/shape_util.h" namespace op = xla::testing::opcode_matchers; @@ -194,7 +195,7 @@ ENTRY DotOperationFusion_TransposeFusion { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); HloInstruction* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::Dot(op::Parameter(0), op::Parameter(1), diff --git a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc index 53b7d0ed39..363862e490 100644 --- a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc +++ b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc @@ -19,11 +19,11 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_utils.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/types.h" @@ -73,7 +73,7 @@ class HloModuleDceTest : public HloTestBase { // Tests that a while with all outputs live is unmodified. TEST_F(HloModuleDceTest, WhileWithLiveOutputs) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleLoop SimpleLoop.body { loop_var.1 = (s32[], s32[3]{0}) parameter(0) @@ -110,7 +110,7 @@ TEST_F(HloModuleDceTest, WhileWithLiveOutputs) { // Tests a while loop with one unused output (which is used in the while loop // body by an instruction with side-effects: rng) is unmodified. TEST_F(HloModuleDceTest, WhileWithUnusedSideEffectingTupleElement) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleLoop SimpleLoop.body { loop_var.1 = (s32[], f32[]) parameter(0) @@ -150,7 +150,7 @@ TEST_F(HloModuleDceTest, WhileWithUnusedSideEffectingTupleElement) { // Tests that a while loop with one dead tuple element at {1} has its while // loop body modified to make that tuple element pass-through the while body. TEST_F(HloModuleDceTest, OneWhileWithDeadTupleElement) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleLoop SimpleLoop.body { loop_var.1 = (s32[], s32[3]{0}) parameter(0) @@ -193,7 +193,7 @@ TEST_F(HloModuleDceTest, OneWhileWithDeadTupleElement) { // dead in while.body{1} and at while.result{1}) propgates liveness of this // tuple element to while.body{1} and at while.result{1}. TEST_F(HloModuleDceTest, OneWhileWithTupleElementUsedByCond) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleLoop SimpleLoop.body { loop_var.1 = (s32[], s32[]) parameter(0) @@ -235,7 +235,7 @@ TEST_F(HloModuleDceTest, OneWhileWithTupleElementUsedByCond) { // Tests that HloModuleDCE can remove a dead tuple element at index {1} between // two dependent while loops. TEST_F(HloModuleDceTest, TwoWhilesWithDeadTupleElement) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleLoop SimpleLoop.body0 { loop_var.1 = (s32[], s32[3]{0}) parameter(0) @@ -303,7 +303,7 @@ TEST_F(HloModuleDceTest, TwoWhilesWithDeadTupleElement) { // Tests that HloModuleDCE can remove a dead tuple element at while.1{0} and // while.2{1}, between two dependent while loops. TEST_F(HloModuleDceTest, TwoWhilesWithDeadTupleElementSwizzled) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule SimpleLoop SimpleLoop.body0 { loop_var.1 = (s32[3]{0}, s32[]) parameter(0) diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc index 37a7fbad97..cfe5dace05 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc @@ -22,10 +22,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/hlo_scheduling.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -310,7 +310,7 @@ ENTRY while.v11 { })"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(module_str)); + ParseHloString(module_str)); DependencyHloOrdering ordering(module.get()); ordering.ToString(); // Shouldn't crash. } @@ -347,7 +347,7 @@ ENTRY root { })"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(module_str)); + ParseHloString(module_str)); TF_ASSERT_OK_AND_ASSIGN(auto dataflow, HloDataflowAnalysis::Run(*module, /*ssa_form=*/true)); DependencyHloOrdering ordering(module.get()); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc similarity index 92% rename from tensorflow/compiler/xla/tools/parser/hlo_parser.cc rename to tensorflow/compiler/xla/service/hlo_parser.cc index ef10ca4bff..cefc6ff915 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" @@ -24,18 +24,17 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" namespace xla { -namespace tools { namespace { -using tensorflow::StringPiece; -using tensorflow::gtl::optional; -using tensorflow::str_util::Join; -using tensorflow::str_util::Split; -using tensorflow::str_util::SplitAndParseAsInts; -using tensorflow::strings::Printf; -using tensorflow::strings::StrAppend; -using tensorflow::strings::StrCat; +using ::tensorflow::StringPiece; +using ::tensorflow::gtl::optional; +using ::tensorflow::str_util::Join; +using ::tensorflow::str_util::Split; +using ::tensorflow::str_util::SplitAndParseAsInts; +using ::tensorflow::strings::Printf; +using ::tensorflow::strings::StrAppend; +using ::tensorflow::strings::StrCat; const double kF16max = 65504; @@ -83,11 +82,15 @@ class HloParser { // Sets the sub-value of literal at the given index to the given value. The // literal's shape must have the default layout. - bool SetValueInLiteral(int64 value, int64 linear_index, Literal* literal); - bool SetValueInLiteral(double value, int64 linear_index, Literal* literal); - bool SetValueInLiteral(bool value, int64 linear_index, Literal* literal); + bool SetValueInLiteral(tensorflow::int64 value, + tensorflow::int64 linear_index, Literal* literal); + bool SetValueInLiteral(double value, tensorflow::int64 linear_index, + Literal* literal); + bool SetValueInLiteral(bool value, tensorflow::int64 linear_index, + Literal* literal); template - bool SetValueInLiteralHelper(ParsedElemT value, int64 linear_index, + bool SetValueInLiteralHelper(ParsedElemT value, + tensorflow::int64 linear_index, Literal* literal); bool ParseOperands(std::vector* operands); @@ -99,9 +102,9 @@ class HloParser { // Describes the start, limit, and stride on every dimension of the operand // being sliced. struct SliceRanges { - std::vector starts; - std::vector limits; - std::vector strides; + std::vector starts; + std::vector limits; + std::vector strides; }; // Types of attributes. @@ -179,13 +182,14 @@ class HloParser { bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed); // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3. - bool ParseDxD(const string& name, std::vector* result); + bool ParseDxD(const string& name, std::vector* result); // Parses window's pad sub-attriute, e.g., pad=0_0x3x3. - bool ParseWindowPad(std::vector>* pad); + bool ParseWindowPad(std::vector>* pad); bool ParseSliceRanges(SliceRanges* result); bool ParseInt64List(const TokKind start, const TokKind end, - const TokKind delim, std::vector* result); + const TokKind delim, + std::vector* result); bool ParseParamListToShape(Shape* shape, LocTy* shape_loc); bool ParseParamList(); @@ -197,7 +201,7 @@ class HloParser { bool ParseFftType(FftType* result); bool ParseFusionKind(HloInstruction::FusionKind* result); bool ParseRandomDistribution(RandomDistribution* result); - bool ParseInt64(int64* result); + bool ParseInt64(tensorflow::int64* result); bool ParseDouble(double* result); bool ParseBool(bool* result); bool ParseToken(TokKind kind, const string& msg); @@ -455,7 +459,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction* instruction; switch (opcode) { case HloOpcode::kParameter: { - int64 parameter_number; + tensorflow::int64 parameter_number; if (!ParseToken(TokKind::kLparen, "expects '(' before parameter number") || !ParseInt64(¶meter_number) || @@ -611,7 +615,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kRecv: { - optional channel_id; + optional channel_id; attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; if (!ParseOperands(&operands, /*expected_size=*/0) || !ParseAttributes(attrs)) { @@ -622,7 +626,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kRecvDone: { - optional channel_id; + optional channel_id; attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; if (!ParseOperands(&operands, /*expected_size=*/1) || !ParseAttributes(attrs)) { @@ -636,7 +640,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kSend: { - optional channel_id; + optional channel_id; attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; if (!ParseOperands(&operands, /*expected_size=*/1) || !ParseAttributes(attrs)) { @@ -647,7 +651,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kSendDone: { - optional channel_id; + optional channel_id; attrs["channel_id"] = {/*required=*/true, AttrTy::kInt64, &channel_id}; if (!ParseOperands(&operands, /*expected_size=*/1) || !ParseAttributes(attrs)) { @@ -661,7 +665,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kGetTupleElement: { - optional index; + optional index; attrs["index"] = {/*required=*/true, AttrTy::kInt64, &index}; if (!ParseOperands(&operands, /*expected_size=*/1) || !ParseAttributes(attrs)) { @@ -719,7 +723,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, } case HloOpcode::kFft: { optional fft_type; - optional> fft_length; + optional> fft_length; attrs["fft_type"] = {/*required=*/true, AttrTy::kFftType, &fft_type}; attrs["fft_length"] = {/*required=*/true, AttrTy::kBracedInt64List, &fft_length}; @@ -732,7 +736,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kBroadcast: { - optional> broadcast_dimensions; + optional> broadcast_dimensions; attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, &broadcast_dimensions}; if (!ParseOperands(&operands, /*expected_size=*/1) || @@ -744,7 +748,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kConcatenate: { - optional> dimensions; + optional> dimensions; attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, &dimensions}; if (!ParseOperands(&operands) || !ParseAttributes(attrs) || @@ -770,7 +774,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, optional reduce_computation; attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, &reduce_computation}; - optional> dimensions_to_reduce; + optional> dimensions_to_reduce; attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, &dimensions_to_reduce}; if (!ParseOperands(&operands, /*expected_size=*/2) || @@ -783,7 +787,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kReverse: { - optional> dimensions; + optional> dimensions; attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, &dimensions}; if (!ParseOperands(&operands, /*expected_size=*/1) || @@ -827,7 +831,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kDynamicSlice: { - optional> dynamic_slice_sizes; + optional> dynamic_slice_sizes; attrs["dynamic_slice_sizes"] = { /*required=*/true, AttrTy::kBracedInt64List, &dynamic_slice_sizes}; if (!ParseOperands(&operands, /*expected_size=*/2) || @@ -851,7 +855,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kTranspose: { - optional> dimensions; + optional> dimensions; attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, &dimensions}; if (!ParseOperands(&operands, /*expected_size=*/1) || @@ -865,7 +869,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kBatchNormTraining: { optional epsilon; attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; - optional feature_index; + optional feature_index; attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, &feature_index}; if (!ParseOperands(&operands, /*expected_size=*/3) || @@ -881,7 +885,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kBatchNormInference: { optional epsilon; attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; - optional feature_index; + optional feature_index; attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, &feature_index}; if (!ParseOperands(&operands, /*expected_size=*/5) || @@ -898,7 +902,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kBatchNormGrad: { optional epsilon; attrs["epsilon"] = {/*required=*/true, AttrTy::kFloat, &epsilon}; - optional feature_index; + optional feature_index; attrs["feature_index"] = {/*required=*/true, AttrTy::kInt64, &feature_index}; if (!ParseOperands(&operands, /*expected_size=*/5) || @@ -969,8 +973,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kReducePrecision: { - optional exponent_bits; - optional mantissa_bits; + optional exponent_bits; + optional mantissa_bits; attrs["exponent_bits"] = {/*required=*/true, AttrTy::kInt64, &exponent_bits}; attrs["mantissa_bits"] = {/*required=*/true, AttrTy::kInt64, @@ -1015,7 +1019,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, } case HloOpcode::kHostCompute: { optional channel_name; - optional cost_estimate_ns; + optional cost_estimate_ns; attrs["channel_name"] = {/*required=*/true, AttrTy::kString, &channel_name}; attrs["cost_estimate_ns"] = {/*required=*/true, AttrTy::kInt64, @@ -1028,16 +1032,16 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kDot: { - optional> lhs_contracting_dims; + optional> lhs_contracting_dims; attrs["lhs_contracting_dims"] = { /*required=*/false, AttrTy::kBracedInt64List, &lhs_contracting_dims}; - optional> rhs_contracting_dims; + optional> rhs_contracting_dims; attrs["rhs_contracting_dims"] = { /*required=*/false, AttrTy::kBracedInt64List, &rhs_contracting_dims}; - optional> lhs_batch_dims; + optional> lhs_batch_dims; attrs["lhs_batch_dims"] = {/*required=*/false, AttrTy::kBracedInt64List, &lhs_batch_dims}; - optional> rhs_batch_dims; + optional> rhs_batch_dims; attrs["rhs_batch_dims"] = {/*required=*/false, AttrTy::kBracedInt64List, &rhs_batch_dims}; @@ -1069,20 +1073,20 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, break; } case HloOpcode::kGather: { - optional> output_window_dims; + optional> output_window_dims; attrs["output_window_dims"] = { /*required=*/true, AttrTy::kBracedInt64List, &output_window_dims}; - optional> elided_window_dims; + optional> elided_window_dims; attrs["elided_window_dims"] = { /*required=*/true, AttrTy::kBracedInt64List, &elided_window_dims}; - optional> gather_dims_to_operand_dims; + optional> gather_dims_to_operand_dims; attrs["gather_dims_to_operand_dims"] = {/*required=*/true, AttrTy::kBracedInt64List, &gather_dims_to_operand_dims}; - optional index_vector_dim; + optional index_vector_dim; attrs["index_vector_dim"] = {/*required=*/true, AttrTy::kInt64, &index_vector_dim}; - optional> window_bounds; + optional> window_bounds; attrs["window_bounds"] = {/*required=*/true, AttrTy::kBracedInt64List, &window_bounds}; @@ -1178,8 +1182,8 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, LocTy loc = lexer_.GetLoc(); bool maximal = false; bool replicated = false; - std::vector devices; - std::vector tile_assignment_dimensions; + std::vector devices; + std::vector tile_assignment_dimensions; Shape tile_shape; while (lexer_.GetKind() != TokKind::kRbrace) { switch (lexer_.GetKind()) { @@ -1206,7 +1210,7 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, } do { - int64 dim; + tensorflow::int64 dim; if (!ParseInt64(&dim)) { return false; } @@ -1218,7 +1222,7 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, return false; } do { - int64 device; + tensorflow::int64 device; if (!ParseInt64(&device)) { return false; } @@ -1277,10 +1281,10 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, } sharding->set_type(OpSharding::Type::OpSharding_Type_OTHER); *sharding->mutable_tile_shape() = tile_shape; - for (int64 dim : tile_assignment_dimensions) { + for (tensorflow::int64 dim : tile_assignment_dimensions) { sharding->add_tile_assignment_dimensions(dim); } - for (int64 device : devices) { + for (tensorflow::int64 device : devices) { sharding->add_tile_assignment_devices(device); } } @@ -1315,40 +1319,50 @@ bool HloParser::ParseInstructionNames( "expects '}' at the end of instruction name list"); } -bool HloParser::SetValueInLiteral(int64 value, int64 linear_index, +bool HloParser::SetValueInLiteral(tensorflow::int64 value, + tensorflow::int64 linear_index, Literal* literal) { const Shape& shape = literal->shape(); switch (shape.element_type()) { case S8: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, + literal); case S16: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, + literal); case S32: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, + literal); case S64: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, + literal); case U8: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, + literal); case U16: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, + literal); case U32: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, + literal); case U64: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, + literal); default: LOG(FATAL) << "unknown integral primitive type " << PrimitiveType_Name(shape.element_type()); } } -bool HloParser::SetValueInLiteral(double value, int64 linear_index, +bool HloParser::SetValueInLiteral(double value, tensorflow::int64 linear_index, Literal* literal) { const Shape& shape = literal->shape(); switch (shape.element_type()) { case F16: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, literal); case BF16: - return SetValueInLiteralHelper(value, linear_index, literal); + return SetValueInLiteralHelper(value, linear_index, + literal); case F32: return SetValueInLiteralHelper(value, linear_index, literal); case F64: @@ -1359,7 +1373,7 @@ bool HloParser::SetValueInLiteral(double value, int64 linear_index, } } -bool HloParser::SetValueInLiteral(bool value, int64 linear_index, +bool HloParser::SetValueInLiteral(bool value, tensorflow::int64 linear_index, Literal* literal) { const Shape& shape = literal->shape(); switch (shape.element_type()) { @@ -1372,7 +1386,8 @@ bool HloParser::SetValueInLiteral(bool value, int64 linear_index, } template -bool HloParser::SetValueInLiteralHelper(ParsedElemT value, int64 linear_index, +bool HloParser::SetValueInLiteralHelper(ParsedElemT value, + tensorflow::int64 linear_index, Literal* literal) { // Check that linear_index is in range. if (linear_index >= ShapeUtil::ElementsIn(literal->shape())) { @@ -1484,7 +1499,7 @@ bool HloParser::ParseNonTupleLiteral(std::unique_ptr* literal, bool HloParser::ParseDenseLiteral(std::unique_ptr* literal, const Shape& shape) { - const int64 rank = ShapeUtil::Rank(shape); + const tensorflow::int64 rank = ShapeUtil::Rank(shape); if (rank > 1 && !EatShapeAndCheckCompatible(shape)) { return false; } @@ -1492,8 +1507,8 @@ bool HloParser::ParseDenseLiteral(std::unique_ptr* literal, // Create a literal with the given shape in default layout. *literal = Literal::CreateFromDimensions(shape.element_type(), AsInt64Slice(shape.dimensions())); - int64 nest_level = 0; - int64 linear_index = 0; + tensorflow::int64 nest_level = 0; + tensorflow::int64 linear_index = 0; // elems_seen_per_dim[i] is how many elements or sub-arrays we have seen for // the dimension i. For example, to parse f32[2,3] {{1, 2, 3}, {4, 5, 6}}, // when we are parsing the 2nd '{' (right before '1'), we are seeing a @@ -1501,14 +1516,14 @@ bool HloParser::ParseDenseLiteral(std::unique_ptr* literal, // the first '}' (right after '3'), it means the sub-array ends, and the // sub-array is supposed to contain exactly 3 elements, so check if // elems_seen_per_dim[1] is 3. - std::vector elems_seen_per_dim(rank); + std::vector elems_seen_per_dim(rank); auto get_index_str = [&elems_seen_per_dim](int dim) -> string { - std::vector elems_seen_until_dim(elems_seen_per_dim.begin(), - elems_seen_per_dim.begin() + dim); + std::vector elems_seen_until_dim( + elems_seen_per_dim.begin(), elems_seen_per_dim.begin() + dim); return StrCat("[", Join(elems_seen_until_dim, ",", - [](string* out, const int64& num_elems) { - tensorflow::strings::StrAppend(out, num_elems - 1); + [](string* out, const tensorflow::int64& num_elems) { + StrAppend(out, num_elems - 1); }), "]"); }; @@ -1584,7 +1599,7 @@ bool HloParser::ParseDenseLiteral(std::unique_ptr* literal, lexer_.Lex(); } else if (primitive_util::IsIntegralType(shape.element_type())) { LocTy loc = lexer_.GetLoc(); - int64 value; + tensorflow::int64 value; if (!ParseInt64(&value)) { return Error(loc, StrCat("expects integer for primitive type: ", PrimitiveType_Name(shape.element_type()))); @@ -1624,29 +1639,29 @@ bool HloParser::ParseSparseLiteral(std::unique_ptr* literal, switch (shape.element_type()) { case PRED: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case S8: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case S16: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case S32: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case S64: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case U8: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case U16: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case U32: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case U64: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case F16: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case F32: return ParseSparseLiteralHelper(literal, shape); case BF16: - return ParseSparseLiteralHelper(literal, shape); + return ParseSparseLiteralHelper(literal, shape); case F64: return ParseSparseLiteralHelper(literal, shape); default: @@ -1659,9 +1674,9 @@ bool HloParser::ParseSparseLiteral(std::unique_ptr* literal, template bool HloParser::ParseSparseLiteralHelper(std::unique_ptr* literal, const Shape& shape) { - std::vector index; + std::vector index; - int64 rank = ShapeUtil::Rank(shape); + tensorflow::int64 rank = ShapeUtil::Rank(shape); *literal = MakeUnique(shape); @@ -1679,7 +1694,7 @@ bool HloParser::ParseSparseLiteralHelper(std::unique_ptr* literal, LocTy index_loc = lexer_.GetLoc(); index.clear(); if (lexer_.GetKind() == TokKind::kInt) { - int64 single_index = lexer_.GetInt64Val(); + tensorflow::int64 single_index = lexer_.GetInt64Val(); lexer_.Lex(); if (rank != 1) { return Error( @@ -1712,7 +1727,7 @@ bool HloParser::ParseSparseLiteralHelper(std::unique_ptr* literal, value = static_cast(lexer_.GetKind() == TokKind::kw_true); lexer_.Lex(); } else if (primitive_util::IsIntegralType(shape.element_type())) { - int64 value_s64; + tensorflow::int64 value_s64; if (!ParseInt64(&value_s64)) { return Error(value_loc, StrCat("expects integer for primitive type: ", @@ -1885,23 +1900,24 @@ bool HloParser::ParseAttributeHelper( LocTy attr_loc = lexer_.GetLoc(); switch (attr_type) { case AttrTy::kInt64: { - int64 result; + tensorflow::int64 result; if (!ParseInt64(&result)) { return false; } - static_cast*>(attr_out_ptr)->emplace(result); + static_cast*>(attr_out_ptr) + ->emplace(result); return true; } case AttrTy::kInt32: { - int64 result; + tensorflow::int64 result; if (!ParseInt64(&result)) { return false; } - if (result != static_cast(result)) { + if (result != static_cast(result)) { return Error(attr_loc, "value out of range for int32"); } - static_cast*>(attr_out_ptr) - ->emplace(static_cast(result)); + static_cast*>(attr_out_ptr) + ->emplace(static_cast(result)); return true; } case AttrTy::kFloat: { @@ -1977,12 +1993,12 @@ bool HloParser::ParseAttributeHelper( return true; } case AttrTy::kBracedInt64List: { - std::vector result; + std::vector result; if (!ParseInt64List(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma, &result)) { return false; } - static_cast>*>(attr_out_ptr) + static_cast>*>(attr_out_ptr) ->emplace(result); return true; } @@ -2157,7 +2173,7 @@ bool HloParser::ParseConvolutionDimensionNumbers( << str; } - const int64 rank = lhs_rhs_out[0].length(); + const tensorflow::int64 rank = lhs_rhs_out[0].length(); if (rank != lhs_rhs_out[1].length() || rank != lhs_rhs_out[2].length()) { return TokenError( "convolution lhs, rhs, and output must have the same rank"); @@ -2271,7 +2287,7 @@ bool HloParser::ParseSliceRanges(SliceRanges* result) { if (!ParseToken(TokKind::kLbrace, "expects '{' to start ranges")) { return false; } - std::vector> ranges; + std::vector> ranges; if (lexer_.GetKind() == TokKind::kRbrace) { // empty return ParseToken(TokKind::kRbrace, "expects '}' to end ranges"); @@ -2305,7 +2321,7 @@ bool HloParser::ParseSliceRanges(SliceRanges* result) { // ::= int64_val (delim int64_val)* bool HloParser::ParseInt64List(const TokKind start, const TokKind end, const TokKind delim, - std::vector* result) { + std::vector* result) { if (!ParseToken(start, StrCat("expects an int64 list starting with ", TokKindToString(start)))) { return false; @@ -2314,7 +2330,7 @@ bool HloParser::ParseInt64List(const TokKind start, const TokKind end, // empty } else { do { - int64 i; + tensorflow::int64 i; if (!ParseInt64(&i)) { return false; } @@ -2431,7 +2447,8 @@ bool HloParser::ParseString(string* result) { return true; } -bool HloParser::ParseDxD(const string& name, std::vector* result) { +bool HloParser::ParseDxD(const string& name, + std::vector* result) { LocTy loc = lexer_.GetLoc(); if (!result->empty()) { return Error(loc, @@ -2439,7 +2456,7 @@ bool HloParser::ParseDxD(const string& name, std::vector* result) { } // 1D if (lexer_.GetKind() == TokKind::kInt) { - int64 number; + tensorflow::int64 number; if (!ParseInt64(&number)) { return Error(loc, Printf("expects sub-attribute '%s=i'", name.c_str())); } @@ -2459,7 +2476,8 @@ bool HloParser::ParseDxD(const string& name, std::vector* result) { return TokenError("expects token type kInt or kDxD"); } -bool HloParser::ParseWindowPad(std::vector>* pad) { +bool HloParser::ParseWindowPad( + std::vector>* pad) { LocTy loc = lexer_.GetLoc(); if (!pad->empty()) { return Error(loc, "sub-attribute 'pad=' already exists"); @@ -2470,7 +2488,7 @@ bool HloParser::ParseWindowPad(std::vector>* pad) { string str = lexer_.GetStrVal(); std::vector padding_str = Split(str, 'x'); for (int i = 0; i < padding_str.size(); i++) { - std::vector low_high; + std::vector low_high; if (!SplitAndParseAsInts(padding_str[i], '_', &low_high) || low_high.size() != 2) { return Error(loc, @@ -2494,7 +2512,7 @@ bool HloParser::ParsePaddingConfig(PaddingConfig* padding) { string str = lexer_.GetStrVal(); std::vector padding_str = Split(str, 'x'); for (const auto& padding_dim_str : padding_str) { - std::vector padding_dim; + std::vector padding_dim; if (!SplitAndParseAsInts(padding_dim_str, '_', &padding_dim) || (padding_dim.size() != 2 && padding_dim.size() != 3)) { return Error(loc, @@ -2516,7 +2534,7 @@ bool HloParser::ParseMetadata(OpMetadata* metadata) { optional op_type; optional op_name; optional source_file; - optional source_line; + optional source_line; attrs["op_type"] = {/*required=*/false, AttrTy::kString, &op_type}; attrs["op_name"] = {/*required=*/false, AttrTy::kString, &op_name}; attrs["source_file"] = {/*required=*/false, AttrTy::kString, &source_file}; @@ -2603,7 +2621,7 @@ bool HloParser::ParseRandomDistribution(RandomDistribution* result) { return true; } -bool HloParser::ParseInt64(int64* result) { +bool HloParser::ParseInt64(tensorflow::int64* result) { VLOG(1) << "ParseInt64"; if (lexer_.GetKind() != TokKind::kInt) { return TokenError("expects integer"); @@ -2726,8 +2744,8 @@ HloParser::ParseConvolutionDimensionNumbersOnly() { } // namespace -StatusOr> Parse(StringPiece str, - const HloModuleConfig& config) { +StatusOr> ParseHloString( + tensorflow::StringPiece str, const HloModuleConfig& config) { HloParser parser(str, config); if (!parser.Run()) { return InvalidArgument("Syntax error:\n%s", parser.GetError().c_str()); @@ -2735,9 +2753,10 @@ StatusOr> Parse(StringPiece str, return parser.ConsumeHloModule(); } -StatusOr> Parse(StringPiece str) { +StatusOr> ParseHloString( + tensorflow::StringPiece str) { HloModuleConfig config; - return Parse(str, config); + return ParseHloString(str, config); } StatusOr ParseSharding(tensorflow::StringPiece str) { @@ -2759,5 +2778,4 @@ StatusOr ParseConvolutionDimensionNumbers( return parser.ParseConvolutionDimensionNumbersOnly(); } -} // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h similarity index 70% rename from tensorflow/compiler/xla/tools/parser/hlo_parser.h rename to tensorflow/compiler/xla/service/hlo_parser.h index 902c45cebc..3f3a51215e 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.h +++ b/tensorflow/compiler/xla/service/hlo_parser.h @@ -13,28 +13,31 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_TOOLS_PARSER_HLO_PARSER_H_ -#define TENSORFLOW_COMPILER_XLA_TOOLS_PARSER_HLO_PARSER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PARSER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PARSER_H_ #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_lexer.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_lexer.h" #include "tensorflow/compiler/xla/xla_data.pb.h" namespace xla { -namespace tools { + +// For details about the syntax accepted by this parser, see +// g3doc/hlo_parser.md. // The api of the hlo parser. Given a string in the HloModule::ToString() // format, parses the string and creates a HloModule with the given config. -StatusOr> Parse(tensorflow::StringPiece str, - const HloModuleConfig& config); +StatusOr> ParseHloString( + tensorflow::StringPiece str, const HloModuleConfig& config); // The api of the hlo parser. Given a string in the HloModule::ToString() // format, parses the string and creates a HloModule with default config. -StatusOr> Parse(tensorflow::StringPiece str); +StatusOr> ParseHloString( + tensorflow::StringPiece str); // Parses the result of HloSharding::ToString(), e.g. "{replicated}". StatusOr ParseSharding(tensorflow::StringPiece str); @@ -47,7 +50,10 @@ StatusOr ParseWindow(tensorflow::StringPiece str); StatusOr ParseConvolutionDimensionNumbers( tensorflow::StringPiece str); -} // namespace tools +// ParseHloString sharding from str. str is supposed to contain the body of the +// sharding, i.e. just the rhs of the "sharding={...}" attribute string. +StatusOr ParseSharding(tensorflow::StringPiece str); + } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_TOOLS_PARSER_HLO_PARSER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PARSER_H_ diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc similarity index 94% rename from tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc rename to tensorflow/compiler/xla/service/hlo_parser_test.cc index 3c5957b96a..9a18b4f845 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include #include "tensorflow/compiler/xla/window_util.h" @@ -23,10 +23,10 @@ limitations under the License. #include "tensorflow/core/platform/test.h" namespace xla { -namespace tools { + namespace { -using tensorflow::StringPiece; +using ::tensorflow::StringPiece; struct TestData { string test_name; @@ -901,12 +901,12 @@ class HloParserTest : public ::testing::Test, << "'" << s << "' does not contain '" << expected << "'"; } - // Expects "ToString(Parse(string)) == string", that is, parses the string, - // asserts that it succeeded, stringifies the parsed module, and checks that - // the it equals the original string. + // Expects "ToString(ParseHloString(string)) == string", that is, parses the + // string, asserts that it succeeded, stringifies the parsed module, and + // checks that the it equals the original string. void ExpectEqual() { const string& original = GetParam().module_string; - auto result = Parse(original); + auto result = ParseHloString(original); TF_ASSERT_OK(result.status()); EXPECT_EQ(original, result.ValueOrDie()->ToString( HloPrintOptions().set_print_large_constants(true))); @@ -917,7 +917,7 @@ class HloParserShortTest : public HloParserTest { protected: void ExpectEqualShort() { const string& original = GetParam().module_string; - auto result = Parse(original); + auto result = ParseHloString(original); TF_ASSERT_OK(result.status()); EXPECT_EQ(original, result.ValueOrDie()->ToString(HloPrintOptions::ShortParsable())); @@ -938,13 +938,13 @@ INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserShortTest, TEST_F(HloParserTest, Empty) { const string original = ""; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); } TEST_F(HloParserTest, Garbage) { const string original = "HloModule thi$ str1ng makes# N0 sen$e @all!*&^%$"; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); } @@ -958,7 +958,7 @@ ENTRY %blabla (x: f32[], y: f32[]) -> f32[] { } )"; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); } @@ -970,7 +970,7 @@ ENTRY %blabla (x: g32[]) -> g32[] { } )"; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); } @@ -983,7 +983,7 @@ ENTRY %blabla (x: f32[]) -> pred[] { } )"; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); } @@ -994,7 +994,7 @@ ENTRY %blabla (x: f32[]) -> pred[] { %eq = pred[]{} equal-to(f32[]{} %x, f32[]{} %y) } )"; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); } @@ -1009,7 +1009,7 @@ ENTRY %SelectScalarS32True.v4 () -> s32[] { } )"; - auto result = Parse(original); + auto result = ParseHloString(original); TF_EXPECT_OK(result.status()); // Constant instructions have no name. The string will be parsed successfully // but the constant names will not be exactly the same. @@ -1020,7 +1020,7 @@ TEST_F(HloParserTest, ConfigurationField) { ENTRY %configuration_test() -> s32[] { %constant = s32[] constant(42), backend_config="foo bar" })"; - auto result = Parse(original); + auto result = ParseHloString(original); TF_ASSERT_OK(result.status()); EXPECT_EQ("foo bar", result.ValueOrDie() ->entry_computation() @@ -1036,7 +1036,7 @@ ENTRY %some_2 () -> f32[2] { } )"; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); ExpectHasSubstr(result.status().error_message(), "expects nested array in rank 1, but sees larger"); @@ -1050,7 +1050,7 @@ ENTRY %some_2x3 () -> f32[2,3] { } )"; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); ExpectHasSubstr(result.status().error_message(), "expects nested array in rank 2, but sees 1"); @@ -1064,7 +1064,7 @@ ENTRY %some_2x3x2 () -> f32[2,3,2] { } )"; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); ExpectHasSubstr(result.status().error_message(), "expects 3 elements in the [0]th element"); @@ -1079,7 +1079,7 @@ ENTRY %ConstantF16Overflow.v4 () -> f16[] { } )"; - auto result = Parse(original); + auto result = ParseHloString(original); EXPECT_NE(Status::OK(), result.status()); ExpectHasSubstr(result.status().error_message(), "is out of range for literal's primitive type F16"); @@ -1093,7 +1093,7 @@ ENTRY %ConstantWithExp.v4 () -> f32[] { } )"; - auto result = Parse(original); + auto result = ParseHloString(original); TF_EXPECT_OK(result.status()); // The string will be parsed successfully but the output strings are not // exactly the same, because "3e2" is parsed into value 300 and will be @@ -1111,7 +1111,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2 } )"; - TF_EXPECT_OK(Parse(original).status()); + TF_EXPECT_OK(ParseHloString(original).status()); } TEST_F(HloParserTest, InvalidDimLabels) { @@ -1127,17 +1127,18 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2 )"; + ExpectHasSubstr(ParseHloString(tensorflow::strings::StrCat( + prefix, ",dim_labels=00_01_10", suffix)) + .status() + .error_message(), + "expects dim labels pattern"); + ExpectHasSubstr( - Parse(tensorflow::strings::StrCat(prefix, ",dim_labels=00_01_10", suffix)) + ParseHloString(tensorflow::strings::StrCat( + prefix, ",dim_labels=010_1100->010", suffix)) .status() .error_message(), - "expects dim labels pattern"); - - ExpectHasSubstr(Parse(tensorflow::strings::StrCat( - prefix, ",dim_labels=010_1100->010", suffix)) - .status() - .error_message(), - "must have the same rank"); + "must have the same rank"); } TEST_F(HloParserTest, UnexpectedAttribute) { @@ -1152,7 +1153,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { } )"; - ExpectHasSubstr(Parse(original).status().error_message(), + ExpectHasSubstr(ParseHloString(original).status().error_message(), "unexpected attribute \"calls\""); } @@ -1168,7 +1169,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { } )"; - ExpectHasSubstr(Parse(original).status().error_message(), + ExpectHasSubstr(ParseHloString(original).status().error_message(), "attribute channel_id is expected but not seen"); } @@ -1184,7 +1185,7 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] { } )"; - ExpectHasSubstr(Parse(original).status().error_message(), + ExpectHasSubstr(ParseHloString(original).status().error_message(), "'done' is not defined"); } @@ -1197,7 +1198,7 @@ ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] { } )"; - TF_EXPECT_OK(Parse(original).status()); + TF_EXPECT_OK(ParseHloString(original).status()); } TEST_F(HloParserTest, PaddingConfigIsNotWindowPad) { @@ -1211,7 +1212,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,1], filter: f32[1,1,1]) -> f32[1,2 } )"; - ExpectHasSubstr(Parse(original).status().error_message(), + ExpectHasSubstr(ParseHloString(original).status().error_message(), "expects padding_low and padding_high separated by '_'"); } @@ -1223,7 +1224,7 @@ ENTRY %test_comma.v4 () -> f32[] { } )"; - TF_EXPECT_OK(Parse(original).status()); + TF_EXPECT_OK(ParseHloString(original).status()); } TEST_F(HloParserTest, ComputationShapeDoesNotMatchRootShape) { @@ -1233,7 +1234,7 @@ ENTRY %CustomCall () -> f32[1] { %constant = f32[1]{0} constant({12345}) ROOT %foo = f32[1,2,3]{0,2,1} custom-call(f32[1]{0} %constant), custom_call_target="foo\"bar" })"; - ExpectHasSubstr(Parse(original).status().error_message(), + ExpectHasSubstr(ParseHloString(original).status().error_message(), "Shape of computation CustomCall, f32[1], is not compatible " "with that of its root instruction foo, f32[1,2,3]"); } @@ -1252,7 +1253,7 @@ ENTRY %Reduce (input: f32[8,16,256]) -> f32[8,16] { ROOT reduce = f32[8,16]{0,1} reduce(input, constant), dimensions={2}, to_apply=add_F32.v3 })"; - auto module = Parse(original); + auto module = ParseHloString(original); TF_ASSERT_OK(module.status()); auto program_layout = module.ValueOrDie()->host_entry_computation_layout(); ASSERT_EQ(program_layout.parameter_count(), 1); @@ -1275,7 +1276,7 @@ c1 { c2 { const2 = f32[1]{0} constant({67890}) })"; - auto module = Parse(original); + auto module = ParseHloString(original); TF_ASSERT_OK(module.status()); EXPECT_EQ(module.ValueOrDie()->entry_computation()->name(), "c2"); } @@ -1286,7 +1287,7 @@ ENTRY consts { first = f32[1]{0} constant({12345}) last = f32[1]{0} constant({67890}) })"; - auto module = Parse(original); + auto module = ParseHloString(original); TF_ASSERT_OK(module.status()); EXPECT_EQ( module.ValueOrDie()->entry_computation()->root_instruction()->name(), @@ -1301,7 +1302,7 @@ ENTRY c1 { ENTRY c2 { const2 = f32[1]{0} constant({67890}) })"; - ExpectHasSubstr(Parse(original).status().error_message(), + ExpectHasSubstr(ParseHloString(original).status().error_message(), "expects only one ENTRY"); } @@ -1311,7 +1312,7 @@ ENTRY consts { ROOT const1 = f32[1]{0} constant({12345}) ROOT const2 = f32[1]{0} constant({12345}) })"; - ExpectHasSubstr(Parse(original).status().error_message(), + ExpectHasSubstr(ParseHloString(original).status().error_message(), "one computation should have only one ROOT"); } @@ -1323,7 +1324,7 @@ comp { comp { const2 = f32[1]{0} constant({67890}) })"; - ExpectHasSubstr(Parse(original).status().error_message(), + ExpectHasSubstr(ParseHloString(original).status().error_message(), R"(was parsing 2:1: error: computation previously defined here comp { ^)"); @@ -1346,7 +1347,7 @@ ENTRY entry { ROOT call1 = s32[] call(param), to_apply=tcallb })"; ExpectHasSubstr( - Parse(original).status().error_message(), + ParseHloString(original).status().error_message(), "was parsing 8:39: error: instruction does not exist: aparam"); } @@ -1371,5 +1372,4 @@ TEST_F(HloParserTest, ParseConvolutionDimensionNumbers) { } } // namespace -} // namespace tools } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 31e13da0c0..e1f9d8efd4 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -22,9 +22,9 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/transfer_manager.h" #include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/common_runtime/eigen_thread_pool.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -36,7 +36,7 @@ HloRunner::CreateModuleFromString(const tensorflow::StringPiece hlo_string, const DebugOptions& debug_options) { HloModuleConfig config; config.set_debug_options(debug_options); - return tools::Parse(hlo_string, config); + return ParseHloString(hlo_string, config); } namespace { @@ -80,7 +80,7 @@ HloRunner::ReadModuleFromHloTextFile(const std::string& filename, filename, &hlo_string)); HloModuleConfig config; config.set_debug_options(debug_options); - return tools::Parse(hlo_string, config); + return ParseHloString(hlo_string, config); } HloRunner::HloRunner(se::Platform* platform) { diff --git a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc index 0bc930f9ea..db7ef6f0d4 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc @@ -22,9 +22,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_ordering.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -158,7 +158,7 @@ ENTRY root { })"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(module_str)); + ParseHloString(module_str)); auto size_fn = [](const BufferValue& buffer) { return ShapeUtil::ByteSizeOf(buffer.shape(), /*pointer_size=*/8); diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc index 94d1a3226b..ee7133689b 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc @@ -19,11 +19,11 @@ limitations under the License. #include #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/util.h" namespace xla { @@ -311,10 +311,10 @@ TEST_F(HloShardingTest, OstreamTest) { EXPECT_EQ(oss.str(), "{f32[3,5,7,11] devices=[1,1,2,2]0,1,2,3}"); } -TEST_F(HloShardingTest, Parse) { +TEST_F(HloShardingTest, ParseHloString) { auto check = [](const HloSharding& sharding) { TF_ASSERT_OK_AND_ASSIGN(auto parsed_sharding, - tools::ParseSharding(sharding.ToString())); + ParseSharding(sharding.ToString())); EXPECT_EQ(sharding, parsed_sharding); }; check(HloSharding::Replicate()); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_token.h b/tensorflow/compiler/xla/service/hlo_token.h similarity index 84% rename from tensorflow/compiler/xla/tools/parser/hlo_token.h rename to tensorflow/compiler/xla/service/hlo_token.h index 7928bee5c2..533429608b 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_token.h +++ b/tensorflow/compiler/xla/service/hlo_token.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_TOOLS_PARSER_HLO_TOKEN_H_ -#define TENSORFLOW_COMPILER_XLA_TOOLS_PARSER_HLO_TOKEN_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TOKEN_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TOKEN_H_ #include @@ -22,9 +22,11 @@ limitations under the License. #include "tensorflow/core/platform/types.h" namespace xla { -namespace tools { // Defines different kinds of tokens in a hlo module string. +// +// You shouldn't need to use this directly unless you're using HloLexer +// directly, and you probably don't need to do that. Use hlo_parser instead. enum class TokKind { // Markers kEof, @@ -72,7 +74,6 @@ enum class TokKind { string TokKindToString(TokKind kind); -} // namespace tools } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_TOOLS_PARSER_HLO_TOKEN_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TOKEN_H_ diff --git a/tensorflow/compiler/xla/service/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/instruction_fusion_test.cc index df109df787..21db233899 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion_test.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/instruction_fusion.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { @@ -47,7 +47,7 @@ class InstructionFusionForTesting : public InstructionFusion { }; TEST_F(InstructionFusionTest, FuseInstructions) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY entry_computation { p0 = f32[4,3]{1,0} parameter(0) @@ -67,7 +67,7 @@ TEST_F(InstructionFusionTest, FuseInstructions) { } TEST_F(InstructionFusionTest, FuseIntoFusionInstruction) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module fused_computation { p1 = f32[4,3] parameter(0) @@ -90,7 +90,7 @@ TEST_F(InstructionFusionTest, FuseIntoFusionInstruction) { } TEST_F(InstructionFusionTest, FuseInstructionsIntoMultiOutput) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY entry_computation { p0 = f32[4,3]{1,0} parameter(0) @@ -195,7 +195,7 @@ static int Count(const HloModule& module, HloOpcode op) { } TEST_F(InstructionFusionTest, FuseCheapNonDuplicatableOps) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[4,3]{1,0} parameter(0) @@ -220,7 +220,7 @@ TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusableRecursively) { // // p0 -> add -------------------------> sub // \-> abs1 -> rng -> abs2 -/ - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[4,3]{1,0} parameter(0) @@ -251,7 +251,7 @@ TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusableRecursively) { // p0 -> add -------------------------> sub // \-> abs1 -> log -> abs2 -/ // \-> send - module = tools::Parse(R"( + module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[4,3]{1,0} parameter(0) @@ -282,7 +282,7 @@ TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusableRecursively) { // \ \-> add2 -/ // \-> log -/ // \-> send - module = tools::Parse(R"( + module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[4,3]{1,0} parameter(0) @@ -314,7 +314,7 @@ TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusableRecursively) { // \------> sub1 // log -/ // \-> send - module = tools::Parse(R"( + module = ParseHloString(R"( HloModule test_module ENTRY OutputFusion { p0 = f32[4,3]{1,0} parameter(0) @@ -390,7 +390,7 @@ TEST_F(InstructionFusionTest, AllowEffectiveUnaryDuplication) { TEST_F(InstructionFusionTest, WideningConvertsAreAlwaysDuplicableIntoConsumers) { - auto module = tools::Parse(R"( + auto module = ParseHloString(R"( HloModule test_module ENTRY Test { p0 = f16[100] parameter(0) diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 7508013199..bf0448a676 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -29,13 +29,13 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/shape_layout.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/test_utils.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" @@ -651,7 +651,7 @@ TEST_F(LayoutAssignmentTest, TransposeWithinFusionDoesNotCrash) { } )"; - auto module = tools::Parse(module_str).ValueOrDie(); + auto module = ParseHloString(module_str).ValueOrDie(); module = backend() @@ -691,7 +691,7 @@ TEST_F(LayoutAssignmentTest, GTEInheritsLayoutFromOperand) { } )"; - auto module = tools::Parse(module_str).ValueOrDie(); + auto module = ParseHloString(module_str).ValueOrDie(); ComputationLayout computation_layout( module->entry_computation()->ComputeProgramShape()); Shape param_shape = ShapeUtil::MakeTupleShape( diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc index 204e8c9920..fef3c132b0 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc +++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/pattern_matcher.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/core/platform/test.h" namespace xla { @@ -29,7 +29,7 @@ TEST(PatternMatcherTest, AddOp) { ROOT %two_plus_two = f32[] add(f32[] %two, f32[] %two) } )"; - TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, tools::Parse(kModuleStr)); + TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr)); const HloInstruction* matched_inst; HloInstruction* matched_operand; @@ -182,7 +182,7 @@ TEST(PatternMatcherTest, FusionKind) { p0 = f32[] parameter(0) ROOT fusion = f32[] fusion(p0), kind=kLoop, calls=fused_computation })"; - TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, tools::Parse(kModuleStr)); + TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr)); auto* root = hlo_module->entry_computation()->root_instruction(); EXPECT_TRUE(Match( diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc index f73f1227aa..3139801ea3 100644 --- a/tensorflow/compiler/xla/service/transpose_folding_test.cc +++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc @@ -27,12 +27,12 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/platform/logging.h" @@ -69,7 +69,7 @@ ENTRY entry_computation { } )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); FoldTranspose(module.get()); @@ -91,7 +91,7 @@ ENTRY entry_computation { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); TransposeFolding transpose_folding( [](const HloInstruction& dot, @@ -119,7 +119,7 @@ ENTRY entry_computation { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); TransposeFolding transpose_folding( [](const HloInstruction& dot, @@ -147,7 +147,7 @@ ENTRY entry_computation { } )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); FoldTranspose(module.get()); @@ -205,7 +205,7 @@ ENTRY entry_computation { } )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); FoldTranspose(module.get()); const HloComputation* callee = module->GetComputationWithName("callee"); diff --git a/tensorflow/compiler/xla/service/tuple_util_test.cc b/tensorflow/compiler/xla/service/tuple_util_test.cc index 754fd8ef16..d33d5bb8f3 100644 --- a/tensorflow/compiler/xla/service/tuple_util_test.cc +++ b/tensorflow/compiler/xla/service/tuple_util_test.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/tuple_util.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/test.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { namespace { @@ -37,7 +37,7 @@ ENTRY entry { )"; TF_ASSIGN_OR_RETURN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); *entry_computation = module->entry_computation(); *param0 = (*entry_computation)->parameter_instruction(0); diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc b/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc index 0d2288d8ea..393e758038 100644 --- a/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/test.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/lib/core/status_test_util.h" namespace xla { @@ -55,7 +55,7 @@ ENTRY entry { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); TF_ASSERT_OK_AND_ASSIGN(bool changed, WhileLoopConstantSinking{}.Run(module.get())); @@ -95,7 +95,7 @@ ENTRY entry { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); TF_ASSERT_OK_AND_ASSIGN(bool changed, WhileLoopConstantSinking{}.Run(module.get())); @@ -136,7 +136,7 @@ ENTRY entry { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); TF_ASSERT_OK_AND_ASSIGN(bool changed, WhileLoopConstantSinking{}.Run(module.get())); @@ -184,7 +184,7 @@ ENTRY entry { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); TF_ASSERT_OK_AND_ASSIGN(bool changed, WhileLoopConstantSinking{}.Run(module.get())); diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc index e1ec12192f..8831c513ee 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion_test.cc @@ -16,9 +16,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/lib/core/status_test_util.h" namespace xla { diff --git a/tensorflow/compiler/xla/service/while_util_test.cc b/tensorflow/compiler/xla/service/while_util_test.cc index bcc545c61d..d79d329721 100644 --- a/tensorflow/compiler/xla/service/while_util_test.cc +++ b/tensorflow/compiler/xla/service/while_util_test.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/while_util.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/test.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/util.h" namespace xla { @@ -50,7 +50,7 @@ ENTRY entry { )"; TF_ASSIGN_OR_RETURN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); *entry_computation = module->entry_computation(); *param0 = (*entry_computation)->parameter_instruction(0); @@ -151,7 +151,7 @@ ENTRY main { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); HloComputation* while_body = module->GetComputationWithName("body"); @@ -190,7 +190,7 @@ ENTRY main { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_string)); + ParseHloString(hlo_string)); HloComputation* main = module->GetComputationWithName("main"); HloInstruction* while_instr = main->root_instruction(); diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index a62d49e9c7..7f6bbe6f87 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -117,11 +117,11 @@ cc_library( "//tensorflow/compiler/xla/service:backend", "//tensorflow/compiler/xla/service:computation_layout", "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/service:hlo_runner", "//tensorflow/compiler/xla/service:hlo_verifier", "//tensorflow/compiler/xla/service:interpreter_plugin", # reference backend "//tensorflow/compiler/xla/service:platform_util", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", "//tensorflow/core:test", @@ -138,8 +138,8 @@ cc_library( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/service:hlo_verifier", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:test", ], @@ -697,8 +697,8 @@ xla_test( "//tensorflow/compiler/xla:execution_options_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) @@ -1195,9 +1195,9 @@ xla_test( ], deps = [ ":client_library_test_base", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:test", ], @@ -1520,11 +1520,11 @@ xla_test( "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/client/xla_client:xla_computation", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", "//tensorflow/core:test", diff --git a/tensorflow/compiler/xla/tests/cross_replica_sum_test.cc b/tensorflow/compiler/xla/tests/cross_replica_sum_test.cc index b159887765..c960b3c15f 100644 --- a/tensorflow/compiler/xla/tests/cross_replica_sum_test.cc +++ b/tensorflow/compiler/xla/tests/cross_replica_sum_test.cc @@ -14,12 +14,12 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/test_macros.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { namespace { @@ -36,7 +36,8 @@ XLA_TEST_F(TrivialCrossReplicaSumTest, OneOperand) { p = f32[3] parameter(0) ROOT crs = f32[3] cross-replica-sum(p) })"; - auto module = tools::Parse(module_str, GetModuleConfigForTest()).ValueOrDie(); + auto module = + ParseHloString(module_str, GetModuleConfigForTest()).ValueOrDie(); auto literal = Literal::CreateR1({1, 2, 3}); EXPECT_EQ(*literal, *ExecuteAndTransfer(std::move(module), {literal.get()})); } @@ -49,7 +50,8 @@ XLA_TEST_F(TrivialCrossReplicaSumTest, MultipleOperands) { p1 = f32[2] parameter(1) ROOT crs = (f32[3], f32[2]) cross-replica-sum(p0, p1) })"; - auto module = tools::Parse(module_str, GetModuleConfigForTest()).ValueOrDie(); + auto module = + ParseHloString(module_str, GetModuleConfigForTest()).ValueOrDie(); auto literal0 = Literal::CreateR1({1, 2, 3}); auto literal1 = Literal::CreateR1({10, 20}); EXPECT_EQ( @@ -68,7 +70,8 @@ XLA_TEST_F(TrivialCrossReplicaSumTest, ConstantOperand) { p1 = f32[2] constant({10, 20}) ROOT crs = (f32[3], f32[2]) cross-replica-sum(p0, p1) })"; - auto module = tools::Parse(module_str, GetModuleConfigForTest()).ValueOrDie(); + auto module = + ParseHloString(module_str, GetModuleConfigForTest()).ValueOrDie(); auto literal0 = Literal::CreateR1({1, 2, 3}); auto literal1 = Literal::CreateR1({10, 20}); EXPECT_EQ(*Literal::MakeTuple({literal0.get(), literal1.get()}), diff --git a/tensorflow/compiler/xla/tests/gather_operation_test.cc b/tensorflow/compiler/xla/tests/gather_operation_test.cc index 4854c649c1..143ffbdeb4 100644 --- a/tensorflow/compiler/xla/tests/gather_operation_test.cc +++ b/tensorflow/compiler/xla/tests/gather_operation_test.cc @@ -14,12 +14,12 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/execution_options_util.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/test_macros.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" // NB! TODO(b/74360564): These tests do not test out of bounds behavior since // that hasn't been specced yet. @@ -41,7 +41,7 @@ class GatherOperationTest : public HloTestBase { HloModuleConfig config; config.set_debug_options(GetDebugOptionsForTest()); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, - tools::Parse(hlo_text, config)); + ParseHloString(hlo_text, config)); EXPECT_TRUE(RunAndCompare(std::move(module), args, nullopt)); } }; diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc index 36e19e6507..08ed826c80 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -23,11 +23,11 @@ limitations under the License. #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_utils.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/gtl/array_slice.h" diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc index da4cf4ae0c..c8a05c2e9e 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc @@ -15,10 +15,10 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/hlo_verifier.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" @@ -67,7 +67,7 @@ HloModule& HloVerifiedTestBase::module() { void HloVerifiedTestBase::ParseAndVerifyModule( tensorflow::StringPiece hlo_text) { CHECK(!module_) << "Called ParseModule when test already has a module."; - TF_ASSERT_OK_AND_ASSIGN(module_, tools::Parse(hlo_text)); + TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); VerifyModule(); } } // namespace xla diff --git a/tensorflow/compiler/xla/tests/reduce_hlo_test.cc b/tensorflow/compiler/xla/tests/reduce_hlo_test.cc index c0a2c0ca4c..9052b188ed 100644 --- a/tensorflow/compiler/xla/tests/reduce_hlo_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_hlo_test.cc @@ -15,9 +15,9 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/test_macros.h" -#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/test.h" @@ -73,7 +73,7 @@ ENTRY reduce.1 { } )"; - return tools::Parse(hlo_string); + return ParseHloString(hlo_string); } // TODO(b/72454718): XLA:GPU does not support executing code compiled without diff --git a/tensorflow/compiler/xla/tools/parser/BUILD b/tensorflow/compiler/xla/tools/parser/BUILD deleted file mode 100644 index 76f35afd53..0000000000 --- a/tensorflow/compiler/xla/tools/parser/BUILD +++ /dev/null @@ -1,73 +0,0 @@ -# Build file for the Hlo parser. - -licenses(["notice"]) # Apache 2.0 - -package( - default_visibility = [":friends"], -) - -package_group( - name = "friends", - includes = [ - "//tensorflow/compiler/xla:friends", - ], -) - -# Filegroup used to collect source files for dependency checking. -filegroup( - name = "c_srcs", - data = glob([ - "**/*.cc", - "**/*.h", - ]), -) - -load("//tensorflow:tensorflow.bzl", "tf_cc_test") - -cc_library( - name = "hlo_lexer", - srcs = ["hlo_lexer.cc"], - hdrs = [ - "hlo_lexer.h", - "hlo_token.h", - ], - deps = [ - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/core:lib", - "//tensorflow/core:regexp_internal", - ], -) - -cc_library( - name = "hlo_parser", - srcs = ["hlo_parser.cc"], - hdrs = ["hlo_parser.h"], - deps = [ - ":hlo_lexer", - "//tensorflow/compiler/xla:literal_util", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - ], -) - -tf_cc_test( - name = "hlo_parser_test", - size = "small", - srcs = ["hlo_parser_test.cc"], - deps = [ - ":hlo_parser", - "//tensorflow/compiler/xla:window_util", - "//tensorflow/core:lib", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], -) -- GitLab From 2d71691dad337c4e7a6b5dbf18fd0ab0e6bd7cf6 Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Fri, 1 Jun 2018 15:36:29 -0700 Subject: [PATCH 414/902] Swift for TensorFlow lives in GitHub, for now. Update ecosystem page and dropdown menu. Remove community/swift page and add redirect. PiperOrigin-RevId: 198936463 --- tensorflow/docs_src/community/leftnav_files | 1 - tensorflow/docs_src/community/swift.md | 60 --------------------- 2 files changed, 61 deletions(-) delete mode 100644 tensorflow/docs_src/community/swift.md diff --git a/tensorflow/docs_src/community/leftnav_files b/tensorflow/docs_src/community/leftnav_files index 2bae60d9dd..0bd1f14de9 100644 --- a/tensorflow/docs_src/community/leftnav_files +++ b/tensorflow/docs_src/community/leftnav_files @@ -6,4 +6,3 @@ groups.md documentation.md style_guide.md benchmarks.md -swift.md diff --git a/tensorflow/docs_src/community/swift.md b/tensorflow/docs_src/community/swift.md deleted file mode 100644 index 070f9931e0..0000000000 --- a/tensorflow/docs_src/community/swift.md +++ /dev/null @@ -1,60 +0,0 @@ -

- -

- -# Swift for TensorFlow - -Welcome to the Swift for TensorFlow development community! - -Swift for TensorFlow is a new way to develop machine learning models. It -gives you the power of -[TensorFlow](https://www.tensorflow.org) directly -integrated into the [Swift programming language](https://swift.org/about). -With Swift, you can write the following imperative code, and Swift -automatically turns it into **a single TensorFlow Graph** and runs it -with the full performance of TensorFlow Sessions on CPU, GPU and -[TPU](https://cloud.google.com/tpu/docs/tpus). - -```swift -import TensorFlow - -var x = Tensor([[1, 2], [3, 4]]) - -for i in 1...5 { - x += matmul(x, x) -} - -print(x) -``` - -Swift combines the flexibility of -[Eager Execution](https://www.tensorflow.org/programmers_guide/eager) with the -high performance of [Graphs and Sessions](https://www.tensorflow.org/programmers_guide/graphs). -Behind the scenes, Swift analyzes your Tensor code and automatically builds -graphs for you. Swift also catches type errors and shape mismatches before -running your code, and has [Automatic Differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) -built right in. We believe that machine learning tools are so important that -they deserve **a first-class language and a compiler**. - -Note: Swift for TensorFlow is an early stage research project. It has been -released to enable open source development and is not yet ready for general use -by machine learning developers. - -## Open Source - -We have released Swift for TensorFlow as an open-source project on GitHub! - -Our [documentation repository](https://github.com/tensorflow/swift) contains a -[project overview](https://github.com/tensorflow/swift/blob/master/docs/DesignOverview.md) -and [technical papers](https://github.com/tensorflow/swift/tree/master/docs) -explaining specific areas in depth. There are also instructions for [installing -pre-built packages](https://github.com/tensorflow/swift/blob/master/Installation.md) -(for macOS and Ubuntu) as well as a simple -[usage tutorial](https://github.com/tensorflow/swift/blob/master/Usage.md). - -Moving forward, we will use an open design model and all discussions will be -public. - -[Sign up here to join the community Google -group](https://groups.google.com/a/tensorflow.org/d/forum/swift), which we will -use for announcements and general discussion. -- GitLab From 25486ef05d59265b769684589b738636b3207cc7 Mon Sep 17 00:00:00 2001 From: Vinu Rajashekhar Date: Fri, 1 Jun 2018 15:44:29 -0700 Subject: [PATCH 415/902] Adds a batch-op implemented using TF functions. o This has a couple of important advantages over the current implementation: 1. The existing batch-op waits for the batch to be created and then forwards the tensors to the rest of the graph, which causes a lot of batches to be created, because there is no way for the op to know if the other batches are being queued up. A mitigation, which we have seen working in practice, is to actually wait for the graph to finish processing the batch. So there is a sort of flow-control happening, and meanwhile the batches get coalesced, which improves latency and throughput as well. Using functions makes this kind of approach easier. 2. The existing op passes empty tensors around the graph to make the TF executor happy, which has sometimes worked not well with some Ops (like Reshape). Using functions means that we don't need to rely on this mechanism as well. PiperOrigin-RevId: 198937594 --- .../batching/python/ops/batch_ops_test.py | 87 ++++ .../base_api/api_def_BatchFunction.pbtxt | 128 ++++++ tensorflow/core/kernels/batch_kernels.cc | 390 +++++++++++++++--- tensorflow/core/ops/batch_ops.cc | 20 + 4 files changed, 564 insertions(+), 61 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_BatchFunction.pbtxt diff --git a/tensorflow/contrib/batching/python/ops/batch_ops_test.py b/tensorflow/contrib/batching/python/ops/batch_ops_test.py index e22f978dde..68e8a88ca0 100644 --- a/tensorflow/contrib/batching/python/ops/batch_ops_test.py +++ b/tensorflow/contrib/batching/python/ops/batch_ops_test.py @@ -23,7 +23,9 @@ import time from tensorflow.contrib.batching.python.ops import batch_ops from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_batch_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import script_ops from tensorflow.python.platform import test @@ -205,6 +207,91 @@ class BatchOpsTest(test.TestCase): self.assertEqual(thread_results[0], [2]) self.assertEqual(main_results[0], [3]) + def testBatchFunctionOp(self): + """Tests that the batch_func works.""" + with self.test_session() as sess: + + @function.Defun(dtypes.int32) + def computation(in_t): + return in_t + 1 + + inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1]) + result = gen_batch_ops.batch_function( + [inp], + num_batch_threads=1, + max_batch_size=10, + batch_timeout_micros=100000, + Tout=[dtypes.int32], + f=computation, + captured_tensors=computation.captured_inputs) + thread_results = [] + + def worker(): + thread_results.extend(sess.run([result], feed_dict={inp: [1]})) + + worker_thread = threading.Thread(target=worker) + worker_thread.start() + main_results = sess.run([result], feed_dict={inp: [2]}) + worker_thread.join() + self.assertEqual(thread_results[0], [2]) + self.assertEqual(main_results[0], [3]) + + def testBatchFunctionOpWithCapturedInput(self): + """Tests that batch_func with timeout.""" + with self.test_session() as sess: + captured_inp0 = array_ops.placeholder_with_default(2, shape=[]) + captured_inp1 = array_ops.placeholder_with_default(1, shape=[]) + inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1]) + + @function.Defun(dtypes.int32) + def computation(inp): + return inp + captured_inp0 - captured_inp1 + + result = gen_batch_ops.batch_function( + num_batch_threads=1, + max_batch_size=10, + batch_timeout_micros=100000, # 100ms + allowed_batch_sizes=[3, 10], + batching_queue="", + f=computation, + in_tensors=[inp], + captured_tensors=computation.captured_inputs, + Tout=[o.type for o in computation.definition.signature.output_arg]) + + thread_results = [] + + def worker(): + thread_results.extend(sess.run([result], feed_dict={inp: [1]})) + + worker_thread = threading.Thread(target=worker) + worker_thread.start() + main_results = sess.run([result], feed_dict={inp: [2]}) + worker_thread.join() + self.assertEqual(thread_results[0], [2]) + self.assertEqual(main_results[0], [3]) + + def testBasicUnbatchDecoratedWithReshape(self): + """Tests that the batch_function decorator works.""" + with self.test_session() as sess: + + @batch_ops.batch_function(1, 10, 100000) + def computation(in_t): + return array_ops.reshape(in_t, [-1]) + 1 + + inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1, 1]) + result = computation(inp) + thread_results = [] + + def worker(): + thread_results.extend(sess.run([result], feed_dict={inp: [[1]]})) + + worker_thread = threading.Thread(target=worker) + worker_thread.start() + main_results = sess.run([result], feed_dict={inp: [[2]]}) + worker_thread.join() + self.assertEqual(thread_results[0], [2]) + self.assertEqual(main_results[0], [3]) + def testUnbatchTimeout(self): """Tests that the unbatch timeout works.""" with self.test_session() as sess: diff --git a/tensorflow/core/api_def/base_api/api_def_BatchFunction.pbtxt b/tensorflow/core/api_def/base_api/api_def_BatchFunction.pbtxt new file mode 100644 index 0000000000..09eff6177b --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_BatchFunction.pbtxt @@ -0,0 +1,128 @@ +op { + graph_op_name: "BatchFunction" + in_arg { + name: "in_tensors" + description: < Status Concat(OpKernelContext* context, const gtl::ArraySlice& inputs, - int output_index) { + Tensor* output) { const int input_dims = inputs[0].dims(); const TensorShape& input_shape = inputs[0].shape(); @@ -76,9 +78,8 @@ Status Concat(OpKernelContext* context, const gtl::ArraySlice& inputs, TensorShape output_shape(input_shape); output_shape.set_dim(0, output_dim0); - Tensor* output = nullptr; TF_RETURN_IF_ERROR( - context->allocate_output(output_index, output_shape, &output)); + context->allocate_temp(DataTypeToEnum::value, output_shape, output)); if (output->NumElements() > 0) { auto output_flat = output->shaped({1, output->NumElements()}); #if GOOGLE_CUDA @@ -209,6 +210,7 @@ class BatchResource : public ResourceBase { static Status Create(int32 num_batch_threads, int32 max_batch_size, int32 batch_timeout_micros, int32 max_enqueued_batches, const std::vector& allowed_batch_sizes, + FunctionLibraryRuntime::Handle fhandle, std::unique_ptr* resource) { std::unique_ptr new_resource(new BatchResource); @@ -225,6 +227,8 @@ class BatchResource : public ResourceBase { new_resource->allowed_batch_sizes_ = allowed_batch_sizes; + new_resource->fhandle_ = fhandle; + *resource = std::move(new_resource); return Status::OK(); } @@ -254,6 +258,14 @@ class BatchResource : public ResourceBase { } batch_components->inputs.push_back(tensor); } + OpInputList captured_tensors; + const auto captured_status = + context->input_list("captured_tensors", &captured_tensors); + if (captured_status.ok()) { + for (const Tensor& captured_tensor : captured_tensors) { + batch_components->captured_inputs.push_back(captured_tensor); + } + } batch_components->context = context; batch_components->done_callback = std::move(done_callback); @@ -272,6 +284,7 @@ class BatchResource : public ResourceBase { int64 guid; std::vector inputs; + std::vector captured_inputs; OpKernelContext* context; AsyncOpKernel::DoneCallback done_callback; @@ -314,50 +327,32 @@ class BatchResource : public ResourceBase { return batch_size; } - // Processes a batch of one or more BatchTask entries. - void ProcessBatch(std::unique_ptr batch) const { - if (batch->empty()) { - return; + Status ConcatInputTensors(const Batch& batch, OpKernelContext* context, + std::vector* concatenated_tensors) const { + if (batch.num_tasks() == 0) { + return errors::InvalidArgument("Empty batch."); } - const int padded_batch_size = RoundToLowestAllowedBatchSize(batch->size()); - const int padding_amount = padded_batch_size - batch->size(); - OpKernelContext* last_task_context = - batch->task(batch->num_tasks() - 1).context; - AsyncOpKernel::DoneCallback last_task_callback = - batch->task(batch->num_tasks() - 1).done_callback; - - OP_REQUIRES_OK_ASYNC(last_task_context, ValidateBatch(*batch), - last_task_callback); + const int padded_batch_size = RoundToLowestAllowedBatchSize(batch.size()); + const int padding_amount = padded_batch_size - batch.size(); // All tasks should have the same number of input edges. - const int num_input_edges = batch->task(0).inputs.size(); - - // Process each input edge one at a time (the typical case has just one). - for (int i = 0; i < num_input_edges; ++i) { - // Emit batch->num_tasks() - 1 empty output tensors. - for (int task_idx = 0; task_idx < batch->num_tasks() - 1; ++task_idx) { - const BatchTask& task = batch->task(task_idx); - TensorShape output_shape(task.inputs.at(i).shape()); - output_shape.set_dim(0, 0); - Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( - task.context, - task.context->allocate_output(i, output_shape, &output), - task.done_callback); - } + const int num_inputs = batch.task(0).inputs.size(); + concatenated_tensors->reserve(num_inputs); + // Process each input one at a time (the typical case has just one). + for (int i = 0; i < num_inputs; ++i) { // Concatenate the tasks ith input tensors into a big output tensor. std::vector to_concatenate; - to_concatenate.reserve(batch->num_tasks()); - for (int task_idx = 0; task_idx < batch->num_tasks(); ++task_idx) { - to_concatenate.push_back(batch->task(task_idx).inputs.at(i)); + to_concatenate.reserve(batch.num_tasks()); + for (int task_idx = 0; task_idx < batch.num_tasks(); ++task_idx) { + to_concatenate.push_back(batch.task(task_idx).inputs.at(i)); } // Add padding as needed. Use the first row of the first task's tensor as // the data for padding. if (padding_amount > 0) { - const Tensor& padding_source = batch->task(0).inputs.at(i); + const Tensor& padding_source = batch.task(0).inputs.at(i); Tensor padding; if (padding_source.shape().dim_size(0) == 1) { padding = padding_source; @@ -367,10 +362,10 @@ class BatchResource : public ResourceBase { Status slice_status; std::vector slices; switch (type) { -#define CASE(type) \ - case DataTypeToEnum::value: \ - slice_status = SplitCPU(last_task_context, padding_source, \ - slice_sizes, &slices); \ +#define CASE(type) \ + case DataTypeToEnum::value: \ + slice_status = \ + SplitCPU(context, padding_source, slice_sizes, &slices); \ break; TF_CALL_ALL_TYPES(CASE); #undef CASE @@ -379,8 +374,7 @@ class BatchResource : public ResourceBase { errors::InvalidArgument("Unsupported data type: ", type); break; } - OP_REQUIRES_OK_ASYNC(last_task_context, slice_status, - last_task_callback); + TF_RETURN_IF_ERROR(slice_status); padding = slices.at(0); } for (int i = 0; i < padding_amount; ++i) { @@ -390,10 +384,12 @@ class BatchResource : public ResourceBase { const DataType type = to_concatenate[0].dtype(); Status concat_status; + Tensor concatenated_tensor; switch (type) { -#define CASE(type) \ - case DataTypeToEnum::value: \ - concat_status = Concat(last_task_context, to_concatenate, i); \ +#define CASE(type) \ + case DataTypeToEnum::value: \ + concat_status = \ + Concat(context, to_concatenate, &concatenated_tensor); \ break; TF_CALL_ALL_TYPES(CASE); #undef CASE @@ -402,10 +398,190 @@ class BatchResource : public ResourceBase { errors::InvalidArgument("Unsupported data type: ", type); break; } - OP_REQUIRES_OK_ASYNC(last_task_context, concat_status, - last_task_callback); + TF_RETURN_IF_ERROR(concat_status); + concatenated_tensors->push_back(concatenated_tensor); } + return Status::OK(); + } + + Status SplitOutputTensors(const std::vector& combined_outputs, + Batch* batch) const { + DCHECK_GE(batch->num_tasks(), 1); + if (batch->num_tasks() < 1) { + return errors::Internal("Batch size expected to be positive; was ", + batch->num_tasks()); + } + + std::vector task_sizes_plus_optional_padding; + task_sizes_plus_optional_padding.reserve(batch->num_tasks()); + for (int i = 0; i < batch->num_tasks(); ++i) { + task_sizes_plus_optional_padding.push_back(batch->task(i).size()); + } + const int padding_size = + RoundToLowestAllowedBatchSize(batch->size()) - batch->size(); + if (padding_size > 0) { + task_sizes_plus_optional_padding.push_back(padding_size); + } + + // For each output tensor name, a divided-up tensor with one entry per task. + std::map> split_tensors; + + DCHECK_EQ(batch->task(0).context->num_outputs(), combined_outputs.size()); + if (combined_outputs.size() != batch->task(0).context->num_outputs()) { + return errors::Internal("Wrong number of batched output tensors"); + } + + // Generate 'split_tensors' and populate the context outputs. + for (int i = 0; i < combined_outputs.size(); ++i) { + const Tensor& output_tensor = combined_outputs[i]; + if (output_tensor.shape().dims() == 0) { + return errors::FailedPrecondition( + "Batched output tensor has 0 dimensions"); + } + if (output_tensor.shape().dim_size(0) != batch->size() + padding_size) { + return errors::FailedPrecondition( + "Batched output tensor's 0th dimension does not equal the sum of " + "the 0th dimension sizes of the input tensors"); + } + + std::vector split_tensor; + const Status split_status = tensor::Split( + output_tensor, task_sizes_plus_optional_padding, &split_tensor); + DCHECK(split_status.ok()) << split_status.ToString(); + if (!split_status.ok()) { + return errors::Internal("Tensor split operation failed: ", + split_status.ToString()); + } + DCHECK_EQ(split_tensor.size(), task_sizes_plus_optional_padding.size()); + if (split_tensor.size() != task_sizes_plus_optional_padding.size()) { + return errors::Internal( + "Tensor split operation did not work as expected; got ", + split_tensor.size(), " splits; expected ", + task_sizes_plus_optional_padding.size()); + } + + for (int j = 0; j < batch->num_tasks(); ++j) { + BatchTask& task = *(batch->mutable_task(j)); + task.context->set_output(i, split_tensor.at(j)); + } // (Ignore a possible final split_tensors entry containing the + // padding.) + } + + return Status::OK(); + } + + void ProcessFuncBatch(std::unique_ptr batch) const { + if (batch->empty()) { + return; + } + + OpKernelContext* last_task_context = + batch->task(batch->num_tasks() - 1).context; + + // Regardless of the outcome, we need to propagate the status to the + // individual tasks and signal that they are done. We use MakeCleanup() to + // ensure that this happens no matter how we exit the method below. + Status status; + bool cleanup_done = false; + auto cleanup_fn = [&cleanup_done, &batch](const Status& status) { + if (cleanup_done) { + return; + } + for (int i = 0; i < batch->num_tasks(); ++i) { + batch->mutable_task(i)->context->SetStatus(status); + batch->mutable_task(i)->done_callback(); + } + cleanup_done = true; + }; + auto finally = + gtl::MakeCleanup([&cleanup_fn, &status] { cleanup_fn(status); }); + + status = ValidateBatch(*batch); + if (!status.ok()) { + return; + } + + std::vector concatenated_tensors; + status = + ConcatInputTensors(*batch, last_task_context, &concatenated_tensors); + if (!status.ok()) { + return; + } + FunctionLibraryRuntime::Options opts; + opts.step_id = last_task_context->step_id(); + opts.step_container = last_task_context->step_container(); + opts.cancellation_manager = last_task_context->cancellation_manager(); + opts.stats_collector = last_task_context->stats_collector(); + opts.rendezvous = last_task_context->rendezvous(); + opts.runner = last_task_context->runner(); + + auto* flib = last_task_context->function_library(); + std::vector combined_outputs; + Notification done; + std::vector args(concatenated_tensors.begin(), + concatenated_tensors.end()); + const auto& captured_inputs = + batch->task(batch->num_tasks() - 1).captured_inputs; + args.insert(args.end(), captured_inputs.begin(), captured_inputs.end()); + flib->Run(opts, fhandle_, args, &combined_outputs, + [&](const Status& run_status) { + if (!run_status.ok()) { + return; + } + const auto split_status = + SplitOutputTensors(combined_outputs, batch.get()); + // We do the cleanup here as an optimization, so that it runs in + // the underlying TF inter-op threadpool. Running it in the + // threadpool, let's the ensuing ops be scheduled faster, + // because the executor will add them to the front of the + // threadpool's task queue rather than the end. + cleanup_fn(split_status); + done.Notify(); + }); + // By waiting for the notification we are ensuring that this thread isn't + // used for processing other batches, which gives the batches time to + // coalesce upstream. So overall the number of batches going through the + // devices goes down, improving latency and throughput in most cases. + done.WaitForNotification(); + } + + // Processes a batch of one or more BatchTask entries. + void ProcessBatch(std::unique_ptr batch) const { + if (batch->empty()) { + return; + } + + OpKernelContext* last_task_context = + batch->task(batch->num_tasks() - 1).context; + AsyncOpKernel::DoneCallback last_task_callback = + batch->task(batch->num_tasks() - 1).done_callback; + + OP_REQUIRES_OK_ASYNC(last_task_context, ValidateBatch(*batch), + last_task_callback); + + // All tasks should have the same number of input edges. + const int num_input_edges = batch->task(0).inputs.size(); + std::vector concatenated_tensors; + const Status concat_status = + ConcatInputTensors(*batch, last_task_context, &concatenated_tensors); + OP_REQUIRES_OK_ASYNC(last_task_context, concat_status, last_task_callback); + // Process each input edge one at a time (the typical case has just one). + for (int i = 0; i < num_input_edges; ++i) { + last_task_context->set_output(i, concatenated_tensors.at(i)); + + // Emit batch->num_tasks() - 1 empty output tensors. + for (int task_idx = 0; task_idx < batch->num_tasks() - 1; ++task_idx) { + const BatchTask& task = batch->task(task_idx); + TensorShape output_shape(task.inputs.at(i).shape()); + output_shape.set_dim(0, 0); + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + task.context, + task.context->allocate_output(i, output_shape, &output), + task.done_callback); + } + } // Emit batch->num_tasks() - 1 empty index tensors. for (int task_idx = 0; task_idx < batch->num_tasks() - 1; ++task_idx) { const BatchTask& task = batch->task(task_idx); @@ -463,7 +639,7 @@ class BatchResource : public ResourceBase { return Status::OK(); } - // Looks up the batcher queue for 'queue_name'. If it didn't previously exist, + // Looks up the batcher queue for 'queue_name'. If it did't previously exist, // creates it. Status LookupOrCreateBatcherQueue(const string& queue_name, BatcherQueue** queue) { @@ -477,7 +653,11 @@ class BatchResource : public ResourceBase { std::unique_ptr new_queue; auto process_batch_callback = [this](std::unique_ptr batch) { - ProcessBatch(std::move(batch)); + if (fhandle_ == kInvalidHandle) { + ProcessBatch(std::move(batch)); + } else { + ProcessFuncBatch(std::move(batch)); + } }; TF_RETURN_IF_ERROR(batcher_->AddQueue(batcher_queue_options_, process_batch_callback, &new_queue)); @@ -498,8 +678,99 @@ class BatchResource : public ResourceBase { GUARDED_BY(batcher_queues_mu_); std::vector allowed_batch_sizes_; + FunctionLibraryRuntime::Handle fhandle_; }; +class BatchFunctionKernel : public AsyncOpKernel { + public: + explicit BatchFunctionKernel(OpKernelConstruction* c) : AsyncOpKernel(c) { + OP_REQUIRES_OK(c, c->GetAttr("container", &container_)); + OP_REQUIRES_OK(c, c->GetAttr("shared_name", &shared_name_)); + // If shared_name is not supplied, use name instead (prevent collisions by + // default). + if (shared_name_.empty()) { + shared_name_ = name(); + } + OP_REQUIRES_OK(c, c->GetAttr("batching_queue", &batcher_queue_)); + OP_REQUIRES_OK(c, c->GetAttr("num_batch_threads", &num_batch_threads_)); + OP_REQUIRES_OK(c, c->GetAttr("max_batch_size", &max_batch_size_)); + OP_REQUIRES_OK(c, + c->GetAttr("batch_timeout_micros", &batch_timeout_micros_)); + OP_REQUIRES_OK(c, + c->GetAttr("max_enqueued_batches", &max_enqueued_batches_)); + OP_REQUIRES_OK(c, c->GetAttr("allowed_batch_sizes", &allowed_batch_sizes_)); + OP_REQUIRES_OK(c, ValidateAllowedBatchSizes()); + + auto lib = c->function_library(); + OP_REQUIRES(c, lib != nullptr, errors::Internal("No function library")); + NameAttrList func; + OP_REQUIRES_OK(c, c->GetAttr("f", &func)); + OP_REQUIRES_OK( + c, lib->Instantiate(func.name(), AttrSlice(&func.attr()), &fhandle_)); + } + + bool IsExpensive() override { return false; } + + void ComputeAsync(OpKernelContext* c, DoneCallback done) final { + BatchResource* br; + std::function creator = [this, + c](BatchResource** r) { + std::unique_ptr new_resource; + TF_RETURN_IF_ERROR( + BatchResource::Create(num_batch_threads_, max_batch_size_, + batch_timeout_micros_, max_enqueued_batches_, + allowed_batch_sizes_, fhandle_, &new_resource)); + *r = new_resource.release(); + return Status::OK(); + }; + OP_REQUIRES_OK_ASYNC(c, + c->resource_manager()->LookupOrCreate( + container_, shared_name_, &br, creator), + done); + const Status status = + br->RegisterInput(random::New64(), c, batcher_queue_, done); + br->Unref(); + OP_REQUIRES_OK_ASYNC(c, status, done); + // Assume br calls done, so nothing to do here. + } + + // Validates 'allowed_batch_sizes_'. The entries must increase monotonically, + // and the last one must equal 'max_batch_size_'. + Status ValidateAllowedBatchSizes() const { + if (allowed_batch_sizes_.empty()) { + return Status::OK(); + } + int32 last_size = 0; + for (size_t i = 0; i < allowed_batch_sizes_.size(); ++i) { + const int32 size = allowed_batch_sizes_.at(i); + if (i > 0 && size <= last_size) { + return errors::InvalidArgument( + "allowed_batch_sizes entries must be monotonically increasing"); + } + if (i == allowed_batch_sizes_.size() - 1 && size != max_batch_size_) { + return errors::InvalidArgument( + "final entry in allowed_batch_sizes must equal max_batch_size"); + } + last_size = size; + } + return Status::OK(); + } + + private: + string container_; + string shared_name_; + string batcher_queue_; + int32 num_batch_threads_; + int32 max_batch_size_; + int32 batch_timeout_micros_; + int32 max_enqueued_batches_; + std::vector allowed_batch_sizes_; + FunctionLibraryRuntime::Handle fhandle_; +}; + +REGISTER_KERNEL_BUILDER(Name("BatchFunction").Device(DEVICE_CPU), + BatchFunctionKernel); + class BatchKernel : public AsyncOpKernel { public: explicit BatchKernel(OpKernelConstruction* c) : AsyncOpKernel(c) { @@ -528,7 +799,8 @@ class BatchKernel : public AsyncOpKernel { std::unique_ptr new_resource; TF_RETURN_IF_ERROR(BatchResource::Create( num_batch_threads_, max_batch_size_, batch_timeout_micros_, - max_enqueued_batches_, allowed_batch_sizes_, &new_resource)); + max_enqueued_batches_, allowed_batch_sizes_, kInvalidHandle, + &new_resource)); *r = new_resource.release(); return Status::OK(); }; @@ -539,9 +811,7 @@ class BatchKernel : public AsyncOpKernel { const Status status = br->RegisterInput(random::New64(), c, batcher_queue_, done); br->Unref(); - if (!status.ok()) { - OP_REQUIRES_OK_ASYNC(c, status, done); - } + OP_REQUIRES_OK_ASYNC(c, status, done); // Assume br calls done, so nothing to do here. } @@ -800,9 +1070,7 @@ class UnbatchKernel : public AsyncOpKernel { done); auto status = ubr->Compute(c, done); ubr->Unref(); - if (!status.ok()) { - OP_REQUIRES_OK_ASYNC(c, status, done); - } + OP_REQUIRES_OK_ASYNC(c, status, done); // Assume ubr calls done, so nothing to do here. } @@ -840,10 +1108,12 @@ class UnbatchGradResource : public ResourceBase { } const DataType type = tensors[0].dtype(); + Tensor concatenated_tensor; switch (type) { -#define CASE(type) \ - case DataTypeToEnum::value: \ - TF_RETURN_IF_ERROR(Concat(context, tensors, 0)); \ +#define CASE(type) \ + case DataTypeToEnum::value: \ + TF_RETURN_IF_ERROR(Concat(context, tensors, &concatenated_tensor)); \ + context->set_output(0, concatenated_tensor); \ break; TF_CALL_ALL_TYPES(CASE); #undef CASE @@ -986,9 +1256,7 @@ class UnbatchGradKernel : public AsyncOpKernel { done); Status status = ubr->Compute(c, done); ubr->Unref(); - if (!status.ok()) { - OP_REQUIRES_OK_ASYNC(c, status, done); - } + OP_REQUIRES_OK_ASYNC(c, status, done); // Assume ubr calls done, so nothing to do here. } diff --git a/tensorflow/core/ops/batch_ops.cc b/tensorflow/core/ops/batch_ops.cc index 0a62965eed..ba7faeb5e8 100644 --- a/tensorflow/core/ops/batch_ops.cc +++ b/tensorflow/core/ops/batch_ops.cc @@ -19,6 +19,26 @@ limitations under the License. namespace tensorflow { +REGISTER_OP("BatchFunction") + .Input("in_tensors: Tin") + .Input("captured_tensors: Tcaptured") + .Output("out_tensors: Tout") + .Attr("f: func") + .Attr("num_batch_threads: int") + .Attr("max_batch_size: int") + .Attr("batch_timeout_micros: int") + .Attr("max_enqueued_batches: int = 10") + .Attr("allowed_batch_sizes: list(int) = []") + .Attr("container: string = ''") + .Attr("shared_name: string = ''") + .Attr("batching_queue: string = ''") + .Attr("Tin: list(type)") + .Attr("Tcaptured: list(type) >= 0") + .Attr("Tout: list(type)") + // TODO(apassos): Fix this shape inference function. It requires shape + // inference of function calls. + .SetShapeFn(shape_inference::UnknownShape); + REGISTER_OP("Batch") .Input("in_tensors: T") .Output("batched_tensors: T") -- GitLab From fd9a647d0e79b562b99ab6d1ee4d28c2d9db8a95 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 16:09:57 -0700 Subject: [PATCH 416/902] Update ops-related pbtxt files. PiperOrigin-RevId: 198941362 --- .../core/ops/compat/ops_history.v1.pbtxt | 84 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 84 +++++++++++++++++++ 2 files changed, 168 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 1920d0a592..43dafec6f5 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -8762,6 +8762,90 @@ op { version: 15 } } +op { + name: "BatchFunction" + input_arg { + name: "in_tensors" + type_list_attr: "Tin" + } + input_arg { + name: "captured_tensors" + type_list_attr: "Tcaptured" + } + output_arg { + name: "out_tensors" + type_list_attr: "Tout" + } + attr { + name: "f" + type: "func" + } + attr { + name: "num_batch_threads" + type: "int" + } + attr { + name: "max_batch_size" + type: "int" + } + attr { + name: "batch_timeout_micros" + type: "int" + } + attr { + name: "max_enqueued_batches" + type: "int" + default_value { + i: 10 + } + } + attr { + name: "allowed_batch_sizes" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "batching_queue" + type: "string" + default_value { + s: "" + } + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Tcaptured" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + minimum: 1 + } +} op { name: "BatchIFFT" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index d929a5fc87..8c7333e7a4 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -3049,6 +3049,90 @@ op { explanation: "Use FFT3D" } } +op { + name: "BatchFunction" + input_arg { + name: "in_tensors" + type_list_attr: "Tin" + } + input_arg { + name: "captured_tensors" + type_list_attr: "Tcaptured" + } + output_arg { + name: "out_tensors" + type_list_attr: "Tout" + } + attr { + name: "f" + type: "func" + } + attr { + name: "num_batch_threads" + type: "int" + } + attr { + name: "max_batch_size" + type: "int" + } + attr { + name: "batch_timeout_micros" + type: "int" + } + attr { + name: "max_enqueued_batches" + type: "int" + default_value { + i: 10 + } + } + attr { + name: "allowed_batch_sizes" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "batching_queue" + type: "string" + default_value { + s: "" + } + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Tcaptured" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + minimum: 1 + } +} op { name: "BatchIFFT" input_arg { -- GitLab From 73ec24e8b75ba4f73a06756502d8bf86b2a6828b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 16:22:47 -0700 Subject: [PATCH 417/902] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 198942995 --- tensorflow/go/op/wrappers.go | 94 ++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 9b66850a6c..c9817e4d61 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -2724,6 +2724,53 @@ func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) { return op.Output(0) } +// Returns a batched diagonal tensor with a given batched diagonal values. +// +// Given a `diagonal`, this operation returns a tensor with the `diagonal` and +// everything else padded with zeros. The diagonal is computed as follows: +// +// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a +// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where: +// +// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`. +// +// For example: +// +// ``` +// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]] +// +// and diagonal.shape = (2, 4) +// +// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0] +// [0, 2, 0, 0] +// [0, 0, 3, 0] +// [0, 0, 0, 4]], +// [[5, 0, 0, 0] +// [0, 6, 0, 0] +// [0, 0, 7, 0] +// [0, 0, 0, 8]]] +// +// which has shape (2, 4, 4) +// ``` +// +// Arguments: +// diagonal: Rank `k`, where `k >= 1`. +// +// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`. +func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MatrixDiag", + Input: []tf.Input{ + diagonal, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Creates a sequence of numbers. // // This operation creates a sequence of numbers that begins at `start` and @@ -5198,53 +5245,6 @@ func FloorDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } -// Returns a batched diagonal tensor with a given batched diagonal values. -// -// Given a `diagonal`, this operation returns a tensor with the `diagonal` and -// everything else padded with zeros. The diagonal is computed as follows: -// -// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a -// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where: -// -// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`. -// -// For example: -// -// ``` -// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]] -// -// and diagonal.shape = (2, 4) -// -// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0] -// [0, 2, 0, 0] -// [0, 0, 3, 0] -// [0, 0, 0, 4]], -// [[5, 0, 0, 0] -// [0, 6, 0, 0] -// [0, 0, 7, 0] -// [0, 0, 0, 8]]] -// -// which has shape (2, 4, 4) -// ``` -// -// Arguments: -// diagonal: Rank `k`, where `k >= 1`. -// -// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`. -func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixDiag", - Input: []tf.Input{ - diagonal, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes the inverse permutation of a tensor. // // This operation computes the inverse of an index permutation. It takes a 1-D -- GitLab From b31498a054d55ce328a2820fd403af764c482500 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Fri, 1 Jun 2018 16:27:45 -0700 Subject: [PATCH 418/902] Support 5-inputs LSTM kernel in TFLite (float only). PiperOrigin-RevId: 198943559 --- tensorflow/contrib/lite/builtin_op_data.h | 10 + tensorflow/contrib/lite/kernels/lstm.cc | 190 +++++++++++++++++- tensorflow/contrib/lite/kernels/register.cc | 3 +- tensorflow/contrib/lite/model.cc | 8 + tensorflow/contrib/lite/schema/schema.fbs | 12 ++ .../contrib/lite/schema/schema_generated.h | 52 ++++- tensorflow/contrib/lite/testing/BUILD | 1 + .../contrib/lite/testing/generate_examples.py | 13 ++ .../contrib/lite/testing/tflite_driver.cc | 25 ++- tensorflow/contrib/lite/toco/args.h | 1 + .../identify_lstm_merge_inputs.cc | 8 +- .../identify_lstm_split_inputs.cc | 8 +- tensorflow/contrib/lite/toco/model.h | 10 +- .../contrib/lite/toco/tflite/operator.cc | 31 ++- .../contrib/lite/toco/toco_cmdline_flags.cc | 6 + tensorflow/contrib/lite/toco/toco_flags.proto | 6 +- tensorflow/contrib/lite/toco/toco_tooling.cc | 2 +- 17 files changed, 355 insertions(+), 31 deletions(-) diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h index 52ab9ee640..c1cc4476fb 100644 --- a/tensorflow/contrib/lite/builtin_op_data.h +++ b/tensorflow/contrib/lite/builtin_op_data.h @@ -148,10 +148,20 @@ typedef struct { float beta; } TfLiteLocalResponseNormParams; +typedef enum { + kTfLiteLSTMFullKernel = 0, + kTfLiteLSTMBasicKernel +} TfLiteLSTMKernelType; + typedef struct { + // Parameters for LSTM version 1. TfLiteFusedActivation activation; float cell_clip; float proj_clip; + + // Parameters for LSTM version 2. + // kTfLiteLSTMBasicKernel is only supported in version 2 or above. + TfLiteLSTMKernelType kernel_type; } TfLiteLSTMParams; typedef struct { diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index 990b3da055..9aae3e571b 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -25,6 +25,8 @@ limitations under the License. #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/kernels/activation_functor.h" #include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/kernels/op_macros.h" @@ -34,6 +36,17 @@ namespace ops { namespace builtin { namespace lstm { +struct OpData { + // Which kernel type to use. Full kernel (18-inputs) or basic kernel + // (5-inputs). + TfLiteLSTMKernelType kernel_type; + // Only used by full kernel. + int scratch_tensor_index; +}; + +// For full inputs kernel (18-inputs). +namespace full { + // Input Tensors of size {n_batch, n_input} constexpr int kInputTensor = 0; @@ -71,13 +84,10 @@ constexpr int kCellStateTensor = 1; constexpr int kOutputTensor = 2; void* Init(TfLiteContext* context, const char* buffer, size_t length) { - auto* scratch_tensor_index = new int; - context->AddTensors(context, 1, scratch_tensor_index); - return scratch_tensor_index; -} - -void Free(TfLiteContext* context, void* buffer) { - delete reinterpret_cast(buffer); + auto* op_data = new OpData; + op_data->kernel_type = kTfLiteLSTMFullKernel; + context->AddTensors(context, 1, &op_data->scratch_tensor_index); + return op_data; } // Check that input tensor dimensions matches with each other. @@ -233,7 +243,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, // Allocate a temporary scratch tensor. Also check that the sizes of the input // tensors match each other. TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - int* scratch_tensor_index = reinterpret_cast(node->user_data); + OpData* op_data = reinterpret_cast(node->user_data); // Check we have all the inputs and outputs we need. TF_LITE_ENSURE_EQ(context, node->inputs->size, 18); @@ -289,7 +299,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Create a scratch buffer tensor. TfLiteIntArrayFree(node->temporaries); node->temporaries = TfLiteIntArrayCreate(1); - node->temporaries->data[0] = *scratch_tensor_index; + node->temporaries->data[0] = op_data->scratch_tensor_index; TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/0); scratch_buffer->type = input->type; scratch_buffer->allocation_type = kTfLiteArenaRw; @@ -447,6 +457,168 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } +} // namespace full + +// For basic kernel (5-inputs). +namespace basic { + +enum InputTensor { + kInputData = 0, + kInputPrevActivation = 1, + kInputWeights = 2, + kInputBiases = 3, + kInputPrevState = 4, + kInputNum = 5, +}; + +enum OutputTensor { + kOutputActivation = 0, + kOutputState = 1, + kOutputConcatTemp = 2, + kOutputActivationTemp = 3, + kOutputNum = 4, +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* op_data = new OpData; + op_data->kernel_type = kTfLiteLSTMBasicKernel; + // `scratch_tensor_index` is unused in this kernel. + op_data->scratch_tensor_index = -1; + return op_data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE(context, node->inputs->size == kInputNum); + TF_LITE_ENSURE(context, node->outputs->size == kOutputNum); + + // Only Float32 is supportted currently. + // TODO(ycling): Implement quantize uint8 support. + for (int index = 0; index < node->inputs->size; ++index) { + TfLiteTensor* tensor = &context->tensors[node->inputs->data[index]]; + TF_LITE_ENSURE_EQ(context, tensor->type, kTfLiteFloat32); + } + + const TfLiteTensor* input = GetInput(context, node, kInputData); + const TfLiteTensor* prev_activation = + GetInput(context, node, kInputPrevActivation); + const TfLiteTensor* weights = GetInput(context, node, kInputWeights); + const TfLiteTensor* bias = GetInput(context, node, kInputBiases); + const TfLiteTensor* prev_state = GetInput(context, node, kInputPrevState); + + TF_LITE_ENSURE_EQ(context, input->dims->size, 2); + const int num_batches = input->dims->data[0]; + + TF_LITE_ENSURE_EQ(context, prev_activation->dims->size, 2); + TF_LITE_ENSURE_EQ(context, prev_activation->dims->data[0], num_batches); + + TF_LITE_ENSURE_EQ(context, weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, bias->dims->size, 1); + + TF_LITE_ENSURE_EQ(context, prev_state->dims->size, 2); + TF_LITE_ENSURE_EQ(context, prev_state->dims->data[0], num_batches); + + TfLiteTensor* activation_out = GetOutput(context, node, kOutputActivation); + TfLiteTensor* state_out = GetOutput(context, node, kOutputState); + TfLiteTensor* concat_temp = GetOutput(context, node, kOutputConcatTemp); + TfLiteTensor* activation_temp = + GetOutput(context, node, kOutputActivationTemp); + + TF_LITE_ENSURE_OK(context, context->ResizeTensor( + context, activation_out, + TfLiteIntArrayCopy(prev_activation->dims))); + TF_LITE_ENSURE_OK( + context, context->ResizeTensor(context, state_out, + TfLiteIntArrayCopy(prev_state->dims))); + TfLiteIntArray* concat_temp_size = TfLiteIntArrayCreate(2); + concat_temp_size->data[0] = num_batches; + concat_temp_size->data[1] = weights->dims->data[1]; + TF_LITE_ENSURE_OK( + context, context->ResizeTensor(context, concat_temp, concat_temp_size)); + TfLiteIntArray* activation_temp_size = TfLiteIntArrayCreate(2); + activation_temp_size->data[0] = num_batches; + activation_temp_size->data[1] = weights->dims->data[0]; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, activation_temp, + activation_temp_size)); + + // Set the state tensors as persistent. + for (auto index : {kInputPrevActivation, kInputPrevState}) { + TfLiteTensor* tensor = &context->tensors[node->inputs->data[index]]; + tensor->allocation_type = kTfLiteArenaRwPersistent; + } + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputData); + const TfLiteTensor* prev_activation = + GetInput(context, node, kInputPrevActivation); + const TfLiteTensor* weights = GetInput(context, node, kInputWeights); + const TfLiteTensor* bias = GetInput(context, node, kInputBiases); + const TfLiteTensor* prev_state = GetInput(context, node, kInputPrevState); + + TfLiteTensor* activation_out = GetOutput(context, node, kOutputActivation); + TfLiteTensor* state_out = GetOutput(context, node, kOutputState); + TfLiteTensor* concat_temp = GetOutput(context, node, kOutputConcatTemp); + TfLiteTensor* activation_temp = + GetOutput(context, node, kOutputActivationTemp); + + optimized_ops::LstmCell( + // Inputs. + GetTensorData(input), GetTensorDims(input), + GetTensorData(prev_activation), GetTensorDims(prev_activation), + GetTensorData(weights), GetTensorDims(weights), + GetTensorData(bias), GetTensorDims(bias), + GetTensorData(prev_state), GetTensorDims(prev_state), + // Outputs. + GetTensorData(state_out), GetTensorDims(state_out), + GetTensorData(activation_out), GetTensorDims(activation_out), + GetTensorData(concat_temp), GetTensorDims(concat_temp), + GetTensorData(activation_temp), GetTensorDims(activation_temp)); + + // TODO(ycling): Investigate if this copy can be avoided with the 5-inputs + // LSTM kernel. + memcpy(prev_activation->data.raw, activation_out->data.raw, + activation_out->bytes); + memcpy(prev_state->data.raw, state_out->data.raw, state_out->bytes); + + return kTfLiteOk; +} + +} // namespace basic + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + const auto* params = reinterpret_cast(buffer); + switch (params->kernel_type) { + case kTfLiteLSTMFullKernel: + return full::Init(context, buffer, length); + case kTfLiteLSTMBasicKernel: + return basic::Init(context, buffer, length); + } +} +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + const auto* op_data = reinterpret_cast(node->user_data); + switch (op_data->kernel_type) { + case kTfLiteLSTMFullKernel: + return full::Prepare(context, node); + case kTfLiteLSTMBasicKernel: + return basic::Prepare(context, node); + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const auto* op_data = reinterpret_cast(node->user_data); + switch (op_data->kernel_type) { + case kTfLiteLSTMFullKernel: + return full::Eval(context, node); + case kTfLiteLSTMBasicKernel: + return basic::Eval(context, node); + } +} + } // namespace lstm TfLiteRegistration* Register_LSTM() { diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index c7d72738d6..184b02dcec 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -126,7 +126,8 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION()); AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, Register_LOCAL_RESPONSE_NORMALIZATION()); - AddBuiltin(BuiltinOperator_LSTM, Register_LSTM()); + AddBuiltin(BuiltinOperator_LSTM, Register_LSTM(), /* min_version */ 1, + /* max_version */ 2); AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, Register_BIDIRECTIONAL_SEQUENCE_LSTM()); AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index ca115a1c59..8d8d74adfb 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -558,6 +558,14 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, parse_activation(lstm_params->fused_activation_function()); params->cell_clip = lstm_params->cell_clip(); params->proj_clip = lstm_params->proj_clip(); + switch (lstm_params->kernel_type()) { + case LSTMKernelType_FULL: + params->kernel_type = kTfLiteLSTMFullKernel; + break; + case LSTMKernelType_BASIC: + params->kernel_type = kTfLiteLSTMBasicKernel; + break; + } } *builtin_data = reinterpret_cast(params); break; diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 7d76134e3d..7dbb36c864 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -315,11 +315,23 @@ table LocalResponseNormalizationOptions { beta:float; } +enum LSTMKernelType : byte { + // Full LSTM kernel which supports peephole and projection. + FULL = 0, + // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell. + BASIC = 1, +} + // An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell table LSTMOptions { + // Parameters for LSTM version 1 or above. fused_activation_function:ActivationFunctionType; cell_clip: float; // Optional, 0.0 means no clipping proj_clip: float; // Optional, 0.0 means no clipping + + // Parameters for LSTM version 2 or above. + // Basic kernel is only supported in version 2 or above. + kernel_type: LSTMKernelType = FULL; } table ResizeBilinearOptions { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 0a60fcd3d0..b1beb39b28 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -1428,6 +1428,35 @@ inline const char *EnumNameLSHProjectionType(LSHProjectionType e) { return EnumNamesLSHProjectionType()[index]; } +enum LSTMKernelType { + LSTMKernelType_FULL = 0, + LSTMKernelType_BASIC = 1, + LSTMKernelType_MIN = LSTMKernelType_FULL, + LSTMKernelType_MAX = LSTMKernelType_BASIC +}; + +inline LSTMKernelType (&EnumValuesLSTMKernelType())[2] { + static LSTMKernelType values[] = { + LSTMKernelType_FULL, + LSTMKernelType_BASIC + }; + return values; +} + +inline const char **EnumNamesLSTMKernelType() { + static const char *names[] = { + "FULL", + "BASIC", + nullptr + }; + return names; +} + +inline const char *EnumNameLSTMKernelType(LSTMKernelType e) { + const size_t index = static_cast(e); + return EnumNamesLSTMKernelType()[index]; +} + enum CombinerType { CombinerType_SUM = 0, CombinerType_MEAN = 1, @@ -2865,10 +2894,12 @@ struct LSTMOptionsT : public flatbuffers::NativeTable { ActivationFunctionType fused_activation_function; float cell_clip; float proj_clip; + LSTMKernelType kernel_type; LSTMOptionsT() : fused_activation_function(ActivationFunctionType_NONE), cell_clip(0.0f), - proj_clip(0.0f) { + proj_clip(0.0f), + kernel_type(LSTMKernelType_FULL) { } }; @@ -2877,7 +2908,8 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { enum { VT_FUSED_ACTIVATION_FUNCTION = 4, VT_CELL_CLIP = 6, - VT_PROJ_CLIP = 8 + VT_PROJ_CLIP = 8, + VT_KERNEL_TYPE = 10 }; ActivationFunctionType fused_activation_function() const { return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); @@ -2888,11 +2920,15 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { float proj_clip() const { return GetField(VT_PROJ_CLIP, 0.0f); } + LSTMKernelType kernel_type() const { + return static_cast(GetField(VT_KERNEL_TYPE, 0)); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && VerifyField(verifier, VT_CELL_CLIP) && VerifyField(verifier, VT_PROJ_CLIP) && + VerifyField(verifier, VT_KERNEL_TYPE) && verifier.EndTable(); } LSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -2912,6 +2948,9 @@ struct LSTMOptionsBuilder { void add_proj_clip(float proj_clip) { fbb_.AddElement(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); } + void add_kernel_type(LSTMKernelType kernel_type) { + fbb_.AddElement(LSTMOptions::VT_KERNEL_TYPE, static_cast(kernel_type), 0); + } explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -2928,10 +2967,12 @@ inline flatbuffers::Offset CreateLSTMOptions( flatbuffers::FlatBufferBuilder &_fbb, ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, float cell_clip = 0.0f, - float proj_clip = 0.0f) { + float proj_clip = 0.0f, + LSTMKernelType kernel_type = LSTMKernelType_FULL) { LSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); builder_.add_cell_clip(cell_clip); + builder_.add_kernel_type(kernel_type); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } @@ -6226,6 +6267,7 @@ inline void LSTMOptions::UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_ { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; { auto _e = cell_clip(); _o->cell_clip = _e; }; { auto _e = proj_clip(); _o->proj_clip = _e; }; + { auto _e = kernel_type(); _o->kernel_type = _e; }; } inline flatbuffers::Offset LSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -6239,11 +6281,13 @@ inline flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBuffe auto _fused_activation_function = _o->fused_activation_function; auto _cell_clip = _o->cell_clip; auto _proj_clip = _o->proj_clip; + auto _kernel_type = _o->kernel_type; return tflite::CreateLSTMOptions( _fbb, _fused_activation_function, _cell_clip, - _proj_clip); + _proj_clip, + _kernel_type); } inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 74fc32a12b..80e4c5a4dd 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -155,6 +155,7 @@ cc_library( deps = [ ":split", ":test_runner", + "//tensorflow/contrib/lite:builtin_op_data", "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite/kernels:builtin_ops", ], diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index f07e36fc7d..9bb7a4600d 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -118,6 +118,8 @@ class ExtraTocoOptions(object): self.allow_custom_ops = False # Rnn states that are used to support rnn / lstm cells. self.rnn_states = None + # Split the LSTM inputs from 5 inoputs to 18 inputs for TFLite. + self.split_tflite_lstm_inputs = None def toco_options(data_types, @@ -155,6 +157,11 @@ def toco_options(data_types, s += " --allow_custom_ops" if extra_toco_options.rnn_states: s += (" --rnn_states='" + extra_toco_options.rnn_states + "'") + if extra_toco_options.split_tflite_lstm_inputs is not None: + if extra_toco_options.split_tflite_lstm_inputs: + s += " --split_tflite_lstm_inputs=true" + else: + s += " --split_tflite_lstm_inputs=false" return s @@ -461,6 +468,11 @@ def make_zip_of_tests(zip_path, sess, tf.global_variables() + inputs + outputs) if use_frozen_graph else sess.graph_def + + if "split_tflite_lstm_inputs" in param_dict_real: + extra_toco_options.split_tflite_lstm_inputs = param_dict_real[ + "split_tflite_lstm_inputs"] + tflite_model_binary, toco_log = toco_convert( graph_def.SerializeToString(), input_tensors, output_tensors, extra_toco_options) @@ -2019,6 +2031,7 @@ def make_lstm_tests(zip_path): "time_step_size": [1], "input_vec_size": [3], "num_cells": [4], + "split_tflite_lstm_inputs": [True, False], }, ] diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc index 8cab6cd8cd..fc28faf524 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -16,6 +16,7 @@ limitations under the License. #include +#include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/testing/split.h" namespace tflite { @@ -290,12 +291,24 @@ void TfLiteDriver::ResetLSTMStateTensors() { const auto& node_and_reg = interpreter_->node_and_registration(node_index); const auto& node = node_and_reg->first; const auto& registration = node_and_reg->second; - if (registration.builtin_code == tflite::BuiltinOperator_LSTM && - node.outputs->size >= 2) { - // The first 2 outputs of LSTM are state tensors. - for (int i = 0; i < 2; ++i) { - int node_index = node.outputs->data[i]; - ResetTensor(node_index); + + if (registration.builtin_code == tflite::BuiltinOperator_LSTM) { + const auto* params = + reinterpret_cast(node.builtin_data); + if (params->kernel_type == kTfLiteLSTMFullKernel && + node.outputs->size >= 2) { + // The first 2 outputs of LSTM are state tensors. + for (int i = 0; i < 2; ++i) { + int node_index = node.outputs->data[i]; + ResetTensor(node_index); + } + } else if (params->kernel_type == kTfLiteLSTMBasicKernel && + node.inputs->size == 5) { + // The 2th and 5th inputs are state tensors. + for (int i : {1, 4}) { + int node_index = node.inputs->data[i]; + ResetTensor(node_index); + } } } } diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h index 6c0311af0a..77bc54f191 100644 --- a/tensorflow/contrib/lite/toco/args.h +++ b/tensorflow/contrib/lite/toco/args.h @@ -242,6 +242,7 @@ struct ParsedTocoFlags { Arg propagate_fake_quant_num_bits = Arg(false); Arg allow_nudging_weights_to_use_fast_gemm_kernel = Arg(false); Arg dedupe_array_min_size_bytes = Arg(64); + Arg split_tflite_lstm_inputs = Arg(true); }; } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc index 3f768bfee1..5b6a984ee1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc @@ -33,9 +33,10 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) { return false; } - // Already a compact LstmCell with LstmCellOperator::NUM_INPUTS of inputs, - // do not need to merge cell inputs. - if (src_op->inputs.size() == LstmCellOperator::NUM_INPUTS) { + // Already a compact LstmCell. Do not need to merge cell inputs. + const auto* src_lstm_op = static_cast(src_op); + if (src_lstm_op->kernel_type != LstmCellOperator::KERNEL_FULL || + src_lstm_op->inputs.size() != kExtendedLstmInputCount) { return false; } @@ -136,6 +137,7 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) { // Emplace a new LSTM cell operator (use basic 5 inputs kernel). auto lstm_cell_op = absl::make_unique(); + lstm_cell_op->kernel_type = LstmCellOperator::KERNEL_BASIC; // Compact LstmCell's 5 inputs. lstm_cell_op->inputs.resize(LstmCellOperator::NUM_INPUTS); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc index 8e66323bd7..e6e3dfa1de 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc @@ -33,9 +33,10 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) { return false; } - // Already an extended LstmCell with kExtendedLstmInputCount of inputs, - // do not need to split cell inputs. - if (curr_op->inputs.size() == kExtendedLstmInputCount) { + const auto* curr_lstm_op = static_cast(curr_op); + // Already an extended LstmCell. Do not need to split cell inputs. + if (curr_lstm_op->kernel_type != LstmCellOperator::KERNEL_BASIC || + curr_lstm_op->inputs.size() != LstmCellOperator::NUM_INPUTS) { return false; } @@ -56,6 +57,7 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) { // Emplace a new LstmCell operator with extended inputs (kernel/lstm.cc). auto lstm_cell_op = absl::make_unique(); + lstm_cell_op->kernel_type = LstmCellOperator::KERNEL_FULL; lstm_cell_op->inputs.resize(kExtendedLstmInputCount); int num_input = model->GetArray(curr_op->inputs[LstmCellOperator::DATA_INPUT]) .shape() diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 9062c03c73..1a4f87e363 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -527,7 +527,15 @@ struct LstmCellOperator : Operator { ACTIV_TEMP = 3, NUM_OUTPUTS = 4 }; - LstmCellOperator() : Operator(OperatorType::kLstmCell) {} + enum KernelType { + KERNEL_BASIC = 0, + KERNEL_FULL = 1, + }; + + LstmCellOperator() + : Operator(OperatorType::kLstmCell), kernel_type(KERNEL_BASIC) {} + + KernelType kernel_type; }; // Element-wise multiplication operator. diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index 84a5410839..a8518adefc 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -626,11 +626,21 @@ class Lstm : public BuiltinOperator WriteOptions( const TocoOperator& op, flatbuffers::FlatBufferBuilder* builder) const override { + ::tflite::LSTMKernelType kernel_type; + switch (op.kernel_type) { + case LstmCellOperator::KERNEL_BASIC: + kernel_type = ::tflite::LSTMKernelType_BASIC; + break; + case LstmCellOperator::KERNEL_FULL: + kernel_type = ::tflite::LSTMKernelType_FULL; + break; + } + // Current toco converter only supports tanh, no clip. return ::tflite::CreateLSTMOptions(*builder, /*fused_activation_function=*/ ::tflite::ActivationFunctionType_TANH, /*cell_clip=*/0.0, - /*proj_clip=*/0.0); + /*proj_clip=*/0.0, kernel_type); } void ReadOptions(const TfLiteOptions& options, @@ -638,9 +648,26 @@ class Lstm : public BuiltinOperatorkernel_type = LstmCellOperator::KERNEL_BASIC; + break; + case ::tflite::LSTMKernelType_FULL: + op->kernel_type = LstmCellOperator::KERNEL_FULL; + break; + } } - int GetVersion(const Operator& op) const override { return 1; } + int GetVersion(const Operator& op) const override { + const auto& lstm_op = static_cast(op); + switch (lstm_op.kernel_type) { + case LstmCellOperator::KERNEL_FULL: + return 1; + case LstmCellOperator::KERNEL_BASIC: + return 2; + } + } }; class Mean : public BuiltinOperator Date: Fri, 1 Jun 2018 16:32:20 -0700 Subject: [PATCH 419/902] Allow user to opt out of saving metagraph for TPU with TPUEstimator.export_output(). PiperOrigin-RevId: 198944144 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 4465833f88..a155de3844 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1830,6 +1830,7 @@ class TPUEstimator(estimator_lib.Estimator): predict_batch_size=None, batch_axis=None, eval_on_tpu=True, + export_to_tpu=True, warm_start_from=None): """Constructs an `TPUEstimator` instance. @@ -1872,6 +1873,8 @@ class TPUEstimator(estimator_lib.Estimator): False or `PER_HOST_V2`, batch_axis is ignored. eval_on_tpu: If False, evaluation runs on CPU or GPU. In this case, the model_fn must return `EstimatorSpec` when called with `mode` as `EVAL`. + export_to_tpu: If True, `export_savedmodel()` exports a metagraph for + serving on TPU besides the one on CPU. warm_start_from: Optional string filepath to a checkpoint or SavedModel to warm-start from, or a `tf.estimator.WarmStartSettings` object to fully configure warm-starting. If the string @@ -1943,6 +1946,8 @@ class TPUEstimator(estimator_lib.Estimator): use_tpu, eval_on_tpu) + self._export_to_tpu = export_to_tpu + self._is_input_fn_invoked = None def _add_meta_graph_for_mode(self, @@ -1965,11 +1970,11 @@ class TPUEstimator(estimator_lib.Estimator): save_variables, mode=mode) - input_receiver_fn_map = {_REWRITE_FOR_INFERENCE_MODE: - input_receiver_fn_map[mode]} - export_tags = [tag_constants.SERVING, tag_constants.TPU] - mode = _REWRITE_FOR_INFERENCE_MODE - try: + if self._export_to_tpu: + input_receiver_fn_map = {_REWRITE_FOR_INFERENCE_MODE: + input_receiver_fn_map[mode]} + export_tags = [tag_constants.SERVING, tag_constants.TPU] + mode = _REWRITE_FOR_INFERENCE_MODE (super(TPUEstimator, self). _add_meta_graph_for_mode(builder, input_receiver_fn_map, @@ -1978,9 +1983,6 @@ class TPUEstimator(estimator_lib.Estimator): save_variables=False, mode=mode, export_tags=export_tags)) - except Exception as error: # pylint: disable=broad-except - logging.warning('Saving meta graph for TPU failed: {}.' - .format(str(error))) def _call_model_fn(self, features, labels, mode, config): if mode == _REWRITE_FOR_INFERENCE_MODE: -- GitLab From f84e8257aa88fa45cc7a15835ad386565cd60237 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 16:48:10 -0700 Subject: [PATCH 420/902] Change the Eigen reduction code to use a tree to improve numerical stability. This changes the InnerMostDimReducer to use a summation tree, which is more numerically stable than the previous approach of sequential addition into an accumulator. This solves the issue for reduction over all or a trailing subset of dimensions. This change does not improve the numerical accuracy for MeanReducer, which maintains state. Benchmarks show a 40% (AVX) to 50% (SSE) slowdown for small row reductions (sum, float). column- and full reductions are unchanged. Cleaned up TensorFunctors.h a bit by moving the traits to reducer_traits and updating the code that uses the reducers accordingly. Introduced a new trait "IsExactlyAssociative" and new template specializations of InnerMostDimReducer to ensure that we only invoke the new and slightly more expensive codepath when it is needed, i.e. for sum reduction of non-integer types. PiperOrigin-RevId: 198946075 --- tensorflow/core/kernels/eigen_pooling.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/eigen_pooling.h b/tensorflow/core/kernels/eigen_pooling.h index 2f83780525..56de6b1d43 100644 --- a/tensorflow/core/kernels/eigen_pooling.h +++ b/tensorflow/core/kernels/eigen_pooling.h @@ -372,16 +372,23 @@ struct reducer_traits, Device> { Cost = 1, #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) // We only support packet access for floats. - PacketAccess = true + PacketAccess = true, #else - PacketAccess = false + PacketAccess = false, #endif + IsStateful = true, + IsExactlyAssociative = false }; }; template <> struct reducer_traits, GpuDevice> { - enum { Cost = 1, PacketAccess = false }; + enum { + Cost = 1, + PacketAccess = false, + IsStateful = true, + IsExactlyAssociative = false + }; }; } // namespace internal -- GitLab From da63752d84b65b238dfcdacb550b41661d0cf211 Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 1 Jun 2018 17:07:29 -0700 Subject: [PATCH 421/902] Internal change. PiperOrigin-RevId: 198948296 --- tensorflow/workspace.bzl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index e4b7f9a695..c072f89965 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -167,8 +167,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "gemmlowp", urls = [ - # TODO (yongtang): uncomment once mirror.bazel.build is propagated. - # "https://mirror.bazel.build/github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip", + "https://mirror.bazel.build/github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip", "https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip", ], sha256 = "b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658", -- GitLab From 3dd460bb419776e6a4804843eec98e4bf14fdcdd Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 1 Jun 2018 17:21:55 -0700 Subject: [PATCH 422/902] Add an explanatory comment. PiperOrigin-RevId: 198949796 --- tensorflow/compiler/aot/tests/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/aot/tests/BUILD b/tensorflow/compiler/aot/tests/BUILD index fd2cf2b67d..0ecc3feeb6 100644 --- a/tensorflow/compiler/aot/tests/BUILD +++ b/tensorflow/compiler/aot/tests/BUILD @@ -7,6 +7,10 @@ package( load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library") load("//tensorflow:tensorflow.bzl", "tf_cc_test") +# We disable some tfcompile tests in the open source build with the +# "manual" tag to avoid making our OSS users build LLVM twice +# (once for host and once for target). + test_suite( name = "all_tests", tags = ["manual"], -- GitLab From b33ba9a8e7e20e4b2378937204fe74af69982906 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 1 Jun 2018 18:00:43 -0700 Subject: [PATCH 423/902] Remove use of absl::make_unique absl is not yet ready for use by open source TensorFlow. :-( PiperOrigin-RevId: 198952953 --- tensorflow/contrib/cloud/kernels/gcs_config_ops.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc b/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc index ef4998212e..648a219fb8 100644 --- a/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc +++ b/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/platform/cloud/curl_http_request.h" #include "tensorflow/core/platform/cloud/gcs_file_system.h" #include "tensorflow/core/platform/cloud/oauth_client.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace { @@ -96,7 +97,8 @@ class GcsCredentialsOpKernel : public OpKernel { errors::InvalidArgument("JSON format incompatible; did not find fields " "`refresh_token` or `private_key`.")); - auto provider = absl::make_unique(json, ctx->env()); + auto provider = + tensorflow::MakeUnique(json, ctx->env()); // Test getting a token string dummy_token; @@ -121,7 +123,7 @@ class GcsCredentialsOpKernel : public OpKernel { initial_retry_delay_usec_(initial_retry_delay_usec) {} ConstantAuthProvider(const Json::Value& json, Env* env) - : ConstantAuthProvider(json, absl::make_unique(), env, + : ConstantAuthProvider(json, tensorflow::MakeUnique(), env, kInitialRetryDelayUsec) {} ~ConstantAuthProvider() override {} -- GitLab From 6e5606fce0e4615880e2685a3674c498756b9cfb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 18:01:58 -0700 Subject: [PATCH 424/902] Extract FoldMultiplyIntoConv optimization stage. PiperOrigin-RevId: 198953044 --- .../optimizers/arithmetic_optimizer.cc | 214 ++++++++++-------- .../optimizers/arithmetic_optimizer.h | 1 + .../optimizers/arithmetic_optimizer_test.cc | 76 ++++--- 3 files changed, 172 insertions(+), 119 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index ca3f84a81d..400af82627 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1958,6 +1958,127 @@ class ReorderCastAndTranspose : public ArithmeticOptimizerStage { bool IsNumberType(DataType dtype) { return kNumberTypes.Contains(dtype); } }; +// Fold a multiply of a scalar into the following convolution. This folding +// can jump across nodes that merely reorders data (such as reshape and +// transpose). For example, we can optimize +// +// +// Conv2D Conv2D +// / \ / \ +// Transpose weights* -> Transpose Mul +// | | / \ +// Mul | weights scale +// / \ | +// input scale** input +// +// *) weights must be a const +// **) scale must be a const scalar +// +// When `weights` and `scale` are constant, `Mul` in the optimized graph can be +// constant-folded, also weights tend to be smaller than the activations. +// +// TODO(jingyue): Fold scalar multiplies to Conv?DBackpropFilter and +// Conv?DBackpropInput. +class FoldMultiplyIntoConv : public ArithmeticOptimizerStage { + public: + explicit FoldMultiplyIntoConv(const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("FoldMultiplyIntoConv", ctx, ctx_ext) {} + ~FoldMultiplyIntoConv() override = default; + + bool IsSupported(const NodeDef* node) const override { + return IsConv2D(*node) || IsConv3D(*node); + } + + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { +#define TF_RETURN_IF_TRUE(...) \ + if ((__VA_ARGS__)) return Status::OK() + + NodeDef* conv = node; + + NodeDef* weights; + TF_RETURN_IF_ERROR(GetInputNode(conv->input(1), &weights)); + + // Fold the multiply to conv only when the weights are constant, so the + // multiply can be constant-folded. + // + // TODO(jingyue): When the weights aren't constant, this should also help + // performance a bit and memory usage a lot, since the weights tend to be + // smaller than the activations. + TF_RETURN_IF_TRUE(!IsConstant(*weights)); + + // Verify that this node was not already optimized. + const string scaled_weights_node_name = + OptimizedNodeName(ParseNodeScopeAndName(weights->name()), + strings::StrCat("scaled", "_", conv->name())); + + TF_RETURN_IF_TRUE(ctx().node_map->NodeExists(scaled_weights_node_name)); + + // Find the tail of value preserving chain entering the Conv node. + NodeDef* tail = GetTailOfValuePreservingChain(*conv, *ctx().node_map, + *ctx().nodes_to_preserve); + + NodeDef* source; + TF_RETURN_IF_ERROR(GetInputNode(tail->input(0), &source)); + + // Check that value preserving chain is the only consumer of the Mul output. + TF_RETURN_IF_TRUE(!IsMul(*source)); + TF_RETURN_IF_TRUE(NumNonControlOutputs(*source, *ctx().node_map) != 1); + + const NodeDef* mul = source; + + // TODO(jingyue): handle the case where `scale` is 0-th operand. + NodeDef* scale; // scalar multiplier fot the input tensor + NodeDef* input; + TF_RETURN_IF_ERROR(GetInputNode(mul->input(1), &scale)); + TF_RETURN_IF_ERROR(GetInputNode(mul->input(0), &input)); + + // Check that 'scale * weight' can be const folded. + TF_RETURN_IF_TRUE(!IsConstant(*scale)); + TF_RETURN_IF_TRUE(scale->attr().at("dtype").type() != + weights->attr().at("dtype").type()); + + // Check that `scale` is a scalar. + const TensorProto& scale_tensor = scale->attr().at("value").tensor(); + bool scale_is_a_scalar = scale_tensor.has_tensor_shape() && + scale_tensor.tensor_shape().dim_size() == 0; + TF_RETURN_IF_TRUE(!scale_is_a_scalar); + + // At this point all preconditions are met, and we safely do the rewrite. + VLOG(3) << "Fold multiply into conv: conv=" << conv->name() + << " mul=" << mul->name() << " weights=" << weights->name(); + + // Create new node `scaled_weights`. + NodeDef* scaled_weights = AddEmptyNode(scaled_weights_node_name); + scaled_weights->set_op("Mul"); + scaled_weights->set_device(weights->device()); + (*scaled_weights->mutable_attr())["T"] = weights->attr().at("dtype"); + AddToOptimizationQueue(scaled_weights); + + // Link in its inputs. + scaled_weights->add_input(conv->input(1)); + ctx().node_map->AddOutput(weights->name(), scaled_weights->name()); + scaled_weights->add_input(mul->input(1)); + ctx().node_map->AddOutput(scale->name(), scaled_weights->name()); + ForwardControlDependencies(scaled_weights, {source}); + + // Update `conv`'s weights to `scaled_weights`. + conv->set_input(1, scaled_weights->name()); + ctx().node_map->UpdateInput(conv->name(), weights->name(), + scaled_weights->name()); + AddToOptimizationQueue(conv); + + // Update `tail` node to bypass `mul` because it's folded to the weights. + tail->set_input(0, mul->input(0)); + ctx().node_map->UpdateInput(tail->name(), mul->name(), input->name()); + AddToOptimizationQueue(tail); + *simplified_node_name = conv->name(); + + return Status::OK(); +#undef TF_RETURN_IF_TRUE + } +}; + } // namespace class UniqueNodes { @@ -2210,97 +2331,6 @@ void ArithmeticOptimizer::ForwardControlDependencies( // ArithmeticOptimizerStage string ArithmeticOptimizer::TrySimplifyAndReplaceUses( const NodeDef* node, SetVector* nodes_to_simplify) { - // Fold a multiply of a scalar into the following convolution. This folding - // can jump across nodes that merely reorders data (such as reshape and - // transpose). For example, we can optimize - // - // - // Conv2D - // / \ - // Transpose weights - // | - // Mul - // / \ - // inputs 255.0 - // - // to - // - // Conv2D - // / \ - // Transpose Mul - // | / \ - // | weights 255.0 - // | - // inputs - // - // when `weights` are constant. `Mul` in the optimized graph can be - // constant-folded. - // - // TODO(jingyue): Fold scalar multiplies to Conv?DBackpropFilter and - // Conv?DBackpropInput. - if (node->op() == "Conv2D" || node->op() == "Conv3D") { - NodeDef* conv = const_cast(node); - const NodeDef* weights = node_map_->GetNode(NodeName(conv->input(1))); - // Fold the multiply to conv only when the weights are constant, so the - // multiply can be constant-folded. TODO(jingyue): When the weights aren't - // constant, this should also help performance a bit and memory usage a lot, - // since the weights tend to be smaller than the activations. - if (weights->op() == "Const" && - !OptimizedNodeExists(*weights, StrCat("scaled_", conv->name()))) { - const NodeDef* source = node_map_->GetNode( - GetTailOfValuePreservingChain(*node, *node_map_, nodes_to_preserve_) - ->input(0)); - if (source->op() == "Mul" && - node_map_->GetOutputs(source->name()).size() == 1) { - const NodeDef* mul = source; - // `scale` is the scalar multiplier, and `other` is the other operand. - // TODO(jingyue): handle the case where `scale` is 0-th operand. - const NodeDef* scale = node_map_->GetNode(mul->input(1)); - const NodeDef* other = node_map_->GetNode(mul->input(0)); - if (scale->op() == "Const" && scale->attr().at("dtype").type() == - weights->attr().at("dtype").type()) { - const TensorProto& scale_tensor = scale->attr().at("value").tensor(); - // Test whether `scale` is a scalar. - if (scale_tensor.has_tensor_shape() && - scale_tensor.tensor_shape().dim_size() == 0) { - // Create new node `scaled_weights`. - NodeDef* scaled_weights = AddNode( - *weights, StrCat("scaled_", conv->name()), /*copy_node=*/false); - scaled_weights->set_op("Mul"); - scaled_weights->set_device(weights->device()); - (*scaled_weights->mutable_attr())["T"] = - weights->attr().at("dtype"); - nodes_to_simplify->PushBack(scaled_weights); - - // Link in its inputs. - scaled_weights->add_input(conv->input(1)); - node_map_->AddOutput(weights->name(), scaled_weights->name()); - scaled_weights->add_input(mul->input(1)); - node_map_->AddOutput(scale->name(), scaled_weights->name()); - ForwardControlDependencies(scaled_weights, {source}); - - // Update `conv`'s weights to `scaled_weights`. - conv->set_input(1, scaled_weights->name()); - node_map_->UpdateInput(conv->name(), weights->name(), - scaled_weights->name()); - nodes_to_simplify->PushBack(conv); - - // Update `mul`'s consumer to bypass `mul` because it's folded to - // the weights. - CHECK_EQ(node_map_->GetOutputs(mul->name()).size(), 1); - NodeDef* consumer_of_mul = - *node_map_->GetOutputs(mul->name()).begin(); - consumer_of_mul->set_input(0, mul->input(0)); - node_map_->UpdateInput(consumer_of_mul->name(), mul->name(), - other->name()); - nodes_to_simplify->PushBack(consumer_of_mul); - return conv->name(); - } - } - } - } - } - if (node->op() == "Mul" && node->input(0) == node->input(1) && !OptimizedNodeExists(*node, "square")) { const DataType type = GetDataTypeFromAttr(*node, "T"); @@ -2480,6 +2510,8 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) { if (options_.combine_add_to_addn && can_use_shapes) pipeline.AddStage(ctx, ctx_ext); + if (options_.fold_multiply_into_conv) + pipeline.AddStage(ctx, ctx_ext); if (options_.hoist_common_factor_out_of_aggregation && can_use_shapes) pipeline.AddStage(ctx, ctx_ext); if (options_.minimize_broadcasts && can_use_shapes) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 0fce23a40a..ce3c633baf 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -61,6 +61,7 @@ class ArithmeticOptimizer : public GraphOptimizer { bool combine_add_to_addn = true; bool convert_sqrt_div_to_rsqrt_mul = false; bool dedup_computations = true; + bool fold_multiply_into_conv = true; bool hoist_common_factor_out_of_aggregation = true; bool hoist_cwise_unary_chains = false; bool minimize_broadcasts = true; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 02f76df025..b9fec0f860 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -126,6 +126,7 @@ class ArithmeticOptimizerTest : public GrapplerTest { options.enable_try_simplify_and_replace = false; options.combine_add_to_addn = false; options.convert_sqrt_div_to_rsqrt_mul = false; + options.fold_multiply_into_conv = false; options.hoist_common_factor_out_of_aggregation = false; options.hoist_cwise_unary_chains = false; options.minimize_broadcasts = false; @@ -150,6 +151,11 @@ class ArithmeticOptimizerTest : public GrapplerTest { optimizer->options_.combine_add_to_addn = true; } + void EnableOnlyFoldMultipleIntoConv(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.fold_multiply_into_conv = true; + } + void EnableOnlyHoistCommonFactor(ArithmeticOptimizer* optimizer) { DisableAllStages(optimizer); optimizer->options_.hoist_common_factor_out_of_aggregation = true; @@ -1462,18 +1468,24 @@ TEST_F(ArithmeticOptimizerTest, FoldMulToTransposeConv) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphDef output; - TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); - - item.graph.Swap(&output); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + ArithmeticOptimizer optimizer; + EnableOnlyFoldMultipleIntoConv(&optimizer); + OptimizeTwiceAndPrune(&optimizer, &item, &output); NodeMap node_map(&output); + // `conv` is now a folded convolution with scaled weights. const NodeDef* folded_conv = node_map.GetNode(conv.node()->name()); - CHECK_EQ(node_map.GetNode(NodeName(folded_conv->input(1)))->op(), "Mul"); + ASSERT_NE(folded_conv, nullptr); + + const NodeDef* folded_conv_weights = node_map.GetNode(folded_conv->input(1)); + ASSERT_NE(folded_conv_weights, nullptr); + EXPECT_EQ("Mul", folded_conv_weights->op()); + // Its input should be a transpose of `inputs`. const NodeDef* transpose = node_map.GetNode(NodeName(folded_conv->input(0))); - CHECK_EQ(NodeName(transpose->input(0)), inputs.node()->name()); + ASSERT_NE(transpose, nullptr); + EXPECT_EQ("inputs", transpose->input(0)); } TEST_F(ArithmeticOptimizerTest, NotFoldMulAcrossPreservedTranspose) { @@ -1574,28 +1586,32 @@ TEST_F(ArithmeticOptimizerTest, OptimizeCastMulTransposeConv) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphDef output; - ArithmeticOptimizer optimizer; + ArithmeticOptimizer optimizer; // all optimization stages are on OptimizeTwiceAndPrune(&optimizer, &item, &output, /*const_folding=*/true); NodeMap node_map(&output); - // Expected names for the optimized nodes. + // Expected names for reordered cast and transpose. const string p = "ArithmeticOptimizer/ReorderCastAndTranspose_"; const string optimized_cast_name = strings::StrCat(p, "float_Cast"); const string optimized_transpose_name = strings::StrCat(p, "uint8_Transpose"); + // Expected names for folded multiply and conv. + const string optimized_weights = + "ArithmeticOptimizer/FoldMultiplyIntoConv_scaled_Conv2D_weights"; + const NodeDef* inputs_node = node_map.GetNode("Placeholder"); const NodeDef* transpose_node = node_map.GetNode(optimized_transpose_name); const NodeDef* cast_node = node_map.GetNode(optimized_cast_name); - const NodeDef* weights_node = - node_map.GetNode(OptimizedName("weights_scaled_Conv2D")); + + const NodeDef* weights_node = node_map.GetNode(optimized_weights); const NodeDef* conv_node = node_map.GetNode("Conv2D"); - ASSERT_TRUE(inputs_node != nullptr); - ASSERT_TRUE(transpose_node != nullptr); - ASSERT_TRUE(cast_node != nullptr); - ASSERT_TRUE(weights_node != nullptr); - ASSERT_TRUE(conv_node != nullptr); + ASSERT_NE(inputs_node, nullptr); + ASSERT_NE(transpose_node, nullptr); + ASSERT_NE(cast_node, nullptr); + ASSERT_NE(weights_node, nullptr); + ASSERT_NE(conv_node, nullptr); EXPECT_EQ(output.node_size(), 7); EXPECT_EQ(transpose_node->input(0), inputs_node->name()); @@ -1627,23 +1643,27 @@ TEST_F(ArithmeticOptimizerTest, OptimizeMultipleMulTransposeConv) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphDef output; - TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + ArithmeticOptimizer optimizer; + EnableOnlyFoldMultipleIntoConv(&optimizer); + OptimizeTwiceAndPrune(&optimizer, &item, &output, /*const_folding=*/true); - item.graph.Swap(&output); - TF_EXPECT_OK( - ConstantFolding(/*cpu_device=*/nullptr).Optimize(nullptr, item, &output)); + NodeMap node_map(&output); - item.graph.Swap(&output); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + using strings::StrCat; + const string p = "ArithmeticOptimizer/FoldMultiplyIntoConv_"; + const string optimized_weights = StrCat(p, "scaled_Conv2D_weights"); + const string optimized_weights_1 = StrCat(p, "scaled_Conv2D_1_weights_1"); - NodeMap node_map(&output); - const NodeDef* weights_node = - CHECK_NOTNULL(node_map.GetNode(OptimizedName("weights_scaled_Conv2D"))); - const NodeDef* conv_node = CHECK_NOTNULL(node_map.GetNode("Conv2D")); + const NodeDef* weights_node = node_map.GetNode(optimized_weights); + const NodeDef* weights_node_1 = node_map.GetNode(optimized_weights_1); + const NodeDef* conv_node = node_map.GetNode("Conv2D"); + const NodeDef* conv_node_1 = node_map.GetNode("Conv2D_1"); + + ASSERT_NE(weights_node, nullptr); + ASSERT_NE(weights_node_1, nullptr); + ASSERT_NE(conv_node, nullptr); + ASSERT_NE(conv_node_1, nullptr); - const NodeDef* weights_node_1 = - CHECK_NOTNULL(node_map.GetNode(OptimizedName("weights_scaled_Conv2D_1"))); - const NodeDef* conv_node_1 = CHECK_NOTNULL(node_map.GetNode("Conv2D_1")); EXPECT_EQ(conv_node->input(1), weights_node->name()); EXPECT_EQ(conv_node_1->input(1), weights_node_1->name()); } -- GitLab From d81328115bd10de70570c46dbfc683cd0238d779 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Fri, 1 Jun 2018 18:09:31 -0700 Subject: [PATCH 425/902] [XLA] Add comments for the Reduce->Reshape simplifier pass. Also forcing reduction order for init to be on lhs for ReduceWindow->Map pass. PiperOrigin-RevId: 198953817 --- tensorflow/compiler/xla/service/algebraic_simplifier.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index e1a45e453e..dc5f1b31bf 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -1774,6 +1774,10 @@ Status AlgebraicSimplifierVisitor::HandleReduce(HloInstruction* reduce) { new_reduce_dimensions, function)); } + // If the reduction results in the same number of elements, then the only + // possible side effect would be a reshape. Since the init_value is an + // identity of the reduction function, we can therefore replace the reduce + // with a simple reshape, ignoring the reduction function completely. if (ShapeUtil::ElementsIn(reduce->shape()) == ShapeUtil::ElementsIn(arg->shape())) { return ReplaceWithNewInstruction( @@ -1842,7 +1846,7 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( return ReplaceWithNewInstruction( reduce_window, HloInstruction::CreateMap(reduce_window->shape(), - {operand, reduce_window->mutable_operand(1)}, + {reduce_window->mutable_operand(1), operand}, function)); } -- GitLab From dbdd276a05c417963b3f06f71e801540bde9ab7c Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Fri, 1 Jun 2018 18:30:32 -0700 Subject: [PATCH 426/902] Quantize weights transformation for toco. Finds float weight tensors, quantizes them to 8 bits, and adds Dequantize operations after them. PiperOrigin-RevId: 198955123 --- tensorflow/contrib/lite/toco/BUILD | 1 + tensorflow/contrib/lite/toco/args.h | 1 + .../lite/toco/g3doc/cmdline_reference.md | 4 + .../graph_transformations.h | 1 + .../graph_transformations/quantize_weights.cc | 108 +++++++++++ .../toco/graph_transformations/tests/BUILD | 20 ++- .../tests/quantize_weights_test.cc | 167 ++++++++++++++++++ .../resolve_constant_concatenation_test.cc | 4 +- .../contrib/lite/toco/toco_cmdline_flags.cc | 11 ++ tensorflow/contrib/lite/toco/toco_flags.proto | 7 +- tensorflow/contrib/lite/toco/toco_tooling.cc | 3 + 11 files changed, 319 insertions(+), 8 deletions(-) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/quantize_weights.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/tests/quantize_weights_test.cc diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index b8acc9a8e0..7ea4f32ef6 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -245,6 +245,7 @@ cc_library( "graph_transformations/quantization_util.cc", "graph_transformations/quantization_util.h", "graph_transformations/quantize.cc", + "graph_transformations/quantize_weights.cc", "graph_transformations/read_fake_quant_min_max.cc", "graph_transformations/remove_final_dequantize_op.cc", "graph_transformations/remove_tensorflow_assert.cc", diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h index 77bc54f191..9f5ca66d05 100644 --- a/tensorflow/contrib/lite/toco/args.h +++ b/tensorflow/contrib/lite/toco/args.h @@ -234,6 +234,7 @@ struct ParsedTocoFlags { Arg drop_fake_quant = Arg(false); Arg reorder_across_fake_quant = Arg(false); Arg allow_custom_ops = Arg(false); + Arg quantize_weights = Arg(false); // Deprecated flags Arg input_type; Arg input_types; diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md index 9e99287f82..a8381169b8 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md @@ -203,6 +203,10 @@ have. graph transformations on them, at the cost of no longer faithfully matching inference and training arithmetic. +* `--quantize_weights`. Type: boolean. Default: false. Store weights as + quantized weights followed by dequantize operations. Computation is still + done in float, but reduces model size (at the cost of accuracy and latency). + ## Logging flags The following are standard Google logging flags: diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 8da242aa9c..1bc7557d46 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -139,6 +139,7 @@ DECLARE_GRAPH_TRANSFORMATION(PropagateFakeQuantNumBits); DECLARE_GRAPH_TRANSFORMATION(PropagateFixedSizes) DECLARE_GRAPH_TRANSFORMATION(HardcodeMinMax) DECLARE_GRAPH_TRANSFORMATION(Quantize) +DECLARE_GRAPH_TRANSFORMATION(QuantizeWeights) DECLARE_GRAPH_TRANSFORMATION(RemoveFinalDequantizeOp) DECLARE_GRAPH_TRANSFORMATION(RemoveTensorFlowAssert) DECLARE_GRAPH_TRANSFORMATION(RemoveTensorFlowIdentity) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize_weights.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize_weights.cc new file mode 100644 index 0000000000..88ea0945e7 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize_weights.cc @@ -0,0 +1,108 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/quantization_util.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +namespace { + +// The minimum number of elements a weights array must have to be quantized +// by this transformation. +// TODO(suharshs): Make this minimum size configurable. +const int kWeightsMinSize = 1024; + +// Gets the quantization params from the float array. +void GetQuantizationParamsFromArray(const Array& array, + QuantizationParams* params) { + const std::vector& float_vals = + array.GetBuffer().data; + auto minmax = std::minmax_element(float_vals.begin(), float_vals.end()); + MinMax toco_minmax; + toco_minmax.min = *minmax.first; + toco_minmax.max = *minmax.second; + GetQuantizationParams(ArrayDataType::kUint8, toco_minmax, params); +} + +} // namespace + +bool QuantizeWeights::Run(Model* model, std::size_t op_index) { + const auto op_it = model->operators.begin() + op_index; + Operator* op = op_it->get(); + + // Get the weights tensor, if the current operator has one. + int weights_index; + if (op->type == OperatorType::kConv || + op->type == OperatorType::kDepthwiseConv || + op->type == OperatorType::kFullyConnected) { + weights_index = 1; + } else if (op->type == OperatorType::kLstmCell) { + weights_index = LstmCellOperator::WEIGHTS_INPUT; + } else { + return false; + } + + // Return early if the array isn't a constant param, this can happen in early + // transformation passes until transpose operations following the weight array + // are resolved. + const string weights = op->inputs[weights_index]; + if (!IsConstantParameterArray(*model, weights)) { + return false; + } + + // Return early if the weight tensor is not type float. + Array& weights_array = model->GetArray(weights); + if (weights_array.data_type != ArrayDataType::kFloat) { + return false; + } + + // Return early if the tensor is too small. Small tensors don't take up too + // much space and can result in bad quantization results. + if (weights_array.GetBuffer().data.size() < + kWeightsMinSize) { + return false; + } + + // Quantize the weight tensor to type kUint8. + QuantizationParams params; + GetQuantizationParamsFromArray(weights_array, ¶ms); + QuantizeArray(this, model, weights, ArrayDataType::kUint8, params); + + // Insert a Dequantize operation after the quantized weights tensor. + auto* dequantize_op = new DequantizeOperator; + model->operators.emplace(op_it, dequantize_op); + + // Create a new intermediate tensor to connect the Dequantize op to the + // original op. + const string dequantized_output = + AvailableArrayName(*model, weights + "_dequantized"); + Array& dequantized_output_array = model->GetOrCreateArray(dequantized_output); + dequantized_output_array.data_type = ArrayDataType::kFloat; + + // Connect up the new Dequantize op with the weights and original op. + op->inputs[weights_index] = dequantized_output; + dequantize_op->inputs = {weights}; + dequantize_op->outputs = {dequantized_output}; + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD index 8dcd4adc90..95e8433be2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD @@ -8,8 +8,8 @@ load( ) tf_cc_test( - name = "resolve_constant_concatenation_test", - srcs = ["resolve_constant_concatenation_test.cc"], + name = "lstm_utils_test", + srcs = ["lstm_utils_test.cc"], deps = [ "//tensorflow/contrib/lite/toco:graph_transformations", "//tensorflow/contrib/lite/toco:model", @@ -19,8 +19,20 @@ tf_cc_test( ) tf_cc_test( - name = "lstm_utils_test", - srcs = ["lstm_utils_test.cc"], + name = "quantize_weights_test", + srcs = ["quantize_weights_test.cc"], + deps = [ + "//tensorflow/contrib/lite/toco:graph_transformations", + "//tensorflow/contrib/lite/toco:model", + "//tensorflow/contrib/lite/toco:tooling_util", + "@com_google_absl//absl/memory", + "@com_google_googletest//:gtest_main", + ], +) + +tf_cc_test( + name = "resolve_constant_concatenation_test", + srcs = ["resolve_constant_concatenation_test.cc"], deps = [ "//tensorflow/contrib/lite/toco:graph_transformations", "//tensorflow/contrib/lite/toco:model", diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/quantize_weights_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/quantize_weights_test.cc new file mode 100644 index 0000000000..c05eb0929f --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/quantize_weights_test.cc @@ -0,0 +1,167 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include +#include +#include "absl/memory/memory.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +class QuantizeWeightsTest : public ::testing::Test { + protected: + QuantizeWeightsTest() {} + + // The name of the weights input array. + const string kWeightsName = "weights"; + // The zero_point of the values in the input array. + const int kZeroPoint = 128; + + // Prepare a hypothetical TOCO model of a quantizable fully connected float + // layer. + void PrepareModel(Model* model, int elements_per_dim) { + std::vector fc_input_names = {"inputs", kWeightsName}; + + const int kDim = 4; + const int buf_size = std::pow(elements_per_dim, static_cast(kDim)); + auto in_buf = absl::make_unique(buf_size); + // Initialize the array with values from -128.0 to 127.0, since these values + // should be exactly representable by quantization. + for (int i = 0; i < buf_size; i++) { + in_buf[i] = static_cast(i % 256 - kZeroPoint); + } + + for (const string& fc_input_name : fc_input_names) { + Array& in_array = model->GetOrCreateArray(fc_input_name); + in_array.data_type = ArrayDataType::kFloat; + + // Initialize shape for the input array. + Shape* in_array_shape = in_array.mutable_shape(); + std::vector* in_array_shape_dim = in_array_shape->mutable_dims(); + in_array_shape_dim->resize(kDim, elements_per_dim); + auto& in_array_buffer = + in_array.GetMutableBuffer(); + in_array_buffer.data.resize(buf_size); + float* buf_ptr = + in_array.GetMutableBuffer().data.data(); + std::copy(in_buf.get(), in_buf.get() + buf_size, buf_ptr); + } + + auto* fc_op = new FullyConnectedOperator; + fc_op->inputs = fc_input_names; + fc_op->outputs = {"fc_op_outputs"}; + Array& out_array = model->GetOrCreateArray(fc_op->outputs[0]); + out_array.data_type = ArrayDataType::kFloat; + Shape* out_array_shape = out_array.mutable_shape(); + std::vector* out_array_shape_dim = out_array_shape->mutable_dims(); + out_array_shape_dim->resize(kDim, elements_per_dim); + model->operators.push_back(std::unique_ptr(fc_op)); + } +}; + +TEST_F(QuantizeWeightsTest, QuantizedFullyConnected) { + // Test that weight arrays that are large enough are quantized. + Model model; + // 6 elements per dim gives us 1296 elements, which is sufficient to be + // quantized. + PrepareModel(&model, 6); + + // Check the state of the graph before the transformation. + const auto& float_array_map = model.GetArrayMap(); + EXPECT_EQ(float_array_map.size(), 3); + // Before the transformation, all arrays should be type float. + for (const auto& element : float_array_map) { + EXPECT_EQ(element.second->data_type, ArrayDataType::kFloat); + } + const std::vector float_weight_vals = + model.GetArray(kWeightsName).GetBuffer().data; + + // Invoke the transformation. + GraphTransformationsSet graph_transformation_set; + graph_transformation_set.Add(new toco::QuantizeWeights); + (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + + // Check the state of the graph after the transformation. + const auto& quantized_array_map = model.GetArrayMap(); + EXPECT_EQ(quantized_array_map.size(), 4); + // After the transformation, three arrays should be type float and one array + // should be uint8. + int num_float = 0; + int num_uint8 = 0; + for (const auto& element : quantized_array_map) { + if (element.second->data_type == ArrayDataType::kFloat) { + num_float++; + } else if (element.second->data_type == ArrayDataType::kUint8) { + num_uint8++; + } else { + FAIL() << "Unexpected array type."; + } + } + EXPECT_EQ(num_float, 3); + EXPECT_EQ(num_uint8, 1); + // Ensure that the values were quantized correctly. + const std::vector& quantized_weight_vals = + model.GetArray(kWeightsName).GetBuffer().data; + for (int i = 0; i < quantized_weight_vals.size(); i++) { + EXPECT_EQ(quantized_weight_vals[i], float_weight_vals[i] + kZeroPoint); + } + + // Ensure that a Dequantize operator has been inserted before the + // FullyConnectedLayer. + EXPECT_EQ(model.operators[0]->type, OperatorType::kDequantize); +} + +TEST_F(QuantizeWeightsTest, NotQuantizedFullyConnected) { + // Test that weight arrays that are too small are left untouched. + Model model; + // 5 elements per dim gives us 625 elements, which is NOT sufficient to be + // quantized. + PrepareModel(&model, 5); + + // Check the state of the graph before the transformation. + const auto& float_array_map = model.GetArrayMap(); + EXPECT_EQ(float_array_map.size(), 3); + // Before the transformation, all arrays should be type float. + for (auto it = float_array_map.begin(); it != float_array_map.end(); it++) { + EXPECT_EQ(it->second->data_type, ArrayDataType::kFloat); + } + std::vector float_weight_vals = + model.GetArray(kWeightsName).GetBuffer().data; + + // Invoke the transformation. + GraphTransformationsSet graph_transformation_set; + graph_transformation_set.Add(new toco::QuantizeWeights); + (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + + // Check the state of the graph after the transformation. + const auto& post_array_map = model.GetArrayMap(); + EXPECT_EQ(post_array_map.size(), 3); + for (auto it = post_array_map.begin(); it != post_array_map.end(); it++) { + EXPECT_EQ(it->second->data_type, ArrayDataType::kFloat); + } + // Ensure that the values remain unchanged. + std::vector const& quantized_weight_vals = + model.GetArray(kWeightsName).GetBuffer().data; + for (int i = 0; i < quantized_weight_vals.size(); i++) { + EXPECT_EQ(quantized_weight_vals[i], float_weight_vals[i]); + } +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc index 3a1d175b98..66cfed4ac2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc @@ -12,9 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include #include -#include #include #include @@ -126,7 +124,7 @@ class ResolveConstantConcatenationTest : public ::testing::Test { Array& in_array = model->GetOrCreateArray(concat_input_name); in_array.data_type = ArrayDataType::kFloat; - // Initialize shape for the input array. + // Initialize shape for the input array. Shape* in_array_shape = in_array.mutable_shape(); std::vector* in_array_shape_dim = in_array_shape->mutable_dims(); for (int i = 0; i < kDim; i++) { diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc index 9c6ad673ab..87a1e429b9 100644 --- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc +++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc @@ -158,6 +158,11 @@ bool ParseTocoFlagsFromCommandLineFlags( parsed_flags.split_tflite_lstm_inputs.default_value(), "Split the LSTM inputs from 5 tensors to 18 tensors for TFLite. " "Ignored if the output format is not TFLite."), + Flag("quantize_weights", parsed_flags.quantize_weights.bind(), + parsed_flags.quantize_weights.default_value(), + "Store weights as quantized weights followed by dequantize " + "operations. Computation is still done in float, but reduces model " + "size (at the cost of accuracy and latency)."), }; bool asked_for_help = *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help")); @@ -251,6 +256,7 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags, FlagRequirement::kNone); READ_TOCO_FLAG(dedupe_array_min_size_bytes, FlagRequirement::kNone); READ_TOCO_FLAG(split_tflite_lstm_inputs, FlagRequirement::kNone); + READ_TOCO_FLAG(quantize_weights, FlagRequirement::kNone); // Deprecated flag handling. if (parsed_toco_flags.input_type.specified()) { @@ -284,6 +290,11 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags, QCHECK(toco::IODataType_Parse(input_types[0], &input_type)); toco_flags->set_inference_input_type(input_type); } + if (parsed_toco_flags.quantize_weights.value()) { + QCHECK_NE(toco_flags->inference_type(), IODataType::QUANTIZED_UINT8) + << "quantize_weights is not supported with inference_type " + "QUANTIZED_UINT8."; + } #undef READ_TOCO_FLAG #undef PARSE_TOCO_FLAG diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto index 15f755c104..4fe57879fb 100644 --- a/tensorflow/contrib/lite/toco/toco_flags.proto +++ b/tensorflow/contrib/lite/toco/toco_flags.proto @@ -37,7 +37,7 @@ enum FileFormat { // of as properties of models, instead describing how models are to be // processed in the context of the present tooling job. // -// Next ID to use: 20. +// Next ID to use: 21. message TocoFlags { // Input file format optional FileFormat input_format = 1; @@ -169,4 +169,9 @@ message TocoFlags { // Split the LSTM inputs from 5 tensors to 18 tensors for TFLite. // Ignored if the output format is not TFLite. optional bool split_tflite_lstm_inputs = 19 [default = true]; + + // Store weights as quantized weights followed by dequantize operations. + // Computation is still done in float, but reduces model size (at the cost of + // accuracy and latency). + optional bool quantize_weights = 20 [default = false]; } diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index a648883d1f..1fe76f8163 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -269,6 +269,9 @@ void Transform(const TocoFlags& toco_flags, Model* model) { transformations.Add(new toco::MergeLstmCellInputs); } } + if (toco_flags.quantize_weights()) { + transformations.Add(new QuantizeWeights); + } transformations.Add(new ResolveConstantConcatenation); RunGraphTransformations(model, "general graph transformations", transformations); -- GitLab From d077fb3bcc0483f6326714161bb4b3f51a078332 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 21:20:58 -0700 Subject: [PATCH 427/902] Replace boilerplate code with function template. PiperOrigin-RevId: 198963930 --- .../contrib/lite/toco/import_tensorflow.cc | 561 ++---------------- 1 file changed, 64 insertions(+), 497 deletions(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 94ec7c24d4..0a57015d29 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -656,81 +656,6 @@ void ConvertRandomUniform(const NodeDef& node, model->operators.emplace_back(std::move(op)); } -void ConvertReluOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Relu"); - CheckInputsCount(node, tf_import_flags, 1); - const auto& input_name = node.input(0); - auto* relu = new ReluOperator; - relu->inputs.push_back(input_name); - relu->outputs.push_back(node.name()); - model->operators.emplace_back(relu); -} - -void ConvertRelu6Operator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Relu6"); - CheckInputsCount(node, tf_import_flags, 1); - - const auto& input_name = node.input(0); - auto* op = new Relu6Operator; - op->inputs.push_back(input_name); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertLogOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Log"); - CheckInputsCount(node, tf_import_flags, 1); - - auto op = absl::make_unique(); - op->inputs.push_back(node.input(0)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(std::move(op)); -} - -void ConvertLogisticOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Sigmoid"); - CheckInputsCount(node, tf_import_flags, 1); - - const auto& input_name = node.input(0); - auto* op = new LogisticOperator; - op->inputs.push_back(input_name); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertTanhOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Tanh"); - CheckInputsCount(node, tf_import_flags, 1); - - const auto& input_name = node.input(0); - auto* op = new TanhOperator; - op->inputs.push_back(input_name); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertDivOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK(node.op() == "Div" || node.op() == "RealDiv"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new DivOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - void ConvertIdentityOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -787,38 +712,6 @@ void ConvertFakeQuantWithMinMaxVars( model->operators.emplace_back(op); } -void ConvertNegOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Neg"); - CheckInputsCount(node, tf_import_flags, 1); - auto* op = new NegOperator; - op->inputs.push_back(node.input(0)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertRsqrtOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Rsqrt"); - CheckInputsCount(node, tf_import_flags, 1); - auto* op = new TensorFlowRsqrtOperator; - op->inputs.push_back(node.input(0)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertSqrtOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Sqrt"); - CheckInputsCount(node, tf_import_flags, 1); - auto* op = new TensorFlowSqrtOperator; - op->inputs.push_back(node.input(0)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} void ConvertSqueezeOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, @@ -840,66 +733,6 @@ void ConvertSqueezeOperator(const NodeDef& node, model->operators.emplace_back(op); } -void ConvertSquareOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Square"); - CheckInputsCount(node, tf_import_flags, 1); - auto* op = new TensorFlowSquareOperator; - op->inputs.push_back(node.input(0)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertAddOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Add"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new AddOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertAddNOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "AddN"); - const int num_inputs = GetInputsCount(node, tf_import_flags); - auto* op = new AddNOperator; - for (int i = 0; i < num_inputs; ++i) { - op->inputs.push_back(node.input(i)); - } - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertMulOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Mul"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new MulOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertSubOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Sub"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new SubOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - void ConvertSumOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -915,67 +748,6 @@ void ConvertSumOperator(const NodeDef& node, } } -void ConvertTileOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Tile"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new TensorFlowTileOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertSliceOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Slice"); - CheckInputsCount(node, tf_import_flags, 3); - auto* op = new SliceOperator; - for (int i = 0; i < 3; ++i) { - op->inputs.push_back(node.input(i)); - } - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertPadOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Pad"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new PadOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertPadV2Operator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "PadV2"); - CheckInputsCount(node, tf_import_flags, 3); - auto* op = new PadV2Operator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->inputs.push_back(node.input(2)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertShapeOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Shape"); - CheckInputsCount(node, tf_import_flags, 1); - auto* op = new TensorFlowShapeOperator; - op->inputs.push_back(node.input(0)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - void ConvertSplitOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -993,18 +765,6 @@ void ConvertSplitOperator(const NodeDef& node, model->operators.emplace_back(op); } -void ConvertMergeOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Merge"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new TensorFlowMergeOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - void ConvertSwitchOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -1034,18 +794,6 @@ void ConvertSoftmaxOperator(const NodeDef& node, model->operators.emplace_back(softmax); } -void ConvertLogSoftmaxOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "LogSoftmax"); - CheckInputsCount(node, tf_import_flags, 1); - const auto& input_name = node.input(0); - auto* log_softmax = new LogSoftmaxOperator; - log_softmax->inputs.push_back(input_name); - log_softmax->outputs.push_back(node.name()); - model->operators.emplace_back(log_softmax); -} - void ConvertLRNOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -1142,17 +890,6 @@ void ConvertAvgPoolOperator(const NodeDef& node, model->operators.emplace_back(avgpool); } -void ConvertReshapeOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Reshape"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new TensorFlowReshapeOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} void ConvertBatchMatMulOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, @@ -1215,24 +952,12 @@ void ConvertConcatOperator(const NodeDef& node, model->operators.emplace_back(op); } -void ConvertAllOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "All"); - auto* op = new TensorFlowAllOperator; - const int num_inputs = GetInputsCount(node, tf_import_flags); - for (int i = 0; i < num_inputs; ++i) { - op->inputs.push_back(node.input(i)); - } - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertAssertOperator(const NodeDef& node, +// This method supports simple operators without additional attributes. +template +void ConvertSimpleOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { - CHECK_EQ(node.op(), "Assert"); - auto* op = new TensorFlowAssertOperator; + auto* op = new Op; const int num_inputs = GetInputsCount(node, tf_import_flags); for (int i = 0; i < num_inputs; ++i) { op->inputs.push_back(node.input(i)); @@ -1241,69 +966,13 @@ void ConvertAssertOperator(const NodeDef& node, model->operators.emplace_back(op); } -void ConvertLessOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Less"); - auto* op = new TensorFlowLessOperator; - const int num_inputs = GetInputsCount(node, tf_import_flags); - for (int i = 0; i < num_inputs; ++i) { - op->inputs.push_back(node.input(i)); - } - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertLessEqualOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "LessEqual"); - auto* op = new TensorFlowLessEqualOperator; - const int num_inputs = GetInputsCount(node, tf_import_flags); - for (int i = 0; i < num_inputs; ++i) { - op->inputs.push_back(node.input(i)); - } - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertSinOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Sin"); - auto* op = new SinOperator; - const int num_inputs = GetInputsCount(node, tf_import_flags); - for (int i = 0; i < num_inputs; ++i) { - op->inputs.push_back(node.input(i)); - } - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertGreaterOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Greater"); - auto* op = new TensorFlowGreaterOperator; - const int num_inputs = GetInputsCount(node, tf_import_flags); - for (int i = 0; i < num_inputs; ++i) { - op->inputs.push_back(node.input(i)); - } - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertGreaterEqualOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "GreaterEqual"); - auto* op = new TensorFlowGreaterEqualOperator; - const int num_inputs = GetInputsCount(node, tf_import_flags); - for (int i = 0; i < num_inputs; ++i) { - op->inputs.push_back(node.input(i)); - } - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); +// This method supports simple operators without additional attributes. +template +void ConvertSimpleOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { + CheckInputsCount(node, tf_import_flags, NumInputs); + ConvertSimpleOperator(node, tf_import_flags, model); } void ConvertMaxOperator(const NodeDef& node, @@ -1336,29 +1005,6 @@ void ConvertMinOperator(const NodeDef& node, } } -void ConvertMaximumOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Maximum"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new TensorFlowMaximumOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertMinimumOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Minimum"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new TensorFlowMinimumOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} void ConvertUnsupportedOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, @@ -1387,19 +1033,6 @@ void ConvertUnsupportedOperator(const NodeDef& node, } } -void ConvertSelectOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CheckInputsCount(node, tf_import_flags, 3); - - auto* op = new SelectOperator; - for (const auto& input : node.input()) { - op->inputs.push_back(input); - } - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - void ConvertStridedSliceOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -1678,17 +1311,6 @@ void ConvertBatchToSpaceNDOperator(const NodeDef& node, model->operators.emplace_back(op); } -void ConvertExpOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Exp"); - CheckInputsCount(node, tf_import_flags, 1); - auto* op = new ExpOperator; - op->inputs.push_back(node.input(0)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - void ConvertMeanOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -1802,53 +1424,6 @@ void ConvertTransposeConvOperator(const NodeDef& node, model->operators.emplace_back(op); } -void ConvertExpandDimsOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "ExpandDims"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new ExpandDimsOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertFillOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Fill"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new FillOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertFloorDivOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "FloorDiv"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new FloorDivOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - -void ConvertFloorModOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "FloorMod"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new FloorModOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} void ConvertRangeOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, @@ -1869,17 +1444,6 @@ void ConvertRangeOperator(const NodeDef& node, model->operators.emplace_back(op); } -void ConvertRankOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Rank"); - CheckInputsCount(node, tf_import_flags, 1); - auto* op = new RankOperator; - op->inputs.push_back(node.input(0)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} - void ConvertStackOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -1900,17 +1464,6 @@ void ConvertStackOperator(const NodeDef& node, model->operators.emplace_back(op); } -void ConvertTransposeOperator(const NodeDef& node, - const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Transpose"); - CheckInputsCount(node, tf_import_flags, 2); - auto* op = new TransposeOperator; - op->inputs.push_back(node.input(0)); - op->inputs.push_back(node.input(1)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); -} // Some TensorFlow ops only occur in graph cycles, representing // control flow. We do not currently support control flow, so we wouldn't @@ -2174,25 +1727,26 @@ Status ImportTensorFlowNode(const tensorflow::NodeDef& node, } else if (node.op() == "BiasAdd") { ConvertBiasAddOperator(node, tf_import_flags, model); } else if (node.op() == "Relu") { - ConvertReluOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Relu6") { - ConvertRelu6Operator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Sigmoid") { - ConvertLogisticOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Tanh") { - ConvertTanhOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "MaxPool") { ConvertMaxPoolOperator(node, tf_import_flags, model); } else if (node.op() == "AvgPool") { ConvertAvgPoolOperator(node, tf_import_flags, model); } else if (node.op() == "Reshape") { - ConvertReshapeOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "BatchMatMul") { ConvertBatchMatMulOperator(node, tf_import_flags, model); } else if (node.op() == "MatMul") { ConvertMatMulOperator(node, tf_import_flags, model); } else if (node.op() == "Div" || node.op() == "RealDiv") { - ConvertDivOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Identity" || node.op() == "CheckNumerics" || node.op() == "StopGradient") { ConvertIdentityOperator(node, tf_import_flags, model); @@ -2201,27 +1755,31 @@ Status ImportTensorFlowNode(const tensorflow::NodeDef& node, } else if (node.op() == "FakeQuantWithMinMaxArgs") { ConvertFakeQuantWithMinMaxArgs(node, tf_import_flags, model); } else if (node.op() == "Neg") { - ConvertNegOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Rsqrt") { - ConvertRsqrtOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Squeeze") { ConvertSqueezeOperator(node, tf_import_flags, model); } else if (node.op() == "Sqrt") { - ConvertSqrtOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Square") { - ConvertSquareOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Add") { - ConvertAddOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "AddN") { - ConvertAddNOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Mul") { - ConvertMulOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Sub") { - ConvertSubOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Sum") { ConvertSumOperator(node, tf_import_flags, model); } else if (node.op() == "Tile") { - ConvertTileOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Concat" || node.op() == "ConcatV2") { ConvertConcatOperator(node, tf_import_flags, model); } else if (node.op() == "LRN") { @@ -2229,41 +1787,50 @@ Status ImportTensorFlowNode(const tensorflow::NodeDef& node, } else if (node.op() == "Softmax") { ConvertSoftmaxOperator(node, tf_import_flags, model); } else if (node.op() == "Log") { - ConvertLogOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "LogSoftmax") { - ConvertLogSoftmaxOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "All") { - ConvertAllOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Assert") { - ConvertAssertOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Less") { - ConvertLessOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "LessEqual") { - ConvertLessEqualOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Greater") { - ConvertGreaterOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "GreaterEqual") { - ConvertGreaterEqualOperator(node, tf_import_flags, model); + ConvertSimpleOperator( + node, tf_import_flags, model); } else if (node.op() == "Max") { ConvertMaxOperator(node, tf_import_flags, model); } else if (node.op() == "Min") { ConvertMinOperator(node, tf_import_flags, model); } else if (node.op() == "Maximum") { - ConvertMaximumOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Minimum") { - ConvertMinimumOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Merge") { - ConvertMergeOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Pad") { - ConvertPadOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "PadV2") { - ConvertPadV2Operator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "StridedSlice") { ConvertStridedSliceOperator(node, tf_import_flags, model); } else if (node.op() == "Shape") { - ConvertShapeOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, + model); } else if (node.op() == "Slice") { - ConvertSliceOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Split") { ConvertSplitOperator(node, tf_import_flags, model); } else if (node.op() == "Switch") { @@ -2300,25 +1867,25 @@ Status ImportTensorFlowNode(const tensorflow::NodeDef& node, } else if (node.op() == "NextIteration") { ConvertOperatorSpecialCasedAsRNNBackEdge(node, tf_import_flags, model); } else if (node.op() == "ExpandDims") { - ConvertExpandDimsOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Fill") { - ConvertFillOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "FloorDiv") { - ConvertFloorDivOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "FloorMod") { - ConvertFloorModOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Range") { ConvertRangeOperator(node, tf_import_flags, model); } else if (node.op() == "Rank") { - ConvertRankOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Stack" || node.op() == "Pack") { ConvertStackOperator(node, tf_import_flags, model); } else if (node.op() == "Transpose") { - ConvertTransposeOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "ArgMax") { ConvertArgMaxOperator(node, tf_import_flags, model); } else if (node.op() == "Exp") { - ConvertExpOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "TopK" || node.op() == "TopKV2") { ConvertTopKV2Operator(node, tf_import_flags, model); } else if (node.op() == "DynamicPartition") { @@ -2329,9 +1896,9 @@ Status ImportTensorFlowNode(const tensorflow::NodeDef& node, } else if (node.op() == "RandomUniform") { ConvertRandomUniform(node, tf_import_flags, model); } else if (node.op() == "Sin") { - ConvertSinOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "Select") { - ConvertSelectOperator(node, tf_import_flags, model); + ConvertSimpleOperator(node, tf_import_flags, model); } else if (node.op() == "SparseToDense") { ConvertSparseToDenseOperator(node, tf_import_flags, model); } else { -- GitLab From 14daf02aed8d54d14c0b235fe331e3757a0640df Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Sat, 2 Jun 2018 12:29:12 +0800 Subject: [PATCH 428/902] [XLA] Explicitly use ::xla::Layout MSVC uses delayed template parsing, so it confuses `Layout` as `::xla::match::Layout` below instead of `::xla::Layout`. --- tensorflow/compiler/xla/service/pattern_matcher.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h index d3bc47e61e..2515222cf2 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher.h +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -204,7 +204,7 @@ class LayoutPattern { // Modifies the pattern to match only if the layout equals the given proto. // The layout must outlive the returned pattern. constexpr LayoutPattern> EqualTo( - const Layout* layout) const { + const ::xla::Layout* layout) const { return LayoutPattern>( LayoutPatternEqualImpl(impl_, layout), matched_layout_); } -- GitLab From 0303c029d99c4080a3929a8320d9972cc4b973d5 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 2 Jun 2018 15:28:04 +0000 Subject: [PATCH 429/902] Remove duplicate imports Inside ffmpeg/__init__.py the last import line: ``` from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video ``` is a duplicate of the previous import. This fix removes the duplicate. Signed-off-by: Yong Tang --- tensorflow/contrib/ffmpeg/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/ffmpeg/__init__.py b/tensorflow/contrib/ffmpeg/__init__.py index daba965a98..484ffee3e7 100644 --- a/tensorflow/contrib/ffmpeg/__init__.py +++ b/tensorflow/contrib/ffmpeg/__init__.py @@ -28,7 +28,6 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio -from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_video from tensorflow.python.util.all_util import remove_undocumented -- GitLab From 72307dfb415e44d95bf72850bff7b7106385cda0 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 2 Jun 2018 15:29:59 +0000 Subject: [PATCH 430/902] Remove duplicate import of gen_decode_video_op_py Signed-off-by: Yong Tang --- tensorflow/contrib/ffmpeg/ffmpeg_ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py index 020b5c99c6..b1b5126d9e 100644 --- a/tensorflow/contrib/ffmpeg/ffmpeg_ops.py +++ b/tensorflow/contrib/ffmpeg/ffmpeg_ops.py @@ -21,7 +21,6 @@ from __future__ import print_function from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py -from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py from tensorflow.contrib.util import loader from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader -- GitLab From a06e521204d7b5a2dd27de44efbab352ff918aa7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 2 Jun 2018 12:35:32 -0700 Subject: [PATCH 431/902] Adding support for the int() and float() built-ins. PiperOrigin-RevId: 199001807 --- .../autograph/converters/builtin_functions.py | 2 +- tensorflow/contrib/autograph/utils/BUILD | 2 ++ .../contrib/autograph/utils/builtins.py | 23 ++++++++++++++++++- .../contrib/autograph/utils/builtins_test.py | 17 +++++++++++++- 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/autograph/converters/builtin_functions.py b/tensorflow/contrib/autograph/converters/builtin_functions.py index 46e39da16a..231e4ee35a 100644 --- a/tensorflow/contrib/autograph/converters/builtin_functions.py +++ b/tensorflow/contrib/autograph/converters/builtin_functions.py @@ -48,7 +48,7 @@ class BuiltinFunctionTransformer(transformer.Base): # TODO(mdan): This won't work if the function was hidden. # TODO(mdan): Rely on the live_val and use inspect_utils.is_builtin instead. if (isinstance(node.func, gast.Name) and - node.func.id in ('len', 'range', 'xrange')): + node.func.id in ('len', 'range', 'xrange', 'float', 'int')): return self._convert_builtin(node) # Print needs to be handled separately because it can be read as statement. if isinstance(node.func, gast.Name) and node.func.id == 'print': diff --git a/tensorflow/contrib/autograph/utils/BUILD b/tensorflow/contrib/autograph/utils/BUILD index d3a1b94688..d82c17bf2a 100644 --- a/tensorflow/contrib/autograph/utils/BUILD +++ b/tensorflow/contrib/autograph/utils/BUILD @@ -33,6 +33,8 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:__subpackages__"], deps = [ + "//tensorflow/contrib/autograph/pyct", + "//tensorflow/python:dtypes", "//tensorflow/python:list_ops", "//tensorflow/python:script_ops", "//tensorflow/python/data/ops:dataset_ops", diff --git a/tensorflow/contrib/autograph/utils/builtins.py b/tensorflow/contrib/autograph/utils/builtins.py index 211e8eaee9..998087e056 100644 --- a/tensorflow/contrib/autograph/utils/builtins.py +++ b/tensorflow/contrib/autograph/utils/builtins.py @@ -24,6 +24,7 @@ import six from tensorflow.contrib.autograph.utils import py_func from tensorflow.contrib.autograph.utils import type_check +from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import logging_ops @@ -38,7 +39,13 @@ def dynamic_builtin(f, *args, **kwargs): return dynamic_range(*args, **kwargs) if f is range: return dynamic_range(*args, **kwargs) - raise ValueError('%s is not supported' % f) + if f is int: + return dynamic_int(*args, **kwargs) + if f is float: + return dynamic_float(*args, **kwargs) + + raise NotImplementedError( + 'The "%s" builtin is not yet supported.' % f.__name__) def dynamic_len(list_or_tensor): @@ -52,6 +59,20 @@ def dynamic_len(list_or_tensor): return len(list_or_tensor) +def dynamic_int(num_or_tensor, **kwargs): + """Implementation of int() using dynamic dispatch.""" + if tensor_util.is_tensor(num_or_tensor): + return math_ops.cast(num_or_tensor, dtype=dtypes.int32, **kwargs) + return int(num_or_tensor) + + +def dynamic_float(num_or_tensor, **kwargs): + """Implementation of float() using dynamic dispatch.""" + if tensor_util.is_tensor(num_or_tensor): + return math_ops.cast(num_or_tensor, dtype=dtypes.float32, **kwargs) + return float(num_or_tensor) + + def dynamic_range(start_or_stop, stop=None, step=None): """Implementation of range using dynamic dispatch.""" if type_check.is_tensor(start_or_stop, stop, step): diff --git a/tensorflow/contrib/autograph/utils/builtins_test.py b/tensorflow/contrib/autograph/utils/builtins_test.py index 163e698407..0c2312178a 100644 --- a/tensorflow/contrib/autograph/utils/builtins_test.py +++ b/tensorflow/contrib/autograph/utils/builtins_test.py @@ -24,6 +24,7 @@ import six from tensorflow.contrib.autograph.utils import builtins from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.platform import test @@ -77,7 +78,7 @@ class BuiltinsTest(test.TestCase): return x # Functions that just have the names of builtins are rejected. - with self.assertRaises(ValueError): + with self.assertRaises(NotImplementedError): self.assertEqual(builtins.dynamic_builtin(range, 1), 1) if six.PY2: self.assertListEqual( @@ -87,6 +88,20 @@ class BuiltinsTest(test.TestCase): self.assertListEqual( list(builtins.dynamic_builtin(six.moves.xrange, 3)), [0, 1, 2]) + def test_casts(self): + i = constant_op.constant(2, dtype=dtypes.int32) + f = constant_op.constant(1.0, dtype=dtypes.float32) + + self.assertEqual(builtins.dynamic_builtin(int, i).dtype, dtypes.int32) + self.assertEqual(builtins.dynamic_builtin(int, f).dtype, dtypes.int32) + self.assertEqual(builtins.dynamic_builtin(float, i).dtype, dtypes.float32) + self.assertEqual(builtins.dynamic_builtin(float, f).dtype, dtypes.float32) + + self.assertEqual(builtins.dynamic_builtin(int, True), 1) + self.assertEqual(builtins.dynamic_builtin(int, False), 0) + self.assertEqual(builtins.dynamic_builtin(float, True), 1.0) + self.assertEqual(builtins.dynamic_builtin(float, False), 0.0) + def test_dynamic_print_tf(self): try: out_capturer = six.StringIO() -- GitLab From d23f115d89ad6111674f53135d669cb2d2c086f0 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Sat, 2 Jun 2018 14:06:14 -0700 Subject: [PATCH 432/902] Don't cluster Identity nodes that forward tensor refs XLA cannot implement the forward-tensor-ref semantic -- there is no guaranteed aliasing between the input and output of the XLA cluster. PiperOrigin-RevId: 199005227 --- .../compiler/jit/mark_for_compilation_pass.cc | 26 ++++++++++ .../jit/mark_for_compilation_pass_test.cc | 47 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index 8e2ee0f1d7..07ee93d79e 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -46,6 +46,12 @@ const char* const kXlaOutsideCompilationAttr = "_XlaOutsideCompilation"; namespace { +// Returns true if, when executed in TensorFlow, `node` is guaranteed to forward +// a ref tensor input to its output. +static bool AlwaysForwardsRefInput(const Node& node) { + return node.IsIdentity(); +} + bool HasXLAKernel(const Node& node, const DeviceType& jit_device_type) { // There is a SymbolicGradient kernel on the XLA_JIT device, but the gradient // is really a kind of function call and will be handled by @@ -60,6 +66,26 @@ bool HasXLAKernel(const Node& node, const DeviceType& jit_device_type) { return false; } } + + // XLA does not offer guaranteed aliasing between the input and output of the + // XLA cluster so it can't implement the forward-tensor-ref semantic. Leave + // such nodes out of XLA clusters. + if (AlwaysForwardsRefInput(node)) { + for (const Edge* incoming_edge : node.in_edges()) { + if (incoming_edge->IsControlEdge()) { + continue; + } + + Node* incoming_node = incoming_edge->src(); + if (IsRefType(incoming_node->output_type(incoming_edge->src_output()))) { + VLOG(2) << "Not clustering " << node.def().ShortDebugString() + << " because of ref input " << incoming_node->name() << " " + << incoming_node->type_string(); + return false; + } + } + } + return FindKernelDef(jit_device_type, node.def(), nullptr, nullptr).ok(); } diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc index 703d8825d7..772c92d369 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc @@ -633,5 +633,52 @@ TEST(XlaCompilationTest, ConstOp) { } } +TEST(XlaCompilationTest, DontClusterIdentityWithRefInput) { + Scope root = Scope::NewRootScope().ExitOnError(); + Output variable = ops::Variable(root.WithOpName("variable"), + PartialTensorShape{}, DT_FLOAT); + Output read = ops::Identity(root.WithOpName("read"), variable); + Output neg = ops::Negate(root.WithOpName("negate"), read); + Output add = ops::Add(root.WithOpName("add"), neg, neg); + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + TF_ASSERT_OK(root.ToGraph(graph.get())); + TF_ASSERT_OK(MarkForCompilation(&graph)); + + std::unordered_map clusters = GetClusters(*graph); + + ASSERT_FALSE(clusters.empty()); + string cluster_name = clusters.begin()->second; + + std::unordered_map expected_clusters( + {{"negate", cluster_name}, {"add", cluster_name}}); + EXPECT_EQ(clusters, expected_clusters); +} + +TEST(XlaCompilationTest, ClusterIdentityWithNonRefInput) { + Scope root = Scope::NewRootScope().ExitOnError(); + Output variable = ops::Variable(root.WithOpName("variable"), + PartialTensorShape{}, DT_FLOAT); + Output read = ops::Identity(root.WithOpName("read"), variable); + Output neg = ops::Negate(root.WithOpName("negate"), read); + Output identity = ops::Negate(root.WithOpName("identity"), neg); + Output add = ops::Add(root.WithOpName("add"), identity, neg); + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + TF_ASSERT_OK(root.ToGraph(graph.get())); + TF_ASSERT_OK(MarkForCompilation(&graph)); + + std::unordered_map clusters = GetClusters(*graph); + + ASSERT_FALSE(clusters.empty()); + string cluster_name = clusters.begin()->second; + + std::unordered_map expected_clusters( + {{"negate", cluster_name}, + {"identity", cluster_name}, + {"add", cluster_name}}); + EXPECT_EQ(clusters, expected_clusters); +} + } // namespace } // namespace tensorflow -- GitLab From 5cc568290d9039e360e5705aeee64ed24984b9e7 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 24 May 2018 21:20:41 +0000 Subject: [PATCH 433/902] Add complex numbers to the supported data types for UnsortedSegmentProd In the kernel implementation both UnsortedSegmentProd and UnsortedSegmentSum supports complex numbers. However, unlike UnsortedSegmentSum, the op of UnsortedSegmentProd does not register complex number types in math_ops.cc. This fix adds the supported complex number types to math_ops.cc, and enables test cases for it. Signed-off-by: Yong Tang --- tensorflow/core/ops/math_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 8c0b073ce4..929213656c 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1080,7 +1080,7 @@ REGISTER_OP("UnsortedSegmentProd") .Input("segment_ids: Tindices") .Input("num_segments: Tnumsegments") .Output("output: T") - .Attr("T: realnumbertype") + .Attr("T: numbertype") .Attr("Tindices: {int32,int64}") .Attr("Tnumsegments: {int32,int64} = DT_INT32") .SetShapeFn(UnsortedSegmentReductionShapeFn); -- GitLab From 32b6cb87a349bb6b2866a6ae2f2c24dcd3ad738f Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 24 May 2018 21:23:33 +0000 Subject: [PATCH 434/902] Enable test case for complex number types with unsorted_segment_prod Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/segment_reduction_ops_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 794be096b7..b3e1e8bec5 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -263,8 +263,7 @@ class UnsortedSegmentTest(SegmentReductionHelper): math_ops.unsorted_segment_max, lambda t: t.min)] # A subset of ops has been enabled for complex numbers - self.complex_ops_list = [(np.add, None, - math_ops.unsorted_segment_sum, lambda t: 0)] + self.complex_ops_list = [(np.add, None, math_ops.unsorted_segment_sum, lambda t: 0), (np.ndarray.__mul__, None, math_ops.unsorted_segment_prod, lambda t: 1)] self.differentiable_dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64] self.all_dtypes = (self.differentiable_dtypes + -- GitLab From 51d8cc8bff7c4455ee8054240facf44da846e492 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 2 Jun 2018 21:57:32 +0000 Subject: [PATCH 435/902] Pylint fix Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/segment_reduction_ops_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index b3e1e8bec5..a82855dfeb 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -263,7 +263,10 @@ class UnsortedSegmentTest(SegmentReductionHelper): math_ops.unsorted_segment_max, lambda t: t.min)] # A subset of ops has been enabled for complex numbers - self.complex_ops_list = [(np.add, None, math_ops.unsorted_segment_sum, lambda t: 0), (np.ndarray.__mul__, None, math_ops.unsorted_segment_prod, lambda t: 1)] + self.complex_ops_list = [(np.add, None, + math_ops.unsorted_segment_sum, lambda t: 0), + (np.ndarray.__mul__, None, + math_ops.unsorted_segment_prod, lambda t: 1)] self.differentiable_dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64] self.all_dtypes = (self.differentiable_dtypes + -- GitLab From 18526a0d2f85c32269d40e621a492759bee3aaf2 Mon Sep 17 00:00:00 2001 From: Karan Kaw Date: Sun, 3 Jun 2018 13:37:45 +0530 Subject: [PATCH 436/902] Mentioned Visual C++ 2015 dependency for Windows JNI library --- tensorflow/docs_src/install/install_java.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 1256fb99c4..bbbabb6086 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -181,7 +181,7 @@ Take the following steps to install TensorFlow for Java on Windows: [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip). 3. Extract this .zip file. - +__Note__: Please ensure that _MS Visual C++ 2015 Redistributable_ package is installed on Windows system as tensorflow JNI library (*tensorflow_jni.dll*) uses them at runtime. ### Validate the installation -- GitLab From c045937787d6dd221e0fac0f040d7bf68b2101be Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 3 Jun 2018 15:11:45 +0000 Subject: [PATCH 437/902] Add int16 support for `tf.as_string` In `tf.as_string`, integers are mostly supported (`int8`, `int32`, `int64`) but not `int16`. This fix adds the `int16` support for `tf.as_string`. Signed-off-by: Yong Tang --- tensorflow/core/kernels/as_string_op.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/as_string_op.cc b/tensorflow/core/kernels/as_string_op.cc index 66c4aff3e3..a7757d1361 100644 --- a/tensorflow/core/kernels/as_string_op.cc +++ b/tensorflow/core/kernels/as_string_op.cc @@ -73,6 +73,7 @@ class AsStringOp : public OpKernel { } switch (dtype) { case DT_INT8: + case DT_INT16: case DT_INT32: strings::Appendf(&format_, "d"); break; @@ -129,6 +130,7 @@ class AsStringOp : public OpKernel { ENCODE_TYPE(DT_FLOAT, float, format_); ENCODE_TYPE(DT_DOUBLE, double, format_); ENCODE_TYPE(DT_INT8, int8, format_); + ENCODE_TYPE(DT_INT16, int16, format_); case (DT_BOOL): { const auto& input_flat = input_tensor->flat(); for (int i = 0; i < input_flat.size(); ++i) { -- GitLab From 56666ab5b3d807e4b070c4035e74d645f11ae817 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 3 Jun 2018 15:14:21 +0000 Subject: [PATCH 438/902] Register int16 as supported ops for AsString in string_ops.cc Signed-off-by: Yong Tang --- tensorflow/core/ops/string_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index 1d5c743a56..03bd4994bd 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -78,7 +78,7 @@ REGISTER_OP("ReduceJoin") REGISTER_OP("AsString") .Input("input: T") .Output("output: string") - .Attr("T: {int32, int64, complex64, float, double, bool, int8}") + .Attr("T: {int8, int16, int32, int64, complex64, float, double, bool}") .Attr("precision: int = -1") .Attr("scientific: bool = false") .Attr("shortest: bool = false") -- GitLab From 82bedc89eb3a865ff56577822828a1c30105aff3 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 3 Jun 2018 15:14:48 +0000 Subject: [PATCH 439/902] Add test cases for int16 support of `tf.as_string` Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/as_string_op_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/kernel_tests/as_string_op_test.py b/tensorflow/python/kernel_tests/as_string_op_test.py index 9d54add264..94ed8ebd31 100644 --- a/tensorflow/python/kernel_tests/as_string_op_test.py +++ b/tensorflow/python/kernel_tests/as_string_op_test.py @@ -130,6 +130,16 @@ class AsStringOpTest(test.TestCase): result = output.eval(feed_dict={input_: int_inputs_}) self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) + def testHalfInt(self): + s = lambda strs: [x.decode("ascii") for x in strs] + + with self.test_session(): + input_ = array_ops.placeholder(dtypes.int16) + int_inputs_ = [np.iinfo(np.int16).min, np.iinfo(np.int16).max] + output = string_ops.as_string(input_) + result = output.eval(feed_dict={input_: int_inputs_}) + self.assertAllEqual(s(result), ["%d" % x for x in int_inputs_]) + def testBool(self): bool_inputs_ = [False, True] s = lambda strs: [x.decode("ascii") for x in strs] -- GitLab From d836210e7d7c8bf54676fd4154f40920310cdb27 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Sun, 3 Jun 2018 12:08:00 -0700 Subject: [PATCH 440/902] Re-Merge accidentally reverted change (#19727) * Add IBM ppc64le build to README. * ppc64le -> ppc64le CPU -- GitLab From 45198062b58245711d7446aa389f3b9aa2c1535f Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Sun, 3 Jun 2018 12:43:16 -0700 Subject: [PATCH 441/902] New NN API interface that uses the TensorFlow Lite delegate API. - Make nn_api a delegate in its own directory. - Use the delegate API to rewrite the graph. - Use only on static APIs right now. - This is initial preview of the delegate that only supports add and conv. PiperOrigin-RevId: 199055747 --- tensorflow/contrib/lite/BUILD | 10 + tensorflow/contrib/lite/context_util.h | 48 ++ tensorflow/contrib/lite/delegates/nnapi/BUILD | 31 ++ .../lite/delegates/nnapi/nnapi_delegate.cc | 464 ++++++++++++++++++ .../lite/delegates/nnapi/nnapi_delegate.h | 31 ++ .../delegates/nnapi/nnapi_delegate_test.cc | 82 ++++ tensorflow/contrib/lite/kernels/test_util.cc | 6 + tensorflow/contrib/lite/kernels/test_util.h | 10 + 8 files changed, 682 insertions(+) create mode 100644 tensorflow/contrib/lite/context_util.h create mode 100644 tensorflow/contrib/lite/delegates/nnapi/BUILD create mode 100644 tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc create mode 100644 tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.h create mode 100644 tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 55b984f260..9c804d2785 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -90,6 +90,16 @@ cc_library( deps = [":context"], ) +cc_library( + name = "kernel_api", + hdrs = [ + "builtin_op_data.h", + "builtin_ops.h", + "context.h", + "context_util.h", + ], +) + exports_files(["builtin_ops.h"]) cc_library( diff --git a/tensorflow/contrib/lite/context_util.h b/tensorflow/contrib/lite/context_util.h new file mode 100644 index 0000000000..abe802e342 --- /dev/null +++ b/tensorflow/contrib/lite/context_util.h @@ -0,0 +1,48 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// This provides a few C++ helpers that are useful for manipulating C structures +// in C++. +#ifndef TENSORFLOW_CONTRIB_LITE_CONTEXT_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_CONTEXT_UTIL_H_ + +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { + +// Provide a range iterable wrapper for TfLiteIntArray* (C lists that TfLite +// C api uses. Can't use the google array_view, since we can't depend on even +// absl for embedded device reasons. +class TfLiteIntArrayView { + public: + // Construct a view of a TfLiteIntArray*. Note, `int_array` should be non-null + // and this view does not take ownership of it. + explicit TfLiteIntArrayView(const TfLiteIntArray* int_array) + : int_array_(int_array) {} + + TfLiteIntArrayView(const TfLiteIntArrayView&) = default; + TfLiteIntArrayView& operator=(const TfLiteIntArrayView& rhs) = default; + + typedef const int* const_iterator; + const_iterator begin() const { return int_array_->data; } + const_iterator end() const { return &int_array_->data[int_array_->size]; } + size_t size() const { return end() - begin(); } + + private: + const TfLiteIntArray* int_array_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_CONTEXT_UTIL_H_ diff --git a/tensorflow/contrib/lite/delegates/nnapi/BUILD b/tensorflow/contrib/lite/delegates/nnapi/BUILD new file mode 100644 index 0000000000..35a8f6ca41 --- /dev/null +++ b/tensorflow/contrib/lite/delegates/nnapi/BUILD @@ -0,0 +1,31 @@ +package(default_visibility = [ + "//visibility:public", +]) + +load("//tensorflow:tensorflow.bzl", "tf_cc_test") + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "nnapi_delegate", + srcs = ["nnapi_delegate.cc"], + hdrs = ["nnapi_delegate.h"], + deps = [ + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:kernel_api", + "//tensorflow/contrib/lite/kernels:kernel_util", + "//tensorflow/contrib/lite/nnapi:nnapi_lib", + ], +) + +tf_cc_test( + name = "nnapi_delegate_test", + size = "small", + srcs = ["nnapi_delegate_test.cc"], + deps = [ + ":nnapi_delegate", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc new file mode 100644 index 0000000000..0731d14419 --- /dev/null +++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc @@ -0,0 +1,464 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/allocation.h" +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/builtin_ops.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/context_util.h" +#include "tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h" + +namespace tflite { +namespace { + +// TODO(b/80621585): Consider printing error string, but don't for now to +// minimize binary size. +#define CHECK_NN(context, code) \ + if (code != ANEURALNETWORKS_NO_ERROR) { \ + context->ReportError(context, "NN API returned error (%d).\n", code); \ + return kTfLiteError; \ + } + +// RAII NN API Model Destructor for use with std::unique_ptr +struct NNFreeModel { + void operator()(ANeuralNetworksModel* model) { + ANeuralNetworksModel_free(model); + } +}; +// RAII NN API Compilation Destructor for use with std::unique_ptr +struct NNFreeCompilation { + void operator()(ANeuralNetworksCompilation* model) { + ANeuralNetworksCompilation_free(model); + } +}; + +// Track tensor indices to NN API tensor indices mapping. +class OperandMapping { + public: + // Given a TFLite index return the ANN index. If it doesn't exist + // return -1. + int lite_index_to_ann(int index) const { + if (index < lite_tensor_to_ann_tensor_.size()) + return lite_tensor_to_ann_tensor_[index]; + else + return -1; + } + + // NN API uses non tensor operands instead of structs. This creates one + // and returns the index. It uses a std::vector and resizes it as needed + // keeping -1 to unmapped values. Intermediate tensors likely will not + // be mapped. + int add_new_non_tensor_operand() { return next_ann_tensor_index_++; } + + // Add a new mapping from `tflite_index` and return the NN API tensor index. + int add_new_ann_tensor_index(int tflite_index) { + if (tflite_index >= lite_tensor_to_ann_tensor_.size()) { + lite_tensor_to_ann_tensor_.resize(tflite_index + 1); + } + int new_tensor_index = next_ann_tensor_index_++; + lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index; + return new_tensor_index; + } + + private: + // Next index of ann tensor + int next_ann_tensor_index_ = 0; + + // Mapping from lite index. Use a std::vector for speed and code size + // rather than a map. + std::vector lite_tensor_to_ann_tensor_; +}; + +// Abstract builder for building an op in the NN API graph. This handles +// the disparity between TFLite and NN API operand types. NN API has singular +// operands for both tensors and parameters, and TFLite separates the two. +class NNAPIOpBuilder { + public: + NNAPIOpBuilder(TfLiteContext* context, OperandMapping* tensor_mapping, + ANeuralNetworksModel* nn_model) + : context_(context), + operand_mapping_(tensor_mapping), + nn_model_(nn_model) {} + + TfLiteStatus AddScalarInt32Operand(int value) { + ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_INT32}; + CHECK_NN(context_, + ANeuralNetworksModel_addOperand(nn_model_, &operand_type)); + int ann_operand = operand_mapping_->add_new_non_tensor_operand(); + CHECK_NN(context_, ANeuralNetworksModel_setOperandValue( + nn_model_, ann_operand, &value, sizeof(int32_t))); + augmented_inputs_.push_back(ann_operand); + return kTfLiteOk; + } + + TfLiteStatus AddTensorInput(int tensor_index) { + int ann_index; + TF_LITE_ENSURE_STATUS(AddTensor(tensor_index, &ann_index)); + augmented_inputs_.push_back(ann_index); + return kTfLiteOk; + } + + TfLiteStatus AddTensorOutput(int tensor_index) { + int ann_index; + TF_LITE_ENSURE_STATUS(AddTensor(tensor_index, &ann_index)); + augmented_outputs_.push_back(ann_index); + return kTfLiteOk; + } + + // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`. + // This returns the NN API tensor index corresponding to the created tensor. + // If another caller previously created a NN API tensor for `tensor_index` + // then the existing one is returned. + TfLiteStatus AddTensor(int tensor_index, int* ann_tensor_index_out) { + int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index); + if (ann_tensor_index != -1) { + *ann_tensor_index_out = ann_tensor_index; + return kTfLiteOk; + } + // Allocate a new tensor index + ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index); + + // Parameters needed for new type. + int32_t nn_type = 0; + float scale = 0.0f; + int32_t zeroPoint = 0; + TfLiteTensor* tensor = &context_->tensors[tensor_index]; + switch (tensor->type) { + case kTfLiteNoType: + // Tensors added during initialization of Ops don't have a type yet and + // should not be registered with the NNAPI. + *ann_tensor_index_out = -1; + return kTfLiteOk; + case kTfLiteFloat32: + nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; + scale = 0.f; + break; + case kTfLiteUInt8: + nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; + scale = tensor->params.scale; + zeroPoint = tensor->params.zero_point; + break; + case kTfLiteInt32: + nn_type = ANEURALNETWORKS_TENSOR_INT32; + scale = 0.f; + zeroPoint = 0; + break; + default: + context_->ReportError(context_, "Logic error in NN API Delegate.\n"); + return kTfLiteError; + } + + ANeuralNetworksOperandType operand_type{ + nn_type, static_cast(tensor->dims->size), + reinterpret_cast(tensor->dims->data), scale, zeroPoint}; + CHECK_NN(context_, + ANeuralNetworksModel_addOperand(nn_model_, &operand_type)); + + if (tensor->allocation_type == kTfLiteMmapRo) { + // TODO(b/80630405): Use NNAPIAllocation. + CHECK_NN(context_, ANeuralNetworksModel_setOperandValue( + nn_model_, ann_tensor_index, tensor->data.raw, + tensor->bytes)); + } + + *ann_tensor_index_out = ann_tensor_index; + return kTfLiteOk; + } + + // Finish emitting the op (of type `type`) into the NN API. + TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type) { + // Actually add a NN API operation + CHECK_NN(context_, ANeuralNetworksModel_addOperation( + nn_model_, type, + static_cast(augmented_inputs_.size()), + augmented_inputs_.data(), + static_cast(augmented_outputs_.size()), + augmented_outputs_.data())); + augmented_outputs_.clear(); + augmented_outputs_.clear(); + return kTfLiteOk; + } + + private: + // TfLiteContext for error handling. Must be named context for macros to + // work. + TfLiteContext* context_; + + // Tracks relationship between indices + OperandMapping* operand_mapping_; + + // The model + ANeuralNetworksModel* nn_model_; + + // Inputs and outputs for the current op. These are augmented in the sense + // that NN API uses operands for all arguments, not just tensors, unlike + // TensorFlow lite. + std::vector augmented_inputs_; + std::vector augmented_outputs_; +}; + +// The kernel that represents the subgraph of TF Lite being run on NN API. +class NNAPIDelegateKernel { + public: + NNAPIDelegateKernel() = default; + + typedef ANeuralNetworksOperationType (*MappingFn)(TfLiteContext*, + NNAPIOpBuilder* builder, + TfLiteNode* node); + + // Return a function that knows how to translate a node into its operands + // when called. You can use this function to see if a node is supported + // (i.e. that MappingFn is not nullptr). + MappingFn Map(TfLiteContext* context, int builtin_code, TfLiteNode* node) { + switch (builtin_code) { + case kTfLiteBuiltinAdd: + return [](TfLiteContext* context, NNAPIOpBuilder* builder, + TfLiteNode* node) -> ANeuralNetworksOperationType { + auto builtin = reinterpret_cast(node->builtin_data); + builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_ADD; + }; + break; + case kTfLiteBuiltinAveragePool2d: + return [](TfLiteContext* context, NNAPIOpBuilder* builder, + TfLiteNode* node) -> ANeuralNetworksOperationType { + auto builtin = + reinterpret_cast(node->builtin_data); + builder->AddScalarInt32Operand(builtin->padding); + builder->AddScalarInt32Operand(builtin->stride_width); + builder->AddScalarInt32Operand(builtin->stride_height); + builder->AddScalarInt32Operand(builtin->filter_width); + builder->AddScalarInt32Operand(builtin->filter_height); + builder->AddScalarInt32Operand(builtin->activation); + return ANEURALNETWORKS_AVERAGE_POOL_2D; + }; + break; + default: + return nullptr; + } + } + + // Initialize the kernel (a NN model). + TfLiteStatus Init(TfLiteContext* context, + const TfLiteDelegateParams* params) { + for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) { + nodes_.push_back(node_index); + } + + if (!nn_model_) { + ANeuralNetworksModel* model; + CHECK_NN(context, ANeuralNetworksModel_create(&model)); + nn_model_.reset(model); + + TF_LITE_ENSURE_STATUS( + BuildGraph(context, params->input_tensors, params->output_tensors)); + } + + if (!nn_compilation_) { + ANeuralNetworksCompilation* compilation; + CHECK_NN(context, ANeuralNetworksCompilation_create(nn_model_.get(), + &compilation)); + CHECK_NN(context, ANeuralNetworksCompilation_finish(compilation)); + nn_compilation_.reset(compilation); + } + return kTfLiteOk; + } + + TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) { + ANeuralNetworksExecution* execution = nullptr; + CHECK_NN(context, ANeuralNetworksExecution_create(nn_compilation_.get(), + &execution)); + + // Set the input tensor buffers. Note: we access tflite tensors using + // absolute indices but NN api indices inputs by relative indices. + int relative_input_index = 0; + for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) { + TfLiteTensor* tensor = &context->tensors[absolute_input_index]; + CHECK_NN(context, ANeuralNetworksExecution_setInput( + execution, relative_input_index, nullptr, + tensor->data.raw, tensor->bytes)); + relative_input_index++; + } + + // Set the output tensor buffers. + int relative_output_index = 0; + for (auto output_index : TfLiteIntArrayView(node->outputs)) { + TfLiteTensor* tensor = &context->tensors[output_index]; + CHECK_NN(context, ANeuralNetworksExecution_setOutput( + execution, relative_output_index, nullptr, + tensor->data.raw, tensor->bytes)); + relative_output_index++; + } + // Invoke ANN in blocking fashion. + ANeuralNetworksEvent* event = nullptr; + CHECK_NN(context, ANeuralNetworksExecution_startCompute(execution, &event)); + CHECK_NN(context, ANeuralNetworksEvent_wait(event)); + ANeuralNetworksEvent_free(event); + ANeuralNetworksExecution_free(execution); + + return kTfLiteOk; + } + + private: + // ANN API state. + std::unique_ptr nn_model_; + std::unique_ptr + nn_compilation_; + // Node indices that this delegate is responsible for. Indices here + // indexes into the nodes array in the TfLiteContext. + std::vector nodes_; + // Track indices we use + OperandMapping operand_mapping_; + + TfLiteStatus AddOpsAndTensors(TfLiteContext* context) { + // The operand builder allows creating a single op. We create it at this + // reduced power position rather than in the for loop to avoid reallocating + // the vectors. + NNAPIOpBuilder builder(context, &operand_mapping_, nn_model_.get()); + // Add Tensors + // allocate outside to avoid realloc + for (auto node_index : nodes_) { + // Obtain the op and registration. + TfLiteNode* node; + TfLiteRegistration* reg; + context->GetNodeAndRegistration(context, node_index, &node, ®); + // Map inputs to NN API tensor indices. + for (auto input_index : TfLiteIntArrayView(node->inputs)) { + TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index)); + } + // Get op type and operands + int nn_op_type = + Map(context, reg->builtin_code, node)(context, &builder, node); + // Map outputs to NN API tensor indices. + for (auto output_index : TfLiteIntArrayView(node->outputs)) { + TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(output_index)); + } + + builder.FinalizeAddOperation(nn_op_type); + } + return kTfLiteOk; + } + + TfLiteStatus BuildGraph(TfLiteContext* context, + const TfLiteIntArray* input_tensors, + const TfLiteIntArray* output_tensors) { + // Build the ops and tensors. + TF_LITE_ENSURE_STATUS(AddOpsAndTensors(context)); + // Map input and output tensor indices to ANN + std::vector inputs; + inputs.reserve(input_tensors->size); + std::vector outputs; + outputs.reserve(output_tensors->size); + // Make the TensorFlow lite inputs and outputs to ann_indices. + for (int i : TfLiteIntArrayView(input_tensors)) + inputs.push_back(operand_mapping_.lite_index_to_ann(i)); + for (int i : TfLiteIntArrayView(output_tensors)) + outputs.push_back(operand_mapping_.lite_index_to_ann(i)); + // Tell ANN to declare inputs/outputs + CHECK_NN(context, ANeuralNetworksModel_identifyInputsAndOutputs( + nn_model_.get(), inputs.size(), inputs.data(), + outputs.size(), outputs.data())); + // Finalize the model + CHECK_NN(context, ANeuralNetworksModel_finish(nn_model_.get())); + + return kTfLiteOk; + } +}; + +} // namespace + +// Return a NN API Delegate struct that can check for support of ops. +TfLiteDelegate* NnApiDelegate() { + static TfLiteDelegate delegate = { + .data_ = nullptr, + .Prepare = [](TfLiteContext* context, + TfLiteDelegate* delegate) -> TfLiteStatus { + // Do not check nodes_ if NN API is unavailable. + if (!NNAPIExists()) return kTfLiteOk; + + std::vector supported_nodes(1); + // We don't care about all nodes_, we only care about ones in the + // current plan. + TfLiteIntArray* plan; + TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan)); + int total_supported_nodes = 0; + // Check for every node if it is supported + // TODO(b/80625235): Fix this to do more careful checking of versioning. + for (int node_index : TfLiteIntArrayView(plan)) { + TfLiteNode* node; + TfLiteRegistration* registration; + TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration( + context, node_index, &node, ®istration)); + NNAPIDelegateKernel dummy_kernel; + if (dummy_kernel.Map(context, registration->builtin_code, node)) { + supported_nodes.push_back(node_index); + } + total_supported_nodes += 1; + } + // Put the size at the beginning of the array. + supported_nodes[0] = supported_nodes.size() - 1; + + // NN API Delegate Registration (the pseudo kernel that will invoke NN + // API subgraphs) + static const TfLiteRegistration nnapi_delegate_kernel = { + .init = [](TfLiteContext* context, const char* buffer, + size_t length) -> void* { + const TfLiteDelegateParams* params = + reinterpret_cast(buffer); + NNAPIDelegateKernel* kernel_state = new NNAPIDelegateKernel; + kernel_state->Init(context, params); + return kernel_state; + }, + + .free = [](TfLiteContext* context, void* buffer) -> void { + delete reinterpret_cast(buffer); + }, + + .prepare = [](TfLiteContext* context, + TfLiteNode* node) -> TfLiteStatus { + // Since the underlying resize happened ahead of delegation + // worked. This does nothing. + return kTfLiteOk; + }, + + .invoke = [](TfLiteContext* context, + TfLiteNode* node) -> TfLiteStatus { + NNAPIDelegateKernel* state = + reinterpret_cast(node->user_data); + return state->Invoke(context, node); + }, + + .builtin_code = kTfLiteBuiltinDelegate, + }; + + // Request TFLite to partition the graph and make kernels + // for each independent subgraph a new nnapi_delegate_kernel. + context->ReplaceSubgraphsWithDelegateKernels( + context, nnapi_delegate_kernel, + reinterpret_cast(supported_nodes.data()), + delegate); + return kTfLiteOk; + }}; + + return &delegate; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.h b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.h new file mode 100644 index 0000000000..44cca2fd28 --- /dev/null +++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.h @@ -0,0 +1,31 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_ + +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { + +// Return a delegate that can be used to use the NN API. +// e.g. +// NnApiDelegate* delegate = NnApiDelegate(); +// interpreter->ModifyGraphWithDelegate(&delegate); +// NnApiDelegate() returns a singleton, so you should not free this +// pointer or worry about its lifetime. +TfLiteDelegate* NnApiDelegate(); +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_ diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc new file mode 100644 index 0000000000..ff2e721423 --- /dev/null +++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -0,0 +1,82 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.h" +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class FloatAddOpModel : public SingleOpModel { + public: + FloatAddOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output, + ActivationFunctionType activation_type) { + this->SetApplyDelegate([](Interpreter* interpreter) { + interpreter->ModifyGraphWithDelegate(NnApiDelegate()); + }); + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions, + CreateAddOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + std::vector GetOutput() { return ExtractVector(output_); } + + protected: + int input1_; + int input2_; + int output_; +}; + +// Do a test with the NN API using no activation. +TEST(NNAPIDelegate, AddWithNoActivation) { + FloatAddOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3})); +} + +// Do a test with the NN api with relu. +TEST(NNAPIDelegate, AddWithRelu) { + FloatAddOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({0.0, 0.4, 1.0, 1.3})); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 1a01ee0936..d23ec201b4 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -112,6 +112,12 @@ void SingleOpModel::BuildInterpreter( if (shape.empty()) continue; CHECK(interpreter_->ResizeInputTensor(input_idx, shape) == kTfLiteOk); } + + // Modify delegate with function. + if (apply_delegate_fn_) { + apply_delegate_fn_(interpreter_.get()); + } + CHECK(interpreter_->AllocateTensors() == kTfLiteOk) << "Cannot allocate tensors"; } diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h index 55edc97d19..db80c0082c 100644 --- a/tensorflow/contrib/lite/kernels/test_util.h +++ b/tensorflow/contrib/lite/kernels/test_util.h @@ -114,6 +114,13 @@ class SingleOpModel { SingleOpModel() {} ~SingleOpModel() {} + // Set a function callback that is run right after graph is prepared + // that allows applying external delegates. This is useful for testing + // other runtimes like NN API or GPU. + void SetApplyDelegate(std::function apply_delegate_fn) { + apply_delegate_fn_ = apply_delegate_fn; + } + // Copying or assignment is disallowed to simplify ownership semantics. SingleOpModel(const SingleOpModel&) = delete; SingleOpModel& operator=(const SingleOpModel&) = delete; @@ -317,6 +324,9 @@ class SingleOpModel { std::vector> operators_; std::vector> buffers_; std::map> custom_registrations_; + // A function pointer that gets called after the interpreter is created but + // before evaluation happens. This is useful for applying a delegate. + std::function apply_delegate_fn_; }; // Base class for single op unit tests. -- GitLab From bab05a2191383b3c66e9ea9ee192aef0aa36c218 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Sun, 3 Jun 2018 18:18:12 -0700 Subject: [PATCH 442/902] [tf.data] Input pipeline rewrites prototype. This CL: - adds `tf.contrib.data.optimize()` transformation that can be used to trigger rewrite-based optimization for the input pipeline. - adds `tf.data.Dataset._as_serialized_graph()` method that returns the serialized graph representation of the dataset PiperOrigin-RevId: 199068055 --- .../contrib/data/python/kernel_tests/BUILD | 13 ++ .../kernel_tests/optimize_dataset_op_test.py | 89 ++++++++ tensorflow/contrib/data/python/ops/BUILD | 15 ++ .../contrib/data/python/ops/optimization.py | 80 +++++++ .../base_api/api_def_DatasetToGraph.pbtxt | 20 ++ .../base_api/api_def_IdentityDataset.pbtxt | 14 ++ .../base_api/api_def_OptimizeDataset.pbtxt | 20 ++ tensorflow/core/framework/dataset.h | 19 ++ tensorflow/core/kernels/BUILD | 2 +- tensorflow/core/kernels/data/BUILD | 47 ++++ tensorflow/core/kernels/data/dataset_ops.cc | 47 ++++ .../core/kernels/data/identity_dataset_op.cc | 102 +++++++++ .../core/kernels/data/optimize_dataset_op.cc | 210 ++++++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 20 ++ tensorflow/python/data/kernel_tests/BUILD | 11 + .../data/kernel_tests/dataset_ops_test.py | 37 +++ tensorflow/python/data/ops/dataset_ops.py | 9 + 17 files changed, 754 insertions(+), 1 deletion(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/optimize_dataset_op_test.py create mode 100644 tensorflow/contrib/data/python/ops/optimization.py create mode 100644 tensorflow/core/api_def/base_api/api_def_DatasetToGraph.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_IdentityDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_OptimizeDataset.pbtxt create mode 100644 tensorflow/core/kernels/data/dataset_ops.cc create mode 100644 tensorflow/core/kernels/data/identity_dataset_op.cc create mode 100644 tensorflow/core/kernels/data/optimize_dataset_op.cc create mode 100644 tensorflow/python/data/kernel_tests/dataset_ops_test.py diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 523d1f2f71..ba707d8d6e 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -280,6 +280,19 @@ py_test( ], ) +py_test( + name = "optimize_dataset_op_test", + size = "small", + srcs = ["optimize_dataset_op_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":dataset_serialization_test", + "//tensorflow/contrib/data/python/ops:optimization", + "//tensorflow/python:platform", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + py_test( name = "prefetch_dataset_op_test", size = "small", diff --git a/tensorflow/contrib/data/python/kernel_tests/optimize_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimize_dataset_op_test.py new file mode 100644 index 0000000000..30f1847dcd --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/optimize_dataset_op_test.py @@ -0,0 +1,89 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base +from tensorflow.contrib.data.python.ops import optimization +from tensorflow.core.framework import graph_pb2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import errors +from tensorflow.python.platform import test + + +class OptimizeDatasetTest(test.TestCase): + + def testDefaultOptimizations(self): + dataset = dataset_ops.Dataset.range(10).map(lambda x: x * x).batch( + 10).apply(optimization.optimize()) + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + + with self.test_session() as sess: + graph = graph_pb2.GraphDef().FromString( + sess.run(dataset._as_serialized_graph())) + self.assertTrue( + all([node.op != "MapAndBatchDatasetV2" for node in graph.node])) + self.assertAllEqual([x * x for x in range(10)], sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testEmptyOptimizations(self): + dataset = dataset_ops.Dataset.range(10).map(lambda x: x * x).batch( + 10).apply(optimization.optimize([])) + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + + with self.test_session() as sess: + graph = graph_pb2.GraphDef().FromString( + sess.run(dataset._as_serialized_graph())) + self.assertTrue( + all([node.op != "MapAndBatchDatasetV2" for node in graph.node])) + self.assertAllEqual([x * x for x in range(10)], sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testOptimization(self): + dataset = dataset_ops.Dataset.range(10).map(lambda x: x * x).batch( + 10).apply(optimization.optimize(["map_and_batch_fusion"])) + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + + with self.test_session() as sess: + graph = graph_pb2.GraphDef().FromString( + sess.run(dataset._as_serialized_graph())) + self.assertTrue( + any([node.op == "MapAndBatchDatasetV2" for node in graph.node])) + self.assertAllEqual([x * x for x in range(10)], sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + +class OptimizeDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def testCore(self): + + def build_dataset(num_elements, batch_size): + return dataset_ops.Dataset.range(num_elements).map(lambda x: x * x).batch( + batch_size).apply(optimization.optimize(["map_and_batch_fusion"])) + + self.run_core_tests(lambda: build_dataset(200, 10), None, 20) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index eceecfd174..086661adb7 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -208,6 +208,20 @@ py_library( ], ) +py_library( + name = "optimization", + srcs = ["optimization.py"], + srcs_version = "PY2AND3", + deps = [ + ":contrib_op_loader", + ":gen_dataset_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + py_library( name = "resampling", srcs = ["resampling.py"], @@ -368,6 +382,7 @@ py_library( ":get_single_element", ":grouping", ":interleave_ops", + ":optimization", ":prefetching_ops", ":readers", ":resampling", diff --git a/tensorflow/contrib/data/python/ops/optimization.py b/tensorflow/contrib/data/python/ops/optimization.py new file mode 100644 index 0000000000..cad41bce29 --- /dev/null +++ b/tensorflow/contrib/data/python/ops/optimization.py @@ -0,0 +1,80 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Experimental API for optimizing `tf.data` pipelines.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops + + +def optimize(optimizations=None): + """A transformation that applies optimizations. + + Args: + optimizations: (Optional.) A `tf.string` vector `tf.Tensor` identifying + optimizations to use. If not specified, the default set of optimizations + is applied. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + """ + + def _apply_fn(dataset): + """Function from `Dataset` to `Dataset` that applies the transformation.""" + return OptimizeDataset(dataset, optimizations) + + return _apply_fn + + +class OptimizeDataset(dataset_ops.Dataset): + """A `Dataset` that acts as an identity, and applies optimizations.""" + + def __init__(self, input_dataset, optimizations): + """See `optimize()` for details.""" + super(OptimizeDataset, self).__init__() + self._input_dataset = input_dataset + if optimizations is None: + optimizations = [] + self._optimizations = ops.convert_to_tensor( + optimizations, dtype=dtypes.string, name="optimizations") + + def _as_variant_tensor(self): + return gen_dataset_ops.optimize_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._optimizations, + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes + + @property + def output_shapes(self): + return self._input_dataset.output_shapes + + @property + def output_types(self): + return self._input_dataset.output_types diff --git a/tensorflow/core/api_def/base_api/api_def_DatasetToGraph.pbtxt b/tensorflow/core/api_def/base_api/api_def_DatasetToGraph.pbtxt new file mode 100644 index 0000000000..55dd6179dd --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_DatasetToGraph.pbtxt @@ -0,0 +1,20 @@ +op { + graph_op_name: "DatasetToGraph" + visibility: HIDDEN + in_arg { + name: "input_dataset" + description: <
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.9.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.9.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.8.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.8.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.7.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
- +
QuantizedFloat
0-10.0
25530.0
12810.0
25530.0
Table 2: Example quantized value range -- GitLab From 506eaaaee694a19d271eba87a8e3f9023931a384 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Mon, 4 Jun 2018 13:11:34 +0800 Subject: [PATCH 455/902] Fix some minor incorrect anchor links (#18348) * Fix the incorrect link of PrepareLinux or PrepareMacOS * Fix incorrect link of common_installation_problems also * Fix not work anchor PrepareLinux issue --- tensorflow/docs_src/install/install_sources.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 5ba522b436..cc29074757 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -81,7 +81,7 @@ or [macOS](#PrepareMac) - + ## Prepare environment for Linux Before building TensorFlow on Linux, install the following build @@ -373,9 +373,9 @@ The build and installation problems you encounter typically depend on the operating system. See the "Common installation problems" section of one of the following guides: - * @{$install_linux#CommonInstallationProblems$Installing TensorFlow on Linux} - * @{$install_mac#CommonInstallationProblems$Installing TensorFlow on Mac OS} - * @{$install_windows#CommonInstallationProblems$Installing TensorFlow on Windows} + * @{$install_linux#common_installation_problems$Installing TensorFlow on Linux} + * @{$install_mac#common_installation_problems$Installing TensorFlow on Mac OS} + * @{$install_windows#common_installation_problems$Installing TensorFlow on Windows} Beyond the errors documented in those two guides, the following table notes additional errors specific to building TensorFlow. Note that we -- GitLab From b933be02b97cdb42a86548f73697654d4c5d0f56 Mon Sep 17 00:00:00 2001 From: Sergei Lebedev Date: Mon, 4 Jun 2018 07:12:36 +0200 Subject: [PATCH 456/902] Fallback to dynamic loader even if HADOOP_HDFS_HOME is not defined (#19336) * Fallback to dynamic loader even if HADOOP_HDFS_HOME is not defined Prior to this commit HadoopFileSystem required HADOOP_HDFS_HOME to be defined to initialize the filesystem, even if libhdfs.so is located outside of the standard location. This limitation is unnecessary and can be safely removed. As a nice side-effect, the error message is now more informative. Before: Environment variable HADOOP_HDFS_HOME not set After: libhdfs.so: cannot open shared object file: No such file or directory Change-Id: Ief6a8679d7ef353003aa387f7767ebaa8ef290ce * Addressed review comments Change-Id: I703d57e022744e26d1b47732beeaa48c073bd5fc --- .../platform/hadoop/hadoop_file_system.cc | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index 72c12318ca..ff4b4436bb 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -115,18 +115,17 @@ class LibHDFS { const char* kLibHdfsDso = "libhdfs.so"; #endif char* hdfs_home = getenv("HADOOP_HDFS_HOME"); - if (hdfs_home == nullptr) { - status_ = errors::FailedPrecondition( - "Environment variable HADOOP_HDFS_HOME not set"); - return; - } - string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); - status_ = TryLoadAndBind(path.c_str(), &handle_); - if (!status_.ok()) { - // try load libhdfs.so using dynamic loader's search path in case - // libhdfs.so is installed in non-standard location - status_ = TryLoadAndBind(kLibHdfsDso, &handle_); + if (hdfs_home != nullptr) { + string path = io::JoinPath(hdfs_home, "lib", "native", kLibHdfsDso); + status_ = TryLoadAndBind(path.c_str(), &handle_); + if (status_.ok()) { + return; + } } + + // Try to load the library dynamically in case it has been installed + // to a in non-standard location. + status_ = TryLoadAndBind(kLibHdfsDso, &handle_); } Status status_; -- GitLab From a8ae26ae1aa7a33b48cca8bf12c42ab7503a45cf Mon Sep 17 00:00:00 2001 From: Evgeniy Zheltonozhskiy Date: Mon, 4 Jun 2018 08:12:47 +0300 Subject: [PATCH 457/902] Fix fake quantization link (#19278) --- tensorflow/contrib/quantize/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md index c83623ec94..27a933c0f9 100644 --- a/tensorflow/contrib/quantize/README.md +++ b/tensorflow/contrib/quantize/README.md @@ -6,7 +6,7 @@ inference. The details of the transformation implemented in this package is described here [1]. This is done using the -[fake quantization op](https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization). +[fake quantization op](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization). Literature has shown that fixed point networks provide comparable performance to floating point networks [2]. This is achieved by modeling the quantization -- GitLab From c36bda171673884c0f3829fac3a342733d6040f8 Mon Sep 17 00:00:00 2001 From: jsawruk Date: Mon, 4 Jun 2018 01:40:23 -0400 Subject: [PATCH 458/902] Update mobile prepare models documentation: correct location of freeze_graph (#18968) --- tensorflow/docs_src/mobile/prepare_models.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 8b22c04d87..2b84dbb973 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -105,8 +105,8 @@ inline constants so everything’s in one file. To handle the conversion, you need the `freeze_graph.py` script, that’s held in [`tensorflow/python/tools/freeze_graph.py`](https://www.tensorflow.org/code/tensorflow/python/tools/freeze_graph.py). You’ll run it like this: - bazel build tensorflow/tools:freeze_graph - bazel-bin/tensorflow/tools/freeze_graph \ + bazel build tensorflow/python/tools:freeze_graph + bazel-bin/tensorflow/python/tools/freeze_graph \ --input_graph=/tmp/model/my_graph.pb \ --input_checkpoint=/tmp/model/model.ckpt-1000 \ --output_graph=/tmp/frozen_graph.pb \ -- GitLab From a0fd55070bb83e369d1d73e777fc1ea9f1c3a6ae Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 3 Jun 2018 22:41:13 -0700 Subject: [PATCH 459/902] Replace direct download link with bazel mirror (mirror.bazel.build) (#19713) * Replace direct download link with bazel mirror (mirror.bazel.build) Since the download package for gemmlowp has been propagated to the bazel mirror (mirror.bazel.build), this fix replaced the direct link with the mirrored one, and removed the related TODO. Signed-off-by: Yong Tang * Remove TODO in tensorflow/contrib/lite/download_dependencies.sh Signed-off-by: Yong Tang --- tensorflow/contrib/lite/download_dependencies.sh | 4 +--- tensorflow/contrib/makefile/download_dependencies.sh | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/download_dependencies.sh b/tensorflow/contrib/lite/download_dependencies.sh index 436c3e1d4c..840015a7fa 100755 --- a/tensorflow/contrib/lite/download_dependencies.sh +++ b/tensorflow/contrib/lite/download_dependencies.sh @@ -30,9 +30,7 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once -# the archive has been propagated in mirror.bazel.build. -GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" NEON_2_SSE_URL="https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip" diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index eff9081e35..48953e2e38 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -27,9 +27,7 @@ if [ ! -f $BZL_FILE_PATH ]; then fi EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" -# TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' once -# the archive has been propagated in mirror.bazel.build. -GEMMLOWP_URL="$(grep -o 'https://github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" +GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" -- GitLab From 5d44932cda0e88537eb2526c7a420ee4ba320619 Mon Sep 17 00:00:00 2001 From: "William D. Irons" Date: Mon, 4 Jun 2018 00:42:12 -0500 Subject: [PATCH 460/902] fix iris example to work with python3 (#19335) iris.py did not work with python3 as urllib.urlopen is not in python3. Switched to urlretrive from six. Same was done in: tensorflow/examples/image_retraining/retrain.py --- tensorflow/examples/learn/iris.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/examples/learn/iris.py b/tensorflow/examples/learn/iris.py index 03e60972aa..86f5204ec3 100644 --- a/tensorflow/examples/learn/iris.py +++ b/tensorflow/examples/learn/iris.py @@ -21,7 +21,8 @@ from __future__ import division from __future__ import print_function import os -import urllib + +from six.moves.urllib.request import urlretrieve import tensorflow as tf @@ -38,9 +39,7 @@ FEATURE_KEYS = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] def maybe_download_iris_data(file_name, download_url): """Downloads the file and returns the number of data.""" if not os.path.exists(file_name): - raw = urllib.urlopen(download_url).read() - with open(file_name, 'w') as f: - f.write(raw) + urlretrieve(download_url, file_name) # The first line is a comma-separated string. The first one is the number of # total data in the file. -- GitLab From 869dc9165e9d58c6a6f49c2ff54a837346fa9b1d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 01:07:18 -0700 Subject: [PATCH 461/902] Add debug output to CHECK for compatible shapes of multi-output fusions. PiperOrigin-RevId: 199091580 --- tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc index 0728ccfff7..dc2934a34c 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc @@ -83,7 +83,9 @@ LoopEmitter::LoopEmitter(const ElementGenerator& target_element_generator, // Sanity check: In multi-output fusion, all shapes produced must have the // same dimensions. for (const IrArray& array : target_arrays) { - CHECK(ShapeUtil::SameDimensions(shape_, array.GetShape())); + CHECK(ShapeUtil::SameDimensions(shape_, array.GetShape())) + << ": '" << shape_.ShortDebugString() << "' does not match '" + << array.GetShape().ShortDebugString() << "'"; } } -- GitLab From 5b498d5d759aa0545990e20778884b465eeb1ad3 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 4 Jun 2018 03:57:01 -0700 Subject: [PATCH 462/902] [XLA] Remove unnecessary std::vector copies We can just pass along the original ArraySlice. PiperOrigin-RevId: 199109815 --- .../compiler/xla/service/llvm_ir/llvm_util.cc | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index bd45f83fb1..ff64da87e9 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -87,18 +87,10 @@ llvm::Value* EmitCallToIntrinsic( tensorflow::gtl::ArraySlice operands, tensorflow::gtl::ArraySlice overloaded_types, llvm::IRBuilder<>* ir_builder) { - std::vector types; - for (auto type : overloaded_types) { - types.push_back(type); - } llvm::Module* module = ModuleFromIRBuilder(ir_builder); - llvm::Function* intrinsic = - llvm::Intrinsic::getDeclaration(module, intrinsic_id, types); - std::vector operands_vec; - for (auto operand : operands) { - operands_vec.push_back(operand); - } - return ir_builder->CreateCall(intrinsic, operands_vec); + llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration( + module, intrinsic_id, AsArrayRef(overloaded_types)); + return ir_builder->CreateCall(intrinsic, AsArrayRef(operands)); } llvm::Value* EmitFloatMax(llvm::Value* lhs_value, llvm::Value* rhs_value, -- GitLab From 92415c09b8d00f200429e994b08e302f4ca85e67 Mon Sep 17 00:00:00 2001 From: Vikram Tankasali Date: Mon, 4 Jun 2018 05:40:33 -0700 Subject: [PATCH 463/902] Update README.md for tf.contrib.kfac and add deprecation warning. PiperOrigin-RevId: 199119904 --- tensorflow/contrib/kfac/README.md | 5 +++++ tensorflow/contrib/kfac/python/ops/optimizer.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/tensorflow/contrib/kfac/README.md b/tensorflow/contrib/kfac/README.md index 762a2f0b57..102626925d 100644 --- a/tensorflow/contrib/kfac/README.md +++ b/tensorflow/contrib/kfac/README.md @@ -1,5 +1,10 @@ # K-FAC: Kronecker-Factored Approximate Curvature +# WARNING: +# ==third_party/tensorflow/contrib/kfac is deprecated. This will be== +# ==removed on 15-07-2018. Please import third_party/tensorflow_kfac.== +# ==== + **K-FAC in TensorFlow** is an implementation of [K-FAC][kfac-paper], an approximate second-order optimization method, in TensorFlow. When applied to feedforward and convolutional neural networks, K-FAC can converge `>3.5x` diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index b7f63d8d94..03b9da7933 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import warnings + # pylint disable=long-line from tensorflow.contrib.kfac.python.ops import curvature_matrix_vector_products as cmvp from tensorflow.contrib.kfac.python.ops import estimator as est @@ -107,6 +109,10 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): ValueError: If momentum is non-zero and momentum_type is not 'regular' or 'adam'. """ + warnings.warn( + "third_party.tensorflow.contrib.kfac is deprecated." + "This will be removed on 15-07-2018. Check README for further details.", + DeprecationWarning) # Parameters to be passed to the Fisher estimator: self._variables = var_list or tf_variables.trainable_variables self._cov_ema_decay = cov_ema_decay -- GitLab From 256ef4232d6551c2d1099eb2b932737e83f33f77 Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Mon, 4 Jun 2018 06:47:07 -0700 Subject: [PATCH 464/902] Add stored eager variables to graph collections. PiperOrigin-RevId: 199125920 --- tensorflow/python/framework/ops.py | 17 +++--------- .../kernel_tests/variable_scope_test.py | 26 +++++++++++++++++++ .../python/ops/resource_variable_ops.py | 3 +++ tensorflow/python/ops/variable_scope.py | 10 ++++++- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 6f3bb5563b..eceea5276a 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -3882,7 +3882,6 @@ class Graph(object): contains many standard names for collections. value: The value to add to the collection. """ # pylint: disable=g-doc-exception - _assert_collection_is_ok(name) self._check_not_finalized() with self._lock: if name not in self._collections: @@ -3929,7 +3928,6 @@ class Graph(object): The list of values in the collection with the given `name`, or an empty list if no value has been added to that collection. """ # pylint: disable=g-doc-exception - _assert_collection_is_ok(name) with self._lock: coll_list = self._collections.get(name, None) if coll_list is None: @@ -3959,7 +3957,6 @@ class Graph(object): list contains the values in the order under which they were collected. """ # pylint: disable=g-doc-exception - _assert_collection_is_ok(name) with self._lock: collection = self._collections.get(name, None) if collection is None: @@ -5822,7 +5819,8 @@ def add_to_collection(name, value): value: The value to add to the collection. @compatibility(eager) - Collections are not supported when eager execution is enabled. + Collections are only supported in eager when variables are created inside an + EagerVariableStore (e.g. as part of a layer or template). @end_compatibility """ get_default_graph().add_to_collection(name, value) @@ -5840,7 +5838,8 @@ def add_to_collections(names, value): value: The value to add to the collections. @compatibility(eager) - Collections are not supported when eager execution is enabled. + Collections are only supported in eager when variables are created inside an + EagerVariableStore (e.g. as part of a layer or template). @end_compatibility """ get_default_graph().add_to_collections(names, value) @@ -6133,14 +6132,6 @@ def get_from_proto_function(collection_name): return None -def _assert_collection_is_ok(collection_name): - if context.executing_eagerly(): - if collection_name in GraphKeys._VARIABLE_COLLECTIONS: # pylint: disable=protected-access - raise ValueError( - "variable collections are not supported when eager execution is enabled." - ) - - def _operation_conversion_error(op, dtype=None, name=None, as_ref=False): """Produce a nice error if someone converts an Operation to a Tensor.""" raise TypeError(("Can't convert Operation '%s' to Tensor " diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 9dc4ec0f96..2ee53df931 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -197,6 +197,32 @@ class VariableScopeTest(test.TestCase): self.assertAllEqual([v1, v2], [v3, v4]) f() + @test_util.run_in_graph_and_eager_modes() + def testEagerVariablesStoreAddsToCollections(self): + store = variable_scope.EagerVariableStore() + with store.as_default(): + trainable = variable_scope.get_variable("v1", [], trainable=True) + not_trainable = variable_scope.get_variable("v2", [], trainable=False) + concat = variable_scope.get_variable( + "v3", [], collections=[ops.GraphKeys.CONCATENATED_VARIABLES]) + self.assertEqual( + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES), + [trainable, not_trainable]) + self.assertEqual( + ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES), + [trainable, concat]) + self.assertEqual( + ops.get_collection(ops.GraphKeys.CONCATENATED_VARIABLES), [concat]) + + @test_util.run_in_graph_and_eager_modes() + def testEagerVariablesOutsideStoreNotAddedToCollections(self): + if not context.executing_eagerly(): + return + variable_scope.get_variable("v1", [], trainable=True) + variable_scope.get_variable("v2", [], trainable=False) + self.assertFalse(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) + self.assertFalse(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) + @test_util.run_in_graph_and_eager_modes() def testInitFromNonTensorValue(self): v = variable_scope.get_variable("v4", initializer=4, dtype=dtypes.int32) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 7061b32808..c137bfacb2 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -507,6 +507,9 @@ class ResourceVariable(variables.Variable): else: self._cached_value = None if not context.executing_eagerly(): + # Eager variables are only added to collections if they are part of an + # eager variable store (otherwise in an interactive session they would + # hog memory and cause OOM). This is done in ops/variable_scope.py. ops.add_to_collections(collections, self) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index fa34774622..23234e2e61 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. + # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -794,6 +794,14 @@ class _VariableStore(object): validate_shape=validate_shape, constraint=constraint, use_resource=use_resource) + if context.executing_eagerly() and self._store_eager_variables: + if collections: + ops.add_to_collections(collections, v) + else: + ops.add_to_collection(ops.GraphKeys.GLOBAL_VARIABLES, v) + if trainable: + ops.add_to_collection(ops.GraphKeys.TRAINABLE_VARIABLES, v) + if not context.executing_eagerly() or self._store_eager_variables: # In eager mode we do not want to keep default references to Variable # objects as this will prevent their memory from being released. -- GitLab From edd936e4ea1bd9f1f9ee05af92efc3bae5f1515a Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Mon, 4 Jun 2018 07:43:19 -0700 Subject: [PATCH 465/902] Temporary patch: properly handle expressions in subscripts. The long term fix is either of: (a) dropping support for tracking specific slices of a symbol (b) track slices along with the symbols on which they depend. Background: So far we tracked symbols like `a[b]` and allow conversions of the kind `if : a[b] = c` -> `a[b] = ag__.if_stmt(, lambda: c, lambda: a[b])`. That construct allowed a to be anything, including e.g. Python lists, objects. etc. This is incomplete and will in the future become obsolete as we override the slice operator. In effect the statement above will be converted to `a = ag__.if_stmt(, lambda: ag__.set_item(a, b, c), lambda: a)`. However, this latter form does not support objects, so there is a tradeoff. PiperOrigin-RevId: 199131573 --- tensorflow/contrib/autograph/pyct/qual_names.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/autograph/pyct/qual_names.py b/tensorflow/contrib/autograph/pyct/qual_names.py index 583cf7ecd7..da07013cf4 100644 --- a/tensorflow/contrib/autograph/pyct/qual_names.py +++ b/tensorflow/contrib/autograph/pyct/qual_names.py @@ -205,6 +205,7 @@ class QnResolver(gast.NodeTransformer): return node def visit_Subscript(self, node): + # TODO(mdan): This may no longer apply if we overload getitem. node = self.generic_visit(node) s = node.slice if not isinstance(s, gast.Index): @@ -216,7 +217,11 @@ class QnResolver(gast.NodeTransformer): elif isinstance(s.value, gast.Str): subscript = QN(StringLiteral(s.value.s)) else: - subscript = anno.getanno(node.slice.value, anno.Basic.QN) + # The index may be an expression, case in which a name doesn't make sense. + if anno.hasanno(node.slice.value, anno.Basic.QN): + subscript = anno.getanno(node.slice.value, anno.Basic.QN) + else: + return node if anno.hasanno(node.value, anno.Basic.QN): anno.setanno(node, anno.Basic.QN, QN(anno.getanno(node.value, anno.Basic.QN), -- GitLab From 01c4773f435c556712c5465792f2936b5c762a1e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 07:52:01 -0700 Subject: [PATCH 466/902] [XLA:GPU] Add error message to CHECK for preconditions to lower fusions with multiple reduce outputs. PiperOrigin-RevId: 199132442 --- tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 0f5c003341..b40b557cab 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -2443,8 +2443,11 @@ StatusOr> IrEmitterUnnested::BuildInitializerThunk( case HloOpcode::kReduce: return inst->operand(1); case HloOpcode::kTuple: - CHECK(hlo->IsMultiOutputFusion() && - inst->operand(index.back())->opcode() == HloOpcode::kReduce); + CHECK(hlo->IsMultiOutputFusion()) + << ": " << hlo->ToString() << " is not a multi-output fusion."; + CHECK(inst->operand(index.back())->opcode() == HloOpcode::kReduce) + << ": Found '" << inst->operand(index.back())->opcode() << "' in " + << inst->ToString() << " but expected 'reduce'."; // For multi-output fusion look through the tuple. return inst->operand(index.back())->operand(1); default: -- GitLab From 1b4336cd5ab851404d18976169d396247ec40f10 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 08:12:37 -0700 Subject: [PATCH 467/902] Add LRN as unchanged rf layer operations for the receptive field calculator. PiperOrigin-RevId: 199134753 --- .../receptive_field/python/util/parse_layer_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters.py b/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters.py index bc383a8034..0e3c46f17d 100644 --- a/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters.py +++ b/tensorflow/contrib/receptive_field/python/util/parse_layer_parameters.py @@ -27,7 +27,7 @@ from tensorflow.python.platform import tf_logging as logging _UNCHANGED_RF_LAYER_OPS = [ "Add", "BiasAdd", "Cast", "Ceil", "ConcatV2", "Const", "Floor", "FusedBatchNorm", "Identity", "Log", "Mul", "Pow", "RealDiv", "Relu", - "Relu6", "Round", "Rsqrt", "Softplus", "Sub", "VariableV2" + "Relu6", "Round", "Rsqrt", "Softplus", "Sub", "VariableV2", "LRN" ] # Different ways in which padding modes may be spelled. -- GitLab From 1a9f69583876c50c98fc3ccd9ded1f81731a9492 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 4 Jun 2018 09:00:06 -0700 Subject: [PATCH 468/902] Disable flaky test tensorflow/contrib/distribute/python:minimize_loss_test_gpu from continuous builds. PiperOrigin-RevId: 199140117 --- tensorflow/contrib/distribute/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index 3118deaa47..a91c54153f 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -311,6 +311,7 @@ cuda_py_test( tags = [ "multi_and_single_gpu", "no_pip", + "noguitar", # TODO(b/109653107): test is flaky. ], ) -- GitLab From 33c84aa99fab76ddce7e0a8a5420e8cd63cd2a76 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 4 Jun 2018 16:04:12 +0000 Subject: [PATCH 469/902] Expose `tf.broadcast_to` op This fix is a follow up of 15243 to expose `tf.broadcast_to`. Previously the op was exposed as `tf.contrib.framework.broadcast_to. This fix unhide the BroadcastTo so that it is exposed in `tf.broadcast_to`. Signed-off-by: Yong Tang --- tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt diff --git a/tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt b/tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt deleted file mode 100644 index 083eeced81..0000000000 --- a/tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt +++ /dev/null @@ -1,4 +0,0 @@ -op { - graph_op_name: "BroadcastTo" - visibility: HIDDEN -} -- GitLab From af3c646a03033db3074b5d6f6f40d2ead430a53d Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 4 Jun 2018 16:06:19 +0000 Subject: [PATCH 470/902] Remove exposure of tf.contrib.framework.broadcast_to Signed-off-by: Yong Tang --- tensorflow/contrib/framework/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 10d1ecc738..dc49383c5c 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -119,14 +119,13 @@ from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec -from tensorflow.python.ops.array_ops import broadcast_to from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal from tensorflow.python.ops.init_ops import convolutional_orthogonal_1d from tensorflow.python.ops.init_ops import convolutional_orthogonal_2d from tensorflow.python.ops.init_ops import convolutional_orthogonal_3d from tensorflow.python.util.all_util import remove_undocumented -_allowed_symbols = ['nest', 'broadcast_to'] +_allowed_symbols = ['nest'] _nest_allowed_symbols = [ 'assert_same_structure', 'is_sequence', -- GitLab From a1e24ebca75ff21188c131f28952401d9708dd5e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 09:00:08 -0700 Subject: [PATCH 471/902] Internal change PiperOrigin-RevId: 199140124 --- tensorflow/core/kernels/resize_area_op_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/resize_area_op_test.cc b/tensorflow/core/kernels/resize_area_op_test.cc index a7e06ef15a..84ff090b54 100644 --- a/tensorflow/core/kernels/resize_area_op_test.cc +++ b/tensorflow/core/kernels/resize_area_op_test.cc @@ -124,7 +124,8 @@ class ResizeAreaOpTest : public OpsTestBase { ? (j + 1 > in_x1 ? width_scale : j + 1 - in_x) : (j + 1 > in_x1 ? in_x1 - j : 1.0); for (int64 c = 0; c < channels; ++c) { -#define BOUND(val, limit) std::min(((limit)-1ll), (std::max(0ll, (val)))) +#define BOUND(val, limit) \ + std::min(((limit)-int64{1}), (std::max(int64{0}, (val)))) sum_data(c) += static_cast(input_data(b, BOUND(i, in_height), BOUND(j, in_width), c)) * -- GitLab From 736e8fa3b83ca801af64c1bbc8afabdf8a00436b Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 4 Jun 2018 09:09:32 -0700 Subject: [PATCH 472/902] Enable cross-device dependency grouping optimization in non-AGGRESSIVE modes. PiperOrigin-RevId: 199141605 --- .../optimizers/dependency_optimizer.cc | 24 +++++++++++-------- .../optimizers/dependency_optimizer_test.cc | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index fb2aea3b3d..78a6d0d835 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -581,7 +581,8 @@ void DependencyOptimizer::GroupCrossDeviceControlEdges() { for (int j = 0; j < node->input_size(); ++j) { if (IsControlInput(node->input(j))) { const NodeDef* input = node_map_->GetNode(node->input(j)); - if (!input->device().empty() && input->device() != node->device()) { + if (input != nullptr && !input->device().empty() && + input->device() != node->device()) { auto emplace_result = noops.emplace(input->device(), nullptr); if (!emplace_result.second && emplace_result.first->second == nullptr) { @@ -615,14 +616,19 @@ void DependencyOptimizer::GroupCrossDeviceControlEdges() { const string& input_name = node->input(pos); if (IsControlInput(input_name)) { NodeDef* input = node_map_->GetNode(input_name); - auto it = noops.find(input->device()); - if (it == noops.end() || it->second == nullptr) { + if (input == nullptr) { ++pos; } else { - node->mutable_input()->SwapElements(pos, node->input_size() - 1); - node->mutable_input()->RemoveLast(); - it->second->add_input(AsControlDependency(*input)); - node_map_->UpdateOutput(input_name, node->name(), it->second->name()); + auto it = noops.find(input->device()); + if (it == noops.end() || it->second == nullptr) { + ++pos; + } else { + node->mutable_input()->SwapElements(pos, node->input_size() - 1); + node->mutable_input()->RemoveLast(); + it->second->add_input(AsControlDependency(*input)); + node_map_->UpdateOutput(input_name, node->name(), + it->second->name()); + } } } else { ++pos; @@ -669,9 +675,7 @@ Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // Dedup control inputs. CleanControlInputs(); - if (opt_level_ == RewriterConfig::AGGRESSIVE) { - GroupCrossDeviceControlEdges(); - } + GroupCrossDeviceControlEdges(); } return Status::OK(); diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc index 931d073cd3..0ae3b4ec34 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc @@ -774,7 +774,7 @@ TEST_F(DependencyOptimizerTest, GroupCrossDeviceControlDeps) { TF_CHECK_OK(s.ToGraphDef(&expected)); } - DependencyOptimizer optimizer(RewriterConfig::AGGRESSIVE); + DependencyOptimizer optimizer; GraphDef output; TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output)); CompareGraphs(expected, output); -- GitLab From 077612963303c428a1effb9a8791537c131308c3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 09:14:49 -0700 Subject: [PATCH 473/902] Update the distributed SDCA test. PiperOrigin-RevId: 199142338 --- .../python/kernel_tests/sdca_ops_test.py | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index d0c32b43cc..ef0e08a777 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -377,7 +377,10 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op.run() def testDistributedSimple(self): - # Setup test data + # Distributed SDCA may not converge if the workers update concurrently the + # same example. In this test the examples are partitioned across workers. + # The examples are the same for all workers, just the example_ids are + # different. example_protos = [ make_example_proto({ 'age': [0], @@ -389,13 +392,19 @@ class SdcaWithLogisticLossTest(SdcaModelTest): }, 1), ] example_weights = [1.0, 1.0] + examples = make_example_dict(example_protos, example_weights) + example_ids = array_ops.placeholder( + dtypes.string, shape=(len(example_weights),)) + examples['example_ids'] = example_ids + variables = make_variable_dict(1, 1) for num_shards in _SHARD_NUMBERS: for num_loss_partitions in _NUM_LOSS_PARTITIONS: with self._single_threaded_test_session(): - examples = make_example_dict(example_protos, example_weights) - variables = make_variable_dict(1, 1) options = dict( - symmetric_l2_regularization=1, + # Keep the same solution as for TestSimple: since the number of + # examples is multplied by num_loss_partitions, multiply also + # L2 by the same value. + symmetric_l2_regularization=num_loss_partitions, symmetric_l1_regularization=0, loss_type='logistic_loss', num_table_shards=num_shards, @@ -411,32 +420,30 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op = lr.minimize() - def minimize(): + def minimize(worker_id): with self._single_threaded_test_session(): + feed_dict = {example_ids: [ + str(i + worker_id*len(example_weights)) for i in range( + len(example_weights))]} for _ in range(_MAX_ITERATIONS): - train_op.run() # pylint: disable=cell-var-from-loop + train_op.run(feed_dict=feed_dict) # pylint: disable=cell-var-from-loop threads = [] - for _ in range(num_loss_partitions): - threads.append(threading.Thread(target=minimize)) + for worker_id in range(num_loss_partitions): + threads.append(threading.Thread(target=minimize, args=(worker_id,))) threads[-1].start() for t in threads: t.join() - lr.update_weights(train_op).run() - - # The high tolerance in unregularized_loss comparisons is due to the - # fact that it's possible to trade off unregularized_loss vs. - # regularization and still have a sum that is quite close to the - # optimal regularized_loss value. SDCA's duality gap only ensures - # that the regularized_loss is within 0.01 of optimal. - # 0.525457 is the optimal regularized_loss. - # 0.411608 is the unregularized_loss at that optimum. - self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05) - self.assertAllClose(0.525457, loss.eval(), atol=0.01) + lr.update_weights(train_op).run(feed_dict={ + example_ids: [str(i) for i in range(len(example_weights))]}) + + # Test only the unregularized loss because the optimal value of the + # regularized loss depends on num_loss_partitions. + self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.02) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 1], predicted_labels.eval()) - self.assertTrue(lr.approximate_duality_gap().eval() < 0.02) + self.assertNear(0.0, lr.approximate_duality_gap().eval(), 0.02) def testSimpleNoL2(self): # Same as test above (so comments from above apply) but without an L2. -- GitLab From 52f3f70b8bd6953e3f2437289ac078d5a1f439d0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 09:39:17 -0700 Subject: [PATCH 474/902] Build TF on Windows with --config=opt --config=opt will enable /arch:AVX cc option on Windows -c opt is already specified in tools/bazel.rc, no it's OK to remove it here PiperOrigin-RevId: 199145562 --- tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 73520bb2ac..1b1c3815d8 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -77,7 +77,7 @@ echo "import %workspace%/${TMP_BAZELRC}" >> .bazelrc run_configure_for_cpu_build -bazel build --announce_rc -c opt tensorflow/tools/pip_package:build_pip_package || exit $? +bazel build --announce_rc --config=opt tensorflow/tools/pip_package:build_pip_package || exit $? if [[ "$skip_test" == 1 ]]; then exit 0 @@ -98,7 +98,7 @@ N_JOBS="${NUMBER_OF_PROCESSORS}" # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore, # which will result testing system installed tensorflow -bazel test -c opt -k --test_output=errors \ +bazel test --announce_rc --config=opt -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows,-no_oss \ --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ -- GitLab From dc14f35972c8757ab65cdb54f0797e548fe3a579 Mon Sep 17 00:00:00 2001 From: mrTsjolder Date: Mon, 4 Jun 2018 18:42:33 +0200 Subject: [PATCH 475/902] Fix variance initialisers (#18854) * Fix std in variance_scaling initialiser * style improvement variance fix * clean up (own) tests * revert irrelevant changes to tests * fix keras initializers_test --- tensorflow/python/keras/initializers_test.py | 26 +++++++++--------- .../python/kernel_tests/init_ops_test.py | 27 +++++++++++++++++++ tensorflow/python/ops/init_ops.py | 3 ++- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/keras/initializers_test.py b/tensorflow/python/keras/initializers_test.py index a54d6da839..c519e194bd 100644 --- a/tensorflow/python/keras/initializers_test.py +++ b/tensorflow/python/keras/initializers_test.py @@ -71,7 +71,7 @@ class KerasInitializersTest(test.TestCase): stddev=1, seed=126), tensor_shape, - target_mean=0., target_std=None, target_max=2) + target_mean=0., target_max=2, target_min=-2) def test_constant(self): tensor_shape = (5, 6, 4) @@ -83,49 +83,49 @@ class KerasInitializersTest(test.TestCase): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(3. / fan_in) + std = np.sqrt(1. / fan_in) self._runner(keras.initializers.lecun_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_glorot_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(6. / (fan_in + fan_out)) + std = np.sqrt(2. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_he_uniform(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(6. / fan_in) + std = np.sqrt(2. / fan_in) self._runner(keras.initializers.he_uniform(seed=123), tensor_shape, - target_mean=0., target_max=scale, target_min=-scale) + target_mean=0., target_std=std) def test_lecun_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(1. / fan_in) + std = np.sqrt(1. / fan_in) self._runner(keras.initializers.lecun_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_glorot_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, fan_out = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(2. / (fan_in + fan_out)) + std = np.sqrt(2. / (fan_in + fan_out)) self._runner(keras.initializers.glorot_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_he_normal(self): tensor_shape = (5, 6, 4, 2) with self.test_session(): fan_in, _ = init_ops._compute_fans(tensor_shape) - scale = np.sqrt(2. / fan_in) + std = np.sqrt(2. / fan_in) self._runner(keras.initializers.he_normal(seed=123), tensor_shape, - target_mean=0., target_std=None, target_max=2 * scale) + target_mean=0., target_std=std) def test_orthogonal(self): tensor_shape = (20, 20) diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index a9b55854f1..795aa67248 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -362,6 +362,33 @@ class UniformUnitScalingInitializationTest(test.TestCase): dtype=dtypes.string) +class VarianceScalingInitializationTest(test.TestCase): + + def testNormalDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer(distribution='normal') + + with self.test_session(use_gpu=True): + x = init(shape).eval() + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + + def testUniformDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer(distribution='uniform') + + with self.test_session(use_gpu=True): + x = init(shape).eval() + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + + # TODO(vrv): move to sequence_ops_test? class RangeTest(test.TestCase): diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 1f8d8dc4f3..055d42815c 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -463,7 +463,8 @@ class VarianceScaling(Initializer): else: scale /= max(1., (fan_in + fan_out) / 2.) if self.distribution == "normal": - stddev = math.sqrt(scale) + # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) + stddev = math.sqrt(scale) / .87962566103423978 return random_ops.truncated_normal( shape, 0.0, stddev, dtype, seed=self.seed) else: -- GitLab From 301e800623b3a463267c09e8be43972af609d710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Branchaud-Charron?= Date: Mon, 4 Jun 2018 12:42:48 -0400 Subject: [PATCH 476/902] Add globs from Lambda before calling it (#18926) --- tensorflow/python/estimator/keras_test.py | 14 ++++++------ tensorflow/python/keras/layers/core.py | 26 ++++++++++++++++++++++- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py index 6688a84130..5e094ae92b 100644 --- a/tensorflow/python/estimator/keras_test.py +++ b/tensorflow/python/estimator/keras_test.py @@ -31,10 +31,10 @@ from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.keras import backend as K from tensorflow.python.keras import testing_utils from tensorflow.python.keras.applications import mobilenet from tensorflow.python.keras.optimizers import SGD +from tensorflow.python.ops.parsing_ops import gen_parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -146,13 +146,13 @@ def randomize_io_type(array, name): def multi_inputs_multi_outputs_model(): a = keras.layers.Input(shape=(16,), name='input_a') b = keras.layers.Input(shape=(16,), name='input_b') - m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m') + m = keras.layers.Input(shape=(8,), dtype='string', name='input_m') dense = keras.layers.Dense(8, name='dense_1') a_2 = dense(a) - # Apply a mask - s_2 = keras.layers.Lambda(lambda k: - K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2]) + # Read m + m_2 = keras.layers.Lambda(gen_parsing_ops.string_to_number)(m) + s_2 = keras.layers.Lambda(lambda k: k[0] * k[1])([m_2, a_2]) b_2 = dense(b) merged = keras.layers.concatenate([s_2, b_2], name='merge') c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) @@ -372,13 +372,13 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): def train_input_fn(): input_dict = {'input_a': a_train, 'input_b': b_train, - 'input_m': input_m_train > 0} + 'input_m': input_m_train.astype(np.str)} output_dict = {'dense_2': c_train, 'dense_3': d_train} return input_dict, output_dict def eval_input_fn(): input_dict = {'input_a': a_test, 'input_b': b_test, - 'input_m': input_m_test > 0} + 'input_m': input_m_test.astype(np.str)} output_dict = {'dense_2': c_test, 'dense_3': d_test} return input_dict, output_dict diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index df4c3915a3..db0c220380 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -19,7 +19,9 @@ from __future__ import division from __future__ import print_function import copy +import sys import types as python_types +import warnings import numpy as np @@ -714,6 +716,7 @@ class Lambda(Layer): return self.mask def get_config(self): + module = self.function.__module__ if isinstance(self.function, python_types.LambdaType): function = generic_utils.func_dump(self.function) function_type = 'lambda' @@ -721,21 +724,26 @@ class Lambda(Layer): function = self.function.__name__ function_type = 'function' + output_shape_module = None if isinstance(self._output_shape, python_types.LambdaType): output_shape = generic_utils.func_dump(self._output_shape) output_shape_type = 'lambda' + output_shape_module = self._output_shape.__module__ elif callable(self._output_shape): output_shape = self._output_shape.__name__ output_shape_type = 'function' + output_shape_module = self._output_shape.__module__ else: output_shape = self._output_shape output_shape_type = 'raw' config = { 'function': function, + 'module': module, 'function_type': function_type, 'output_shape': output_shape, 'output_shape_type': output_shape_type, + 'output_shape_module': output_shape_module, 'arguments': self.arguments } base_config = super(Lambda, self).get_config() @@ -745,8 +753,16 @@ class Lambda(Layer): def from_config(cls, config, custom_objects=None): config = config.copy() globs = globals() + module = config.pop('module', None) + if module in sys.modules: + globs.update(sys.modules[module].__dict__) + elif module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn('{} is not loaded, but a Lambda layer uses it. ' + 'It may cause errors.'.format(module) + , UserWarning) if custom_objects: - globs = dict(list(globs.items()) + list(custom_objects.items())) + globs.update(custom_objects) function_type = config.pop('function_type') if function_type == 'function': # Simple lookup in custom objects @@ -760,6 +776,14 @@ class Lambda(Layer): else: raise TypeError('Unknown function type:', function_type) + output_shape_module = config.pop('output_shape_module', None) + if output_shape_module in sys.modules: + globs.update(sys.modules[output_shape_module].__dict__) + elif output_shape_module is not None: + # Note: we don't know the name of the function if it's a lambda. + warnings.warn('{} is not loaded, but a Lambda layer uses it. ' + 'It may cause errors.'.format(output_shape_module) + , UserWarning) output_shape_type = config.pop('output_shape_type') if output_shape_type == 'function': # Simple lookup in custom objects -- GitLab From a3b9e75063201c78c75e2f717a2ff24b0ffa6f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 5 Jun 2018 00:43:00 +0800 Subject: [PATCH 477/902] DOC: add more explanation for auxiliary_name_scope (#18948) --- tensorflow/python/ops/variable_scope.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index fa34774622..9c969d61c0 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1778,6 +1778,23 @@ class variable_scope(object): assert v.name == "foo/bar/v:0" ``` + Simple example of how to reenter a premade variable scope safely: + + ```python + with tf.variable_scope("foo") as vs: + pass + + # Re-enter the variable scope. + with tf.variable_scope(vs, + auxiliary_name_scope=False) as vs1: + # Restore the original name_scope. + with tf.name_scope(vs1.original_name_scope): + v = tf.get_variable("v", [1]) + assert v.name == "foo/v:0" + c = tf.constant([1], name="c") + assert c.name == "foo/c:0" + ``` + Basic example of sharing a variable AUTO_REUSE: ```python @@ -1915,7 +1932,9 @@ class variable_scope(object): (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. auxiliary_name_scope: If `True`, we create an auxiliary name scope with - the scope. If `False`, we don't touch name scope. + the scope. If `False`, we don't create it. Note that the argument is + not inherited, and it only takes effect for once when creating. You + should only use it for re-entering a premade variable scope. Returns: A scope that can be captured and reused. -- GitLab From 440e3850bd197332876f391e79cf06c723d69885 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 4 Jun 2018 09:44:20 -0700 Subject: [PATCH 478/902] Fix issue in Keras model complie with float64 mode (#19328) * Fix issue in Keras model complie with float64 mode This fix tries to address the issue raised in 19318 where Keras model complie for `model.compile('rmsprop', 'mse')` does not work in float64 mode. The issue comes from `placeholder_with_default([1.]...`, which returns dtype float32 by default (as `[1.]` was inteprated as float32). Since placeholder does not have a output_dtype to pass, this fix converts `[1.]` to float64 first before passing in. This fix fixes 19318. Signed-off-by: Yong Tang * Fix pylint issue Signed-off-by: Yong Tang * Add test case for float64 and model compile Signed-off-by: Yong Tang --- tensorflow/python/keras/engine/training.py | 7 +++++-- tensorflow/python/keras/models_test.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 04a2aa7664..aca63f822b 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util @@ -409,11 +410,13 @@ class Model(Network): else: if sample_weight_mode == 'temporal': sample_weights.append(array_ops.placeholder_with_default( - [[1.]], shape=[None, None], name=name + '_sample_weights')) + constant_op.constant([[1.]], dtype=K.floatx()), + shape=[None, None], name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: sample_weights.append(array_ops.placeholder_with_default( - [1.], shape=[None], name=name + '_sample_weights')) + constant_op.constant([1.], dtype=K.floatx()), + shape=[None], name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py index c616d8f24f..e6e45902a8 100644 --- a/tensorflow/python/keras/models_test.py +++ b/tensorflow/python/keras/models_test.py @@ -144,5 +144,19 @@ class CheckpointingTests(test.TestCase): model.load_weights(save_prefix) self.assertEqual(12., self.evaluate(beta1_power)) +class TestModelBackend(test.TestCase): + + def test_model_backend_float64_use_cases(self): + # Test case for GitHub issue 19318 + floatx = keras.backend.floatx() + keras.backend.set_floatx('float64') + + x = keras.Input((5,)) + y = keras.layers.Dense(1)(x) + model = keras.models.Model(x, y) + model.compile('rmsprop', 'mse') + + keras.backend.set_floatx(floatx) + if __name__ == '__main__': test.main() -- GitLab From b940fb6ac1234d73fbb50053edf21600bacdda18 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 4 Jun 2018 16:46:03 +0000 Subject: [PATCH 479/902] Update golden API The golden API is updated with: ``` bazel-bin/tensorflow/tools/api/tests/api_compatibility_test \ --update_goldens True ``` Signed-off-by: Yong Tang --- tensorflow/tools/api/golden/tensorflow.pbtxt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 3051c4437e..01b8058118 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -792,6 +792,10 @@ tf_module { name: "broadcast_static_shape" argspec: "args=[\'shape_x\', \'shape_y\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "broadcast_to" + argspec: "args=[\'input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "case" argspec: "args=[\'pred_fn_pairs\', \'default\', \'exclusive\', \'strict\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'False\', \'case\'], " -- GitLab From b5f1ba290053893376bea31b8c4629b7efcd8c0a Mon Sep 17 00:00:00 2001 From: Yanan Cao Date: Mon, 4 Jun 2018 09:56:21 -0700 Subject: [PATCH 480/902] Minor error message fix in TPUEstimator. PiperOrigin-RevId: 199148136 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index a155de3844..f63e9e8bda 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -2641,7 +2641,7 @@ class _CapturedObject(object): def capture(self, o): if self._captured: raise RuntimeError( - 'InternalError: Object can be captured only. Please file bug .') + 'InternalError: Object can capture only once. Please file bug.') self._captured = True self._object = o @@ -2650,7 +2650,7 @@ class _CapturedObject(object): if not self._captured: raise RuntimeError( 'InternalError: Object is not captured properly before `get`. ' - 'Please file bug .') + 'Please file bug.') return self._object -- GitLab From f277fb608d5e278d04e81b82f57b69afe723d973 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Mon, 4 Jun 2018 10:24:33 -0700 Subject: [PATCH 481/902] [TF2XLA] Change to resize bilinear to between match a BackpropInput convolution by swapping the kernel input and output feature dimension. PiperOrigin-RevId: 199153010 --- tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc index 91bff995a1..79d3a6979c 100644 --- a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc @@ -197,8 +197,8 @@ xla::XlaOp ResizeUsingDilationAndConvolution(xla::XlaBuilder* builder, dimension_numbers.add_output_spatial_dimensions(1 + i); dimension_numbers.add_kernel_spatial_dimensions(i); } - dimension_numbers.set_kernel_input_feature_dimension(num_spatial_dims); - dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims + 1); + dimension_numbers.set_kernel_input_feature_dimension(num_spatial_dims + 1); + dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims); ResizeConvolutionDims dims = ComputeResizeConvolutionParameters(in_size, out_size); -- GitLab From 4a1197c4c09ca4383cf7fc24c08d83a1641c7735 Mon Sep 17 00:00:00 2001 From: G K Date: Mon, 4 Jun 2018 19:30:17 +0200 Subject: [PATCH 482/902] added crucial documentation on SELU activation (#15337) * added crucial documentation on SELU activation * changed from layers. to tf. --- tensorflow/core/api_def/base_api/api_def_Selu.pbtxt | 4 ++++ tensorflow/go/op/wrappers.go | 6 +++--- tensorflow/python/keras/activations.py | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt index cbe76de415..985f09312f 100644 --- a/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Selu.pbtxt @@ -4,6 +4,10 @@ op { description: < Date: Mon, 4 Jun 2018 10:25:23 -0700 Subject: [PATCH 483/902] Computing the volume of the set of correlation matrices with bounded determinant. This is useful for testing the LKJ distribution on correlation matrices. PiperOrigin-RevId: 199153115 --- .../python/kernel_tests/util/BUILD | 48 +++ .../util/correlation_matrix_volumes.py | 98 ++++++ .../util/correlation_matrix_volumes_lib.py | 323 ++++++++++++++++++ .../util/correlation_matrix_volumes_test.py | 150 ++++++++ 4 files changed, 619 insertions(+) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/util/BUILD create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD b/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD new file mode 100644 index 0000000000..03e26b198e --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD @@ -0,0 +1,48 @@ +# Description: +# Internal testing utilities, e.g., computing the correct answer to +# put in a unit test. + +licenses(["notice"]) # Apache 2.0 + +py_library( + name = "correlation_matrix_volumes_py", + srcs = [ + "correlation_matrix_volumes_lib.py", + ], + deps = [ + "//tensorflow/contrib/distributions:distributions_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:errors", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//third_party/py/numpy", + ], +) + +py_binary( + name = "correlation_matrix_volumes", + srcs = [ + "correlation_matrix_volumes.py", + ], + deps = [ + ":correlation_matrix_volumes_py", + ], +) + +py_test( + name = "correlation_matrix_volumes_test", + size = "medium", + srcs = ["correlation_matrix_volumes_test.py"], + tags = ["no_pip"], + deps = [ + ":correlation_matrix_volumes_py", + # For statistical testing + "//tensorflow/contrib/distributions:distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + ], +) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py new file mode 100644 index 0000000000..2eab51cd30 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py @@ -0,0 +1,98 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Executable to estimate the volume of various sets of correlation matrices. + +See correlation_matrix_volumes_lib.py for purpose and methodology. + +Invocation example: +``` +python correlation_matrix_volumes.py --num_samples 1e7 +``` + +This will compute 10,000,000-sample confidence intervals for the +volumes of several sets of correlation matrices. Which sets, and the +desired statistical significance, are hard-coded in this source file. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import pprint + +from absl import app +from absl import flags + +from tensorflow.contrib.distributions.python.kernel_tests.util import correlation_matrix_volumes_lib as corr + +FLAGS = flags.FLAGS + +# Float to support giving the number of samples in scientific notation. +# The production run used for the LKJ test used 1e7 samples. +flags.DEFINE_float('num_samples', 1e4, 'Number of samples to use.') + + +def ctv_debatched(det_bounds, dim, num_samples, error_rate=1e-6, seed=42): + # This wrapper undoes the batching in compute_true_volumes, because + # apparently several 5x5x9x1e7 Tensors of float32 can strain RAM. + bounds = {} + for db in det_bounds: + bounds[db] = corr.compute_true_volumes( + [db], dim, num_samples, error_rate=error_rate, seed=seed)[db] + return bounds + + +# The particular bounds in all three of these functions were chosen by +# a somewhat arbitrary walk through an empirical tradeoff, for the +# purpose of testing the LKJ distribution. Setting the determinant +# bound lower +# - Covers more of the testee's sample space, and +# - Increases the probability that the rejection sampler will hit, thus +# - Decreases the relative error (at a fixed sample count) in the +# rejection-based volume estimate; +# but also +# - Increases the variance of the estimator used in the LKJ test. +# This latter variance is also affected by the dimension and the +# tested concentration parameter, and can be compensated for with more +# compute (expensive) or a looser discrepancy limit (unsatisfying). +# The values here are the projection of the points in that test design +# space that ended up getting chosen. +def compute_3x3_volumes(num_samples): + det_bounds = [0.01, 0.25, 0.3, 0.35, 0.4, 0.45] + return ctv_debatched( + det_bounds, 3, num_samples, error_rate=5e-7, seed=46) + + +def compute_4x4_volumes(num_samples): + det_bounds = [0.01, 0.25, 0.3, 0.35, 0.4, 0.45] + return ctv_debatched( + det_bounds, 4, num_samples, error_rate=5e-7, seed=47) + + +def compute_5x5_volumes(num_samples): + det_bounds = [0.01, 0.2, 0.25, 0.3, 0.35, 0.4] + return ctv_debatched( + det_bounds, 5, num_samples, error_rate=5e-7, seed=48) + + +def main(_): + full_bounds = {} + full_bounds[3] = compute_3x3_volumes(int(FLAGS.num_samples)) + full_bounds[4] = compute_4x4_volumes(int(FLAGS.num_samples)) + full_bounds[5] = compute_5x5_volumes(int(FLAGS.num_samples)) + pprint.pprint(full_bounds) + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py new file mode 100644 index 0000000000..455e71f00c --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py @@ -0,0 +1,323 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Estimating the volume of the correlation matrices with bounded determinant. + +Why? Because lkj_test.py tests the sampler for the LKJ distribution +by estimating the same volume another way. + +How? Rejection sampling. Or, more precisely, importance sampling, +proposing from the uniform distribution on symmetric matrices with +diagonal 1s and entries in [-1, 1]. Such a matrix is a correlation +matrix if and only if it is also positive semi-definite. + +The samples can then be converted into a confidence interval on the +volume in question by the [Clopper-Pearson +method](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval), +also implemented here. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import importlib +import sys + +import numpy as np + +from tensorflow.python.client import session +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.distributions import uniform +from tensorflow.python.ops.distributions import util +from tensorflow.python.platform import tf_logging + +__all__ = [ + "correlation_matrix_volume_rejection_samples", + "compute_true_volumes", +] + + +def try_import(name): # pylint: disable=invalid-name + module = None + try: + module = importlib.import_module(name) + except ImportError as e: + tf_logging.warning("Could not import %s: %s" % (name, str(e))) + return module + +optimize = try_import("scipy.optimize") +stats = try_import("scipy.stats") + + +def _psd_mask(x): + """Computes whether each square matrix in the input is positive semi-definite. + + Args: + x: A floating-point `Tensor` of shape `[B1, ..., Bn, M, M]`. + + Returns: + mask: A floating-point `Tensor` of shape `[B1, ... Bn]`. Each + scalar is 1 if the corresponding matrix was PSD, otherwise 0. + """ + # Allegedly + # https://scicomp.stackexchange.com/questions/12979/testing-if-a-matrix-is-positive-semi-definite + # it is more efficient to test for positive semi-definiteness by + # trying to compute the Cholesky decomposition -- the matrix is PSD + # if you succeed and not PSD if you fail. However, TensorFlow's + # Cholesky raises an exception if _any_ of the input matrices are + # not PSD, from which I don't know how to extract _which ones_, so I + # proceed by explicitly computing all the eigenvalues and checking + # whether they are all positive or not. + # + # Also, as was discussed in the answer, it is somewhat dangerous to + # treat SPD-ness as binary in floating-point arithmetic. Cholesky + # factorization can complete and 'look' like everything is fine + # (e.g., O(1) entries and a diagonal of all ones) but the matrix can + # have an exponential condition number. + eigenvalues, _ = linalg_ops.self_adjoint_eig(x) + return math_ops.cast( + math_ops.reduce_min(eigenvalues, axis=-1) >= 0, dtype=x.dtype) + + +def _det_large_enough_mask(x, det_bounds): + """Returns whether the input matches the given determinant limit. + + Args: + x: A floating-point `Tensor` of shape `[B1, ..., Bn, M, M]`. + det_bounds: A floating-point `Tensor` that must broadcast to shape + `[B1, ..., Bn]`, giving the desired lower bound on the + determinants in `x`. + + Returns: + mask: A floating-point `Tensor` of shape [B1, ..., Bn]. Each + scalar is 1 if the corresponding matrix had determinant above + the corresponding bound, otherwise 0. + """ + # For the curious: I wonder whether it is possible and desirable to + # use a Cholesky decomposition-based algorithm for this, since the + # only matrices whose determinant this code cares about will be PSD. + # Didn't figure out how to code that in TensorFlow. + # + # Expert opinion is that it would be about twice as fast since + # Cholesky is roughly half the cost of Gaussian Elimination with + # Partial Pivoting. But this is less of an impact than the switch in + # _psd_mask. + return math_ops.cast( + linalg_ops.matrix_determinant(x) > det_bounds, dtype=x.dtype) + + +def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed): + """Returns a uniformly random `Tensor` of "correlation-like" matrices. + + A "correlation-like" matrix is a symmetric square matrix with all entries + between -1 and 1 (inclusive) and 1s on the main diagonal. Of these, + the ones that are positive semi-definite are exactly the correlation + matrices. + + Args: + num_rows: Python `int` dimension of the correlation-like matrices. + batch_shape: `Tensor` or Python `tuple` of `int` shape of the + batch to return. + dtype: `dtype` of the `Tensor` to return. + seed: Random seed. + + Returns: + matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]` + and dtype `dtype`. Each entry is in [-1, 1], and each matrix + along the bottom two dimensions is symmetric and has 1s on the + main diagonal. + """ + num_entries = num_rows * (num_rows + 1) / 2 + ones = array_ops.ones(shape=[num_entries], dtype=dtype) + # It seems wasteful to generate random values for the diagonal since + # I am going to throw them away, but `fill_triangular` fills the + # diagonal, so I probably need them. + # It's not impossible that it would be more efficient to just fill + # the whole matrix with random values instead of messing with + # `fill_triangular`. Then would need to filter almost half out with + # `matrix_band_part`. + unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed) + tril = util.fill_triangular(unifs) + symmetric = tril + array_ops.matrix_transpose(tril) + diagonal_ones = array_ops.ones( + shape=util.pad(batch_shape, axis=0, back=True, value=num_rows), + dtype=dtype) + return array_ops.matrix_set_diag(symmetric, diagonal_ones) + + +def correlation_matrix_volume_rejection_samples( + det_bounds, dim, sample_shape, dtype, seed): + """Returns rejection samples from trying to get good correlation matrices. + + The proposal being rejected from is the uniform distribution on + "correlation-like" matrices. We say a matrix is "correlation-like" + if it is a symmetric square matrix with all entries between -1 and 1 + (inclusive) and 1s on the main diagonal. Of these, the ones that + are positive semi-definite are exactly the correlation matrices. + + The rejection algorithm, then, is to sample a `Tensor` of + `sample_shape` correlation-like matrices of dimensions `dim` by + `dim`, and check each one for (i) being a correlation matrix (i.e., + PSD), and (ii) having determinant at least the corresponding entry + of `det_bounds`. + + Args: + det_bounds: A `Tensor` of lower bounds on the determinants of + acceptable matrices. The shape must broadcast with `sample_shape`. + dim: A Python `int` dimension of correlation matrices to sample. + sample_shape: Python `tuple` of `int` shape of the samples to + compute, excluding the two matrix dimensions. + dtype: The `dtype` in which to do the computation. + seed: Random seed. + + Returns: + weights: A `Tensor` of shape `sample_shape`. Each entry is 0 if the + corresponding matrix was not a correlation matrix, or had too + small of a determinant. Otherwise, the entry is the + multiplicative inverse of the density of proposing that matrix + uniformly, i.e., the volume of the set of `dim` by `dim` + correlation-like matrices. + volume: The volume of the set of `dim` by `dim` correlation-like + matrices. + """ + with ops.name_scope("rejection_sampler"): + rej_proposals = _uniform_correlation_like_matrix( + dim, sample_shape, dtype, seed=seed) + rej_proposal_volume = 2. ** (dim * (dim - 1) / 2.) + # The density of proposing any given point is 1 / rej_proposal_volume; + # The weight of that point should be scaled by + # 1 / density = rej_proposal_volume. + rej_weights = rej_proposal_volume * _psd_mask( + rej_proposals) * _det_large_enough_mask(rej_proposals, det_bounds) + return rej_weights, rej_proposal_volume + + +def _clopper_pearson_confidence_interval(samples, error_rate): + """Computes a confidence interval for the mean of the given 1-D distribution. + + Assumes (and checks) that the given distribution is Bernoulli, i.e., + takes only two values. This licenses using the CDF of the binomial + distribution for the confidence, which is tighter (for extreme + probabilities) than the DKWM inequality. The method is known as the + [Clopper-Pearson method] + (https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval). + + Assumes: + + - The given samples were drawn iid from the distribution of interest. + + - The given distribution is a Bernoulli, i.e., supported only on + low and high. + + Guarantees: + + - The probability (over the randomness of drawing the given sample) + that the true mean is outside the returned interval is no more + than the given error_rate. + + Args: + samples: `np.ndarray` of samples drawn iid from the distribution + of interest. + error_rate: Python `float` admissible rate of mistakes. + + Returns: + low: Lower bound of confidence interval. + high: Upper bound of confidence interval. + + Raises: + ValueError: If `samples` has rank other than 1 (batch semantics + are not implemented), or if `samples` contains values other than + `low` or `high` (as that makes the distribution not Bernoulli). + """ + # TODO(b/78025336) Migrate this confidence interval function + # to statistical_testing.py. In order to do that + # - Get the binomial CDF from the Binomial distribution + # - Implement scalar root finding in TF. Batch bisection search + # shouldn't be too hard, and is definitely good enough for this + # problem. Batching the Brent algorithm (from scipy) that is used + # here may be more involved, but may also not be necessary---it's + # only used here because scipy made it convenient. In particular, + # robustness is more important than speed here, which may make + # bisection search actively better. + # - The rest is just a matter of rewriting in the appropriate style. + if optimize is None or stats is None: + raise ValueError( + "Scipy is required for computing Clopper-Pearson confidence intervals") + if len(samples.shape) != 1: + raise ValueError("Batch semantics not implemented") + n = len(samples) + low = np.amin(samples) + high = np.amax(samples) + successes = np.count_nonzero(samples - low) + failures = np.count_nonzero(samples - high) + if successes + failures != n: + uniques = np.unique(samples) + msg = ("Purportedly Bernoulli distribution had distinct samples" + " {}, {}, and {}".format(uniques[0], uniques[1], uniques[2])) + raise ValueError(msg) + def p_small_enough(p): + prob = stats.binom.logcdf(successes, n, p) + return prob - np.log(error_rate / 2.) + def p_big_enough(p): + prob = stats.binom.logsf(successes, n, p) + return prob - np.log(error_rate / 2.) + high_p = optimize.brentq( + p_small_enough, float(successes) / n, 1., rtol=1e-9) + low_p = optimize.brentq( + p_big_enough, 0., float(successes) / n, rtol=1e-9) + low_interval = low + (high - low) * low_p + high_interval = low + (high - low) * high_p + return (low_interval, high_interval) + + +def compute_true_volumes( + det_bounds, dim, num_samples, error_rate=1e-6, seed=42): + """Returns confidence intervals for the desired correlation matrix volumes. + + The confidence intervals are computed by the [Clopper-Pearson method] + (https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval). + + Args: + det_bounds: A rank-1 numpy array of lower bounds on the + determinants of acceptable matrices. Entries must be unique. + dim: A Python `int` dimension of correlation matrices to sample. + num_samples: The number of samples to draw. + error_rate: The statistical significance of the returned + confidence intervals. The significance is broadcast: Each + returned interval separately may be incorrect with probability + (under the sample of correlation-like matrices drawn internally) + at most `error_rate`. + seed: Random seed. + + Returns: + bounds: A Python `dict` mapping each determinant bound to the low, high + tuple giving the confidence interval. + """ + bounds = {} + with session.Session() as sess: + rej_weights, _ = correlation_matrix_volume_rejection_samples( + det_bounds, dim, [num_samples, len(det_bounds)], np.float32, seed=seed) + rej_weights = sess.run(rej_weights) + for rw, det in zip(np.rollaxis(rej_weights, 1), det_bounds): + template = ("Estimating volume of {}x{} correlation " + "matrices with determinant >= {}.") + print(template.format(dim, dim, det)) + sys.stdout.flush() + bounds[det] = _clopper_pearson_confidence_interval( + rw, error_rate=error_rate) + return bounds diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py new file mode 100644 index 0000000000..8f99300e63 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py @@ -0,0 +1,150 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for correlation_matrix_volumes_lib.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.kernel_tests.util import correlation_matrix_volumes_lib as corr +from tensorflow.contrib.distributions.python.ops import statistical_testing as st +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.platform import test + + +# NxN correlation matrices are determined by the N*(N-1)/2 +# lower-triangular entries. In addition to being between -1 and 1, +# they must also obey the constraint that the determinant of the +# resulting symmetric matrix is non-negative. In 2x2, we can even +# analytically compute the volume when the determinant is bounded to > +# epsilon, as that boils down to the one lower-triangular entry being +# less than 1 - epsilon in absolute value. +def two_by_two_volume(det_bound): + return 2 * np.sqrt(1.0 - det_bound) + + +# The post +# https://psychometroscar.com/the-volume-of-a-3-x-3-correlation-matrix/ +# derives (with elementary calculus) that the volume (with respect to +# Lebesgue^3 measure) of the set of 3x3 correlation matrices is +# pi^2/2. The same result is also obtained by [1]. +def three_by_three_volume(): + return np.pi**2 / 2. + + +# The volume of the unconstrained set of correlation matrices is also +# the normalization constant of the LKJ distribution from [2]. As +# part of defining the distribution, that reference a derives general +# formula for this volume for all dimensions. A TensorFlow +# computation thereof gave the below result for 4x4: +def four_by_four_volume(): + # This constant computed as math_ops.exp(lkj.log_norm_const(4, [1.0])) + return 11.6973076 + +# [1] Rousseeuw, P. J., & Molenberghs, G. (1994). "The shape of +# correlation matrices." The American Statistician, 48(4), 276-279. + +# [2] Daniel Lewandowski, Dorota Kurowicka, and Harry Joe, "Generating +# random correlation matrices based on vines and extended onion +# method," Journal of Multivariate Analysis 100 (2009), pp 1989-2001. + + +class CorrelationMatrixVolumesTest(test.TestCase): + + def testRejection2D(self): + num_samples = int(1e5) # Chosen for a small min detectable discrepancy + det_bounds = np.array( + [0.01, 0.02, 0.03, 0.04, 0.05, 0.3, 0.35, 0.4, 0.5], dtype=np.float32) + exact_volumes = two_by_two_volume(det_bounds) + (rej_weights, + rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples( + det_bounds, 2, [num_samples, 9], dtype=np.float32, seed=43) + # shape of rej_weights: [num_samples, 9, 2, 2] + chk1 = st.assert_true_mean_equal_by_dkwm( + rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes, + false_fail_rate=1e-6) + chk2 = check_ops.assert_less( + st.min_discrepancy_of_true_means_detectable_by_dkwm( + num_samples, low=0., high=rej_proposal_volume, + # Correct the false fail rate due to different broadcasting + false_fail_rate=1.1e-7, false_pass_rate=1e-6), + 0.036) + with ops.control_dependencies([chk1, chk2]): + rej_weights = array_ops.identity(rej_weights) + self.evaluate(rej_weights) + + def testRejection3D(self): + num_samples = int(1e5) # Chosen for a small min detectable discrepancy + det_bounds = np.array([0.0], dtype=np.float32) + exact_volumes = np.array([three_by_three_volume()], dtype=np.float32) + (rej_weights, + rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples( + det_bounds, 3, [num_samples, 1], dtype=np.float32, seed=44) + # shape of rej_weights: [num_samples, 1, 3, 3] + chk1 = st.assert_true_mean_equal_by_dkwm( + rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes, + false_fail_rate=1e-6) + chk2 = check_ops.assert_less( + st.min_discrepancy_of_true_means_detectable_by_dkwm( + num_samples, low=0., high=rej_proposal_volume, + false_fail_rate=1e-6, false_pass_rate=1e-6), + # Going for about a 3% relative error + 0.15) + with ops.control_dependencies([chk1, chk2]): + rej_weights = array_ops.identity(rej_weights) + self.evaluate(rej_weights) + + def testRejection4D(self): + num_samples = int(1e5) # Chosen for a small min detectable discrepancy + det_bounds = np.array([0.0], dtype=np.float32) + exact_volumes = [four_by_four_volume()] + (rej_weights, + rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples( + det_bounds, 4, [num_samples, 1], dtype=np.float32, seed=45) + # shape of rej_weights: [num_samples, 1, 4, 4] + chk1 = st.assert_true_mean_equal_by_dkwm( + rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes, + false_fail_rate=1e-6) + chk2 = check_ops.assert_less( + st.min_discrepancy_of_true_means_detectable_by_dkwm( + num_samples, low=0., high=rej_proposal_volume, + false_fail_rate=1e-6, false_pass_rate=1e-6), + # Going for about a 10% relative error + 1.1) + with ops.control_dependencies([chk1, chk2]): + rej_weights = array_ops.identity(rej_weights) + self.evaluate(rej_weights) + + def testVolumeEstimation2D(self): + # Test that the confidence intervals produced by + # corr.compte_true_volumes are sound, in the sense of containing + # the exact volume. + num_samples = int(1e5) # Chosen by symmetry with testRejection2D + det_bounds = np.array( + [0.01, 0.02, 0.03, 0.04, 0.05, 0.3, 0.35, 0.4, 0.5], dtype=np.float32) + volume_bounds = corr.compute_true_volumes( + det_bounds, 2, num_samples, error_rate=1e-6, seed=47) + exact_volumes = two_by_two_volume(det_bounds) + for det, volume in zip(det_bounds, exact_volumes): + computed_low, computed_high = volume_bounds[det] + self.assertLess(computed_low, volume) + self.assertGreater(computed_high, volume) + +if __name__ == "__main__": + test.main() -- GitLab From 5f315a292a65bd898a736cd305152f348846718a Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 4 Jun 2018 11:11:06 -0700 Subject: [PATCH 484/902] Fix visibility for tf.keras.__version__ PiperOrigin-RevId: 199161696 --- tensorflow/python/keras/__init__.py | 4 ++++ tensorflow/python/keras/integration_test.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/python/keras/__init__.py b/tensorflow/python/keras/__init__.py index 197f306097..3493069a5b 100644 --- a/tensorflow/python/keras/__init__.py +++ b/tensorflow/python/keras/__init__.py @@ -41,8 +41,12 @@ from tensorflow.python.keras.layers import Input from tensorflow.python.keras.models import Model from tensorflow.python.keras.models import Sequential +from tensorflow.python.util.tf_export import tf_export + __version__ = '2.1.6-tf' +tf_export('keras.__version__').export_constant(__name__, '__version__') + del absolute_import del division del print_function diff --git a/tensorflow/python/keras/integration_test.py b/tensorflow/python/keras/integration_test.py index 2e83544d97..2a05699407 100644 --- a/tensorflow/python/keras/integration_test.py +++ b/tensorflow/python/keras/integration_test.py @@ -29,6 +29,9 @@ from tensorflow.python.platform import test class KerasIntegrationTest(test.TestCase): + def test_version(self): + self.assertTrue(keras.__version__.endswith('-tf')) + def test_vector_classification_sequential(self): with self.test_session(): np.random.seed(1337) -- GitLab From add0043e9d6233d9fabf2676e449d26ecd257ec5 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 4 Jun 2018 11:25:24 -0700 Subject: [PATCH 485/902] - Fix typo in evaluator PiperOrigin-RevId: 199164433 --- tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index b1b58642ec..13f46407e3 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -1962,7 +1962,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { // TODO(b/74360564): This is implementation defined behavior, but is // currently respected by all implementations. Change this if we ever decide - // to oficially document different behavior. + // to officially document different behavior. for (int64 i = 0; i < start.size(); ++i) { start[i] = std::min( std::max(int64{0}, start[i]), -- GitLab From afb0950cf4acf1ec920287066154cc1b21b2a7bf Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Mon, 4 Jun 2018 11:45:53 -0700 Subject: [PATCH 486/902] Add a special functions module that contains non-Python abstractions, like the list stack operation. PiperOrigin-RevId: 199167953 --- tensorflow/contrib/autograph/__init__.py | 16 +++++- tensorflow/contrib/autograph/impl/BUILD | 11 ++++ .../autograph/impl/special_functions.py | 48 ++++++++++++++++++ .../autograph/impl/special_functions_test.py | 50 +++++++++++++++++++ 4 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/autograph/impl/special_functions.py create mode 100644 tensorflow/contrib/autograph/impl/special_functions_test.py diff --git a/tensorflow/contrib/autograph/__init__.py b/tensorflow/contrib/autograph/__init__.py index 3386c4eca4..310eb34a70 100644 --- a/tensorflow/contrib/autograph/__init__.py +++ b/tensorflow/contrib/autograph/__init__.py @@ -29,12 +29,24 @@ from tensorflow.contrib.autograph.impl.api import do_not_convert from tensorflow.contrib.autograph.impl.api import RunMode from tensorflow.contrib.autograph.impl.api import to_code from tensorflow.contrib.autograph.impl.api import to_graph +from tensorflow.contrib.autograph.impl.special_functions import stack from tensorflow.contrib.autograph.pyct.transformer import AutographParseError from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'utils', 'convert', 'converted_call', 'do_not_convert', 'RunMode', - 'to_code', 'to_graph', 'AutographParseError' + # Main API + 'RunMode', + 'convert', + 'converted_call', + 'do_not_convert', + 'to_code', + 'to_graph', + # Special functions + 'stack', + # Exceptions + 'AutographParseError', + # Utilities: to be removed + 'utils', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/autograph/impl/BUILD b/tensorflow/contrib/autograph/impl/BUILD index 54424e2647..91ae0b9b82 100644 --- a/tensorflow/contrib/autograph/impl/BUILD +++ b/tensorflow/contrib/autograph/impl/BUILD @@ -21,6 +21,7 @@ py_library( "config.py", "conversion.py", "naming.py", + "special_functions.py", ], srcs_version = "PY2AND3", visibility = ["//tensorflow:__subpackages__"], @@ -69,3 +70,13 @@ py_test( "//tensorflow/python:client_testlib", ], ) + +py_test( + name = "special_functions_test", + srcs = ["special_functions_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":impl", + "//tensorflow/python:client_testlib", + ], +) diff --git a/tensorflow/contrib/autograph/impl/special_functions.py b/tensorflow/contrib/autograph/impl/special_functions.py new file mode 100644 index 0000000000..b7a8177c44 --- /dev/null +++ b/tensorflow/contrib/autograph/impl/special_functions.py @@ -0,0 +1,48 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Special functions that only make sense for AutoGraph. + +These functions are meant to ensure feature parity between Python and AutoGraph, +so that the exact same code works in both modes. In general, AutoGraph will +replace these calls. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.autograph.operators import data_structures + + +def stack(list_or_tensor, element_dtype=None): + """Stacks the input, if it admits the notion of stacking. No-op otherwise. + + For example, a list of tensors can be stacked into a larger tensor. This + function is similar to tf.stack, but it accepts non-lists and lists of + non-tensors as arguments. In the latter case, the function does nothing. + + Args: + list_or_tensor: Any entity. + element_dtype: Optional dtype for the elements in the list. Required if the + input is stackable, and the list is untyped. + + Returns: + If the input is stackable, a new object representing the stacked inputs. + Otherwise it returns list_or_tensor unchanged. + """ + return data_structures.list_stack( + list_or_tensor, + data_structures.ListStackOpts( + element_dtype=element_dtype, original_call=lambda x: x)) diff --git a/tensorflow/contrib/autograph/impl/special_functions_test.py b/tensorflow/contrib/autograph/impl/special_functions_test.py new file mode 100644 index 0000000000..9b52d2a59b --- /dev/null +++ b/tensorflow/contrib/autograph/impl/special_functions_test.py @@ -0,0 +1,50 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for special_functions module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.autograph.impl import special_functions +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import list_ops +from tensorflow.python.platform import test + + +class SpecialFunctionsTest(test.TestCase): + + def test_basic(self): + self.assertEqual(special_functions.stack(1), 1) + self.assertListEqual(special_functions.stack([1, 2, 3]), [1, 2, 3]) + # TODO(mdan): This should probably forward to tf.stack. + self.assertTrue( + isinstance( + special_functions.stack( + [constant_op.constant(1), + constant_op.constant(2)]), list)) + + t = constant_op.constant([1.0, 2.0]) + l = list_ops.tensor_list_from_tensor( + t, element_shape=constant_op.constant([], dtype=dtypes.int32)) + self.assertTrue( + tensor_util.is_tensor( + special_functions.stack(l, element_dtype=dtypes.float32))) + + +if __name__ == '__main__': + test.main() -- GitLab From 008fc03ab6ec74a3b9acca1b182e243c55da0956 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 4 Jun 2018 11:47:29 -0700 Subject: [PATCH 487/902] [TF:XLA] Bump open source llvm revision to r333878 PiperOrigin-RevId: 199168290 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index c072f89965..e66af3c8bc 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -452,11 +452,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/48c1879dcedb834e95a95da8715b30897a49edbe.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/48c1879dcedb834e95a95da8715b30897a49edbe.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/40c66c3d40377cf85640b3a35e6ec5c5b1cbc41f.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/40c66c3d40377cf85640b3a35e6ec5c5b1cbc41f.tar.gz", ], - sha256 = "0e0767199c169f738718461d05d3fdada80b533a6e8e2e07c9ae852356be3c0a", - strip_prefix = "llvm-48c1879dcedb834e95a95da8715b30897a49edbe", + sha256 = "6f782a0d2e9d7946bdf20807e0fcd8f5eaed8afd93bdd610cdefbe9435ca551f", + strip_prefix = "llvm-40c66c3d40377cf85640b3a35e6ec5c5b1cbc41f", build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) -- GitLab From 836fc096c77a3b1170b91242e30b6075e7805cec Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 4 Jun 2018 12:05:14 -0700 Subject: [PATCH 488/902] Fix test user ops PiperOrigin-RevId: 199171316 --- tensorflow/tools/ci_build/builds/test_user_ops.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh index c342367bac..25ecee4725 100755 --- a/tensorflow/tools/ci_build/builds/test_user_ops.sh +++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh @@ -239,8 +239,9 @@ function run_op() { fi } -run_op $("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; print(tf.Session('').run(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT})))") -run_op $("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; tf.enable_eager_execution(); print(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT}))") " in eager mode" +run_op "$("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; print(tf.Session('').run(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT})))")" +run_op "$("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; tf.enable_eager_execution(); print(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT}).numpy())")" " in eager mode" + popd -- GitLab From d16877ce0372df0c1ff5b8046fbe8985cfb796f9 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Mon, 4 Jun 2018 12:08:15 -0700 Subject: [PATCH 489/902] Fix Python API. PiperOrigin-RevId: 199171845 --- tensorflow/contrib/lite/python/convert_saved_model.py | 4 ++-- .../contrib/lite/python/convert_saved_model_test.py | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/python/convert_saved_model.py b/tensorflow/contrib/lite/python/convert_saved_model.py index b952a72aab..5dad49f1ed 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model.py +++ b/tensorflow/contrib/lite/python/convert_saved_model.py @@ -216,9 +216,9 @@ def set_tensor_shapes(tensors, shapes): """ if shapes: for tensor in tensors: - shape = shapes.get(tensor.name) + shape = shapes.get(tensor_name(tensor)) if shape is not None: - tensor.set_shape(shapes[tensor.name]) + tensor.set_shape(shape) def freeze_saved_model(saved_model_dir, input_arrays, input_shapes, diff --git a/tensorflow/contrib/lite/python/convert_saved_model_test.py b/tensorflow/contrib/lite/python/convert_saved_model_test.py index 80e5dc6e46..1e570d2c89 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model_test.py +++ b/tensorflow/contrib/lite/python/convert_saved_model_test.py @@ -73,10 +73,15 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32) self.assertEqual([None, 3, 5], tensor.shape.as_list()) - convert_saved_model.set_tensor_shapes([tensor], - {"Placeholder:0": [5, 3, 5]}) + convert_saved_model.set_tensor_shapes([tensor], {"Placeholder": [5, 3, 5]}) self.assertEqual([5, 3, 5], tensor.shape.as_list()) + def testSetTensorShapeNoneValid(self): + tensor = array_ops.placeholder(dtype=dtypes.float32) + + convert_saved_model.set_tensor_shapes([tensor], {"Placeholder": [1, 3, 5]}) + self.assertEqual([1, 3, 5], tensor.shape.as_list()) + def testSetTensorShapeInvalid(self): tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32) self.assertEqual([None, 3, 5], tensor.shape.as_list()) -- GitLab From d88e8719833b409042c03d20a9a4acaac1d1f531 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 12:15:47 -0700 Subject: [PATCH 490/902] added clearer description for invalid behavior when executing in eager mode. PiperOrigin-RevId: 199173022 --- tensorflow/python/keras/engine/input_layer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/python/keras/engine/input_layer.py b/tensorflow/python/keras/engine/input_layer.py index b04dc3c60b..7996110829 100644 --- a/tensorflow/python/keras/engine/input_layer.py +++ b/tensorflow/python/keras/engine/input_layer.py @@ -119,6 +119,12 @@ class InputLayer(base_layer.Layer): self.is_placeholder = False self._batch_input_shape = tuple(input_tensor.get_shape().as_list()) + if context.executing_eagerly(): + raise ValueError('You should not pass an input tensor when executing ' + 'in eager mode. For example, instead of creating an ' + 'InputLayer, you should instantiate your model and ' + 'directly call it on your input.') + # Create an input node to add to self.outbound_node # and set output_tensors' _keras_history. input_tensor._keras_history = (self, 0, 0) # pylint: disable=protected-access -- GitLab From 48acc50c8d5ddf641e5fe0f8f3b27c9085854edd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 12:42:39 -0700 Subject: [PATCH 491/902] Turns on optimization to convert division of sqrt to multiplication of rsqrt PiperOrigin-RevId: 199177029 --- tensorflow/core/grappler/optimizers/arithmetic_optimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index ce3c633baf..e6fc311929 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -59,7 +59,7 @@ class ArithmeticOptimizer : public GraphOptimizer { bool enable_try_simplify_and_replace = true; bool combine_add_to_addn = true; - bool convert_sqrt_div_to_rsqrt_mul = false; + bool convert_sqrt_div_to_rsqrt_mul = true; bool dedup_computations = true; bool fold_multiply_into_conv = true; bool hoist_common_factor_out_of_aggregation = true; -- GitLab From 8c7a504699f35fb5252640d7319fe516ff0a19a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 12:57:33 -0700 Subject: [PATCH 492/902] Fix a couple of doc typos. PiperOrigin-RevId: 199179067 --- .../api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt index 41a9cfaa27..9b500d0b58 100644 --- a/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt @@ -44,6 +44,7 @@ END summary: "Quantizes then dequantizes a tensor." description: < Date: Mon, 4 Jun 2018 13:01:31 -0700 Subject: [PATCH 493/902] Fix broken distributed_runtime/remote_device_test by adding missing std::shared_ptr. PiperOrigin-RevId: 199179607 --- tensorflow/core/distributed_runtime/remote_device_test.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/remote_device_test.cc b/tensorflow/core/distributed_runtime/remote_device_test.cc index 778060daaf..a04e79328b 100644 --- a/tensorflow/core/distributed_runtime/remote_device_test.cc +++ b/tensorflow/core/distributed_runtime/remote_device_test.cc @@ -49,8 +49,9 @@ class RemoteDeviceTest : public ::testing::Test { TF_CHECK_OK(spec.AddHostPortsJob("localhost", {hostport})); ChannelCreationFunction channel_func = ConvertToChannelCreationFunction(NewHostPortGrpcChannel); - worker_cache_.reset( - NewGrpcWorkerCache(NewGrpcChannelCache(spec, channel_func))); + std::shared_ptr channel_cache( + NewGrpcChannelCache(spec, channel_func)); + worker_cache_.reset(NewGrpcWorkerCache(channel_cache)); remote_name_ = "/job:localhost/replica:0/task:0"; wi_ = worker_cache_->CreateWorker(remote_name_); } -- GitLab From 06a7049f29b0148659693ec53db530c2c895a6a6 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 4 Jun 2018 13:23:40 -0700 Subject: [PATCH 494/902] I've made the updates Rajat requested. Please note the links will not work until after we have launched. --- RELEASE.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 600294478d..c1ed69bd45 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -4,8 +4,10 @@ * Update tf.keras to the Keras 2.1.6 API. * `tfe.Network` is deprecated. Please inherit from `tf.keras.Model`. * Adding support of core feature columns and losses to gradient boosted trees estimators. -* The Bijector API now requires 'event_ndims' passed in to the `log_det_jacobian` methods, while `event_ndims` is removed from the base class and replaced with `forward_min_event_ndims`. The signature is now `log_det_jacobian(x, event_ndims)`. The main rationale for this change is that it allows Bijectors to broadcast. -RELNOTES: If you were using layers from `tf.keras.layers` in conjunction with custom variable scopes, your layer variable names might have changed. If you were using layers from `tf.layers` in a subclassed `tf.keras.Model` class, then your variable names have changed (you can restore the prior names by importing the same layers from `tf.keras.layers` instead of `tf.layers`). +* The distributions.Bijector API supports broadcasting for Bijectors with new API changes. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/distributions/bijectors/Bijector) for more details. +* Layered variable names have changed in the following conditions: + * Using `tf.keras.layers` with custom variable scopes. + * Using `tf.layers` in a subclassed `tf.keras.Model` class. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details ## Breaking Chances * If you're opening empty variable scopes; replace `variable_scope`('', ...) by `variable_scope`(`tf.get_variable_scope()`, ...). -- GitLab From 279b899642c22734a5bd3b375a2fa9f84aa4738c Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Mon, 4 Jun 2018 13:42:17 -0700 Subject: [PATCH 495/902] Improve TOCO error handling. PiperOrigin-RevId: 199186109 --- .../lite/python/convert_saved_model_test.py | 1 + tensorflow/contrib/lite/python/lite.py | 6 +++++- tensorflow/contrib/lite/python/lite_test.py | 18 ++++++++++++++---- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/python/convert_saved_model_test.py b/tensorflow/contrib/lite/python/convert_saved_model_test.py index 1e570d2c89..92c4ebb246 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model_test.py +++ b/tensorflow/contrib/lite/python/convert_saved_model_test.py @@ -78,6 +78,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): def testSetTensorShapeNoneValid(self): tensor = array_ops.placeholder(dtype=dtypes.float32) + self.assertEqual(None, tensor.shape) convert_saved_model.set_tensor_shapes([tensor], {"Placeholder": [1, 3, 5]}) self.assertEqual([1, 3, 5], tensor.shape.as_list()) diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 253b5eadf3..2cb06e2559 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -254,15 +254,19 @@ class TocoConverter(object): Raises: ValueError: + Input shape is not specified. None value for dimension in input_tensor. """ # Checks dimensions in input tensor. for tensor in self._input_tensors: + if not tensor.get_shape(): + raise ValueError("Provide an input shape for input array '{0}'.".format( + tensor_name(tensor))) shape = tensor.get_shape().as_list() if None in shape[1:]: raise ValueError( "None is only supported in the 1st dimension. Tensor '{0}' has " - "invalid shape '{1}'.".format(tensor.name, shape)) + "invalid shape '{1}'.".format(tensor_name(tensor), shape)) elif shape[0] is None: self._set_batch_size(batch_size=1) diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py index 53d1878293..5f8dfc0dc1 100644 --- a/tensorflow/contrib/lite/python/lite_test.py +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -131,21 +131,31 @@ class FromSessionTest(test_util.TensorFlowTestCase): 'Quantization input stats are not available for input tensors ' '\'inputB\'.', str(error.exception)) - def testBatchSizeInvalid(self): - in_tensor = array_ops.placeholder( - shape=[None, 16, 16, 3], dtype=dtypes.float32) + def testSizeNoneInvalid(self): + in_tensor = array_ops.placeholder(dtype=dtypes.float32) out_tensor = in_tensor + in_tensor sess = session.Session() # Test invalid shape. None after 1st dimension. + converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) + with self.assertRaises(ValueError) as error: + converter.convert() + self.assertEqual('Provide an input shape for input array \'Placeholder\'.', + str(error.exception)) + + def testBatchSizeInvalid(self): in_tensor = array_ops.placeholder( shape=[1, None, 16, 3], dtype=dtypes.float32) + out_tensor = in_tensor + in_tensor + sess = session.Session() + + # Test invalid shape. None after 1st dimension. converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) with self.assertRaises(ValueError) as error: converter.convert() self.assertEqual( 'None is only supported in the 1st dimension. Tensor ' - '\'Placeholder_1:0\' has invalid shape \'[1, None, 16, 3]\'.', + '\'Placeholder\' has invalid shape \'[1, None, 16, 3]\'.', str(error.exception)) def testBatchSizeValid(self): -- GitLab From 204fcd9a002aa8678c42d076553e38d69e8724a6 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Mon, 4 Jun 2018 14:20:46 -0700 Subject: [PATCH 496/902] [XLA:GPU] Propagate layouts in a better order for performance and fusion. PiperOrigin-RevId: 199193181 --- .../compiler/xla/service/gpu/gpu_layout_assignment.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc index 178457721a..8bf62dde8b 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc @@ -159,7 +159,13 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall( Status GpuLayoutAssignment::AddBackendConstraints( LayoutConstraints* constraints) { - for (auto* instruction : constraints->computation()->instructions()) { + // Add convolution constraints in reverse postorder that the earliest + // convolution layout propagates first. This reduces the likelihood of fusion + // nodes with copies. + auto post_order = constraints->computation()->MakeInstructionPostOrder(); + for (auto iterator = post_order.rbegin(); iterator != post_order.rend(); + ++iterator) { + HloInstruction* instruction = *iterator; if (IsCustomCallToDnnConvolution(*instruction)) { TF_RETURN_IF_ERROR( AddBackendConstraintsToDnnConvCustomCall(instruction, constraints)); -- GitLab From 3c87b99d8c8052c3b6d67190bca14ea89137221a Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 4 Jun 2018 14:26:09 -0700 Subject: [PATCH 497/902] Remove --distinct_host_configuration=false from tools/bazel.rc Don't use --distinct_host_configuration=false by default, because it would break cross compiling, like android build and Raspberry Pi build. Instead, we add it for builds that we know they have the same host and target platforms. PiperOrigin-RevId: 199194260 --- tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 1 - .../tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 4 ++++ tools/bazel.rc | 6 ------ 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index 30ea8539aa..1bd1852ffc 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -100,7 +100,6 @@ bazel build -c opt ${PI_COPTS} \ --copt=-fomit-frame-pointer --cpu=armeabi \ --crosstool_top=@local_config_arm_compiler//:toolchain \ --verbose_failures \ - --distinct_host_configuration=true \ //tensorflow/tools/benchmark:benchmark_model \ //tensorflow/tools/pip_package:build_pip_package diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 1b1c3815d8..0b13b97209 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -73,6 +73,10 @@ if [[ "$release_build" != 1 ]]; then echo "build --define=override_eigen_strong_inline=true" >> "${TMP_BAZELRC}" fi +# The host and target platforms are the same in Windows build. So we don't have +# to distinct them. This helps avoid building the same targets twice. +echo "build --distinct_host_configuration=false" >> "${TMP_BAZELRC}" + echo "import %workspace%/${TMP_BAZELRC}" >> .bazelrc run_configure_for_cpu_build diff --git a/tools/bazel.rc b/tools/bazel.rc index 03aa52da1f..1c1e6afb65 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -1,14 +1,8 @@ -# By default, we don't distinct target and host platfroms. -# When doing cross compilation, use --config=cross_compile to distinct them. -build --distinct_host_configuration=false -build:cross_compile --distinct_host_configuration=true - # Android configs. Bazel needs to have --cpu and --fat_apk_cpu both set to the # target CPU to build transient dependencies correctly. See # https://docs.bazel.build/versions/master/user-manual.html#flag--fat_apk_cpu build:android --crosstool_top=//external:android/crosstool build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain -build:android --config=cross_compile build:android_arm --config=android build:android_arm --cpu=armeabi-v7a build:android_arm --fat_apk_cpu=armeabi-v7a -- GitLab From 6b2a088fb263af2428ca672a62088646a7f54219 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 4 Jun 2018 14:46:38 -0700 Subject: [PATCH 498/902] Add various missing aliases for symbols in tf.keras submodules. PiperOrigin-RevId: 199198086 --- tensorflow/python/keras/losses.py | 35 ++++++++++++--- tensorflow/python/ops/init_ops.py | 21 +++++---- ...nsorflow.keras.initializers.constant.pbtxt | 18 ++++++++ ...nsorflow.keras.initializers.identity.pbtxt | 18 ++++++++ ...tensorflow.keras.initializers.normal.pbtxt | 18 ++++++++ .../tensorflow.keras.initializers.ones.pbtxt | 18 ++++++++ ...orflow.keras.initializers.orthogonal.pbtxt | 18 ++++++++ .../tensorflow.keras.initializers.pbtxt | 40 +++++++++++++++++ ...low.keras.initializers.random_normal.pbtxt | 18 ++++++++ ...ow.keras.initializers.random_uniform.pbtxt | 18 ++++++++ ....keras.initializers.truncated_normal.pbtxt | 18 ++++++++ ...ensorflow.keras.initializers.uniform.pbtxt | 18 ++++++++ .../tensorflow.keras.initializers.zeros.pbtxt | 18 ++++++++ .../api/golden/tensorflow.keras.losses.pbtxt | 44 +++++++++++++++++++ .../api/golden/tensorflow.keras.metrics.pbtxt | 44 +++++++++++++++++++ 15 files changed, 350 insertions(+), 14 deletions(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.constant.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.identity.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.normal.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.ones.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.orthogonal.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.random_normal.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.random_uniform.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.truncated_normal.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.uniform.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.zeros.pbtxt diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py index d82ebd9c31..9f548bfe04 100644 --- a/tensorflow/python/keras/losses.py +++ b/tensorflow/python/keras/losses.py @@ -30,19 +30,31 @@ from tensorflow.python.util.tf_export import tf_export @tf_export('keras.metrics.mean_squared_error', - 'keras.losses.mean_squared_error') + 'keras.metrics.mse', + 'keras.metrics.MSE', + 'keras.losses.mean_squared_error', + 'keras.losses.mse', + 'keras.losses.MSE') def mean_squared_error(y_true, y_pred): return K.mean(math_ops.square(y_pred - y_true), axis=-1) @tf_export('keras.metrics.mean_absolute_error', - 'keras.losses.mean_absolute_error') + 'keras.metrics.mae', + 'keras.metrics.MAE', + 'keras.losses.mean_absolute_error', + 'keras.losses.mae', + 'keras.losses.MAE') def mean_absolute_error(y_true, y_pred): return K.mean(math_ops.abs(y_pred - y_true), axis=-1) @tf_export('keras.metrics.mean_absolute_percentage_error', - 'keras.losses.mean_absolute_percentage_error') + 'keras.metrics.mape', + 'keras.metrics.MAPE', + 'keras.losses.mean_absolute_percentage_error', + 'keras.losses.mape', + 'keras.losses.MAPE') def mean_absolute_percentage_error(y_true, y_pred): diff = math_ops.abs( (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None)) @@ -50,7 +62,11 @@ def mean_absolute_percentage_error(y_true, y_pred): @tf_export('keras.metrics.mean_squared_logarithmic_error', - 'keras.losses.mean_squared_logarithmic_error') + 'keras.metrics.msle', + 'keras.metrics.MSLE', + 'keras.losses.mean_squared_logarithmic_error', + 'keras.losses.msle', + 'keras.losses.MSLE') def mean_squared_logarithmic_error(y_true, y_pred): first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.) @@ -117,7 +133,11 @@ def binary_crossentropy(y_true, y_pred): @tf_export('keras.metrics.kullback_leibler_divergence', - 'keras.losses.kullback_leibler_divergence') + 'keras.metrics.kld', + 'keras.metrics.KLD', + 'keras.losses.kullback_leibler_divergence', + 'keras.losses.kld', + 'keras.losses.KLD') def kullback_leibler_divergence(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) @@ -129,7 +149,10 @@ def poisson(y_true, y_pred): return K.mean(y_pred - y_true * math_ops.log(y_pred + K.epsilon()), axis=-1) -@tf_export('keras.metrics.cosine_proximity', 'keras.losses.cosine_proximity') +@tf_export('keras.metrics.cosine_proximity', + 'keras.metrics.cosine', + 'keras.losses.cosine_proximity', + 'keras.losses.cosine') def cosine_proximity(y_true, y_pred): y_true = nn.l2_normalize(y_true, axis=-1) y_pred = nn.l2_normalize(y_pred, axis=-1) diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 1f8d8dc4f3..2df230d470 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -86,7 +86,7 @@ class Initializer(object): @tf_export("keras.initializers.Zeros", "initializers.zeros", - "zeros_initializer") + "zeros_initializer", "keras.initializers.zeros") class Zeros(Initializer): """Initializer that generates tensors initialized to 0.""" @@ -102,7 +102,8 @@ class Zeros(Initializer): return {"dtype": self.dtype.name} -@tf_export("keras.initializers.Ones", "initializers.ones", "ones_initializer") +@tf_export("keras.initializers.Ones", "initializers.ones", "ones_initializer", + "keras.initializers.ones") class Ones(Initializer): """Initializer that generates tensors initialized to 1.""" @@ -119,7 +120,7 @@ class Ones(Initializer): @tf_export("keras.initializers.Constant", "initializers.constant", - "constant_initializer") + "constant_initializer", "keras.initializers.constant") class Constant(Initializer): """Initializer that generates tensors with constant values. @@ -225,7 +226,8 @@ class Constant(Initializer): @tf_export("keras.initializers.RandomUniform", "initializers.random_uniform", - "random_uniform_initializer") + "random_uniform_initializer", "keras.initializers.uniform", + "keras.initializers.random_uniform") class RandomUniform(Initializer): """Initializer that generates tensors with a uniform distribution. @@ -262,7 +264,8 @@ class RandomUniform(Initializer): @tf_export("keras.initializers.RandomNormal", "initializers.random_normal", - "random_normal_initializer") + "random_normal_initializer", "keras.initializers.normal", + "keras.initializers.random_normal") class RandomNormal(Initializer): """Initializer that generates tensors with a normal distribution. @@ -299,7 +302,8 @@ class RandomNormal(Initializer): @tf_export("keras.initializers.TruncatedNormal", - "initializers.truncated_normal", "truncated_normal_initializer") + "initializers.truncated_normal", "truncated_normal_initializer", + "keras.initializers.truncated_normal") class TruncatedNormal(Initializer): """Initializer that generates a truncated normal distribution. @@ -482,7 +486,7 @@ class VarianceScaling(Initializer): @tf_export("keras.initializers.Orthogonal", "initializers.orthogonal", - "orthogonal_initializer") + "orthogonal_initializer", "keras.initializers.orthogonal") class Orthogonal(Initializer): """Initializer that generates an orthogonal matrix. @@ -1062,7 +1066,8 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal): return self._dict_to_tensor(p, ksize, ksize, ksize) -@tf_export("keras.initializers.Identity", "initializers.identity") +@tf_export("keras.initializers.Identity", "initializers.identity", + "keras.initializers.identity") class Identity(Initializer): """Initializer that generates the identity matrix. diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.constant.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.constant.pbtxt new file mode 100644 index 0000000000..bddc37b907 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.constant.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.constant" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'value\', \'dtype\', \'verify_shape\'], varargs=None, keywords=None, defaults=[\'0\', \"\", \'False\'], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.identity.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.identity.pbtxt new file mode 100644 index 0000000000..a4c5a61490 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.identity.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.identity" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'gain\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.normal.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.normal.pbtxt new file mode 100644 index 0000000000..7485772784 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.normal.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.normal" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'mean\', \'stddev\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.ones.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.ones.pbtxt new file mode 100644 index 0000000000..a89f78d1e1 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.ones.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.ones" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=[\"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.orthogonal.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.orthogonal.pbtxt new file mode 100644 index 0000000000..ee1e9bbae2 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.orthogonal.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.orthogonal" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'gain\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.pbtxt index 093c56595b..14a667870d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.initializers.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.pbtxt @@ -40,6 +40,46 @@ tf_module { name: "Zeros" mtype: "" } + member { + name: "constant" + mtype: "" + } + member { + name: "identity" + mtype: "" + } + member { + name: "normal" + mtype: "" + } + member { + name: "ones" + mtype: "" + } + member { + name: "orthogonal" + mtype: "" + } + member { + name: "random_normal" + mtype: "" + } + member { + name: "random_uniform" + mtype: "" + } + member { + name: "truncated_normal" + mtype: "" + } + member { + name: "uniform" + mtype: "" + } + member { + name: "zeros" + mtype: "" + } member_method { name: "deserialize" argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_normal.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_normal.pbtxt new file mode 100644 index 0000000000..a6df1e87a3 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_normal.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.random_normal" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'mean\', \'stddev\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_uniform.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_uniform.pbtxt new file mode 100644 index 0000000000..37a0fa0d55 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_uniform.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.random_uniform" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'minval\', \'maxval\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.truncated_normal.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.truncated_normal.pbtxt new file mode 100644 index 0000000000..f97e93f0b7 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.truncated_normal.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.truncated_normal" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'mean\', \'stddev\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.uniform.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.uniform.pbtxt new file mode 100644 index 0000000000..58186b1383 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.uniform.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.uniform" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'minval\', \'maxval\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.zeros.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.zeros.pbtxt new file mode 100644 index 0000000000..a262390687 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.zeros.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.zeros" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=[\"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.losses.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.losses.pbtxt index ae5f6305b7..eca6b91538 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.losses.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.losses.pbtxt @@ -1,5 +1,25 @@ path: "tensorflow.keras.losses" tf_module { + member_method { + name: "KLD" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MAE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MAPE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MSE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MSLE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "binary_crossentropy" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -12,6 +32,10 @@ tf_module { name: "categorical_hinge" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "cosine" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "cosine_proximity" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -28,6 +52,10 @@ tf_module { name: "hinge" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "kld" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "kullback_leibler_divergence" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -36,6 +64,14 @@ tf_module { name: "logcosh" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mae" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "mape" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "mean_absolute_error" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -52,6 +88,14 @@ tf_module { name: "mean_squared_logarithmic_error" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mse" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "msle" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "poisson" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt index 42729e4237..a97a9b5758 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt @@ -1,5 +1,25 @@ path: "tensorflow.keras.metrics" tf_module { + member_method { + name: "KLD" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MAE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MAPE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MSE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MSLE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "binary_accuracy" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -16,6 +36,10 @@ tf_module { name: "categorical_crossentropy" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "cosine" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "cosine_proximity" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -32,10 +56,22 @@ tf_module { name: "hinge" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "kld" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "kullback_leibler_divergence" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mae" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "mape" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "mean_absolute_error" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -52,6 +88,14 @@ tf_module { name: "mean_squared_logarithmic_error" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mse" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "msle" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "poisson" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" -- GitLab From 06c4fb61f269e18ca2f4b9a73d1b92e48bd095bf Mon Sep 17 00:00:00 2001 From: Vinu Rajashekhar Date: Mon, 4 Jun 2018 14:48:32 -0700 Subject: [PATCH 499/902] Fixes a cleanup bug in BatchFunction op. PiperOrigin-RevId: 199198413 --- .../batching/python/ops/batch_ops_test.py | 28 +++++++++++++- tensorflow/core/kernels/batch_kernels.cc | 37 +++++++++++-------- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/batching/python/ops/batch_ops_test.py b/tensorflow/contrib/batching/python/ops/batch_ops_test.py index 68e8a88ca0..ea8339334f 100644 --- a/tensorflow/contrib/batching/python/ops/batch_ops_test.py +++ b/tensorflow/contrib/batching/python/ops/batch_ops_test.py @@ -24,6 +24,7 @@ import time from tensorflow.contrib.batching.python.ops import batch_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import function +from tensorflow.python.framework.errors import InvalidArgumentError from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_batch_ops from tensorflow.python.ops import gradients_impl @@ -208,7 +209,7 @@ class BatchOpsTest(test.TestCase): self.assertEqual(main_results[0], [3]) def testBatchFunctionOp(self): - """Tests that the batch_func works.""" + """Tests that the batch_function op works.""" with self.test_session() as sess: @function.Defun(dtypes.int32) @@ -237,7 +238,7 @@ class BatchOpsTest(test.TestCase): self.assertEqual(main_results[0], [3]) def testBatchFunctionOpWithCapturedInput(self): - """Tests that batch_func with timeout.""" + """Tests that batch_function op works with captured input.""" with self.test_session() as sess: captured_inp0 = array_ops.placeholder_with_default(2, shape=[]) captured_inp1 = array_ops.placeholder_with_default(1, shape=[]) @@ -270,6 +271,29 @@ class BatchOpsTest(test.TestCase): self.assertEqual(thread_results[0], [2]) self.assertEqual(main_results[0], [3]) + def testBatchFunctionOpWithInputError(self): + """Tests that batch_function op works with error in the inputs.""" + with self.test_session() as sess: + inp = array_ops.placeholder(dtype=dtypes.int32, shape=[1]) + + @function.Defun(dtypes.int32, dtypes.int32) + def computation(in0, in1): + return in0 + in1 + + result = gen_batch_ops.batch_function( + [inp], # computation actually expects 2 inputs. + num_batch_threads=1, + max_batch_size=10, + batch_timeout_micros=100000, # 100ms + batching_queue="", + f=computation, + captured_tensors=computation.captured_inputs, + Tout=[o.type for o in computation.definition.signature.output_arg]) + + with self.assertRaisesRegexp(InvalidArgumentError, + ".*2 arguments.*but 1.*"): + sess.run([result], feed_dict={inp: [2]}) + def testBasicUnbatchDecoratedWithReshape(self): """Tests that the batch_function decorator works.""" with self.test_session() as sess: diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index c0eef229ce..35ddda0ec0 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -523,21 +523,28 @@ class BatchResource : public ResourceBase { const auto& captured_inputs = batch->task(batch->num_tasks() - 1).captured_inputs; args.insert(args.end(), captured_inputs.begin(), captured_inputs.end()); - flib->Run(opts, fhandle_, args, &combined_outputs, - [&](const Status& run_status) { - if (!run_status.ok()) { - return; - } - const auto split_status = - SplitOutputTensors(combined_outputs, batch.get()); - // We do the cleanup here as an optimization, so that it runs in - // the underlying TF inter-op threadpool. Running it in the - // threadpool, let's the ensuing ops be scheduled faster, - // because the executor will add them to the front of the - // threadpool's task queue rather than the end. - cleanup_fn(split_status); - done.Notify(); - }); + + // Releases the cleanup method here, because the callback of the function + // library runtime will handle it now. + finally.release(); + flib->Run( + opts, fhandle_, args, &combined_outputs, [&](const Status& run_status) { + Status final_status; + auto run_finally = gtl::MakeCleanup([&]() { + // We do the cleanup here as an optimization, so that it runs in + // the underlying TF inter-op threadpool. Running it in the + // threadpool, let's the ensuing ops be scheduled faster, + // because the executor will add them to the front of the + // threadpool's task queue rather than the end. + cleanup_fn(final_status); + done.Notify(); + }); + final_status = run_status; + if (!final_status.ok()) { + return; + } + final_status = SplitOutputTensors(combined_outputs, batch.get()); + }); // By waiting for the notification we are ensuring that this thread isn't // used for processing other batches, which gives the batches time to // coalesce upstream. So overall the number of batches going through the -- GitLab From 142ccf3666e07d011aa83fdd6be8c17f721fbc99 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 14:52:29 -0700 Subject: [PATCH 500/902] Add rip-offs of LLVM's cast, dyn_cast, cast_or_null, dyn_cast_or_null in preparation to split HloInstruction into subclasses. This initial implementation uses C++ dynamic_cast, so it also adds vtable to HloInstruction. PiperOrigin-RevId: 199199109 --- tensorflow/compiler/xla/service/BUILD | 16 +++ .../compiler/xla/service/hlo_casting_utils.h | 101 ++++++++++++++++ .../xla/service/hlo_casting_utils_test.cc | 112 ++++++++++++++++++ .../compiler/xla/service/hlo_instruction.h | 11 +- 4 files changed, 235 insertions(+), 5 deletions(-) create mode 100644 tensorflow/compiler/xla/service/hlo_casting_utils.h create mode 100644 tensorflow/compiler/xla/service/hlo_casting_utils_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 0102e4f003..c5b637419c 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -3020,3 +3020,19 @@ cc_library( "//tensorflow/core:regexp_internal", ], ) + +cc_library( + name = "hlo_casting_utils", + hdrs = ["hlo_casting_utils.h"], + deps = [":hlo"], +) + +tf_cc_test( + name = "hlo_casting_utils_test", + srcs = ["hlo_casting_utils_test.cc"], + deps = [ + ":hlo_casting_utils", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep + "//tensorflow/core:test", + ], +) diff --git a/tensorflow/compiler/xla/service/hlo_casting_utils.h b/tensorflow/compiler/xla/service/hlo_casting_utils.h new file mode 100644 index 0000000000..b15f1f24c6 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_casting_utils.h @@ -0,0 +1,101 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Casting utilitiy functions for HLO instructions. + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CASTING_UTILS_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CASTING_UTILS_H_ + +#include "tensorflow/compiler/xla/service/hlo_instruction.h" + +namespace xla { + +template +using EnableIfDerivedFromHlo = + typename std::enable_if::value>::type; + +// TODO(b/93238915): Switch implementation from C++'s dynamic_cast to LLVM-like +// RTTI if it turns out to be a performance issue. +// Casts an HloInstruction pointer to one of its subclasses, dies if argument is +// nullptr or runtime information does not match. +// +// Similar to LLVM's cast. +template * = nullptr> +const T* Cast(const HloInstruction* instruction) { + CHECK(instruction != nullptr); + const T* casted = dynamic_cast(instruction); + CHECK(casted != nullptr); + return casted; +} + +// Non-const overload of Cast. +template * = nullptr> +T* Cast(HloInstruction* instruction) { + return const_cast( + Cast(const_cast(instruction))); +} + +// Works just like the Cast, except that it allows for a null pointer as an +// argument which it then propagates. +// +// Similar to LLVM's cast_or_null. +template * = nullptr> +const T* CastOrNull(const HloInstruction* instruction) { + return instruction != nullptr ? Cast(instruction) : nullptr; +} + +// Non-const overload of CastOrNull. +template * = nullptr> +T* CastOrNull(HloInstruction* instruction) { + return const_cast( + CastOrNull(const_cast(instruction))); +} + +// Casts an HloInstruction pointer to one of its subclasses, dies if argument is +// nullptr, returns nullptr if runtime information does not match. +// +// Similar to LLVM's dyn_cast. +template * = nullptr> +const T* DynCast(const HloInstruction* instruction) { + CHECK(instruction != nullptr); + return dynamic_cast(instruction); +} + +// Non-const overload of DynCast. +template * = nullptr> +T* DynCast(HloInstruction* instruction) { + return const_cast( + DynCast(const_cast(instruction))); +} + +// Works just like the DynCast, except that it allows for a null pointer as an +// argument which it then propagates. +// +// Similar to LLVM's dyn_cast_or_null. +template * = nullptr> +const T* DynCastOrNull(const HloInstruction* instruction) { + return instruction != nullptr ? DynCast(instruction) : nullptr; +} + +// Non-const overload of DynCastOrNull. +template * = nullptr> +T* DynCastOrNull(HloInstruction* instruction) { + return const_cast( + DynCastOrNull(const_cast(instruction))); +} + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CASTING_UTILS_H_ diff --git a/tensorflow/compiler/xla/service/hlo_casting_utils_test.cc b/tensorflow/compiler/xla/service/hlo_casting_utils_test.cc new file mode 100644 index 0000000000..436a922234 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_casting_utils_test.cc @@ -0,0 +1,112 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_casting_utils.h" + +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace { + +class DummyInstruction : public HloInstruction { + public: + DummyInstruction() + : HloInstruction(HloOpcode::kConstant, ShapeUtil::MakeShape(F32, {})) {} +}; + +class AnotherDummyInstruction : public HloInstruction { + public: + AnotherDummyInstruction() + : HloInstruction(HloOpcode::kParameter, ShapeUtil::MakeShape(F32, {})) {} +}; + +TEST(HloCastingUtilsTest, CastSucceeds) { + DummyInstruction instruction; + DummyInstruction* casted = + Cast(static_cast(&instruction)); + ASSERT_EQ(casted, &instruction); +} + +TEST(HloCastingUtilsTest, CastDiesForWrongType) { + AnotherDummyInstruction instruction; + ASSERT_DEATH( + Cast(static_cast(&instruction)), ""); +} + +TEST(HloCastingUtilsTest, CastDiesForNullptr) { + HloInstruction* null = nullptr; + ASSERT_DEATH(Cast(null), ""); +} + +TEST(HloCastingUtilsTest, CastOrNullSucceeds) { + DummyInstruction instruction; + DummyInstruction* casted = + Cast(static_cast(&instruction)); + ASSERT_EQ(casted, &instruction); +} + +TEST(HloCastingUtilsTest, CastOrNullDiesForWrongType) { + AnotherDummyInstruction instruction; + ASSERT_DEATH( + Cast(static_cast(&instruction)), ""); +} + +TEST(HloCastingUtilsTest, CastOrNullReturnsNullptrForNullptr) { + HloInstruction* null = nullptr; + DummyInstruction* casted = CastOrNull(null); + ASSERT_EQ(casted, nullptr); +} + +TEST(HloCastingUtilsTest, DynCastSucceeds) { + DummyInstruction instruction; + DummyInstruction* casted = + DynCast(static_cast(&instruction)); + ASSERT_EQ(casted, &instruction); +} + +TEST(HloCastingUtilsTest, DynCastReturnsNullptrForWrongType) { + AnotherDummyInstruction instruction; + DummyInstruction* casted = + DynCast(static_cast(&instruction)); + ASSERT_EQ(casted, nullptr); +} + +TEST(HloCastingUtilsTest, DynCastDiesForNullptr) { + HloInstruction* null = nullptr; + ASSERT_DEATH(DynCast(null), ""); +} + +TEST(HloCastingUtilsTest, DynCastOrNullSucceeds) { + DummyInstruction instruction; + DummyInstruction* casted = DynCastOrNull( + static_cast(&instruction)); + ASSERT_EQ(casted, &instruction); +} + +TEST(HloCastingUtilsTest, DynCastOrNullReturnsNullptrForWrongType) { + AnotherDummyInstruction instruction; + DummyInstruction* casted = DynCastOrNull( + static_cast(&instruction)); + ASSERT_EQ(casted, nullptr); +} + +TEST(HloCastingUtilsTest, DynCastOrNullReturnsNullptrForNullptr) { + HloInstruction* null = nullptr; + DummyInstruction* casted = DynCastOrNull(null); + ASSERT_EQ(casted, nullptr); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index d47af6c018..905ea5310d 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -322,7 +322,7 @@ class HloInstruction { kCustom, }; - ~HloInstruction(); + virtual ~HloInstruction(); // Creates an instruction from the given proto. Arguments: // @@ -1515,6 +1515,11 @@ class HloInstruction { void RelayoutConstant(const Layout& new_layout, const ShapeIndex& shape_index = {}); + protected: + // Internal constructor for a given opcode/shape, other fields must be filled + // by factory methods. + HloInstruction(HloOpcode opcode, const Shape& shape); + private: // Prints an instruction to a string. // @@ -1560,10 +1565,6 @@ class HloInstruction { // Removes a user for this instruction. void RemoveUser(HloInstruction* user); - // Internal constructor for a given opcode/shape, other fields must be filled - // by factory methods. - HloInstruction(HloOpcode opcode, const Shape& shape); - // Fuses the given instruction into this fusion instruction. When add_output // is false (which is the default), instruction_to_fuse is cloned and the // clone is placed in the fusion instruction. instruction_to_fuse is -- GitLab From e2d300823f410823b1b5fee4e5159a754247e219 Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Mon, 4 Jun 2018 15:00:11 -0700 Subject: [PATCH 501/902] Move benchmarking code to a new directory and add some documentation. PiperOrigin-RevId: 199200246 --- .../lite/profiling/profile_summarizer.h | 3 - tensorflow/contrib/lite/tools/BUILD | 81 --------- tensorflow/contrib/lite/tools/benchmark/BUILD | 91 +++++++++ .../contrib/lite/tools/benchmark/README.md | 172 ++++++++++++++++++ .../tools/{ => benchmark}/benchmark_main.cc | 4 +- .../tools/{ => benchmark}/benchmark_model.cc | 4 +- .../tools/{ => benchmark}/benchmark_model.h | 4 +- .../{ => benchmark}/benchmark_tflite_model.cc | 4 +- .../{ => benchmark}/benchmark_tflite_model.h | 4 +- .../{ => benchmark}/command_line_flags.cc | 47 ++--- .../{ => benchmark}/command_line_flags.h | 2 +- .../command_line_flags_test.cc | 2 +- .../lite/tools/{ => benchmark}/logging.h | 3 +- tensorflow/core/BUILD | 1 - tensorflow/core/util/stat_summarizer.cc | 8 + tensorflow/core/util/stat_summarizer.h | 2 +- tensorflow/core/util/stats_calculator.cc | 27 +-- tensorflow/core/util/stats_calculator.h | 3 - 18 files changed, 321 insertions(+), 141 deletions(-) create mode 100644 tensorflow/contrib/lite/tools/benchmark/BUILD create mode 100644 tensorflow/contrib/lite/tools/benchmark/README.md rename tensorflow/contrib/lite/tools/{ => benchmark}/benchmark_main.cc (89%) rename tensorflow/contrib/lite/tools/{ => benchmark}/benchmark_model.cc (97%) rename tensorflow/contrib/lite/tools/{ => benchmark}/benchmark_model.h (97%) rename tensorflow/contrib/lite/tools/{ => benchmark}/benchmark_tflite_model.cc (98%) rename tensorflow/contrib/lite/tools/{ => benchmark}/benchmark_tflite_model.h (94%) rename tensorflow/contrib/lite/tools/{ => benchmark}/command_line_flags.cc (84%) rename tensorflow/contrib/lite/tools/{ => benchmark}/command_line_flags.h (98%) rename tensorflow/contrib/lite/tools/{ => benchmark}/command_line_flags_test.cc (98%) rename tensorflow/contrib/lite/tools/{ => benchmark}/logging.h (96%) diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer.h b/tensorflow/contrib/lite/profiling/profile_summarizer.h index 6fe6ca04f5..a529ff8742 100644 --- a/tensorflow/contrib/lite/profiling/profile_summarizer.h +++ b/tensorflow/contrib/lite/profiling/profile_summarizer.h @@ -45,9 +45,6 @@ class ProfileSummarizer { return stats_calculator_->GetShortSummary(); } - // Prints the string returned by GetOutputString(). - void PrintStepStats() const { stats_calculator_->PrintStepStats(); } - private: std::unique_ptr stats_calculator_; }; diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index 7fb7517600..5913847329 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -30,87 +30,6 @@ tf_cc_binary( ], ) -tf_cc_binary( - name = "benchmark_model", - srcs = [ - "benchmark_main.cc", - "logging.h", - ], - copts = common_copts, - linkopts = select({ - "//tensorflow:android": [ - "-pie", - "-landroid", - "-lm", - "-z defs", - "-Wl,--exclude-libs,ALL", # Exclude syms in all libs from auto export - ], - "//conditions:default": [], - }), - deps = [ - ":benchmark_tflite_model_lib", - "//tensorflow/core:stats_calculator_portable", - ], -) - -cc_library( - name = "command_line_flags", - srcs = ["command_line_flags.cc"], - hdrs = ["command_line_flags.h"], - copts = common_copts, - visibility = ["//visibility:private"], -) - -cc_test( - name = "command_line_flags_test", - srcs = ["command_line_flags_test.cc"], - copts = common_copts, - visibility = ["//visibility:private"], - deps = [ - ":command_line_flags", - "//tensorflow/contrib/lite/testing:util", - "@com_google_googletest//:gtest", - ], -) - -cc_library( - name = "benchmark_tflite_model_lib", - srcs = [ - "benchmark_tflite_model.cc", - "logging.h", - ], - hdrs = ["benchmark_tflite_model.h"], - copts = common_copts, - deps = [ - ":benchmark_model_lib", - "//tensorflow/contrib/lite:framework", - "//tensorflow/contrib/lite:string_util", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "//tensorflow/contrib/lite/profiling:profile_summarizer", - "//tensorflow/contrib/lite/profiling:profiler", - ], -) - -cc_library( - name = "benchmark_model_lib", - srcs = [ - "benchmark_model.cc", - "logging.h", - ], - hdrs = ["benchmark_model.h"], - copts = common_copts, - deps = [ - ":command_line_flags", - "//tensorflow/contrib/lite:framework", - "//tensorflow/contrib/lite:string_util", - "//tensorflow/contrib/lite/kernels:builtin_ops", - "//tensorflow/contrib/lite/profiling:profile_summarizer", - "//tensorflow/contrib/lite/profiling:profiler", - "//tensorflow/contrib/lite/profiling:time", - "//tensorflow/core:stats_calculator_portable", - ], -) - cc_library( name = "gen_op_registration", srcs = ["gen_op_registration.cc"], diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD new file mode 100644 index 0000000000..4824a4dbde --- /dev/null +++ b/tensorflow/contrib/lite/tools/benchmark/BUILD @@ -0,0 +1,91 @@ +package(default_visibility = [ + "//visibility:public", +]) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow/contrib/lite:special_rules.bzl", "tflite_portable_test_suite") + +common_copts = ["-Wall"] + +cc_binary( + name = "benchmark_model", + srcs = [ + "benchmark_main.cc", + "logging.h", + ], + copts = common_copts, + linkopts = select({ + "//tensorflow:android": [ + "-pie", + "-landroid", + "-lm", + "-z defs", + "-Wl,--exclude-libs,ALL", # Exclude syms in all libs from auto export + ], + "//conditions:default": [], + }), + deps = [ + ":benchmark_tflite_model_lib", + ], +) + +cc_library( + name = "command_line_flags", + srcs = ["command_line_flags.cc"], + hdrs = ["command_line_flags.h"], + copts = common_copts, + visibility = ["//visibility:private"], +) + +cc_test( + name = "command_line_flags_test", + srcs = ["command_line_flags_test.cc"], + copts = common_copts, + visibility = ["//visibility:private"], + deps = [ + ":command_line_flags", + "//tensorflow/contrib/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + +cc_library( + name = "benchmark_tflite_model_lib", + srcs = [ + "benchmark_tflite_model.cc", + "logging.h", + ], + hdrs = ["benchmark_tflite_model.h"], + copts = common_copts, + deps = [ + ":benchmark_model_lib", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/profiling:profile_summarizer", + "//tensorflow/contrib/lite/profiling:profiler", + ], +) + +cc_library( + name = "benchmark_model_lib", + srcs = [ + "benchmark_model.cc", + "logging.h", + ], + hdrs = ["benchmark_model.h"], + copts = common_copts, + deps = [ + ":command_line_flags", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite:string_util", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/profiling:profile_summarizer", + "//tensorflow/contrib/lite/profiling:profiler", + "//tensorflow/contrib/lite/profiling:time", + "//tensorflow/core:stats_calculator_portable", + ], +) + +tflite_portable_test_suite() diff --git a/tensorflow/contrib/lite/tools/benchmark/README.md b/tensorflow/contrib/lite/tools/benchmark/README.md new file mode 100644 index 0000000000..e6f333aa5b --- /dev/null +++ b/tensorflow/contrib/lite/tools/benchmark/README.md @@ -0,0 +1,172 @@ +# TFLite Model Benchmark Tool + +## Description + +A simple C++ binary to benchmark a TFLite model and its individual operators, +both on desktop machines and on Android. + +## To build/install/run + +### On Android: + +(0) Refer to https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android to edit the `WORKSPACE` to configure the android NDK/SDK. + +(1) Build for your specific platform, e.g.: + +``` +bazel build -c opt \ + --config=android_arm \ + --cxxopt='--std=c++11' \ + tensorflow/contrib/lite/tools/benchmark:benchmark_model +``` + +(2) Connect your phone. Push the binary to your phone with adb push + (make the directory if required): + +``` +adb push bazel-bin/tensorflow/contrib/lite/tools/benchmark/benchmark_model /data/local/tmp +``` + +(3) Make the binary executable. + +``` +adb shell chmod +x /data/local/tmp/benchmark_model +``` + +(4) Push the compute graph that you need to test. For example: + +``` +adb push mobilenet_quant_v1_224.tflite /data/local/tmp +``` + +(5) Run the benchmark. For example: + +``` +adb shell /data/local/tmp/benchmark_model \ + --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \ + --input_layer="Placeholder" \ + --input_layer_shape="1,224,224,3" \ + --input_layer_type="uint8" \ + --output_layer="MobilenetV1/Predictions/Reshape_1" \ + --num_threads=4 +``` + +### On desktop: +(1) build the binary + +``` +bazel build -c opt tensorflow/contrib/lite/tools/benchmark:benchmark_model +``` + +(2) Run on your compute graph, similar to the Android case but without the need of adb shell. +For example: + +``` +bazel-bin/tensorflow/contrib/lite/tools/benchmark/benchmark_model \ + --graph=mobilenet_quant_v1_224.tflite \ + --input_layer="Placeholder" \ + --input_layer_shape="1,224,224,3" \ + --input_layer_type="uint8" \ + --output_layer="MobilenetV1/Predictions/Reshape_1" \ + --num_threads=4 +``` + +The MobileNet graph used as an example here may be downloaded from +https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip + +## Profiling model operators +The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this, +compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED** +to compile benchmark with profiling support. +For example, to compile with profiling support on Android, add this flag to the previous command: + +``` +bazel build -c opt \ + --config=android_arm \ + --cxxopt='--std=c++11' \ + --copt=-DTFLITE_PROFILING_ENABLED \ + tensorflow/contrib/lite/tools/benchmark:benchmark_model +``` +This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below: + +``` + +============================== Run Order ============================== + [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] + CONV_2D 0.000 9.132 9.132 0.121% 0.121% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6] + DEPTHWISE_CONV_2D 9.135 3.280 3.280 0.043% 0.165% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6] + CONV_2D 12.419 6.877 6.877 0.091% 0.256% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6] + DEPTHWISE_CONV_2D 19.299 1.708 1.708 0.023% 0.278% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6] + CONV_2D 21.012 4.162 4.162 0.055% 0.334% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6] + DEPTHWISE_CONV_2D 25.177 3.520 3.520 0.047% 0.380% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6] + CONV_2D 28.701 10.218 10.218 0.136% 0.516% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6] + DEPTHWISE_CONV_2D 38.922 0.827 0.827 0.011% 0.527% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6] + CONV_2D 39.752 1.401 1.401 0.019% 0.545% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6] + DEPTHWISE_CONV_2D 41.156 1.290 1.290 0.017% 0.563% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6] + CONV_2D 42.448 5.995 5.995 0.080% 0.642% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6] + DEPTHWISE_CONV_2D 48.445 0.409 0.409 0.005% 0.647% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6] + CONV_2D 48.856 6.167 6.167 0.082% 0.729% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6] + DEPTHWISE_CONV_2D 55.026 0.629 0.629 0.008% 0.738% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6] + CONV_2D 55.656 6.464 6.464 0.086% 0.823% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] + DEPTHWISE_CONV_2D 62.124 0.647 0.647 0.009% 0.832% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6] + CONV_2D 62.774 14.666 14.666 0.195% 1.026% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] + DEPTHWISE_CONV_2D 77.444 0.635 0.635 0.008% 1.035% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6] + CONV_2D 78.081 7.186 7.186 0.095% 1.130% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] + DEPTHWISE_CONV_2D 85.270 0.646 0.646 0.009% 1.139% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6] + CONV_2D 85.918 9.529 9.529 0.126% 1.265% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6] + DEPTHWISE_CONV_2D 95.451 0.628 0.628 0.008% 1.273% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6] + CONV_2D 96.081 2.077 2.077 0.028% 1.301% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6] + DEPTHWISE_CONV_2D 98.162 0.168 0.168 0.002% 1.303% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6] + CONV_2D 98.332 1.007 1.007 0.013% 1.317% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6] + DEPTHWISE_CONV_2D 99.342 0.288 0.288 0.004% 1.320% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6] + CONV_2D 99.632 8.197 8.197 0.109% 1.429% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6] + AVERAGE_POOL_2D 107.832 0.045 0.045 0.001% 1.430% 0.000 0 [MobilenetV1/Logits/AvgPool_1a/AvgPool] + CONV_2D 107.878 0.325 0.325 0.004% 1.434% 0.000 0 [MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd] + RESHAPE 108.206 0.003 0.003 0.000% 1.434% 0.000 0 [MobilenetV1/Predictions/Reshape] + SOFTMAX 108.211 0.038 0.038 0.001% 1.434% 0.000 0 [MobilenetV1/Predictions/Softmax] + +============================== Top by Computation Time ============================== + [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] + CONV_2D 62.774 14.666 14.666 0.195% 0.195% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] + CONV_2D 28.701 10.218 10.218 0.136% 0.330% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6] + CONV_2D 85.918 9.529 9.529 0.126% 0.456% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6] + CONV_2D 0.000 9.132 9.132 0.121% 0.578% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6] + CONV_2D 99.632 8.197 8.197 0.109% 0.686% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6] + CONV_2D 78.081 7.186 7.186 0.095% 0.782% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] + CONV_2D 12.419 6.877 6.877 0.091% 0.873% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6] + CONV_2D 55.656 6.464 6.464 0.086% 0.958% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] + CONV_2D 48.856 6.167 6.167 0.082% 1.040% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6] + CONV_2D 42.448 5.995 5.995 0.080% 1.120% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6] + +============================== Top by Memory Use ============================== + [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] + SOFTMAX 108.211 0.038 0.038 0.001% 0.001% 0.000 0 [MobilenetV1/Predictions/Softmax] + RESHAPE 108.206 0.003 0.003 0.000% 0.001% 0.000 0 [MobilenetV1/Predictions/Reshape] + CONV_2D 78.081 7.186 7.186 0.095% 0.096% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] + DEPTHWISE_CONV_2D 77.444 0.635 0.635 0.008% 0.104% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6] + CONV_2D 62.774 14.666 14.666 0.195% 0.299% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] + DEPTHWISE_CONV_2D 62.124 0.647 0.647 0.009% 0.307% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6] + CONV_2D 55.656 6.464 6.464 0.086% 0.393% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] + DEPTHWISE_CONV_2D 55.026 0.629 0.629 0.008% 0.401% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6] + CONV_2D 48.856 6.167 6.167 0.082% 0.483% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6] + DEPTHWISE_CONV_2D 48.445 0.409 0.409 0.005% 0.489% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6] + +Number of nodes executed: 31 +============================== Summary by node type ============================== + [Node type] [count] [avg ms] [avg %] [cdf %] [mem KB] [times called] + CONV_2D 15 1.861 86.679% 86.679% 0.000 0 + DEPTHWISE_CONV_2D 13 0.286 13.321% 100.000% 0.000 0 + SOFTMAX 1 0.000 0.000% 100.000% 0.000 0 + RESHAPE 1 0.000 0.000% 100.000% 0.000 0 + AVERAGE_POOL_2D 1 0.000 0.000% 100.000% 0.000 0 + +Timings (microseconds): count=50 first=108164 curr=128308 min=102850 max=197072 avg=150805 std=24368 +Memory (bytes): count=0 +31 nodes observed + + +Average inference timings in us: Warmup: 135310, Init: 12123, no stats: 150988 + +``` + + diff --git a/tensorflow/contrib/lite/tools/benchmark_main.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc similarity index 89% rename from tensorflow/contrib/lite/tools/benchmark_main.cc rename to tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc index 1325385e32..372d31e838 100644 --- a/tensorflow/contrib/lite/tools/benchmark_main.cc +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/tools/benchmark_tflite_model.h" -#include "tensorflow/contrib/lite/tools/logging.h" +#include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h" +#include "tensorflow/contrib/lite/tools/benchmark/logging.h" namespace tflite { namespace benchmark { diff --git a/tensorflow/contrib/lite/tools/benchmark_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_model.cc similarity index 97% rename from tensorflow/contrib/lite/tools/benchmark_model.cc rename to tensorflow/contrib/lite/tools/benchmark/benchmark_model.cc index 550994c662..a8a9a6112c 100644 --- a/tensorflow/contrib/lite/tools/benchmark_model.cc +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_model.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/tools/benchmark_model.h" +#include "tensorflow/contrib/lite/tools/benchmark/benchmark_model.h" #include @@ -21,7 +21,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/profiling/time.h" -#include "tensorflow/contrib/lite/tools/logging.h" +#include "tensorflow/contrib/lite/tools/benchmark/logging.h" namespace { void SleepForSeconds(double sleep_seconds) { diff --git a/tensorflow/contrib/lite/tools/benchmark_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_model.h similarity index 97% rename from tensorflow/contrib/lite/tools/benchmark_model.h rename to tensorflow/contrib/lite/tools/benchmark/benchmark_model.h index ef8d6a7d1e..d48f693693 100644 --- a/tensorflow/contrib/lite/tools/benchmark_model.h +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_model.h @@ -23,7 +23,7 @@ limitations under the License. #include #include -#include "tensorflow/contrib/lite/tools//command_line_flags.h" +#include "tensorflow/contrib/lite/tools/benchmark/command_line_flags.h" #include "tensorflow/core/util/stats_calculator.h" namespace tflite { @@ -158,4 +158,4 @@ class BenchmarkModel { } // namespace benchmark } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_TOOLS_BENCHMARK_MODEL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_TOOLS_BENCHMARK_BENCHMARK_MODEL_H_ diff --git a/tensorflow/contrib/lite/tools/benchmark_tflite_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc similarity index 98% rename from tensorflow/contrib/lite/tools/benchmark_tflite_model.cc rename to tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc index be8f46f599..2e5b866273 100644 --- a/tensorflow/contrib/lite/tools/benchmark_tflite_model.cc +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/tools/benchmark_tflite_model.h" +#include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h" #include #include @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/contrib/lite/model.h" #include "tensorflow/contrib/lite/op_resolver.h" #include "tensorflow/contrib/lite/string_util.h" -#include "tensorflow/contrib/lite/tools/logging.h" +#include "tensorflow/contrib/lite/tools/benchmark/logging.h" #ifdef TFLITE_CUSTOM_OPS_HEADER void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); diff --git a/tensorflow/contrib/lite/tools/benchmark_tflite_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h similarity index 94% rename from tensorflow/contrib/lite/tools/benchmark_tflite_model.h rename to tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h index e6d03d5211..e70f6de1bf 100644 --- a/tensorflow/contrib/lite/tools/benchmark_tflite_model.h +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/contrib/lite/model.h" #include "tensorflow/contrib/lite/profiling/profile_summarizer.h" -#include "tensorflow/contrib/lite/tools/benchmark_model.h" +#include "tensorflow/contrib/lite/tools/benchmark/benchmark_model.h" namespace tflite { namespace benchmark { @@ -87,4 +87,4 @@ class BenchmarkTfLiteModel : public BenchmarkModel { } // namespace benchmark } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_TOOLS_BENCHMARK_TFLITE_MODEL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_TOOLS_BENCHMARK_BENCHMARK_TFLITE_MODEL_H_ diff --git a/tensorflow/contrib/lite/tools/command_line_flags.cc b/tensorflow/contrib/lite/tools/benchmark/command_line_flags.cc similarity index 84% rename from tensorflow/contrib/lite/tools/command_line_flags.cc rename to tensorflow/contrib/lite/tools/benchmark/command_line_flags.cc index ba72f40689..723bf67e03 100644 --- a/tensorflow/contrib/lite/tools/command_line_flags.cc +++ b/tensorflow/contrib/lite/tools/benchmark/command_line_flags.cc @@ -10,8 +10,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/tools/command_line_flags.h" +#include "tensorflow/contrib/lite/tools/benchmark/command_line_flags.h" +#include #include #include #include @@ -19,6 +20,13 @@ limitations under the License. namespace tflite { namespace { +template +std::string ToString(T val) { + std::ostringstream stream; + stream << val; + return stream.str(); +} + bool ParseFlag(const std::string& arg, const std::string& flag, const std::function& parse_func, bool* value_parsing_ok) { @@ -35,14 +43,16 @@ bool ParseFlag(const std::string& arg, const std::string& flag, return true; } -bool ParseInt32Flag(const std::string& flag_value, int32_t* value) { - char extra; - return sscanf(flag_value.data(), "%d%c", value, &extra) == 1; -} - -bool ParseInt64Flag(const std::string& flag_value, int64_t* value) { - char extra; - return sscanf(flag_value.data(), "%ld%c", value, &extra) == 1; +template +bool ParseFlag(const std::string& flag_value, T* value) { + std::istringstream stream(flag_value); + T read_value; + stream >> read_value; + if (!stream.eof() && !stream.good()) { + return false; + } + *value = read_value; + return true; } bool ParseBoolFlag(const std::string& flag_value, bool* value) { @@ -54,11 +64,6 @@ bool ParseBoolFlag(const std::string& flag_value, bool* value) { return true; } -bool ParseFloatFlag(const std::string& flag_value, float* value) { - char extra; - return sscanf(flag_value.data(), "%f%c", value, &extra) == 1; -} - bool ParseStringFlag(const std::string& flag_value, std::string* value) { *value = flag_value; return true; @@ -70,27 +75,27 @@ Flag::Flag(const char* name, int32_t* dst, const std::string& usage_text) : name_(name), type_(TYPE_INT32), value_hook_([dst](const std::string& flag_value) { - return ParseInt32Flag(flag_value, dst); + return ParseFlag(flag_value, dst); }), - default_for_display_(std::to_string(*dst)), + default_for_display_(ToString(*dst)), usage_text_(usage_text) {} Flag::Flag(const char* name, int64_t* dst, const std::string& usage_text) : name_(name), type_(TYPE_INT64), value_hook_([dst](const std::string& flag_value) { - return ParseInt64Flag(flag_value, dst); + return ParseFlag(flag_value, dst); }), - default_for_display_(std::to_string(*dst)), + default_for_display_(ToString(*dst)), usage_text_(usage_text) {} Flag::Flag(const char* name, float* dst, const std::string& usage_text) : name_(name), type_(TYPE_FLOAT), value_hook_([dst](const std::string& flag_value) { - return ParseFloatFlag(flag_value, dst); + return ParseFlag(flag_value, dst); }), - default_for_display_(std::to_string(*dst)), + default_for_display_(ToString(*dst)), usage_text_(usage_text) {} Flag::Flag(const char* name, bool* dst, const std::string& usage_text) @@ -166,7 +171,7 @@ std::string Flag::GetTypeName() const { } argv[dst++] = nullptr; *argc = unknown_flags.size() + 1; - return result && (*argc < 2 || strcmp(argv[1], "--help") != 0); + return result && (*argc < 2 || std::strcmp(argv[1], "--help") != 0); } /*static*/ std::string Flags::Usage(const std::string& cmdline, diff --git a/tensorflow/contrib/lite/tools/command_line_flags.h b/tensorflow/contrib/lite/tools/benchmark/command_line_flags.h similarity index 98% rename from tensorflow/contrib/lite/tools/command_line_flags.h rename to tensorflow/contrib/lite/tools/benchmark/command_line_flags.h index 0605d3c9d4..36f9e64767 100644 --- a/tensorflow/contrib/lite/tools/command_line_flags.h +++ b/tensorflow/contrib/lite/tools/benchmark/command_line_flags.h @@ -109,4 +109,4 @@ class Flags { } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_TOOLS_COMMAND_LINE_FLAGS_H_ +#endif // TENSORFLOW_CONTRIB_LITE_TOOLS_BENCHMARK_COMMAND_LINE_FLAGS_H_ diff --git a/tensorflow/contrib/lite/tools/command_line_flags_test.cc b/tensorflow/contrib/lite/tools/benchmark/command_line_flags_test.cc similarity index 98% rename from tensorflow/contrib/lite/tools/command_line_flags_test.cc rename to tensorflow/contrib/lite/tools/benchmark/command_line_flags_test.cc index 463647bec9..74cf59105b 100644 --- a/tensorflow/contrib/lite/tools/command_line_flags_test.cc +++ b/tensorflow/contrib/lite/tools/benchmark/command_line_flags_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/tools/command_line_flags.h" +#include "tensorflow/contrib/lite/tools/benchmark/command_line_flags.h" #include #include #include "tensorflow/contrib/lite/testing/util.h" diff --git a/tensorflow/contrib/lite/tools/logging.h b/tensorflow/contrib/lite/tools/benchmark/logging.h similarity index 96% rename from tensorflow/contrib/lite/tools/logging.h rename to tensorflow/contrib/lite/tools/benchmark/logging.h index aa1fa5b827..9e9292e2fe 100644 --- a/tensorflow/contrib/lite/tools/logging.h +++ b/tensorflow/contrib/lite/tools/benchmark/logging.h @@ -18,6 +18,7 @@ limitations under the License. // LOG and CHECK macros for benchmarks. +#include #include #include @@ -72,4 +73,4 @@ class LoggingWrapper { #define TFLITE_BENCHMARK_CHECK_EQ(a, b) TFLITE_BENCHMARK_CHECK(a == b) -#endif // TENSORFLOW_CONTRIB_LITE_TOOLS_LOGGING_H_ +#endif // TENSORFLOW_CONTRIB_LITE_TOOLS_BENCHMARK_LOGGING_H_ diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 7e13a07e5e..6bde2a0a4a 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -876,7 +876,6 @@ cc_library( hdrs = [ "util/stats_calculator.h", ], - deps = [":platform_base"], ) cc_library( diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc index 42a4801dcb..a5c1fda102 100644 --- a/tensorflow/core/util/stat_summarizer.cc +++ b/tensorflow/core/util/stat_summarizer.cc @@ -78,6 +78,14 @@ void StatSummarizer::Validate(const std::vector* outputs, } } +void StatSummarizer::PrintStepStats() const { + string output = GetOutputString(); + std::istringstream iss(output); + for (std::string line; std::getline(iss, line);) { + LOG(INFO) << line; + } +} + namespace { std::string OpType(const DeviceStepStats& ds, const NodeExecStats& ns) { // There is no published specification of how DeviceStats and NodeStats diff --git a/tensorflow/core/util/stat_summarizer.h b/tensorflow/core/util/stat_summarizer.h index 173ed5cebc..7e6d6f6372 100644 --- a/tensorflow/core/util/stat_summarizer.h +++ b/tensorflow/core/util/stat_summarizer.h @@ -68,7 +68,7 @@ class StatSummarizer { } // Prints the string returned by GetOutputString(). - void PrintStepStats() const { stats_calculator_->PrintStepStats(); } + void PrintStepStats() const; // Prints the output tensor sizes and types for each node. void PrintOutputs() const; diff --git a/tensorflow/core/util/stats_calculator.cc b/tensorflow/core/util/stats_calculator.cc index 20353ec76e..c4befbdb84 100644 --- a/tensorflow/core/util/stats_calculator.cc +++ b/tensorflow/core/util/stats_calculator.cc @@ -21,8 +21,6 @@ limitations under the License. #include #include -#include "tensorflow/core/platform/logging.h" - namespace tensorflow { StatsCalculator::StatsCalculator(const StatSummarizerOptions& options) @@ -93,7 +91,7 @@ std::string StatsCalculator::ColumnString(const Detail& detail, void StatsCalculator::OrderNodesByMetric( SortingMetric metric, std::vector* details) const { - std::priority_queue> sorted_list; + std::priority_queue> sorted_list; const int num_nodes = details_.size(); for (const auto& det : details_) { @@ -142,7 +140,7 @@ void StatsCalculator::ComputeStatsByType( int64_t run_count = run_total_us_.count(); for (const auto& det : details_) { - const string node_name = det.first; + const std::string node_name = det.first; const Detail& detail = det.second; int64_t curr_time_val = @@ -151,7 +149,7 @@ void StatsCalculator::ComputeStatsByType( int64_t curr_memory_val = detail.mem_used.newest(); - const string& node_type = detail.type; + const std::string& node_type = detail.type; (*node_type_map_count)[node_type] += 1; (*node_type_map_time)[node_type] += curr_time_val; @@ -163,12 +161,12 @@ void StatsCalculator::ComputeStatsByType( std::string StatsCalculator::GetStatsByNodeType() const { std::stringstream stream; + stream << "Number of nodes executed: " << details_.size() << std::endl; + stream << "============================== Summary by node type " "==============================" << std::endl; - LOG(INFO) << "Number of nodes executed: " << details_.size(); - std::map node_type_map_count; std::map node_type_map_time; std::map node_type_map_memory; @@ -180,11 +178,12 @@ std::string StatsCalculator::GetStatsByNodeType() const { &accumulated_us); // Sort them. - std::priority_queue>> timings; + std::priority_queue>> + timings; for (const auto& node_type : node_type_map_time) { const int64_t mem_used = node_type_map_memory[node_type.first]; timings.emplace(node_type.second, - std::pair(node_type.first, mem_used)); + std::pair(node_type.first, mem_used)); } InitField(stream, 24) << "[Node type]"; @@ -201,7 +200,7 @@ std::string StatsCalculator::GetStatsByNodeType() const { auto entry = timings.top(); timings.pop(); - const string node_type = entry.second.first; + const std::string node_type = entry.second.first; const float memory = entry.second.second / 1000.0f; const int64_t node_type_total_us = entry.first; @@ -273,14 +272,6 @@ std::string StatsCalculator::GetOutputString() const { return stream.str(); } -void StatsCalculator::PrintStepStats() const { - string output = GetOutputString(); - std::istringstream iss(output); - for (std::string line; std::getline(iss, line);) { - LOG(INFO) << line; - } -} - void StatsCalculator::UpdateDetails( const std::map& details) { details_.insert(details.begin(), details.end()); diff --git a/tensorflow/core/util/stats_calculator.h b/tensorflow/core/util/stats_calculator.h index a1033465fb..39cef816f1 100644 --- a/tensorflow/core/util/stats_calculator.h +++ b/tensorflow/core/util/stats_calculator.h @@ -127,9 +127,6 @@ class StatsCalculator { std::string GetShortSummary() const; - // Prints the string returned by GetOutputString(). - void PrintStepStats() const; - void ComputeStatsByType( std::map* node_type_map_count, std::map* node_type_map_time, -- GitLab From d947e2c172b2eee4338e598a51d80d519907f991 Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 4 Jun 2018 15:00:15 -0700 Subject: [PATCH 502/902] Remove tf_export decorator from contrib. tf_export decorators currently aren't supported in contrib. PiperOrigin-RevId: 199200258 --- tensorflow/contrib/distributions/python/ops/kumaraswamy.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py index 66682b2ff5..0ff989fc95 100644 --- a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py +++ b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py @@ -31,7 +31,6 @@ from tensorflow.python.ops import special_math_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import transformed_distribution from tensorflow.python.ops.distributions import uniform -from tensorflow.python.util.tf_export import tf_export __all__ = [ "Kumaraswamy", @@ -59,7 +58,6 @@ def _harmonic_number(x): return math_ops.digamma(x + one) - math_ops.digamma(one) -@tf_export("distributions.Kumaraswamy") class Kumaraswamy(transformed_distribution.TransformedDistribution): """Kumaraswamy distribution. -- GitLab From 18995ecf1a0c4a161b296fbafe63289e90437807 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 15:19:39 -0700 Subject: [PATCH 503/902] Adds update_ops to train_op for all heads. PiperOrigin-RevId: 199203634 --- tensorflow/contrib/estimator/BUILD | 1 + .../estimator/python/estimator/head.py | 1 + .../estimator/python/estimator/head_test.py | 29 +++++++ tensorflow/python/estimator/BUILD | 1 + tensorflow/python/estimator/canned/head.py | 11 +++ .../python/estimator/canned/head_test.py | 86 +++++++++++++++++++ 6 files changed, 129 insertions(+) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 47c7b7fc19..1937ffb583 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -312,6 +312,7 @@ py_test( "//tensorflow/python:sparse_tensor", "//tensorflow/python:string_ops", "//tensorflow/python:training", + "//tensorflow/python:variables", "//tensorflow/python/estimator:metric_keys", "//tensorflow/python/estimator:model_fn", "//tensorflow/python/estimator:prediction_keys", diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index 8b97f86db1..b798769d2c 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -845,6 +845,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access train_op = train_op_fn(regularized_training_loss) else: raise ValueError('train_op_fn and optimizer cannot both be None.') + train_op = head_lib._append_update_ops(train_op) # pylint:disable=protected-access # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == losses.Reduction.SUM: diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index d6c158608b..b2b57fa06b 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -36,6 +36,7 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import string_ops +from tensorflow.python.ops import variables from tensorflow.python.ops.losses import losses from tensorflow.python.platform import test from tensorflow.python.saved_model import signature_constants @@ -989,6 +990,34 @@ class MultiLabelHead(test.TestCase): six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) + def test_train_with_update_ops(self): + head = head_lib.multi_label_head(n_classes=2) + + with ops.Graph().as_default(): + w = variables.Variable(1) + update_op = w.assign_add(1) + ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op) + + t = variables.Variable('') + expected_train_result = b'my_train_op' + def _train_op_fn(loss): + del loss + return t.assign(expected_train_result) + + spec = head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), + labels=np.array([[1, 0], [1, 1]], dtype=np.int64), + train_op_fn=_train_op_fn) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + sess.run(spec.train_op) + w_value, t_value = sess.run([w, t]) + self.assertEqual(2, w_value) + self.assertEqual(expected_train_result, t_value) + def test_train_with_regularization_losses(self): head = head_lib.multi_label_head( n_classes=2, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 9c4d58b177..d538c6c415 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -709,6 +709,7 @@ py_test( "//tensorflow/python:sparse_tensor", "//tensorflow/python:string_ops", "//tensorflow/python:training", + "//tensorflow/python:variables", "//tensorflow/python/feature_column", "//tensorflow/python/ops/losses", "//tensorflow/python/saved_model:signature_constants", diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index 04fe4d97e4..b74ef1015c 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -873,6 +873,7 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): train_op = train_op_fn(regularized_training_loss) else: raise ValueError('train_op_fn and optimizer cannot both be None.') + train_op = _append_update_ops(train_op) # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == losses.Reduction.SUM: @@ -1244,6 +1245,7 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): train_op = train_op_fn(regularized_training_loss) else: raise ValueError('train_op_fn and optimizer cannot both be None.') + train_op = _append_update_ops(train_op) # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == losses.Reduction.SUM: @@ -1506,6 +1508,7 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): train_op = train_op_fn(regularized_training_loss) else: raise ValueError('train_op_fn and optimizer cannot both be None.') + train_op = _append_update_ops(train_op) # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == losses.Reduction.SUM: @@ -1533,6 +1536,14 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): train_op=train_op) +def _append_update_ops(train_op): + """Returns `train_op` appending `UPDATE_OPS` collection if present.""" + update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) + if update_ops: + return control_flow_ops.group(train_op, *update_ops) + return train_op + + def _assert_range(labels, n_classes, message=None): with ops.name_scope(None, 'assert_range', (labels,)): assert_less = check_ops.assert_less_equal( diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index ecca3e8b0d..08ce5ca8e8 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -39,6 +39,7 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import string_ops +from tensorflow.python.ops import variables from tensorflow.python.ops.losses import losses from tensorflow.python.platform import test from tensorflow.python.saved_model import signature_constants @@ -969,6 +970,35 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), train_result) + def test_train_with_update_ops(self): + n_classes = 3 + head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) + + with ops.Graph().as_default(): + w = variables.Variable(1) + update_op = w.assign_add(1) + ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op) + + t = variables.Variable('') + expected_train_result = b'my_train_op' + def _train_op_fn(loss): + del loss + return t.assign(expected_train_result) + + spec = head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32), + labels=np.array(((1,), (1,)), dtype=np.int64), + train_op_fn=_train_op_fn) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + sess.run(spec.train_op) + w_value, t_value = sess.run([w, t]) + self.assertEqual(2, w_value) + self.assertEqual(expected_train_result, t_value) + def test_train_summaries_with_head_name(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( @@ -2102,6 +2132,34 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) + def test_train_with_update_ops(self): + head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() + + with ops.Graph().as_default(): + w = variables.Variable(1) + update_op = w.assign_add(1) + ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op) + + t = variables.Variable('') + expected_train_result = b'my_train_op' + def _train_op_fn(loss): + del loss + return t.assign(expected_train_result) + + spec = head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=np.array(((45,), (-41,),), dtype=np.float32), + labels=np.array(((1,), (1,),), dtype=np.float64), + train_op_fn=_train_op_fn) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + sess.run(spec.train_op) + w_value, t_value = sess.run([w, t]) + self.assertEqual(2, w_value) + self.assertEqual(expected_train_result, t_value) + def test_train_summaries_with_head_name(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( name='some_binary_head') @@ -3278,6 +3336,34 @@ class RegressionHead(test.TestCase): self.assertAllClose(expected_loss, loss) self.assertEqual(expected_train_result, train_result) + def test_train_with_update_ops(self): + head = head_lib._regression_head() + + with ops.Graph().as_default(): + w = variables.Variable(1) + update_op = w.assign_add(1) + ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op) + + t = variables.Variable('') + expected_train_result = b'my_train_op' + def _train_op_fn(loss): + del loss + return t.assign(expected_train_result) + + spec = head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=np.array(((45,), (41,),), dtype=np.float32), + labels=np.array(((43.,), (44.,),), dtype=np.float64), + train_op_fn=_train_op_fn) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + sess.run(spec.train_op) + w_value, t_value = sess.run([w, t]) + self.assertEqual(2, w_value) + self.assertEqual(expected_train_result, t_value) + def test_train_summaries_with_head_name(self): head = head_lib._regression_head(name='some_regression_head') self.assertEqual(1, head.logits_dimension) -- GitLab From eab2e4d784036568de076317ee40b25dc19eb4a9 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 4 Jun 2018 15:30:59 -0700 Subject: [PATCH 504/902] nit: FlatBuffer -> FrozenGraph PiperOrigin-RevId: 199205459 --- tensorflow/contrib/lite/python/lite_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py index 5f8dfc0dc1..019a3a5f69 100644 --- a/tensorflow/contrib/lite/python/lite_test.py +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -292,7 +292,7 @@ class FromSessionTest(test_util.TensorFlowTestCase): self.assertTrue(output_details[0]['quantization'][0] > 0) # scale -class FromFlatbufferFile(test_util.TensorFlowTestCase): +class FromFrozenGraphFile(test_util.TensorFlowTestCase): def testFloat(self): in_tensor = array_ops.placeholder( -- GitLab From 69613d25c3f82652c636c5a1c1b42029dc427979 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 4 Jun 2018 15:35:58 -0700 Subject: [PATCH 505/902] More handle_data fixing. I'm not sure why our existing tests didn't catch this... PiperOrigin-RevId: 199206183 --- tensorflow/python/framework/function.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 259cab6699..79ee57355d 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -720,6 +720,8 @@ class _FuncGraph(ops.Graph): if ops._USE_C_SHAPES: if isinstance(tensor, ops.EagerTensor): handle_data = tensor._handle_data + if handle_data: + handle_data = handle_data.SerializeToString() else: handle_data = c_api.GetResourceHandleShapeAndType( tensor.graph._c_graph, tensor._as_tf_output()) -- GitLab From cf01d118ef0762c0554611bef123bf4559071fbf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 15:51:17 -0700 Subject: [PATCH 506/902] Add support for kDomain parsing in HLO parser. PiperOrigin-RevId: 199208527 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_instruction.cc | 10 ++-- tensorflow/compiler/xla/service/hlo_parser.cc | 56 ++++++++++++++++++- .../compiler/xla/service/hlo_parser_test.cc | 11 ++++ 4 files changed, 71 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index c5b637419c..75961d49a5 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2980,6 +2980,7 @@ cc_library( deps = [ ":hlo", ":hlo_lexer", + ":hlo_sharding_metadata", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 4095b3d337..1c276b9305 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2441,12 +2441,10 @@ std::vector HloInstruction::ExtraAttributesToString( extra.push_back(StrCat("exponent_bits=", exponent_bits_)); extra.push_back(StrCat("mantissa_bits=", mantissa_bits_)); } - if (operand_side_metadata_ != nullptr) { - extra.push_back( - StrCat("operand_side=", operand_side_metadata_->ToString())); - } - if (user_side_metadata_ != nullptr) { - extra.push_back(StrCat("user_side=", user_side_metadata_->ToString())); + if (operand_side_metadata_ != nullptr && user_side_metadata_ != nullptr) { + extra.push_back(StrCat("domain={kind=\"", operand_side_metadata_->Kind(), + "\", entry=", operand_side_metadata_->ToString(), + ", exit=", user_side_metadata_->ToString(), "}")); } // By contract, we print the custom call target even if // options.print_subcomputation_mode() == kOff, because the call target is not diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index cefc6ff915..09c05c9821 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -16,7 +16,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_domain_metadata.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_sharding_metadata.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/gtl/map_util.h" @@ -107,6 +109,12 @@ class HloParser { std::vector strides; }; + // The data parsed for the kDomain instruction. + struct DomainData { + std::unique_ptr entry_metadata; + std::unique_ptr exit_metadata; + }; + // Types of attributes. enum class AttrTy { kInt64, @@ -125,6 +133,7 @@ class HloParser { kMetadata, kFusionKind, kDistribution, + kDomain, }; struct AttrConfig { @@ -181,6 +190,9 @@ class HloParser { bool ParseSharding(OpSharding* sharding); bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed); + // Parses the metadata behind a kDOmain instruction. + bool ParseDomain(DomainData* domain); + // Parses a sub-attribute of the window attribute, e.g.,size=1x2x3. bool ParseDxD(const string& name, std::vector* result); // Parses window's pad sub-attriute, e.g., pad=0_0x3x3. @@ -492,7 +504,6 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kClz: case HloOpcode::kCopy: case HloOpcode::kCos: - case HloOpcode::kDomain: case HloOpcode::kExp: case HloOpcode::kExpm1: case HloOpcode::kImag: @@ -1106,6 +1117,18 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, dim_numbers, *window_bounds)); break; } + case HloOpcode::kDomain: { + DomainData domain; + attrs["domain"] = {/*required=*/true, AttrTy::kDomain, &domain}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction(HloInstruction::CreateDomain( + shape, operands[0], std::move(domain.entry_metadata), + std::move(domain.exit_metadata))); + break; + } case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); @@ -1293,6 +1316,34 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, return true; } +// domain ::= '{' 'kind=' domain_kind ',' 'entry=' entry_sharding ',' +// 'exit=' exit_sharding '}' +bool HloParser::ParseDomain(DomainData* domain) { + std::unordered_map attrs; + optional kind; + optional entry_sharding; + optional exit_sharding; + attrs["kind"] = {/*required=*/true, AttrTy::kString, &kind}; + attrs["entry"] = {/*required=*/true, AttrTy::kSharding, &entry_sharding}; + attrs["exit"] = {/*required=*/true, AttrTy::kSharding, &exit_sharding}; + if (!ParseSubAttributes(attrs)) { + return false; + } + if (*kind == ShardingMetadata::KindName()) { + auto entry_sharding_ptr = MakeUnique( + HloSharding::FromProto(*entry_sharding).ValueOrDie()); + auto exit_sharding_ptr = MakeUnique( + HloSharding::FromProto(*exit_sharding).ValueOrDie()); + domain->entry_metadata = + MakeUnique(std::move(entry_sharding_ptr)); + domain->exit_metadata = + MakeUnique(std::move(exit_sharding_ptr)); + } else { + return TokenError(StrCat("unsupported domain kind: ", *kind)); + } + return true; +} + // '{' name+ '}' bool HloParser::ParseInstructionNames( std::vector* instructions) { @@ -2043,6 +2094,9 @@ bool HloParser::ParseAttributeHelper( ->emplace(result); return true; } + case AttrTy::kDomain: { + return ParseDomain(static_cast(attr_out_ptr)); + } } }(); if (!success) { diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 9a18b4f845..84a981675f 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -234,6 +234,17 @@ ENTRY %ShardedTupleCreate.v4 (v1: f32[], v2: f32[3], v3: f32[2,3]) -> (f32[], f3 ROOT %tuple = (f32[], f32[3]{0}, f32[2,3]{1,0}) tuple(f32[] %v1, f32[3]{0} %v2, f32[2,3]{1,0} %v3), sharding={{replicated}, {maximal device=0}, {replicated}} } +)" +}, +{ +"DomainParsing", +R"(HloModule DomainParsing_module + +ENTRY %DomainParsing (v1: f32[]) -> f32[] { + %v1 = f32[] parameter(0) + ROOT %dom = f32[] domain(f32[] %v1), domain={kind="sharding", entry={maximal device=0}, exit={maximal device=1}} +} + )" }, // int32 result = 0; -- GitLab From 14d4d1634dd2bd70ebc1629bc27354309bce0cb4 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Mon, 4 Jun 2018 16:41:46 -0700 Subject: [PATCH 507/902] Add TOKEN primitive type. The token type will be threaded through side-effecting ops to order them. Subsequent cls will add new opcodes and change side effecting operations to support this ordering. This CL also does some cleanup in shape_util and layout_util where we have assumed that shapes are either arrays or tuples. PiperOrigin-RevId: 199215963 --- tensorflow/compiler/xla/layout_util.cc | 53 ++-- tensorflow/compiler/xla/layout_util_test.cc | 51 ++++ tensorflow/compiler/xla/shape_util.cc | 263 ++++++++++++-------- tensorflow/compiler/xla/shape_util.h | 26 +- tensorflow/compiler/xla/shape_util_test.cc | 49 +++- tensorflow/compiler/xla/xla_data.proto | 11 +- 6 files changed, 304 insertions(+), 149 deletions(-) diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc index 89cafa1a7d..e8f29b8329 100644 --- a/tensorflow/compiler/xla/layout_util.cc +++ b/tensorflow/compiler/xla/layout_util.cc @@ -98,8 +98,13 @@ Layout CreateDefaultLayoutForRank(int64 rank) { } // namespace /* static */ Layout LayoutUtil::GetDefaultLayoutForShape(const Shape& shape) { + if (ShapeUtil::IsOpaque(shape) || ShapeUtil::IsToken(shape)) { + // Opaque and token types have empty layouts. + return Layout(); + } + // A Layout proto corresponds to a single array, not a tuple. - DCHECK(!ShapeUtil::IsTuple(shape)); + CHECK(ShapeUtil::IsArray(shape)); return CreateDefaultLayoutForRank(shape.dimensions_size()); } @@ -126,14 +131,15 @@ Layout CreateDefaultLayoutForRank(int64 rank) { SetToDefaultLayout(&element_shape); } shape->clear_layout(); - } else if (ShapeUtil::IsOpaque(*shape)) { - shape->clear_layout(); - } else { + } else if (ShapeUtil::IsArray(*shape)) { shape->mutable_layout()->set_format(DENSE); tensorflow::protobuf::RepeatedField* minor_to_major = shape->mutable_layout()->mutable_minor_to_major(); minor_to_major->Resize(shape->dimensions_size(), 0); SetDefaultLayoutToContainer(minor_to_major); + } else { + // Opaque, token types etc. have no layout. + shape->clear_layout(); } } @@ -160,18 +166,20 @@ Layout CreateDefaultLayoutForRank(int64 rank) { TF_RETURN_IF_ERROR(ValidateLayoutInShape(element_shape)); } return Status::OK(); - } else if (ShapeUtil::IsOpaque(shape)) { - if (shape.has_layout()) { - return InvalidArgument("opaque should not have a layout field"); - } - return Status::OK(); - } else { - // Array shape. + } else if (ShapeUtil::IsArray(shape)) { if (!shape.has_layout()) { return InvalidArgument("shape %s does not have a layout", ShapeUtil::HumanString(shape).c_str()); } return ValidateLayoutForShape(shape.layout(), shape); + } else { + // Token, opaque, etc. shape. + if (shape.has_layout()) { + return InvalidArgument( + "shape of primitive type %s should not have a layout", + PrimitiveType_Name(shape.element_type()).c_str()); + } + return Status::OK(); } } @@ -181,8 +189,10 @@ Layout CreateDefaultLayoutForRank(int64 rank) { return InvalidArgument("a single Layout is not valid for tuple shapes"); } - if (ShapeUtil::IsOpaque(shape)) { - return Status::OK(); + if (!ShapeUtil::IsArray(shape)) { + return InvalidArgument( + "shape of primitive type %s should not have a layout", + PrimitiveType_Name(shape.element_type()).c_str()); } if (layout.format() == INVALID_FORMAT) { @@ -273,7 +283,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) { } /* static */ bool LayoutUtil::IsPadded(const Shape& shape) { - if (ShapeUtil::IsTuple(shape) || !HasLayout(shape) || + if (!ShapeUtil::IsArray(shape) || !HasLayout(shape) || shape.layout().padded_dimensions_size() == 0) { return false; } @@ -323,7 +333,8 @@ Layout CreateDefaultLayoutForRank(int64 rank) { // Tuple shape: all subshapes must have a layout. return std::all_of(shape.tuple_shapes().begin(), shape.tuple_shapes().end(), [](const Shape& s) { return HasLayout(s); }); - } else if (ShapeUtil::IsOpaque(shape)) { + } else if (!ShapeUtil::IsArray(shape)) { + // Opaque, token types etc. ignore layout. return true; } return shape.has_layout() && shape.layout().format() != INVALID_FORMAT; @@ -432,12 +443,9 @@ Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src, Shape* dst) { /* static */ bool LayoutUtil::LayoutsInShapesEqual(const Shape& lhs, const Shape& rhs) { - if (ShapeUtil::IsTuple(lhs) != ShapeUtil::IsTuple(rhs)) { - return false; - } if (ShapeUtil::IsTuple(lhs)) { - if (ShapeUtil::TupleElementCount(lhs) != - ShapeUtil::TupleElementCount(rhs)) { + if (!ShapeUtil::IsTuple(rhs) || ShapeUtil::TupleElementCount(lhs) != + ShapeUtil::TupleElementCount(rhs)) { return false; } for (int i = 0; i < ShapeUtil::TupleElementCount(lhs); ++i) { @@ -446,9 +454,12 @@ Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src, Shape* dst) { } } return true; - } else { + } else if (ShapeUtil::IsArray(lhs)) { return ShapeUtil::Rank(lhs) == ShapeUtil::Rank(rhs) && LayoutUtil::Equal(lhs.layout(), rhs.layout()); + } else { + // Layouts of non-array and non-tuple shapes is ignored. + return true; } } diff --git a/tensorflow/compiler/xla/layout_util_test.cc b/tensorflow/compiler/xla/layout_util_test.cc index 4fd1d818e3..e4c825450d 100644 --- a/tensorflow/compiler/xla/layout_util_test.cc +++ b/tensorflow/compiler/xla/layout_util_test.cc @@ -218,6 +218,47 @@ TEST_F(LayoutUtilTest, CopyLayoutBogusLayout) { "elements, but shape is rank")); } +TEST_F(LayoutUtilTest, CopyTokenLayout) { + Shape src = ShapeUtil::MakeTokenShape(); + Shape dst = ShapeUtil::MakeTokenShape(); + + // Layouts are trivially the same for token types and copying layouts should + // be a nop. + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst)); + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); +} + +TEST_F(LayoutUtilTest, CopyOpaqueLayout) { + Shape src = ShapeUtil::MakeOpaqueShape(); + Shape dst = ShapeUtil::MakeOpaqueShape(); + + // Layouts are trivially the same for opaque types and copying layouts should + // be a nop. + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst)); + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); +} + +TEST_F(LayoutUtilTest, CopyTupleLayoutWithTokenAndOpaque) { + Shape src = ShapeUtil::MakeTupleShape( + {MakeShapeWithLayout(F32, {2, 3}, {0, 1}), + MakeShapeWithLayout(F32, {42, 123}, {1, 0}), ShapeUtil::MakeTokenShape(), + ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeOpaqueShape(), MakeShapeWithLayout(F32, {}, {}), + MakeShapeWithLayout(F32, {1, 2, 3}, {0, 2, 1})})}); + Shape dst = ShapeUtil::MakeTupleShape( + {MakeShapeWithLayout(F32, {2, 3}, {1, 0}), + MakeShapeWithLayout(F32, {42, 123}, {1, 0}), ShapeUtil::MakeTokenShape(), + ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeOpaqueShape(), MakeShapeWithLayout(F32, {}, {}), + MakeShapeWithLayout(F32, {1, 2, 3}, {1, 2, 0})})}); + + EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst)); + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); +} + TEST_F(LayoutUtilTest, ClearLayoutTuple) { Shape shape = ShapeUtil::MakeTupleShape( {MakeShapeWithLayout(F32, {2, 3}, {1, 0}), @@ -236,6 +277,16 @@ TEST_F(LayoutUtilTest, ClearLayoutTuple) { EXPECT_FALSE(shape.tuple_shapes(2).tuple_shapes(1).has_layout()); } +TEST_F(LayoutUtilTest, ClearLayoutOpaqueAndToken) { + // Opaque and token types trivially have layouts. + for (Shape shape : + {ShapeUtil::MakeOpaqueShape(), ShapeUtil::MakeTokenShape()}) { + EXPECT_TRUE(LayoutUtil::HasLayout(shape)); + LayoutUtil::ClearLayout(&shape); + EXPECT_TRUE(LayoutUtil::HasLayout(shape)); + } +} + TEST_F(LayoutUtilTest, SetToDefaultLayoutTuple) { Shape shape = ShapeUtil::MakeTupleShape( {MakeShapeWithLayout(F32, {2, 3, 4}, {1, 0, 2}), diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index e8a28d76e9..ce4d0079ee 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -27,7 +27,6 @@ limitations under the License. #include "tensorflow/compiler/xla/primitive_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" -#include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/iterator_range.h" @@ -42,17 +41,18 @@ limitations under the License. namespace xla { +using ::tensorflow::strings::StrAppend; +using ::tensorflow::strings::StrCat; + string ShapeIndex::ToString() const { - return tensorflow::strings::StrCat( - "{", tensorflow::str_util::Join(indices_, ","), "}"); + return StrCat("{", tensorflow::str_util::Join(indices_, ","), "}"); } string ShapeIndexView::ToString() const { - return tensorflow::strings::StrCat( - "{", - tensorflow::str_util::Join(tensorflow::gtl::make_range(begin_, end_), - ","), - "}"); + return StrCat("{", + tensorflow::str_util::Join( + tensorflow::gtl::make_range(begin_, end_), ","), + "}"); } bool ShapeIndexView::operator==(const ShapeIndexView& other) const { @@ -84,18 +84,30 @@ std::ostream& operator<<(std::ostream& out, const ShapeIndexView& shape_index) { namespace { +// Returns whether the given primitive type corresponds to an array shape. +bool IsArrayPrimitiveType(PrimitiveType primitive_type) { + return primitive_type != PRIMITIVE_TYPE_INVALID && primitive_type != TUPLE && + primitive_type != OPAQUE && primitive_type != TOKEN; +} + // Recursive helper for comparing the equality of two shapes. Returns true if // the shapes are the same. If compare_layouts is true, then layouts must also // match. bool CompareShapes(const Shape& lhs, const Shape& rhs, bool compare_layouts) { - if (ShapeUtil::IsTuple(lhs) || ShapeUtil::IsTuple(rhs)) { - return ShapeUtil::IsTuple(lhs) && ShapeUtil::IsTuple(rhs) && - ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), + if (!ShapeUtil::SameElementType(lhs, rhs)) { + VLOG(3) << "CompareShapes: lhs element type != rhs element type"; + return false; + } + + if (ShapeUtil::IsTuple(lhs)) { + return ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), [=](const Shape& l, const Shape& r) { return CompareShapes(l, r, compare_layouts); }); - } else if (ShapeUtil::IsOpaque(lhs) || ShapeUtil::IsOpaque(rhs)) { - return ShapeUtil::IsOpaque(lhs) && ShapeUtil::IsOpaque(rhs); + } else if (!ShapeUtil::IsArray(lhs)) { + // Non-tuple, non-array tupes such as opaque and token types are trivially + // the same. + return true; } if (compare_layouts) { @@ -125,10 +137,6 @@ bool CompareShapes(const Shape& lhs, const Shape& rhs, bool compare_layouts) { VLOG(3) << "CompareShapes: lhs dimensions != rhs dimensions"; return false; } - if (!ShapeUtil::SameElementType(lhs, rhs)) { - VLOG(3) << "CompareShapes: lhs element type != rhs element type"; - return false; - } return true; } @@ -171,8 +179,8 @@ StatusOr MakeShapeWithLayoutInternal( } /* static */ int64 ShapeUtil::Rank(const Shape& shape) { - CHECK(!ShapeUtil::IsTuple(shape)) - << "Tuples do not have a rank, shape: " << shape; + CHECK(ShapeUtil::IsArray(shape)) + << "Non-arrays do not have a rank, shape: " << shape; return shape.dimensions_size(); } @@ -199,8 +207,7 @@ StatusOr MakeShapeWithLayoutInternal( /* static */ Shape ShapeUtil::MakeShape( PrimitiveType element_type, tensorflow::gtl::ArraySlice dimensions) { - DCHECK_NE(TUPLE, element_type); - DCHECK_NE(OPAQUE, element_type); + CHECK(IsArrayPrimitiveType(element_type)); Shape result; PopulateShape(element_type, dimensions, &result); return result; @@ -223,8 +230,7 @@ StatusOr MakeShapeWithLayoutInternal( /* static */ Shape ShapeUtil::MakeShapeWithSparseLayout( PrimitiveType element_type, tensorflow::gtl::ArraySlice dimensions, int64 max_sparse_elements) { - DCHECK_NE(TUPLE, element_type); - DCHECK_NE(OPAQUE, element_type); + CHECK(IsArrayPrimitiveType(element_type)); Shape shape = ShapeUtil::MakeShape(element_type, dimensions); *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements); TF_DCHECK_OK(ShapeUtil::ValidateShape(shape)); @@ -271,6 +277,13 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( return result; } +/* static */ Shape ShapeUtil::MakeTokenShape() { + Shape result; + result.set_element_type(TOKEN); + TF_DCHECK_OK(ValidateShapeWithOptionalLayout(result)); + return result; +} + /* static */ void ShapeUtil::AppendShapeToTuple(const Shape& shape, Shape* tuple_shape) { TF_DCHECK_OK(ValidateShapeWithOptionalLayout(shape)); @@ -294,7 +307,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( } /* static */ bool ShapeUtil::ElementHasBitWidth(const Shape& shape, int bits) { - if (shape.element_type() == TUPLE || shape.element_type() == OPAQUE) { + if (!IsArray(shape)) { return false; } return primitive_util::BitWidth(shape.element_type()) == bits; @@ -320,6 +333,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( case C64: case TUPLE: case OPAQUE: + case TOKEN: return false; default: @@ -335,6 +349,10 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( return primitive_util::IsFloatingPointType(shape.element_type()); } +/* static */ bool ShapeUtil::IsArray(const Shape& shape) { + return IsArrayPrimitiveType(shape.element_type()); +} + /* static */ bool ShapeUtil::IsNestedTuple(const Shape& shape) { return IsTuple(shape) && std::any_of(shape.tuple_shapes().begin(), shape.tuple_shapes().end(), IsTuple); @@ -388,7 +406,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( } /* static */ int64 ShapeUtil::ElementsIn(const Shape& shape) { - CHECK(!IsTuple(shape)) << ShapeUtil::HumanString(shape); + CHECK(IsArray(shape)) << ShapeUtil::HumanString(shape); CHECK_EQ(shape.dimensions_size(), Rank(shape)); return std::accumulate( shape.dimensions().begin(), shape.dimensions().end(), 1LL, @@ -403,23 +421,6 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( return shape.element_type() == F32 && Rank(shape) == 0; } -/* static */ string ShapeUtil::HumanString(const Shape& shape) { - if (IsTuple(shape)) { - string text = "("; - const char* prefix = ""; - for (const Shape& elem_shape : shape.tuple_shapes()) { - tensorflow::strings::StrAppend(&text, prefix, HumanString(elem_shape)); - prefix = ", "; - } - text += ")"; - return text; - } else { - return tensorflow::strings::StrCat( - tensorflow::str_util::Lowercase( - PrimitiveType_Name(shape.element_type())), - "[", tensorflow::str_util::Join(shape.dimensions(), ","), "]"); - } -} namespace { @@ -470,48 +471,56 @@ StatusOr StringToPrimitiveType(const string& name) { } // namespace -/* static */ string ShapeUtil::HumanStringWithLayout(const Shape& shape) { +/* static */ string ShapeUtil::HumanString(const Shape& shape) { if (IsTuple(shape)) { string text = "("; const char* prefix = ""; for (const Shape& elem_shape : shape.tuple_shapes()) { - tensorflow::strings::StrAppend(&text, prefix, - HumanStringWithLayout(elem_shape)); + StrAppend(&text, prefix, HumanString(elem_shape)); prefix = ", "; } text += ")"; return text; - } else { - string result = tensorflow::strings::StrCat( - LowercasePrimitiveTypeName(shape.element_type()), "["); - for (int i = 0; i < shape.dimensions().size(); i++) { - tensorflow::strings::StrAppend(&result, (i > 0) ? "," : "", - shape.dimensions(i)); + } + return StrCat(LowercasePrimitiveTypeName(shape.element_type()), "[", + tensorflow::str_util::Join(shape.dimensions(), ","), "]"); +} + +/* static */ string ShapeUtil::HumanStringWithLayout(const Shape& shape) { + if (IsTuple(shape)) { + string text = "("; + const char* prefix = ""; + for (const Shape& elem_shape : shape.tuple_shapes()) { + StrAppend(&text, prefix, HumanStringWithLayout(elem_shape)); + prefix = ", "; } - result += "]"; - if (!IsScalar(shape) && !IsOpaque(shape)) { - if (LayoutUtil::HasLayout(shape)) { - tensorflow::strings::StrAppend(&result, - LayoutUtil::HumanString(shape.layout())); - } + text += ")"; + return text; + } + string result = StrCat(LowercasePrimitiveTypeName(shape.element_type()), "["); + for (int i = 0; i < shape.dimensions().size(); i++) { + StrAppend(&result, (i > 0) ? "," : "", shape.dimensions(i)); + } + result += "]"; + if (!IsScalar(shape) && IsArray(shape)) { + if (LayoutUtil::HasLayout(shape)) { + StrAppend(&result, LayoutUtil::HumanString(shape.layout())); } - return result; } + return result; } /* static */ string ShapeUtil::HumanString(const ProgramShape& program_shape) { std::vector parameters; for (auto& shape : program_shape.parameters()) { const int i = parameters.size(); - parameters.push_back( - tensorflow::strings::StrCat(i < program_shape.parameter_names_size() - ? program_shape.parameter_names(i) - : "(unknown)", - ": ", HumanString(shape))); + parameters.push_back(StrCat(i < program_shape.parameter_names_size() + ? program_shape.parameter_names(i) + : "(unknown)", + ": ", HumanString(shape))); } - return tensorflow::strings::StrCat( - "(", tensorflow::str_util::Join(parameters, ", "), ") -> ", - HumanString(program_shape.result())); + return StrCat("(", tensorflow::str_util::Join(parameters, ", "), ") -> ", + HumanString(program_shape.result())); } namespace { @@ -581,14 +590,17 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { // Extract the primitive element type. TF_ASSIGN_OR_RETURN(const PrimitiveType primitive_type, StringToPrimitiveType(element_type_string)); - if (primitive_type == PRIMITIVE_TYPE_INVALID || primitive_type == TUPLE || - primitive_type == OPAQUE) { + if (primitive_type == PRIMITIVE_TYPE_INVALID || primitive_type == TUPLE) { return InvalidArgument("Invalid element type string: \"%s\".", element_type_string.c_str()); } Shape result; - if (format_string.empty() && layout_string.empty()) { + if (primitive_type == OPAQUE) { + result = ShapeUtil::MakeOpaqueShape(); + } else if (primitive_type == TOKEN) { + result = ShapeUtil::MakeTokenShape(); + } else if (format_string.empty() && layout_string.empty()) { // Create a shape without a layout set. result = ShapeUtil::MakeShape(primitive_type, dimensions); } else if (format_string == "sparse") { @@ -633,43 +645,44 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { } /* static */ bool ShapeUtil::Compatible(const Shape& lhs, const Shape& rhs) { - if (lhs.element_type() == TUPLE) { + if (IsArray(lhs)) { + return SameElementType(lhs, rhs) && SameDimensions(lhs, rhs); + } else if (lhs.element_type() == TUPLE) { return rhs.element_type() == TUPLE && ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), Compatible); + } else { + // Opaque, token, etc types are vacuously compatible. + return true; } - if (lhs.element_type() == OPAQUE) { - return rhs.element_type() == OPAQUE; - } - return SameElementType(lhs, rhs) && SameDimensions(lhs, rhs); } /* static */ bool ShapeUtil::CompatibleIgnoringElementType(const Shape& lhs, const Shape& rhs) { - if (lhs.element_type() == TUPLE) { + if (IsArray(lhs)) { + return IsArray(rhs) && SameDimensions(lhs, rhs); + } else if (lhs.element_type() == TUPLE) { return rhs.element_type() == TUPLE && ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), CompatibleIgnoringElementType); + } else { + // Opaque, token, etc types are vacuously compatible. + return true; } - if (lhs.element_type() == OPAQUE) { - return rhs.element_type() == OPAQUE; - } - return ShapeUtil::IsArray(rhs) && SameDimensions(lhs, rhs); } /* static */ bool ShapeUtil::CompatibleIgnoringFpPrecision(const Shape& lhs, const Shape& rhs) { - if (lhs.element_type() == TUPLE) { + if (IsArray(lhs)) { + return IsArray(rhs) && SameElementTypeIgnoringFpPrecision(lhs, rhs) && + CompatibleIgnoringElementType(lhs, rhs); + } else if (lhs.element_type() == TUPLE) { return rhs.element_type() == TUPLE && ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), CompatibleIgnoringFpPrecision); + } else { + // Opaque, token, etc types are vacuously compatible. + return true; } - if (lhs.element_type() == OPAQUE) { - return rhs.element_type() == OPAQUE; - } - if (SameElementTypeIgnoringFpPrecision(lhs, rhs)) { - return CompatibleIgnoringElementType(lhs, rhs); - } - return false; } /* static */ int64 ShapeUtil::GetDimension(const Shape& shape, @@ -691,10 +704,6 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { switch (primitive_type) { case PRED: return sizeof(int8); - case TUPLE: - LOG(FATAL) << "tuples have no definitive size"; - case OPAQUE: - LOG(FATAL) << "opaque have no definitive size"; case S8: return sizeof(int8); case S16: @@ -721,6 +730,13 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { return sizeof(double); case C64: return sizeof(complex64); + case TOKEN: + // Tokens require no space. + return 0; + case TUPLE: + case OPAQUE: + LOG(FATAL) << PrimitiveType_Name(primitive_type) + << " primitive type has no definitive size"; default: LOG(FATAL) << "Unhandled primitive type " << primitive_type; } @@ -729,28 +745,32 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { /* static */ int64 ShapeUtil::ByteSizeOf(const Shape& shape, int64 pointer_size) { TF_DCHECK_OK(ValidateShape(shape)); - DCHECK_NE(OPAQUE, shape.element_type()); if (shape.element_type() == TUPLE) { return ByteSizeOfTupleIndexTable(shape, pointer_size); + } else if (IsArray(shape)) { + int64 byte_size = ByteSizeOfElements(shape); + if (LayoutUtil::IsSparseArray(shape)) { + byte_size += ByteSizeOfSparseIndices(shape); + } + return byte_size; + } else if (shape.element_type() == TOKEN) { + return 0; } - int64 byte_size = ByteSizeOfElements(shape); - if (LayoutUtil::IsSparseArray(shape)) { - byte_size += ByteSizeOfSparseIndices(shape); - } - return byte_size; + LOG(FATAL) << PrimitiveType_Name(shape.element_type()) + << " primitive type has no definitive size"; } /* static */ int64 ShapeUtil::ByteSizeOfTupleIndexTable(const Shape& shape, int64 pointer_size) { TF_DCHECK_OK(ValidateShape(shape)); - DCHECK_EQ(TUPLE, shape.element_type()); + CHECK_EQ(TUPLE, shape.element_type()); CHECK_GT(pointer_size, 0); return pointer_size * shape.tuple_shapes_size(); } /* static */ int64 ShapeUtil::ByteSizeOfElements(const Shape& shape) { TF_DCHECK_OK(ValidateShape(shape)); - DCHECK(ShapeUtil::IsArray(shape)); + CHECK(ShapeUtil::IsArray(shape)); int64 allocated_element_count; if (LayoutUtil::IsSparseArray(shape)) { @@ -775,13 +795,17 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { /* static */ int64 ShapeUtil::ByteSizeOfSparseIndices(const Shape& shape) { TF_DCHECK_OK(ValidateShape(shape)); - DCHECK(LayoutUtil::IsSparseArray(shape)); + CHECK(LayoutUtil::IsSparseArray(shape)); return LayoutUtil::MaxSparseElements(shape.layout()) * ShapeUtil::Rank(shape) * sizeof(int64); } /* static */ Status ShapeUtil::ValidateShapeWithOptionalLayoutInternal( const Shape& shape) { + if (shape.element_type() == PRIMITIVE_TYPE_INVALID) { + return InvalidArgument("shape has invalid element type: %s", + shape.ShortDebugString().c_str()); + } if (shape.element_type() == TUPLE) { if (shape.dimensions_size() != 0) { return InvalidArgument("tuples must not have dimensions specified"); @@ -797,10 +821,24 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { if (shape.tuple_shapes_size() > 0) { return InvalidArgument("non-tuple shape has tuple_shapes field"); } - if (shape.element_type() == PRIMITIVE_TYPE_INVALID) { - return InvalidArgument("shape has invalid element type: %s", - shape.ShortDebugString().c_str()); + + // Tokens and opaques can should not have layout or dimensions. + if (shape.element_type() == TOKEN || shape.element_type() == OPAQUE) { + if (shape.dimensions_size() != 0) { + return InvalidArgument( + "shape has %s element type, but has dimensions field: %s", + LowercasePrimitiveTypeName(shape.element_type()).c_str(), + shape.ShortDebugString().c_str()); + } + if (shape.has_layout()) { + return InvalidArgument( + "shape has %s element type, but has layout field: %s", + LowercasePrimitiveTypeName(shape.element_type()).c_str(), + shape.ShortDebugString().c_str()); + } + return Status::OK(); } + if (Rank(shape) != shape.dimensions_size()) { return InvalidArgument( "shape's rank is mismatched with dimension count; rank=%lld " @@ -902,6 +940,8 @@ bool ShapeUtil::IsLeafIndex(const Shape& shape, const ShapeIndex& index) { } /* static */ Shape ShapeUtil::StripDegenerateDimensions(const Shape& shape) { + CHECK(IsArray(shape)); + std::vector dimension_sizes; std::vector degenerate_dimensions; for (int64 i = 0; i < shape.dimensions_size(); ++i) { @@ -1066,6 +1106,9 @@ Status ForEachMutableSubshapeHelper( /* static */ std::tuple, std::vector> ShapeUtil::InsertedOrDeleted1SizedDimensions(const Shape& shape_pre, const Shape& shape_post) { + CHECK(IsArray(shape_pre)); + CHECK(IsArray(shape_post)); + auto nil = std::make_tuple(false, std::vector(), std::vector()); std::vector deleted_indices; @@ -1123,6 +1166,9 @@ ShapeUtil::InsertedOrDeleted1SizedDimensions(const Shape& shape_pre, /* static */ std::vector> ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, const Shape& output_shape) { + CHECK(IsArray(input_shape)); + CHECK(IsArray(output_shape)); + // Unmodified dimensions are merely common factors of rank 1. auto common_factors = CommonFactors(AsInt64Slice(input_shape.dimensions()), AsInt64Slice(output_shape.dimensions())); @@ -1176,8 +1222,10 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ bool ShapeUtil::ReshapeIsBitcast(const Shape& input_shape, const Shape& output_shape) { - CHECK(LayoutUtil::HasLayout(input_shape) && - LayoutUtil::HasLayout(output_shape)); + CHECK(IsArray(input_shape)); + CHECK(IsArray(output_shape)); + CHECK(LayoutUtil::HasLayout(input_shape)); + CHECK(LayoutUtil::HasLayout(output_shape)); if (!SameElementType(input_shape, output_shape)) { return false; @@ -1339,6 +1387,9 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ tensorflow::gtl::optional ShapeUtil::AlignLayouts( const Shape& input_shape, const Shape& output_shape) { + CHECK(IsArray(input_shape)); + CHECK(IsArray(output_shape)); + int64 input_rank = Rank(input_shape); int64 output_rank = Rank(output_shape); @@ -1473,6 +1524,7 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ Shape ShapeUtil::DeleteDimension(int64 dim_to_delete, Shape shape) { + CHECK(IsArray(shape)); shape.mutable_dimensions()->erase(shape.dimensions().begin() + dim_to_delete); if (LayoutUtil::HasLayout(shape)) { Layout* layout = shape.mutable_layout(); @@ -1494,6 +1546,7 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ Shape ShapeUtil::FilterDimensions( const std::function& p, Shape shape) { + CHECK(IsArray(shape)); std::vector dims_to_delete; for (int64 i = shape.dimensions().size() - 1; i >= 0; --i) { if (!p(i)) { diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 9df31d5d21..3853ada6ba 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -169,7 +169,7 @@ class ShapeUtil { // may not actually be able to store this number of elements. See // LayoutUtil::MaxSparseElements(shape) to obtain the maximum number of // elements that can be stored in a sparse shape. - // Precondition: !IsTuple(shape) + // Precondition: IsArray(shape) static int64 ElementsIn(const Shape& shape); // Returns true if 'shape' has zero elements. @@ -180,13 +180,11 @@ class ShapeUtil { // shapes. This includes only the size of the top-level buffer. For example, a // tuple is stored as an array of pointers to other buffers. In this case, // this method only returns the size of the pointer array. - // Precondition: (!ShapeUtil::IsTuple(shape) || pointer_size > 0) && - // !ShapeUtil::IsOpaque(shape) static int64 ByteSizeOf(const Shape& shape, int64 pointer_size = -1); // Returns the number of bytes used to store the primitive_type. // - // Precondition: !ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape) + // Precondition: ShapeUtil::IsArray(shape) static int64 ByteSizeOfPrimitiveType(PrimitiveType primitive_type); // Returns the number of bytes required to store the tuple member pointers for @@ -245,7 +243,7 @@ class ShapeUtil { } // Returns the higher-precision element type if a and b are both floating - // point types; otherwise, checks that they have the same element type + // point types; otherwise, checks that that they have the same element type // and returns it. static PrimitiveType HigherPrecisionElementType(const Shape& a, const Shape& b) { @@ -293,10 +291,10 @@ class ShapeUtil { // Scalar-specific static bool IsScalar(const Shape& shape) { - return !IsTuple(shape) && !IsOpaque(shape) && Rank(shape) == 0; + return IsArray(shape) && Rank(shape) == 0; } static bool IsEffectiveScalar(const Shape& shape) { - return !IsTuple(shape) && !IsOpaque(shape) && TrueRank(shape) == 0; + return IsArray(shape) && TrueRank(shape) == 0; } static bool IsScalarF32(const Shape& shape); @@ -325,6 +323,10 @@ class ShapeUtil { // into a custom operation. static Shape MakeOpaqueShape(); + // Creates a token shape. Values of this shape are used for ordering + // side-effecting operations. + static Shape MakeTokenShape(); + // Appends a shape to the given tuple. static void AppendShapeToTuple(const Shape& shape, Shape* tuple_shape); @@ -424,11 +426,15 @@ class ShapeUtil { return shape.element_type() == OPAQUE; } + // Returns whether the shape is an token value used for ordering + // side-effecting operations. + static bool IsToken(const Shape& shape) { + return shape.element_type() == TOKEN; + } + // Returns whether the shape is an array. Note that scalars are considered // arrays. - static bool IsArray(const Shape& shape) { - return !IsTuple(shape) && !IsOpaque(shape); - } + static bool IsArray(const Shape& shape); // Returns whether the shape is a tuple with at least one element which is // also a tuple. diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc index f7675e97da..ecdb6532f1 100644 --- a/tensorflow/compiler/xla/shape_util_test.cc +++ b/tensorflow/compiler/xla/shape_util_test.cc @@ -93,12 +93,14 @@ TEST(ShapeUtilTest, ParseShapeStringTupleOfArrays) { } TEST(ShapeUtilTest, ParseShapeStringNestedTuple) { - string shape_string = "(f32[1],(f32[2]), f32[3])"; + string shape_string = "(f32[1],(f32[2], token[]), opaque[], f32[3])"; TF_ASSERT_OK_AND_ASSIGN(Shape actual, ShapeUtil::ParseShapeString(shape_string)); Shape expected = ShapeUtil::MakeTupleShape({ ShapeUtil::MakeShape(F32, {1}), - ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {2})}), + ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {2}), ShapeUtil::MakeTokenShape()}), + ShapeUtil::MakeOpaqueShape(), ShapeUtil::MakeShape(F32, {3}), }); ASSERT_TRUE(ShapeUtil::Equal(expected, actual)) @@ -136,6 +138,23 @@ TEST(ShapeUtilTest, ParseShapeStringWithSparseLayout) { << "actual: " << ShapeUtil::HumanString(actual); } +TEST(ShapeUtilTest, ParseOpaqueType) { + TF_ASSERT_OK_AND_ASSIGN(Shape actual, + ShapeUtil::ParseShapeString("opaque[]")); + Shape expected = ShapeUtil::MakeOpaqueShape(); + ASSERT_TRUE(ShapeUtil::Equal(expected, actual)) + << "expected: " << ShapeUtil::HumanString(expected) + << "actual: " << ShapeUtil::HumanString(actual); +} + +TEST(ShapeUtilTest, ParseTokenType) { + TF_ASSERT_OK_AND_ASSIGN(Shape actual, ShapeUtil::ParseShapeString("token[]")); + Shape expected = ShapeUtil::MakeTokenShape(); + ASSERT_TRUE(ShapeUtil::Equal(expected, actual)) + << "expected: " << ShapeUtil::HumanString(expected) + << "actual: " << ShapeUtil::HumanString(actual); +} + TEST(ShapeUtilTest, ParseInvalidShapeString) { string shape_strings[] = { "f32[123,456]foobar{0,1}", "f32[123,456]sparse{0,1}", "f32[123,456]{foo}", @@ -295,6 +314,9 @@ TEST(ShapeUtilTest, ByteSizeOfWithoutPadding) { EXPECT_EQ(8, ShapeUtil::ByteSizeOfPrimitiveType(C64)); EXPECT_EQ(8, ShapeUtil::ByteSizeOf(ShapeUtil::MakeShape(C64, {}))); EXPECT_EQ(1600, ShapeUtil::ByteSizeOf(ShapeUtil::MakeShape(C64, {10, 20}))); + + EXPECT_EQ(0, ShapeUtil::ByteSizeOfPrimitiveType(TOKEN)); + EXPECT_EQ(0, ShapeUtil::ByteSizeOf(ShapeUtil::MakeTokenShape())); } TEST(ShapeUtilTest, ByteSizeOfWithPadding) { @@ -449,19 +471,21 @@ TEST(ShapeUtilTest, IsLeafIndex) { TEST(ShapeUtilTest, HumanString) { Shape opaque = ShapeUtil::MakeOpaqueShape(); + Shape token = ShapeUtil::MakeTokenShape(); Shape scalar = ShapeUtil::MakeShape(F32, {}); Shape matrix = ShapeUtil::MakeShape(U32, {1, 2}); Shape matrix2 = ShapeUtil::MakeShapeWithLayout(S32, {3, 4}, {0, 1}); Shape tuple = ShapeUtil::MakeTupleShape({opaque, scalar, matrix, matrix2}); - Shape nested_tuple = ShapeUtil::MakeTupleShape({tuple, matrix}); + Shape nested_tuple = ShapeUtil::MakeTupleShape({tuple, matrix, token}); EXPECT_EQ("opaque[]", ShapeUtil::HumanString(opaque)); + EXPECT_EQ("token[]", ShapeUtil::HumanString(token)); EXPECT_EQ("f32[]", ShapeUtil::HumanString(scalar)); EXPECT_EQ("u32[1,2]", ShapeUtil::HumanString(matrix)); EXPECT_EQ("s32[3,4]", ShapeUtil::HumanString(matrix2)); EXPECT_EQ("(opaque[], f32[], u32[1,2], s32[3,4])", ShapeUtil::HumanString(tuple)); - EXPECT_EQ("((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2])", + EXPECT_EQ("((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2], token[])", ShapeUtil::HumanString(nested_tuple)); EXPECT_EQ("opaque[]", ShapeUtil::HumanStringWithLayout(opaque)); @@ -470,8 +494,10 @@ TEST(ShapeUtilTest, HumanString) { EXPECT_EQ("s32[3,4]{0,1}", ShapeUtil::HumanStringWithLayout(matrix2)); EXPECT_EQ("(opaque[], f32[], u32[1,2]{1,0}, s32[3,4]{0,1})", ShapeUtil::HumanStringWithLayout(tuple)); - EXPECT_EQ("((opaque[], f32[], u32[1,2]{1,0}, s32[3,4]{0,1}), u32[1,2]{1,0})", - ShapeUtil::HumanStringWithLayout(nested_tuple)); + EXPECT_EQ( + "((opaque[], f32[], u32[1,2]{1,0}, s32[3,4]{0,1}), u32[1,2]{1,0}, " + "token[])", + ShapeUtil::HumanStringWithLayout(nested_tuple)); ProgramShape prog = ShapeUtil::MakeProgramShape( {opaque, scalar, matrix, matrix2, tuple, nested_tuple}, nested_tuple); @@ -481,8 +507,9 @@ TEST(ShapeUtilTest, HumanString) { "(unknown): u32[1,2], " "(unknown): s32[3,4], " "(unknown): (opaque[], f32[], u32[1,2], s32[3,4]), " - "(unknown): ((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2])) -> " - "((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2])", + "(unknown): ((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2], token[])) " + "-> " + "((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2], token[])", ShapeUtil::HumanString(prog)); prog.add_parameter_names("arg0"); @@ -497,8 +524,10 @@ TEST(ShapeUtilTest, HumanString) { "matrix: u32[1,2], " "matrix2: s32[3,4], " "tuple: (opaque[], f32[], u32[1,2], s32[3,4]), " - "nested_tuple: ((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2])) -> " - "((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2])", + "nested_tuple: ((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2], " + "token[])) " + "-> " + "((opaque[], f32[], u32[1,2], s32[3,4]), u32[1,2], token[])", ShapeUtil::HumanString(prog)); } diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index b895ac045c..6bdfb0179c 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -66,11 +66,16 @@ enum PrimitiveType { // in the dimensions field. TUPLE = 13; - // An opaque type used for passing context specific data to a custom - // operation. + // An opaque type used for passing context-specific data to a custom + // operation. Shapes of this primitive type will have empty dimensions and + // tuple_shapes fields. OPAQUE = 14; - // Next = 17 + // A token type threaded between side-effecting operations. Shapes of this + // primitive type will have empty dimensions and tuple_shapes fields. + TOKEN = 17; + + // Next = 18 } // Describes the value held inside padding elements. -- GitLab From 7d195d0d4936cbf289d2d5c590f82471ee8259ad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 16:43:33 -0700 Subject: [PATCH 508/902] Fix an floating point inaccuracy issue in precision_recall_at_equal_thresholds due to accumulating the tp/fp/tn/fn values in float32, which can become highly inaccurate as the number of values increases. In the common case, the method sums the value 1.0f to the tp/fp/tn/fn bucket for every value in the predictions tensor. If the tensor is large (say, it represents an image and we have one tp/fp/tn/fn value per pixel), then we are essentially adding many 1.0f's together, across the entire batch and also across all the batches. By doing it in float32 the value starts becoming inaccurate at around 16M, which is very small. In practice, we see a deviation of 100x when the total reaches about 3e10 (the previous code reports a number about 1e8 when the actual value should be 3e10). We avoid all these issues by always accumulating in float64. Also fix a bug that the method cannot be called with predictions dtype being anything other than float32. Preivously it would crash due to the eps code near the end. Added tests for using float64 and float16. PiperOrigin-RevId: 199216173 --- .../contrib/metrics/python/ops/metric_ops.py | 39 +++-- .../metrics/python/ops/metric_ops_test.py | 137 ++++++++++++++---- 2 files changed, 130 insertions(+), 46 deletions(-) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 00a933e5e0..2ed99d50a4 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -1544,7 +1544,7 @@ def precision_recall_at_equal_thresholds(labels, result: A named tuple (See PrecisionRecallData within the implementation of this function) with properties that are variables of shape `[num_thresholds]`. The names of the properties are tp, fp, tn, fn, - precision, recall, thresholds. + precision, recall, thresholds. Types are same as that of predictions. update_op: An op that accumulates values. Raises: @@ -1570,7 +1570,6 @@ def precision_recall_at_equal_thresholds(labels, check_ops.assert_type(labels, dtypes.bool) - dtype = predictions.dtype with variable_scope.variable_scope(name, 'precision_recall_at_equal_thresholds', (labels, predictions, weights)): @@ -1592,11 +1591,16 @@ def precision_recall_at_equal_thresholds(labels, predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - # We cast to float to ensure we have 0.0 or 1.0. - f_labels = math_ops.cast(labels, dtype) + # It's important we aggregate using float64 since we're accumulating a lot + # of 1.0's for the true/false labels, and accumulating to float32 will + # be quite inaccurate even with just a modest amount of values (~20M). + # We use float64 instead of integer primarily since GPU scatter kernel + # only support floats. + agg_dtype = dtypes.float64 - # Get weighted true/false labels. - true_labels = f_labels * weights + f_labels = math_ops.cast(labels, agg_dtype) + weights = math_ops.cast(weights, agg_dtype) + true_labels = f_labels * weights false_labels = (1.0 - f_labels) * weights # Flatten predictions and labels. @@ -1638,9 +1642,9 @@ def precision_recall_at_equal_thresholds(labels, with ops.name_scope('variables'): tp_buckets_v = metrics_impl.metric_variable( - [num_thresholds], dtype, name='tp_buckets') + [num_thresholds], agg_dtype, name='tp_buckets') fp_buckets_v = metrics_impl.metric_variable( - [num_thresholds], dtype, name='fp_buckets') + [num_thresholds], agg_dtype, name='fp_buckets') with ops.name_scope('update_op'): update_tp = state_ops.scatter_add( @@ -1660,18 +1664,21 @@ def precision_recall_at_equal_thresholds(labels, fn = tp[0] - tp # We use a minimum to prevent division by 0. - epsilon = 1e-7 + epsilon = ops.convert_to_tensor(1e-7, dtype=agg_dtype) precision = tp / math_ops.maximum(epsilon, tp + fp) recall = tp / math_ops.maximum(epsilon, tp + fn) + # Convert all tensors back to predictions' dtype (as per function contract). + out_dtype = predictions.dtype + _convert = lambda tensor: math_ops.cast(tensor, out_dtype) result = PrecisionRecallData( - tp=tp, - fp=fp, - tn=tn, - fn=fn, - precision=precision, - recall=recall, - thresholds=math_ops.lin_space(0.0, 1.0, num_thresholds)) + tp=_convert(tp), + fp=_convert(fp), + tn=_convert(tn), + fn=_convert(fn), + precision=_convert(precision), + recall=_convert(recall), + thresholds=_convert(math_ops.lin_space(0.0, 1.0, num_thresholds))) update_op = control_flow_ops.group(update_tp, update_fp) return result, update_op diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index e6f75fcbd7..4ccba4a253 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -2333,47 +2333,24 @@ class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase): np.random.seed(1) ops.reset_default_graph() - def _testResultsEqual(self, expected_dict, gotten_result): + def _testResultsEqual(self, expected_dict, gotten_result, eps=None): """Tests that 2 results (dicts) represent the same data. Args: expected_dict: A dictionary with keys that are the names of properties of PrecisionRecallData and whose values are lists of floats. gotten_result: A PrecisionRecallData object. + eps: Epsilon value to use for testing output values. If unspecified, use + default from assertAllClose. """ gotten_dict = {k: t.eval() for k, t in gotten_result._asdict().items()} self.assertItemsEqual(list(expected_dict.keys()), list(gotten_dict.keys())) for key, expected_values in expected_dict.items(): - self.assertAllClose(expected_values, gotten_dict[key]) - - def _testCase(self, predictions, labels, expected_result, weights=None): - """Performs a test given a certain scenario of labels, predictions, weights. - - Args: - predictions: The predictions tensor. Of type float32. - labels: The labels tensor. Of type bool. - expected_result: The expected result (dict) that maps to tensors. - weights: Optional weights tensor. - """ - with self.test_session() as sess: - predictions_tensor = constant_op.constant( - predictions, dtype=dtypes_lib.float32) - labels_tensor = constant_op.constant(labels, dtype=dtypes_lib.bool) - weights_tensor = None - if weights: - weights_tensor = constant_op.constant(weights, dtype=dtypes_lib.float32) - gotten_result, update_op = ( - metric_ops.precision_recall_at_equal_thresholds( - labels=labels_tensor, - predictions=predictions_tensor, - weights=weights_tensor, - num_thresholds=3)) - - sess.run(variables.local_variables_initializer()) - sess.run(update_op) - - self._testResultsEqual(expected_result, gotten_result) + if eps is not None: + self.assertAllClose(expected_values, gotten_dict[key], atol=eps) + else: + self.assertAllClose(expected_values, gotten_dict[key]) def testVars(self): metric_ops.precision_recall_at_equal_thresholds( @@ -2414,6 +2391,77 @@ class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase): for _ in range(3): self._testResultsEqual(initial_result, result) + def testLargeCase(self): + shape = [32, 512, 256, 1] + predictions = random_ops.random_uniform( + shape, 0.0, 1.0, dtype=dtypes_lib.float32) + labels = math_ops.greater(random_ops.random_uniform(shape, 0.0, 1.0), 0.5) + + result, update_op = metric_ops.precision_recall_at_equal_thresholds( + labels=labels, predictions=predictions, num_thresholds=201) + # Run many updates, enough to cause highly inaccurate values if the + # code used float32 for accumulation. + num_updates = 71 + + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + for _ in xrange(num_updates): + sess.run(update_op) + + prdata = sess.run(result) + + # Since we use random values, we won't know the tp/fp/tn/fn values, but + # tp and fp at threshold 0 should be the total number of positive and + # negative labels, hence their sum should be total number of pixels. + expected_value = 1.0 * np.product(shape) * num_updates + got_value = prdata.tp[0] + prdata.fp[0] + # They should be at least within 1. + self.assertNear(got_value, expected_value, 1.0) + + def _testCase(self, + predictions, + labels, + expected_result, + dtype=dtypes_lib.float32, + eps=None, + weights=None): + """Performs a test given a certain scenario of labels, predictions, weights. + + Args: + predictions: The predictions tensor. Of type dtype. + labels: The labels tensor. Of type bool. + expected_result: The expected result (dict) that maps to tensors. + dtype: Data type to use for predictions and weights tensor. Default + is float32. + eps: Epsilon value to use for testing output values. If unspecified, use + default from assertAllClose. + weights: Optional weights tensor. + """ + with self.test_session() as sess: + predictions_tensor = constant_op.constant(predictions, dtype=dtype) + labels_tensor = constant_op.constant(labels, dtype=dtypes_lib.bool) + weights_tensor = None + if weights: + weights_tensor = constant_op.constant(weights, dtype=dtype) + gotten_result, update_op = ( + metric_ops.precision_recall_at_equal_thresholds( + labels=labels_tensor, + predictions=predictions_tensor, + weights=weights_tensor, + num_thresholds=3)) + self.assertEqual(gotten_result.tp.dtype, dtype) + self.assertEqual(gotten_result.fp.dtype, dtype) + self.assertEqual(gotten_result.tn.dtype, dtype) + self.assertEqual(gotten_result.fn.dtype, dtype) + self.assertEqual(gotten_result.precision.dtype, dtype) + self.assertEqual(gotten_result.recall.dtype, dtype) + self.assertEqual(gotten_result.thresholds.dtype, dtype) + + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + + self._testResultsEqual(expected_result, gotten_result, eps=eps) + def testAllTruePositives(self): self._testCase( [[1]], [[True]], { @@ -2489,6 +2537,35 @@ class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase): }, weights=[[0.0, 0.5, 2.0, 0.0, 0.5, 1.0]]) + def testFloat64(self): + self._testCase( + [[0.2, 0.3, 0.4, 0.6, 0.7, 0.8]], + [[True, False, False, True, True, True]], { + 'tp': [4, 3, 0], + 'fp': [2, 0, 0], + 'tn': [0, 2, 2], + 'fn': [0, 1, 4], + 'precision': [2.0 / 3.0, 1.0, 0.0], + 'recall': [1.0, 0.75, 0.0], + 'thresholds': [0.0, 0.5, 1.0], + }, + dtype=dtypes_lib.float64) + + def testFloat16(self): + self._testCase( + [[0.2, 0.3, 0.4, 0.6, 0.7, 0.8]], + [[True, False, False, True, True, True]], { + 'tp': [4, 3, 0], + 'fp': [2, 0, 0], + 'tn': [0, 2, 2], + 'fn': [0, 1, 4], + 'precision': [2.0 / 3.0, 1.0, 0.0], + 'recall': [1.0, 0.75, 0.0], + 'thresholds': [0.0, 0.5, 1.0], + }, + dtype=dtypes_lib.float16, + eps=1e-3) + class StreamingSpecificityAtSensitivityTest(test.TestCase): -- GitLab From ff5ad20576e2c2a5c2295365c396da367428c753 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 16:46:57 -0700 Subject: [PATCH 509/902] Updated include path for internal protobuf implementation. PiperOrigin-RevId: 199216721 --- tensorflow/contrib/lite/toco/tooling_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index 1f596ca8e5..3b320e8013 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -26,7 +26,7 @@ limitations under the License. #include "absl/strings/string_view.h" #include "tensorflow/core/platform/logging.h" #if TOCO_SUPPORT_PORTABLE_PROTOS -#include "third_party/protobuf/src/google/protobuf/text_format.h" +#include "third_party/protobuf/include/google/protobuf/text_format.h" #endif // TOCO_SUPPORT_PORTABLE_PROTOS #include "tensorflow/contrib/lite/toco/model.h" #include "tensorflow/contrib/lite/toco/model_flags.pb.h" -- GitLab From 640cb59e94248c55934fe4e2b59fb3e18957b297 Mon Sep 17 00:00:00 2001 From: vchigrin Date: Tue, 5 Jun 2018 02:50:09 +0300 Subject: [PATCH 510/902] Periodic resample operation gradients and optimization (#16520) * Implement gradient of periodic resample operation. * Set fully defined output shape for periodic_resample when possible. * Speed up periodic_resample operation. Use incremental updates in index computation where possible. * Allow periodic_resample run on multiple CPU kernels. * Small refactoring. * Add test for periodic_resample shape inference. * Fix issues after review. * Add shape inference C++ test. * Code style fix --- tensorflow/contrib/periodic_resample/BUILD | 17 +- .../kernels/periodic_resample_op.cc | 5 + .../kernels/periodic_resample_op.h | 415 +++++++++++++----- .../periodic_resample/ops/array_ops.cc | 53 ++- .../periodic_resample/ops/array_ops_test.cc | 40 ++ .../kernel_tests/periodic_resample_op_test.py | 27 +- .../python/ops/periodic_resample_op.py | 8 +- 7 files changed, 445 insertions(+), 120 deletions(-) create mode 100644 tensorflow/contrib/periodic_resample/ops/array_ops_test.cc diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD index 6ca7fe8b6e..976b312e83 100644 --- a/tensorflow/contrib/periodic_resample/BUILD +++ b/tensorflow/contrib/periodic_resample/BUILD @@ -6,12 +6,13 @@ exports_files(["LICENSE"]) load( "//tensorflow:tensorflow.bzl", - "py_test", + "tf_cc_test", "tf_gen_op_libs", "tf_custom_op_library", "tf_custom_op_py_library", "tf_gen_op_wrapper_py", ) +load("//tensorflow:tensorflow.bzl", "py_test") cc_library( name = "all_ops", @@ -84,6 +85,20 @@ py_test( ":init_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradient_checker", + ], +) + +tf_cc_test( + name = "periodic_resample_op_cc_test", + size = "small", + srcs = [ + "ops/array_ops_test.cc", + ], + deps = [ + ":all_ops", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", ], ) diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc index e18923c8aa..514689cf45 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.cc @@ -22,4 +22,9 @@ namespace tensorflow { REGISTER_KERNEL_BUILDER(Name("PeriodicResample").Device(DEVICE_CPU), PeriodicResampleOp); + +REGISTER_KERNEL_BUILDER(Name("PeriodicResampleOpGrad") + .Device(DEVICE_CPU), + PeriodicResampleOpGrad); + } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h index 3ab588c458..42fba81a5c 100644 --- a/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h +++ b/tensorflow/contrib/periodic_resample/kernels/periodic_resample_op.h @@ -25,92 +25,202 @@ #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/util/work_sharder.h" namespace { -template -IndexT compute_input_index( - IndexVecT* target_dimensions, const IndexT& output_index, - const IndexVecT& original_dimensions, const int& adjustable_dimension, - const std::vector& dimension_ceiling, - const std::vector& cumulative_dimensions, IndexT* result, - std::vector* output_indices, const int& rank) { - *result = 0; - output_indices->clear(); +// Computes input tensor index for given output index during forward +// propagation through periodic_resample operation. +class InputIndexer { + public: + InputIndexer(const std::vector& output_dimensions, + const tensorflow::TensorShape& input_shape, + int adjustable_dimension) + : output_dimensions_(output_dimensions), + adjustable_dimension_(adjustable_dimension), + rank_(input_shape.dims()), + linear_output_index_(0), + linear_input_index_(0), + adjustable_dimension_carriage_sum_(0) { + auto input_dimensions = TensorShapeToVector(input_shape); + // factors by which input_dimensions increases/decreases w.r.t. + // output_dimensions + dimension_ceiling_ = + ComputeDimensionCeiling(output_dimensions, input_dimensions); + cumulative_dimensions_ = ComputeCumulativeDimensions(); + + output_indices_.resize(output_dimensions_.size()); + input_indices_.resize(output_dimensions_.size()); + + // Compute index_factors + index_factors_.resize(rank_); + tensorflow::int64 last_index_factor = 1; + for (auto r = rank_ - 1; r >= 0; --r) { + index_factors_[r] = last_index_factor; + last_index_factor *= input_dimensions[r]; + } + } + + tensorflow::int64 linear_input_index() const { return linear_input_index_; } + + void MoveToOutputIndex(tensorflow::int64 output_index); + void IncrementOutputIndex(); + + private: + void RecomputeInputAdjustableDimensionIndex() { + tensorflow::int64 index = adjustable_dimension_carriage_sum_; + index *= output_dimensions_[adjustable_dimension_]; + index += output_indices_[adjustable_dimension_]; + input_indices_[adjustable_dimension_] = index; + } + + std::vector TensorShapeToVector( + const tensorflow::TensorShape& tensor_shape); + + std::vector ComputeDimensionCeiling( + const std::vector& output_dimensions, + const std::vector& input_dimensions); + + std::vector ComputeCumulativeDimensions(); + + const std::vector output_dimensions_; + std::vector dimension_ceiling_; + std::vector index_factors_; + std::vector cumulative_dimensions_; + std::vector output_indices_; + std::vector input_indices_; + + const int adjustable_dimension_; + const int rank_; + tensorflow::int64 linear_output_index_; + tensorflow::int64 linear_input_index_; + tensorflow::int64 adjustable_dimension_carriage_sum_; +}; + +void InputIndexer::MoveToOutputIndex(tensorflow::int64 output_index) { + linear_output_index_ = output_index; + linear_input_index_ = 0; // un-rasterize the output index auto last_reduced_i = output_index; - for (auto r = rank - 1; r >= 0; --r) { - (*output_indices)[r] = last_reduced_i % (*target_dimensions)[r]; + for (auto r = rank_ - 1; r >= 0; --r) { + output_indices_[r] = last_reduced_i % output_dimensions_[r]; last_reduced_i = - (last_reduced_i - (*output_indices)[r]) / (*target_dimensions)[r]; + (last_reduced_i - output_indices_[r]) / output_dimensions_[r]; } + tensorflow::int64 carriage_sum = 0; + for (int qi = 0; qi < rank_; ++qi) { + if (qi == adjustable_dimension_) continue; + carriage_sum += cumulative_dimensions_[qi] * + (output_indices_[qi] % dimension_ceiling_[qi]); + } + adjustable_dimension_carriage_sum_ = carriage_sum; + // rasterize the input index - IndexT last_index_factor = 1; - for (auto r = rank - 1; r >= 0; --r) { - IndexT index = 0; - if (r != adjustable_dimension) - index = (*output_indices)[r] / dimension_ceiling[r]; - else { - for (int qi = 0; qi < rank; ++qi) { - if (qi == adjustable_dimension) continue; - index += cumulative_dimensions[qi] * - ((*output_indices)[qi] % dimension_ceiling[qi]); - } - index *= (*target_dimensions)[adjustable_dimension]; - index += (*output_indices)[r]; + for (auto r = rank_ - 1; r >= 0; --r) { + if (r != adjustable_dimension_) { + input_indices_[r] = output_indices_[r] / dimension_ceiling_[r]; + } else { + RecomputeInputAdjustableDimensionIndex(); } - *result += last_index_factor * index; - last_index_factor *= original_dimensions[r]; } + for (auto r = rank_ - 1; r >= 0; --r) { + linear_input_index_ += index_factors_[r] * input_indices_[r]; + } +} + +void InputIndexer::IncrementOutputIndex() { + linear_output_index_++; + for (auto r = rank_ - 1; r >= 0; --r) { + auto old_carriage_sum_increment = + cumulative_dimensions_[r] * + (output_indices_[r] % dimension_ceiling_[r]); + output_indices_[r] = (output_indices_[r] + 1) % output_dimensions_[r]; + if (r != adjustable_dimension_) { + auto new_input_index = output_indices_[r] / dimension_ceiling_[r]; + linear_input_index_ += + (new_input_index - input_indices_[r]) * index_factors_[r]; + + input_indices_[r] = new_input_index; + + auto new_carriage_sum_increment = + cumulative_dimensions_[r] * + (output_indices_[r] % dimension_ceiling_[r]); - return *result; + adjustable_dimension_carriage_sum_ = adjustable_dimension_carriage_sum_ - + old_carriage_sum_increment + + new_carriage_sum_increment; + } + + if (output_indices_[r] != 0) { + // No more carries to higher indices. + break; + } + } + auto old_adjustable_dimension_input_index = + input_indices_[adjustable_dimension_]; + RecomputeInputAdjustableDimensionIndex(); + linear_input_index_ += (input_indices_[adjustable_dimension_] - + old_adjustable_dimension_input_index) * + index_factors_[adjustable_dimension_]; } -template // both types are needed here b/c IndexVecT and - // InputDataT are not related - void - fill_periodic_tensor( - tensorflow::OpKernelContext* context, - const IndexVecT& desired_shape, - const tensorflow::Tensor& input_tensor) { - // input is a strided array (last index is fastest, C-ordered) - auto input = input_tensor.flat(); - const int rank = input_tensor.dims(); - // original and target dimensions - std::vector original_dimensions(rank), - target_dimensions(rank); - tensorflow::int64 total_size(input_tensor.NumElements()), new_sliced_size(1); - // factors by which original_dimensions increases/decreases w.r.t. - // target_dimensions - std::vector dimension_ceiling(rank), - cumulative_dimensions(rank); - // index of adjustable dimension - int adjustable_dimension; - tensorflow::TensorShape output_shape; +std::vector InputIndexer::TensorShapeToVector( + const tensorflow::TensorShape& tensor_shape) { + std::vector result(tensor_shape.dims()); + int count = 0; + for (const auto dim_info : tensor_shape) { + result[count] = dim_info.size; + ++count; + } + return result; +} - // requires that the rank of the input tensor and length of the desired shape - // are equal - OP_REQUIRES(context, rank == desired_shape.size(), - tensorflow::errors::InvalidArgument( - "periodic_resample expects the rank of the input tensor, ", - rank, ", to be the same as the length of the desired shape, ", - desired_shape.size(), ".")); +std::vector InputIndexer::ComputeDimensionCeiling( + const std::vector& output_dimensions, + const std::vector& input_dimensions) { + std::vector dimension_ceiling(input_dimensions.size()); + for (size_t i = 0; i < input_dimensions.size(); ++i) { + dimension_ceiling[i] = (output_dimensions[i] + input_dimensions[i] - 1) / + input_dimensions[i]; + } + return dimension_ceiling; +} - bool found = false; - const auto& input_tensor_shape = input_tensor.shape(); +std::vector InputIndexer::ComputeCumulativeDimensions() { + std::vector cumulative_dimensions(rank_); + int count = 0; + for (int i = 0; i < rank_; ++i) { + if (count == 0) { + cumulative_dimensions[count] = 1; + } else { + cumulative_dimensions[count] = + cumulative_dimensions[count - 1] * dimension_ceiling_[count - 1]; + } + ++count; + } + return cumulative_dimensions; +} +template +void process_desired_shape(tensorflow::OpKernelContext* context, + const tensorflow::TensorShape& input_tensor_shape, + const IndexVecT& desired_shape, + int* adjustable_dimension, + std::vector* target_dimensions, + tensorflow::int64* output_size) { + tensorflow::int64 new_sliced_size = 1; + bool found = false; + const int rank = input_tensor_shape.dims(); for (int i = 0; i < rank; ++i) { - // if (desired_shape(i) < 1) { if (desired_shape[i] < 1) { // only one index can be adjustable OP_REQUIRES(context, !found, tensorflow::errors::InvalidArgument( "periodic_resample expects only " "one index to be marked as adjustable.")); - adjustable_dimension = i; + *adjustable_dimension = i; found = true; } else { OP_REQUIRES( @@ -122,9 +232,8 @@ template +void +do_periodic_resample_op(tensorflow::OpKernelContext* context, + const tensorflow::TensorShape& original_shape, + const tensorflow::PartialTensorShape& desired_shape, + const tensorflow::Tensor& source_tensor) { + const int rank = source_tensor.dims(); + + // requires that the rank of the input tensor and length of the desired shape + // are equal + OP_REQUIRES(context, rank == desired_shape.dims(), + tensorflow::errors::InvalidArgument( + "periodic_resample expects the rank of the input tensor, ", + rank, ", to be the same as the length of the desired shape, ", + desired_shape.dims(), ".")); + + std::vector target_dimensions(rank); + tensorflow::int64 new_size = 0; + // index of adjustable dimension + int adjustable_dimension = 0; + process_desired_shape(context, original_shape, desired_shape.dim_sizes(), + &adjustable_dimension, &target_dimensions, &new_size); // ensure that the new dimension is greater than zero OP_REQUIRES(context, target_dimensions[adjustable_dimension] > 0, @@ -160,11 +293,14 @@ template allocate_output(0, output_shape, &output_tensor)); auto output = output_tensor->flat(); - // memory is allocated for these variables outside the inner loop for - // efficiency (although, I could create a separate class scope for - // this purpose instead) - tensorflow::int64 result = 0; - std::vector output_indices(target_dimensions.size()); + // input is a strided array (last index is fastest, C-ordered) + auto input = source_tensor.flat(); // Fill output tensor with periodically resampled input tensor values - for (tensorflow::int64 output_index = 0; output_index < new_size; - ++output_index) { - output(output_index) = input(compute_input_index( - &target_dimensions, output_index, original_dimensions, - adjustable_dimension, dimension_ceiling, cumulative_dimensions, &result, - &output_indices, rank)); - } + InputIndexer input_indexer(target_dimensions, original_shape, + adjustable_dimension); + + auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); + auto fill_output_tensor = [&input_indexer, &output, &input]( + tensorflow::int64 start, tensorflow::int64 limit) { + InputIndexer local_indexer(input_indexer); + local_indexer.MoveToOutputIndex(start); + for (tensorflow::int64 output_index = start; output_index < limit; + ++output_index) { + if (mode == Mode::kForward) { + output(output_index) = input(local_indexer.linear_input_index()); + } else { + output(local_indexer.linear_input_index()) = input(output_index); + } + local_indexer.IncrementOutputIndex(); + } + }; + ::tensorflow::Shard(worker_threads.num_threads, worker_threads.workers, + new_size, costPerFillIndex, fill_output_tensor); } +#define DATA_TYPE_SWITCH(data_type, context, CASE) \ + switch (data_type) { \ + CASE(float) \ + CASE(double) \ + CASE(tensorflow::int32) \ + CASE(tensorflow::int64) \ + default: \ + context->CtxFailure(__FILE__, __LINE__, \ + tensorflow::errors::InvalidArgument( \ + "Unsuppored tensor elements type")); \ + break; \ + } + void create_output_tensor( tensorflow::OpKernelContext* context, const tensorflow::Tensor& input_tensor, const tensorflow::DataType& input_tensor_type, - const tensorflow::PartialTensorShape& desired_shape_tensor) { - auto desired_shape = desired_shape_tensor.dim_sizes(); - - // obligatory type switch - switch (input_tensor_type) { - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, input_tensor); + const tensorflow::PartialTensorShape& desired_shape) { +#define CASE(type) \ + case tensorflow::DataTypeToEnum::value: \ + do_periodic_resample_op( \ + context, input_tensor.shape(), desired_shape, input_tensor); \ break; - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, input_tensor); - break; - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, - input_tensor); - break; - case tensorflow::DataTypeToEnum::value: - fill_periodic_tensor(context, desired_shape, - input_tensor); + + DATA_TYPE_SWITCH(input_tensor_type, context, CASE); +#undef CASE +} + +void create_grad_tensor(tensorflow::OpKernelContext* context, + const tensorflow::Tensor& grad_tensor, + const tensorflow::DataType& grad_tensor_type, + const tensorflow::TensorShape& original_shape, + const tensorflow::PartialTensorShape& desired_shape) { +#define CASE(type) \ + case tensorflow::DataTypeToEnum::value: \ + do_periodic_resample_op( \ + context, original_shape, desired_shape, grad_tensor); \ break; - default:; - } + + DATA_TYPE_SWITCH(grad_tensor_type, context, CASE); +#undef CASE } } // namespace @@ -238,4 +400,25 @@ class PeriodicResampleOp : public tensorflow::OpKernel { tensorflow::PartialTensorShape desired_shape; }; +class PeriodicResampleOpGrad : public tensorflow::OpKernel { + public: + explicit PeriodicResampleOpGrad(tensorflow::OpKernelConstruction* context) + : tensorflow::OpKernel(context) { + OP_REQUIRES_OK(context, + context->GetAttr("original_shape", &original_shape)); + OP_REQUIRES_OK(context, context->GetAttr("desired_shape", &desired_shape)); + } + + void Compute(tensorflow::OpKernelContext* context) override { + const tensorflow::Tensor& grad_tensor = context->input(0); + const tensorflow::DataType grad_tensor_type = context->input_dtype(0); + create_grad_tensor(context, grad_tensor, grad_tensor_type, original_shape, + desired_shape); + } + + private: + tensorflow::TensorShape original_shape; + tensorflow::PartialTensorShape desired_shape; +}; + #endif // TENSORFLOW_KERNELS_PERIODICRESAMPLE_OP_H_ diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops.cc b/tensorflow/contrib/periodic_resample/ops/array_ops.cc index 82bd796956..fd38cd09b4 100644 --- a/tensorflow/contrib/periodic_resample/ops/array_ops.cc +++ b/tensorflow/contrib/periodic_resample/ops/array_ops.cc @@ -26,7 +26,42 @@ REGISTER_OP("PeriodicResample") .Input("values: T") .Attr("shape: shape") .Output("output: T") - .SetShapeFn(shape_inference::ExplicitShape) + .SetShapeFn([](shape_inference::InferenceContext* c) { + tensorflow::PartialTensorShape desired_shape; + TF_RETURN_IF_ERROR(c->GetAttr("shape", &desired_shape)); + shape_inference::ShapeHandle input_tensor_shape = c->input(0); + shape_inference::DimensionHandle num_input_elements = + c->NumElements(input_tensor_shape); + shape_inference::ShapeHandle result_shape_handle; + if (!shape_inference::InferenceContext::ValueKnown(num_input_elements)) { + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( + desired_shape, &result_shape_handle)); + } else { + const int rank = c->Rank(input_tensor_shape); + std::vector target_dimensions(rank); + tensorflow::int64 new_sliced_size = 1; + int adjustable_dimension = 0; + for (int i = 0; i < rank; ++i) { + if (desired_shape.dim_size(i) < 1) { + adjustable_dimension = i; + } else { + target_dimensions[i] = desired_shape.dim_size(i); + new_sliced_size *= target_dimensions[i]; + } + } + target_dimensions[adjustable_dimension] = + shape_inference::InferenceContext::Value( + num_input_elements) / new_sliced_size; + tensorflow::TensorShape result_shape; + for (int i = 0; i < rank; ++i) { + result_shape.AddDim(target_dimensions[i]); + } + TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape( + result_shape, &result_shape_handle)); + } + c->set_output(0, result_shape_handle); + return Status::OK(); + }) .Doc(R"doc( Periodically resample elements of a tensor to conform to `shape`. @@ -101,4 +136,20 @@ output: Periodically resampled tensor that has dimensions specified as in )doc"); + +REGISTER_OP("PeriodicResampleOpGrad") + .Attr("T: numbertype") + .Input("grad: T") + .Attr("original_shape: shape") + .Attr("desired_shape: shape") + .Output("grad_values: T") + .SetShapeFn([](shape_inference::InferenceContext* c) { + tensorflow::TensorShape original_shape; + TF_RETURN_IF_ERROR(c->GetAttr("original_shape", &original_shape)); + shape_inference::ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(original_shape, &s)); + c->set_output(0, s); + return Status::OK(); +}); + } // namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc new file mode 100644 index 0000000000..55edf76fcd --- /dev/null +++ b/tensorflow/contrib/periodic_resample/ops/array_ops_test.cc @@ -0,0 +1,40 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/shape_inference_testutil.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +TEST(ArrayOpsTest, PeriodicResample_ShapeFn) { + ShapeInferenceTestOp op("PeriodicResample"); + // Case 1: output shape can be fully inferreed. + PartialTensorShape shape({4, 4, -1}); + TensorShapeProto shape_proto; + shape.AsProto(&shape_proto); + + TF_ASSERT_OK(NodeDefBuilder("test", "PeriodicResample") + .Input({"values", 0, DT_INT32}) + .Attr("shape", shape_proto) + .Finalize(&op.node_def)); + INFER_OK(op, "[2,2,4]", "[4,4,1]"); + // Case 2: output shape can not be inferred - report desired shape. + INFER_OK(op, "[2,2,?]", "[4,4,?]"); +} + +} // end namespace tensorflow diff --git a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py index a25de55e18..31a6fe1d94 100644 --- a/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py +++ b/tensorflow/contrib/periodic_resample/python/kernel_tests/periodic_resample_op_test.py @@ -21,8 +21,11 @@ from __future__ import print_function import numpy from tensorflow.contrib.periodic_resample import periodic_resample +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -93,7 +96,6 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): def testPeriodicResampleErrors(self): input_tensor = numpy.zeros(shape=[1, 2, 2, 4]) with self.test_session(): - variables.global_variables_initializer().run() with self.assertRaisesWithPredicateMatch( errors_impl.InvalidArgumentError, 'Dimension 3 input tensor has size 4, desired shape has size 1'): @@ -103,6 +105,29 @@ class PeriodicResampleTest(test_util.TensorFlowTestCase): '4, to be the same as the length of the desired shape, 3'): periodic_resample(input_tensor, [None, 4, 4]).eval() + def testPeriodicResampleGradient(self): + desired_shape = numpy.array([4, 4, None]) + result_shape = (4, 4, 1) + input_shape = (2, 2, 4) + with self.test_session() as sess: + x = array_ops.placeholder(dtypes.float32, shape=input_shape) + output = periodic_resample(x, desired_shape) + error = gradient_checker.compute_gradient_error( + x, input_shape, output, result_shape) + self.assertLess(error, 1e-4) + + def testPeriodicResampleShapeInference(self): + with self.test_session() as sess: + # Case 1: output shape can be fully inferreed. + x = array_ops.placeholder(dtypes.float32, shape=(2, 2, 4)) + output = periodic_resample(x, [4, 4, None]) + self.assertEqual(output.shape, [4, 4, 1]) + # Case 2: output shape can not be inferred - report desired shape. + x = array_ops.placeholder(dtypes.float32, shape=(2, 2, None)) + output = periodic_resample(x, [4, 4, None]) + self.assertTrue(output.shape.is_compatible_with([4, 4, None])) + self.assertEqual(output.shape[2].value, None) + if __name__ == '__main__': googletest.main() diff --git a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py index 348623d8f8..470e300ccb 100644 --- a/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py +++ b/tensorflow/contrib/periodic_resample/python/ops/periodic_resample_op.py @@ -21,11 +21,17 @@ from __future__ import print_function # pylint: disable=unused-import from tensorflow.contrib.periodic_resample.python.ops import gen_periodic_resample_op -from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample +from tensorflow.contrib.periodic_resample.python.ops.gen_periodic_resample_op import periodic_resample, periodic_resample_op_grad from tensorflow.contrib.util import loader +from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader # pylint: enable=unused-import _periodic_resample_op = loader.load_op_library( resource_loader.get_path_to_datafile('_periodic_resample_op.so')) + +@ops.RegisterGradient("PeriodicResample") +def _periodic_resample_grad_cc(op, grad): + return periodic_resample_op_grad( + grad, op.inputs[0].shape, op.get_attr('shape')) -- GitLab From 310a51bd875bbac16cb2829e16428fca04fc3a29 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 4 Jun 2018 17:15:05 -0700 Subject: [PATCH 511/902] HloParser: use uint16 in U16 case PiperOrigin-RevId: 199220422 --- tensorflow/compiler/xla/service/hlo_parser.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 09c05c9821..ec20606d2f 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -1391,8 +1391,8 @@ bool HloParser::SetValueInLiteral(tensorflow::int64 value, return SetValueInLiteralHelper(value, linear_index, literal); case U16: - return SetValueInLiteralHelper(value, linear_index, - literal); + return SetValueInLiteralHelper(value, linear_index, + literal); case U32: return SetValueInLiteralHelper(value, linear_index, literal); -- GitLab From 35c8574e49aadcf16d009717e1d31fcce148db02 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 4 Jun 2018 17:23:10 -0700 Subject: [PATCH 512/902] [XLA] Don't dump subgraphs twice in hlo_graph_dumper. Surprisingly a subgraph twice mostly worked. But it broke the rollover edge highlighting, and it also drew all the edges in the subgraph twice. PiperOrigin-RevId: 199221368 --- .../compiler/xla/service/hlo_graph_dumper.cc | 54 ++++++++++--------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 05adb45713..61612bebd1 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -590,15 +590,26 @@ bool HloDotDumper::ShouldShowSubcomputation(const HloComputation* subcomp) { string HloDotDumper::DumpSubcomputation(const HloComputation* subcomp, const HloInstruction* parent_instr) { VLOG(2) << "Dumping subcomputation " << subcomp->name(); - const char* computation_fmt = R"(subgraph %s { -%s -label = <%s>; -labelloc = t; -tooltip = " "; -%s -} // %s + // Add an edge from the subcomputation to its parent node. If subcomp + // belongs to a fusion node, it's drawn in place of the fusion instruction, + // so there's no need to link those. + if (parent_instr->opcode() != HloOpcode::kFusion) { + const HloInstruction* from = GetNodeForEdge(subcomp->root_instruction()); + VLOG(2) << "Edge: from " << from->name() << " to " << parent_instr->name() + << " as " << next_edge_id_; + edge_ids_.insert({{from, parent_instr}, next_edge_id_++}); + const char* edge_fmt = + R"(%s -> %s [ltail="%s", style="dashed" tooltip="%s -> %s"];)"; + edges_.push_back(Printf( + edge_fmt, InstructionId(from), InstructionId(parent_instr), + SubcomputationId(subcomp), subcomp->name(), parent_instr->name())); + } -)"; + // Have we already dumped this subcomputation? If so, generating the edge + // linking it and parent_instr is all we want to do in this function. + if (cluster_ids_.find(subcomp) != cluster_ids_.end()) { + return ""; + } cluster_ids_[subcomp] = next_cluster_id_++; @@ -645,25 +656,16 @@ tooltip = " "; string comp_body = DumpComputation(subcomp); - // Add an edge from the subcomputation to its parent node. If subcomp - // belongs to a fusion node, it's drawn in place of the fusion instruction, - // so there's no need to link those. - if (parent_instr->opcode() != HloOpcode::kFusion) { - const HloInstruction* from = GetNodeForEdge(subcomp->root_instruction()); - VLOG(2) << "Edge: from " << from->name() << " to " << parent_instr->name() - << " as " << next_edge_id_; - edge_ids_.insert({{from, parent_instr}, next_edge_id_++}); - const char* edge_fmt = - R"(%s -> %s [ltail="%s", style="dashed" tooltip="%s -> %s"];)"; - edges_.push_back(Printf( - edge_fmt, InstructionId(from), InstructionId(parent_instr), - SubcomputationId(subcomp), subcomp->name(), parent_instr->name())); - } - - string computation = - Printf(computation_fmt, id, style, subcomp_label, comp_body, id); + const char* computation_fmt = R"(subgraph %s { +%s +label = <%s>; +labelloc = t; +tooltip = " "; +%s +} // %s - return computation; +)"; + return Printf(computation_fmt, id, style, subcomp_label, comp_body, id); } string HloDotDumper::DumpComputation(const HloComputation* comp) { -- GitLab From 76801dda9b4766d729ab88267ee47f48d05eafb7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 18:57:57 -0700 Subject: [PATCH 513/902] Enable XLA fusions as a Grappler optimization. PiperOrigin-RevId: 199230907 --- tensorflow/compiler/jit/BUILD | 46 +++ .../compiler/jit/mark_for_compilation_pass.cc | 161 ++------- tensorflow/compiler/jit/xla_cluster_util.cc | 161 +++++++++ tensorflow/compiler/jit/xla_cluster_util.h | 46 +++ .../compiler/jit/xla_fusion_optimizer.cc | 321 ++++++++++++++++++ .../compiler/jit/xla_fusion_optimizer.h | 49 +++ .../compiler/jit/xla_fusion_optimizer_test.cc | 183 ++++++++++ .../custom_graph_optimizer_registry.h | 2 +- .../grappler/optimizers/meta_optimizer.cc | 100 +++--- .../core/grappler/optimizers/meta_optimizer.h | 4 + 10 files changed, 889 insertions(+), 184 deletions(-) create mode 100644 tensorflow/compiler/jit/xla_cluster_util.cc create mode 100644 tensorflow/compiler/jit/xla_cluster_util.h create mode 100644 tensorflow/compiler/jit/xla_fusion_optimizer.cc create mode 100644 tensorflow/compiler/jit/xla_fusion_optimizer.h create mode 100644 tensorflow/compiler/jit/xla_fusion_optimizer_test.cc diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 6d6c030a26..ab8cd8f4bc 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -25,6 +25,7 @@ load("//tensorflow:tensorflow.bzl", "tf_kernel_library") load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured") +load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") # Target that bundles up the XLA CPU and GPU JIT devices. cc_library( @@ -312,6 +313,7 @@ cc_library( ":common", ":shape_inference_helpers", ":union_find", + ":xla_cluster_util", "//tensorflow/compiler/jit/graphcycles", "//tensorflow/compiler/jit/kernels:parallel_check_op", "//tensorflow/compiler/jit/legacy_flags:encapsulate_subgraphs_pass_flags", @@ -332,6 +334,18 @@ cc_library( ], ) +cc_library( + name = "xla_cluster_util", + srcs = ["xla_cluster_util.cc"], + hdrs = ["xla_cluster_util.h"], + deps = [ + "//tensorflow/compiler/jit/graphcycles", + "//tensorflow/core:framework", + "//tensorflow/core:graph", + "//tensorflow/core/kernels:bounds_check", + ], +) + cc_library( name = "union_find", hdrs = ["union_find.h"], @@ -408,6 +422,38 @@ tf_cc_test( ], ) +cc_library( + name = "xla_fusion_optimizer", + srcs = ["xla_fusion_optimizer.cc"], + hdrs = ["xla_fusion_optimizer.h"], + visibility = ["//visibility:public"], + deps = [ + ":common", + ":union_find", + ":xla_cluster_util", + "//tensorflow/compiler/jit/graphcycles", + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", + ], +) + +tf_cuda_cc_test( + name = "xla_fusion_optimizer_test", + srcs = ["xla_fusion_optimizer_test.cc"], + deps = [ + ":common", + ":xla_cluster_util", + ":xla_fusion_optimizer", + "//tensorflow/core:graph", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/grappler/utils:grappler_test", + ], +) + # This target can be used by XLA device plugins to prevent circular dependencies, and provides access to all of the required headers for building a device library. cc_header_only_library( name = "xla_jit_headers_lib", diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index 07ee93d79e..74468266b9 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/compiler/jit/graphcycles/graphcycles.h" #include "tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.h" #include "tensorflow/compiler/jit/union_find.h" +#include "tensorflow/compiler/jit/xla_cluster_util.h" #include "tensorflow/compiler/tf2xla/dump_graph.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/common_runtime/function.h" @@ -41,9 +42,6 @@ limitations under the License. namespace tensorflow { -const char* const kXlaClusterAttr = "_XlaCluster"; -const char* const kXlaOutsideCompilationAttr = "_XlaOutsideCompilation"; - namespace { // Returns true if, when executed in TensorFlow, `node` is guaranteed to forward @@ -191,16 +189,6 @@ bool IsCompilableCall(const NodeDef& call_def, return true; } -// Returns the DeviceType corresponding to 'device'. -Status DeviceTypeOfDevice(const string& device, DeviceType* device_type) { - DeviceNameUtils::ParsedName parsed; - if (!DeviceNameUtils::ParseFullName(device, &parsed)) { - return errors::Internal("Malformed assigned device '", device, "'"); - } - *device_type = DeviceType(parsed.type); - return Status::OK(); -} - // Tests whether `node` has a DT_RESOURCE typed input or output. bool HasResourceInputOrOutput(const Node& node) { return std::find(node.input_types().begin(), node.input_types().end(), @@ -209,18 +197,11 @@ bool HasResourceInputOrOutput(const Node& node) { DT_RESOURCE) != node.output_types().end(); } -struct NodeCompare { - bool operator()(const Node* a, const Node* b) const { - return a->id() < b->id(); - } -}; -using OrderedNodeSet = std::set; - // Returns true if the op can be decomposed into XLA ops for which // there are fusable elemental implementations. // -// TODO(hpucha): Consider a black list instead of a white list as -// implemented below. +// TODO(hpucha): Remove this code since this functionality is subsumed by +// Grappler XlaFusionOptimizer. bool IsXlaFusable(const NodeDef& node) { static const std::unordered_set* elementwise_ops = new std::unordered_set( @@ -390,7 +371,7 @@ Status FindCompilationCandidates( for (Node* node : graph.op_nodes()) { sorted_nodes.push_back(node); } - std::sort(sorted_nodes.begin(), sorted_nodes.end(), NodeCompare()); + std::sort(sorted_nodes.begin(), sorted_nodes.end(), NodeComparatorID()); for (Node* node : sorted_nodes) { VLOG(2) << "Fuel: " << fuel; @@ -405,9 +386,13 @@ Status FindCompilationCandidates( DeviceType device_type(""); TF_RETURN_IF_ERROR( - DeviceTypeOfDevice(node->assigned_device_name(), &device_type)); + DeviceToDeviceType(node->assigned_device_name(), &device_type)); - if (is_compilable_fn && !is_compilable_fn(node, device_type)) continue; + if (is_compilable_fn && !is_compilable_fn(node, device_type)) { + VLOG(2) << "Compilation rejected node: not compilable " << node->name() + << ": " << node->type_string(); + continue; + } const XlaOpRegistry::DeviceRegistration* registration; CHECK( @@ -456,46 +441,6 @@ struct Cluster { int representative = -1; }; -// Returns a string describing how an edge from src to dst would -// create a cycle. -string DescribeCycle(const GraphCycles& cycles, const Graph& graph, int src, - int dst) { - int32 max_path_size = graph.num_node_ids() + 1; - std::vector path(max_path_size); - int32 path_size = cycles.FindPath(dst, src, max_path_size, path.data()); - if (path_size == 0) { - return ""; - } - - auto node_name = [&cycles, &graph](int node_id) { - if (!FastBoundsCheck(node_id, graph.num_node_ids())) { - return string("(null)"); - } - auto* node = graph.FindNodeId(node_id); - if (node == nullptr) { - return string("(null)"); - } - return node->name(); - }; - - string description; - strings::StrAppend(&description, "Edge from ", node_name(src), " to ", - node_name(dst), " would create a cycle.\n"); - path.resize(path_size); - for (int32 node_id : path) { - string ascii_art; - if (node_id == dst) { - ascii_art = "+-> "; - } else if (node_id != src) { - ascii_art = "| "; - } else { - ascii_art = "+-- "; - } - strings::StrAppend(&description, ascii_art, node_name(node_id), "\n"); - } - return description; -} - } // anonymous namespace bool IsCompilable(FunctionLibraryRuntime* flr, const NodeDef& ndef) { @@ -601,84 +546,13 @@ Status MarkForCompilationPass::RunImpl( : Env::Default(), is_compilable_fn, &compilation_candidates)); - GraphCycles cycles; - for (int i = 0; i < graph->num_node_ids(); ++i) { - // We rely on the node IDs in the cycle detection graph being consecutive - // integers starting from 0. - CHECK_EQ(i, cycles.NewNode()); + if (compilation_candidates.empty()) { + VLOG(2) << "No compilable candidates"; + return Status::OK(); } - // Compute the loop structure of the graph. - std::vector control_flow_info; - TF_RETURN_IF_ERROR(BuildControlFlowInfo(graph, &control_flow_info)); - - // The clustering code must avoid adding cycles to the graph to prevent - // deadlock. However, the graph may contain loops, which would trigger the - // cycle detection code. To handle loops, we alter the structure of the cycle - // detection graph, disconnecting each loop from the enclosing graph. - // Specifically, we: - // * add a new "frame" node for each loop. - // * replace edges to "Enter" nodes, and edges from "Exit" nodes with edges - // to/from the corresponding frame node. In essence, we collapse the loop - // into a single node for the purpose of cycle detection in the enclosing - // graph. - // * the body of the loop should now be disconnected from the rest of the - // graph; we make it acyclic by breaking loop backedges (edges outgoing from - // "NextIteration" nodes. - - // Map from frame name strings to node IDs in the cycle detection graph. - std::unordered_map frame_nodes; - - // Get the cycle graph node ID for frame 'frame_name', or add one if none - // exists. - auto GetOrAddFrameNodeId = [&frame_nodes, &cycles](const string& frame_name) { - int& frame_id = frame_nodes.emplace(frame_name, -1).first->second; - if (frame_id < 0) { - // The emplace succeeded; we have not allocated a frame node yet. - frame_id = cycles.NewNode(); - } - return frame_id; - }; - - for (Edge const* edge : graph->edges()) { - if (edge->dst()->IsEnter()) { - // Lift edges to an "Enter" node to the corresponding frame node. - const string& frame_name = - control_flow_info[edge->dst()->id()].frame_name; - int dst = GetOrAddFrameNodeId(frame_name); - if (!cycles.InsertEdge(edge->src()->id(), dst)) { - return errors::Internal( - "Cycle detected when adding enter->frame edge: ", - DescribeCycle(cycles, *graph, edge->src()->id(), dst)); - } - continue; - } - if (edge->src()->IsExit()) { - // Lift edges from an "Exit" node to the corresponding frame node. - const string& frame_name = - control_flow_info[edge->src()->id()].frame_name; - int src = GetOrAddFrameNodeId(frame_name); - if (!cycles.InsertEdge(src, edge->dst()->id())) { - return errors::Internal( - "Cycle detected when adding frame->exit edge: ", - DescribeCycle(cycles, *graph, src, edge->dst()->id())); - } - // Drop the original edge. - continue; - } - if (edge->src()->IsNextIteration()) { - // Break loop back-edges. - continue; - } - if (!cycles.InsertEdge(edge->src()->id(), edge->dst()->id())) { - // This should never happen. All cycles in the graph should contain - // a control flow operator. - return errors::Internal( - "Found cycle in graph without control flow operator during XLA " - "compilation: ", - DescribeCycle(cycles, *graph, edge->src()->id(), edge->dst()->id())); - } - } + GraphCycles cycles; + TF_RETURN_IF_ERROR(CreateCycleDetectionGraph(graph, &cycles)); // Each compilation candidate belongs to a cluster. The cluster's // representative @@ -696,6 +570,9 @@ Status MarkForCompilationPass::RunImpl( // Repeatedly contract edges between clusters that are on the same device, // provided the contraction would not create a cycle. + // + // TODO(hpucha): Handle the case where kXlaClusterAttr is already set (for + // example, from the Grappler fusion pass). while (!worklist.empty()) { int from = worklist.front()->Get().representative; worklist.pop_front(); @@ -804,7 +681,7 @@ Status MarkForCompilationPass::RunImpl( // compilation. DeviceType device_type(""); TF_RETURN_IF_ERROR( - DeviceTypeOfDevice(n->assigned_device_name(), &device_type)); + DeviceToDeviceType(n->assigned_device_name(), &device_type)); const XlaOpRegistry::DeviceRegistration* registration; XlaOpRegistry::GetCompilationDevice(device_type.type(), ®istration); diff --git a/tensorflow/compiler/jit/xla_cluster_util.cc b/tensorflow/compiler/jit/xla_cluster_util.cc new file mode 100644 index 0000000000..70bd10336b --- /dev/null +++ b/tensorflow/compiler/jit/xla_cluster_util.cc @@ -0,0 +1,161 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/xla_cluster_util.h" + +#include + +#include "tensorflow/core/graph/control_flow.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/util/device_name_utils.h" + +namespace tensorflow { + +const char* const kXlaClusterAttr = "_XlaCluster"; +const char* const kXlaOutsideCompilationAttr = "_XlaOutsideCompilation"; + +namespace { +// Returns a string describing how an edge from src to dst would +// create a cycle. +string DescribeCycle(const GraphCycles* cycles, const Graph& graph, int src, + int dst) { + int32 max_path_size = graph.num_node_ids() + 1; + std::vector path(max_path_size); + int32 path_size = cycles->FindPath(dst, src, max_path_size, path.data()); + if (path_size == 0) { + return ""; + } + + auto node_name = [cycles, &graph](int node_id) { + if (!FastBoundsCheck(node_id, graph.num_node_ids())) { + return string("(null)"); + } + auto* node = graph.FindNodeId(node_id); + if (node == nullptr) { + return string("(null)"); + } + return node->name(); + }; + + string description; + strings::StrAppend(&description, "Edge from ", node_name(src), " to ", + node_name(dst), " would create a cycle.\n"); + path.resize(path_size); + for (int32 node_id : path) { + string ascii_art; + if (node_id == dst) { + ascii_art = "+-> "; + } else if (node_id != src) { + ascii_art = "| "; + } else { + ascii_art = "+-- "; + } + strings::StrAppend(&description, ascii_art, node_name(node_id), "\n"); + } + return description; +} +} // namespace + +Status DeviceToDeviceType(const string& device, DeviceType* device_type) { + DeviceNameUtils::ParsedName parsed; + if (!DeviceNameUtils::ParseFullName(device, &parsed)) { + return errors::Internal("Malformed assigned device '", device, "'"); + } + *device_type = DeviceType(parsed.type); + return Status::OK(); +} + +Status CreateCycleDetectionGraph(const Graph* graph, GraphCycles* cycles) { + for (int i = 0; i < graph->num_node_ids(); ++i) { + // We rely on the node IDs in the cycle detection graph being consecutive + // integers starting from 0. + CHECK_EQ(i, cycles->NewNode()); + } + + // Compute the loop structure of the graph. + std::vector control_flow_info; + TF_RETURN_IF_ERROR(BuildControlFlowInfo(graph, &control_flow_info)); + + // The clustering code must avoid adding cycles to the graph to prevent + // deadlock. However, the graph may contain loops, which would trigger the + // cycle detection code. To handle loops, we alter the structure of the cycle + // detection graph, disconnecting each loop from the enclosing graph. + // Specifically, we: + // * add a new "frame" node for each loop. + // * replace edges to "Enter" nodes, and edges from "Exit" nodes with edges + // to/from the corresponding frame node. In essence, we collapse the loop + // into a single node for the purpose of cycle detection in the enclosing + // graph. + // * the body of the loop should now be disconnected from the rest of the + // graph; we make it acyclic by breaking loop backedges (edges outgoing from + // "NextIteration" nodes. + + // Map from frame name strings to node IDs in the cycle detection graph. + std::unordered_map frame_nodes; + + // Get the cycle graph node ID for frame 'frame_name', or add one if none + // exists. + auto GetOrAddFrameNodeId = [&frame_nodes, cycles](const string& frame_name) { + int& frame_id = frame_nodes.emplace(frame_name, -1).first->second; + if (frame_id < 0) { + // The emplace succeeded; we have not allocated a frame node yet. + frame_id = cycles->NewNode(); + } + return frame_id; + }; + + for (Edge const* edge : graph->edges()) { + if (edge->dst()->IsEnter()) { + // Lift edges to an "Enter" node to the corresponding frame node. + const string& frame_name = + control_flow_info[edge->dst()->id()].frame_name; + int dst = GetOrAddFrameNodeId(frame_name); + if (!cycles->InsertEdge(edge->src()->id(), dst)) { + return errors::Internal( + "Cycle detected when adding enter->frame edge: ", + DescribeCycle(cycles, *graph, edge->src()->id(), dst)); + } + continue; + } + if (edge->src()->IsExit()) { + // Lift edges from an "Exit" node to the corresponding frame node. + const string& frame_name = + control_flow_info[edge->src()->id()].frame_name; + int src = GetOrAddFrameNodeId(frame_name); + if (!cycles->InsertEdge(src, edge->dst()->id())) { + return errors::Internal( + "Cycle detected when adding frame->exit edge: ", + DescribeCycle(cycles, *graph, src, edge->dst()->id())); + } + // Drop the original edge. + continue; + } + if (edge->src()->IsNextIteration()) { + // Break loop back-edges. + continue; + } + if (!cycles->InsertEdge(edge->src()->id(), edge->dst()->id())) { + // This should never happen. All cycles in the graph should contain + // a control flow operator. + return errors::Internal( + "Found cycle in graph without control flow operator during XLA " + "compilation: ", + DescribeCycle(cycles, *graph, edge->src()->id(), edge->dst()->id())); + } + } + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_cluster_util.h b/tensorflow/compiler/jit/xla_cluster_util.h new file mode 100644 index 0000000000..5b673bdc27 --- /dev/null +++ b/tensorflow/compiler/jit/xla_cluster_util.h @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Contains utilities for clustering compilable graph nodes via XLA. + +#ifndef TENSORFLOW_COMPILER_JIT_XLA_CLUSTER_UTIL_H_ +#define TENSORFLOW_COMPILER_JIT_XLA_CLUSTER_UTIL_H_ + +#include "tensorflow/compiler/jit/graphcycles/graphcycles.h" +#include "tensorflow/core/graph/algorithm.h" + +namespace tensorflow { + +// The attribute that marks nodes to be grouped into functions by the +// encapsulate subgraphs pass. +extern const char* const kXlaClusterAttr; + +// The attribute that marks nodes in a cluster to be placed outside the xla +// compilation by the encapsulate subgraphs pass. +extern const char* const kXlaOutsideCompilationAttr; + +using OrderedNodeSet = std::set; + +// Returns the DeviceType corresponding to 'device'. +Status DeviceToDeviceType(const string& device, DeviceType* device_type); + +// Creates a graph representation to enable cycle detection when clustering. +// This representation handles loops in graph by disconnecting each loop from +// the enclosing graph. +Status CreateCycleDetectionGraph(const Graph* graph, GraphCycles* cycles); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_XLA_CLUSTER_UTIL_H_ diff --git a/tensorflow/compiler/jit/xla_fusion_optimizer.cc b/tensorflow/compiler/jit/xla_fusion_optimizer.cc new file mode 100644 index 0000000000..96016521ea --- /dev/null +++ b/tensorflow/compiler/jit/xla_fusion_optimizer.cc @@ -0,0 +1,321 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/xla_fusion_optimizer.h" + +#include +#include +#include +#include + +#include "tensorflow/compiler/jit/defs.h" +#include "tensorflow/compiler/jit/graphcycles/graphcycles.h" +#include "tensorflow/compiler/jit/union_find.h" +#include "tensorflow/compiler/jit/xla_cluster_util.h" +#include "tensorflow/core/common_runtime/shape_refiner.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" + +namespace tensorflow { + +// Is 'node' an operator that consumes only the shape of its input, not the +// data itself? +static bool IsShapeConsumerOp(const Node& node) { + return node.type_string() == "Shape" || node.type_string() == "ShapeN" || + node.type_string() == "Rank" || node.type_string() == "Size"; +} + +// Returns true if the op can be decomposed into XLA ops for which +// there are fusable elemental implementations. +bool IsXlaFusable(const NodeDef& node) { + static const std::unordered_set* elementwise_ops = + new std::unordered_set( + {// tf2xla/kernels/aggregate_ops.cc + "AddN", + // tf2xla/kernels/binary_ops.cc + "Add", "Sub", "Mul", "Div", "Atan2", "Complex", "FloorDiv", + "FloorMod", "BitwiseAnd", "BitwiseOr", "LeftShift", "RightShift", + "LogicalAnd", "LogicalOr", "Mod", "Maximum", "Minimum", "RealDiv", + "ReciprocalGrad", "RsqrtGrad", "SqrtGrad", "SquaredDifference", + "TruncateDiv", "TruncateMod", "Equal", "NotEqual", "Greater", + "GreaterEqual", "Less", "LessEqual", "SigmoidGrad", "SoftplusGrad", + "SoftsignGrad", "TanhGrad", "Pow", "ApproximateEqual", + // tf2xla/kernels/unary_ops.cc + "ComplexAbs", "Angle", "Conj", "Abs", "Acos", "Acosh", "Asin", + "Asinh", "Atan", "Atanh", "Ceil", "Cos", "Cosh", "Sin", "Exp", + "Expm1", "Floor", "IsFinite", "IsInf", "IsNan", "Inv", "Reciprocal", + "Log", "Log1p", "Invert", "LogicalNot", "Neg", "Rint", "Round", + "Rsqrt", "Sigmoid", "Sign", "Sinh", "Softplus", "Softsign", "Sqrt", + "Square", "Tan", "Tanh", "Real", "Imag", + // tf2xla/kernels/bcast_ops.cc + "BroadcastArgs", "BroadcastGradientArgs", + // tf2xla/kernels/bias_ops.cc + "BiasAdd", "BiasAddV1", "BiasAddGrad" /*(Reduce)*/, + // tf2xla/kernels/cast_op.cc + "Cast", + // tf2xla/kernels/concat_op.cc + "Concat", "ConcatV2", "ConcatOffset", + // tf2xla/kernels/const_op.cc + "Const", + // tf2xla/kernels/elu_op.cc + "Elu", "EluGrad", "Selu", "SeluGrad", + // tf2xla/kernels/fill_op.cc + "Fill", + // tf2xla/kernels/identity_op.cc + "Identity", "IdentityN", "PreventGradient", + "StopGradient", /*"Snapshot",*/ + // tf2xla/kernels/index_ops.cc + "ArgMax", "ArgMin", + // tf2xla/kernels/mirror_pad_op.cc + "MirrorPad", + // tf2xla/kernels/one_hot_op.cc + "OneHot", + // tf2xla/kernels/pack_op.cc + "Pack", + // tf2xla/kernels/pad_op.cc + "Pad", "PadV2", + // tf2xla/kernels/relu_op.cc + "Relu", "Relu6", "ReluGrad", "Relu6Grad", + // tf2xla/kernels/reshape_op.cc + "Reshape", + // tf2xla/kernels/reverse_op.cc + "Reverse", "ReverseV2", + // tf2xla/kernels/reverse_sequence_op.cc + "ReverseSequence", + // tf2xla/kernels/shape_op.cc + "Shape", "ShapeN", "Rank", "Size", "ExpandDims", "Squeeze", + "ZerosLike", "OnesLike", + // tf2xla/kernels/slice_op.cc + "Slice", + // tf2xla/kernels/split_op.cc + "Split", "SplitV", + // tf2xla/kernels/strided_slice_op.cc + "StridedSlice", "StridedSliceGrad", "ResourceStridedSliceAssign", + // tf2xla/kernels/tile_ops.cc + "Tile", + // tf2xla/kernels/transpose_op.cc + "Transpose", "InvertPermutation", + // tf2xla/kernels/unpack_op.cc + "Unpack"}); + + return elementwise_ops->count(node.op()) > 0; +} + +Status XlaFusionOptimizer::Optimize(grappler::Cluster* cluster, + const grappler::GrapplerItem& item, + GraphDef* output) { + VLOG(2) << "Here at fusion optimizer"; + + // TODO(hpucha): Implement encapsulation and replacing with XlaLaunch op. + // Once that happens, the expected interaction between this optimizer and when + // the global_jit_level is set is as follows: Fusion optimizer will replace + // appropriate fusion clusters with XlaLaunch nodes. The remaining graph can + // be further compiled where possible via mark_for_compilation_pass. Note that + // this might lead to inefficient clustering, and it is best to use either the + // fusion optimizer or the global_jit flag, and not combine the two. + + // Create a Graph out of GraphDef. This is required currently because the + // helpers around clustering, encapsulation etc work on graphs. + FunctionLibraryDefinition function_library(OpRegistry::Global(), + item.graph.library()); + Graph graph(function_library); + ShapeRefiner shape_refiner(graph.versions(), graph.op_registry()); + shape_refiner.set_require_shape_inference_fns(false); + shape_refiner.set_disable_constant_propagation(true); + ImportGraphDefOptions options; + // Graph optimization happens at the late stage of graph execution, when + // colocation constraints are already validated previously and the device + // placement of nodes has also completed, so there is no need to validate + // colocation constraints again. + options.validate_colocation_constraints = false; + options.validate_shape = false; + TF_RETURN_IF_ERROR( + ImportGraphDef(options, item.graph, &graph, &shape_refiner)); + + // Collect nodes that can be fused via XLA, while ignoring those that + // explicitly ask for XLA: (*) nodes that are marked to be compiled + // explicitly. (*) nodes assigned to XLA device. + OrderedNodeSet compilation_candidates; + for (Node* node : graph.op_nodes()) { + // If there is a _XlaCompile annotation, ignore the node if it is + // true. Nodes are marked with this attr via experimental_jit_scope, and + // will be handled by the mark_for_compilation pass. + bool compile = false; + Status status = GetNodeAttr(node->attrs(), kXlaCompileAttr, &compile); + if (status.ok() && compile) { + continue; + } + // If there is already a _XlaCluster annotation, ignore the node. Nodes are + // marked with this attr to indicate they are already part of a cluster and + // hence ignored. + status = GetNodeAttr(node->attrs(), kXlaClusterAttr, &compile); + if (status.ok()) { + continue; + } + + // If there is an explicit XLA device placement, ignore the node. + DeviceType device_type(""); + TF_RETURN_IF_ERROR(DeviceToDeviceType(node->def().device(), &device_type)); + if (device_type.type_string().find("XLA") != string::npos) continue; + + // Assume all fusable ops are registered. + // TODO(hpucha): Check for registration if possible. + if (!IsXlaFusable(node->def())) { + continue; + } + + compilation_candidates.insert(node); + } + + if (compilation_candidates.empty()) { + VLOG(2) << "No compilable candidates"; + *output = item.graph; + return Status::OK(); + } + + GraphCycles cycles; + TF_RETURN_IF_ERROR(CreateCycleDetectionGraph(&graph, &cycles)); + + // TODO(hpucha): Make clustering more robust. There are two known issues that + // we need to mitigate: (a) Non-resource variables can cause deadlocks + // when clustering changes order of execution. See b/77263461 for a specific + // example. (b) Queue operations can also cause deadlocks. See b/77261498 for + // example. + + struct Cluster { + // Identifies the node that represents this cluster in the cycle detection + // graph. + int representative = -1; + }; + + // Each compilation candidate belongs to a cluster. The cluster's + // representative names the node in the 'cycles' graph that represents the + // cluster. + std::vector> clusters(graph.num_node_ids()); + std::deque*> worklist; + for (Node* node : compilation_candidates) { + Cluster& cluster = clusters[node->id()].Get(); + cluster.representative = node->id(); + worklist.push_back(&clusters[node->id()]); + } + + // Repeatedly contract edges between clusters that are on the same device, + // provided the contraction would not create a cycle. This is a simplified + // version of the clustering in mark_for_compilation_pass that also deals with + // nodes that are explicitly tagged to be compiled/clustered. + while (!worklist.empty()) { + int from = worklist.front()->Get().representative; + worklist.pop_front(); + + Node* node_from = graph.FindNodeId(from); + if (node_from->IsControlFlow()) { + // Control flow nodes aren't compilation candidates and should never + // appear. + return errors::Internal( + "Found control flow node in clustering worklist: ", + node_from->type_string()); + } + for (int to : cycles.Successors(from)) { + if (to >= graph.num_node_ids()) { + // Node is a "frame" node that is present only in the cycle detection + // graph. No clustering is possible. + continue; + } + Node* node_to = graph.FindNodeId(to); + if (compilation_candidates.find(node_to) == + compilation_candidates.cend()) { + continue; + } + + // Do not cluster across devices. + if (node_from->def().device() != node_to->def().device()) { + VLOG(2) << "Devices " << node_from->def().device() << " " + << node_to->def().device(); + VLOG(2) << "Device names " << node_from->assigned_device_name() << " " + << node_to->assigned_device_name(); + continue; + } + + // Ops that consume shapes cannot be the root of a cluster. This is an + // optimization. + if (clusters[from].Size() == 1 && IsShapeConsumerOp(*node_from)) { + continue; + } + + // If contracting the edge would create a cycle, bail out. + // However, just because we can't merge the clusters now does not mean + // we won't be able to merge them in the future. + // e.g., if we have edges 1->2, 2->3 and 1->3, we cannot contract edge + // 1->3. But if we first contract 1->2 then we can later contract 1->3. + if (!cycles.ContractEdge(from, to)) continue; + + // Merge the clusters. ContractEdge uses 'from' as the number of the + // merged node, so make sure 'from' is the chosen representative. + clusters[from].Merge(&clusters[to]); + + worklist.push_back(&clusters[from]); + break; + } + } + + // Count the number of non-trivial elements in each cluster. + std::vector effective_cluster_sizes(graph.num_node_ids()); + for (const Node* n : compilation_candidates) { + int cluster = clusters[n->id()].Get().representative; + // Identity nodes will be removed if the node gets marked for compilation. + // Therefore we don't want to count them towards the effective cluster size. + if (n->def().op() != "Identity") { + effective_cluster_sizes[cluster]++; + } + } + + const int min_cluster_size = 2; + int num_clusters = 0; + for (auto size : effective_cluster_sizes) { + if (size >= min_cluster_size) { + VLOG(3) << "Cluster " << num_clusters << " " << size; + num_clusters++; + } + } + + // Names for each cluster. + std::unordered_map cluster_names; + // Sequence number generator to ensure clusters have unique names. + static std::atomic cluster_sequence_num; + + for (Node* n : compilation_candidates) { + int cluster = clusters[n->id()].Get().representative; + + // Compile if this is a cluster of >= min_cluster_size compilable operators. + if (effective_cluster_sizes[cluster] >= min_cluster_size) { + string& name = cluster_names[cluster]; + + if (name.empty()) { + name = strings::StrCat("cluster_", cluster_sequence_num++); + } + n->AddAttr(kXlaClusterAttr, name); + VLOG(3) << "Assigning node " << n->name() << " to cluster " << name; + } + } + + graph.ToGraphDef(output); + return Status::OK(); +} + +REGISTER_GRAPH_OPTIMIZER_AS(XlaFusionOptimizer, "xla-fusion"); + +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_fusion_optimizer.h b/tensorflow/compiler/jit/xla_fusion_optimizer.h new file mode 100644 index 0000000000..3d2309e782 --- /dev/null +++ b/tensorflow/compiler/jit/xla_fusion_optimizer.h @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_JIT_XLA_FUSION_OPTIMIZER_H_ +#define TENSORFLOW_COMPILER_JIT_XLA_FUSION_OPTIMIZER_H_ + +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" + +namespace tensorflow { + +// Optimizes graphs by fusing ops where possible, resulting in more efficient +// execution. +class XlaFusionOptimizer : public grappler::CustomGraphOptimizer { + public: + XlaFusionOptimizer() {} + ~XlaFusionOptimizer() override {} + + Status Init( + const RewriterConfig_CustomGraphOptimizer* config = nullptr) override { + return Status::OK(); + } + + string name() const override { return "xla-fusion"; }; + + Status Optimize(grappler::Cluster* cluster, + const grappler::GrapplerItem& item, + GraphDef* output) override; + + void Feedback(grappler::Cluster* cluster, const grappler::GrapplerItem& item, + const GraphDef& optimize_output, double result) override { + // Nothing to do for XlaFusionOptimizer. + } +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_XLA_FUSION_OPTIMIZER_H_ diff --git a/tensorflow/compiler/jit/xla_fusion_optimizer_test.cc b/tensorflow/compiler/jit/xla_fusion_optimizer_test.cc new file mode 100644 index 0000000000..5736760a87 --- /dev/null +++ b/tensorflow/compiler/jit/xla_fusion_optimizer_test.cc @@ -0,0 +1,183 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/xla_fusion_optimizer.h" +#include "tensorflow/compiler/jit/defs.h" +#include "tensorflow/compiler/jit/xla_cluster_util.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/graph/graph_def_builder_util.h" +#include "tensorflow/core/grappler/utils/grappler_test.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace tensorflow { +namespace { + +REGISTER_OP("UncompilableNullary").Output("o: float"); +REGISTER_OP("UncompilableUnary").Input("a: float").Output("o: float"); + +class XlaFusionOptimizerTest : public grappler::GrapplerTest { + protected: + std::unordered_map GetClusters(const GraphDef& graph) { + std::unordered_map ids; + for (const NodeDef& node : graph.node()) { + string cluster; + if (GetNodeAttr(AttrSlice(node), kXlaClusterAttr, &cluster).ok()) { + CHECK(!cluster.empty()); + ids[node.name()] = cluster; + } + } + return ids; + } +}; + +TEST_F(XlaFusionOptimizerTest, Chains) { + GraphDef graph; + { + GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); + Node* a = + ops::SourceOp("UncompilableNullary", builder.opts().WithName("A")); + Node* b = ops::UnaryOp("Relu", a, builder.opts().WithName("B")); + Node* c = ops::UnaryOp("Relu", b, builder.opts().WithName("C")); + Node* d = + ops::UnaryOp("UncompilableUnary", c, builder.opts().WithName("D")); + Node* e = ops::UnaryOp("Relu", d, builder.opts().WithName("E")); + ops::UnaryOp("Relu", e, builder.opts().WithName("F")); + TF_ASSERT_OK(builder.ToGraphDef(&graph)); + } + grappler::GrapplerItem item; + item.graph = graph; + + XlaFusionOptimizer optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + auto clusters = GetClusters(output); + EXPECT_EQ(4, clusters.size()); + EXPECT_EQ(clusters["B"], clusters["C"]); + EXPECT_EQ(clusters["E"], clusters["F"]); + EXPECT_NE(clusters["B"], clusters["E"]); + EXPECT_TRUE(clusters.find("A") == clusters.cend()); + EXPECT_TRUE(clusters.find("D") == clusters.cend()); +} + +TEST_F(XlaFusionOptimizerTest, FusableOps) { + GraphDef graph; + { + GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); + Node* a = ops::SourceOp( + "Placeholder", + builder.opts().WithName("A").WithAttr("dtype", tensorflow::DT_FLOAT)); + Node* b = ops::SourceOp( + "Placeholder", + builder.opts().WithName("B").WithAttr("dtype", tensorflow::DT_FLOAT)); + + Node* c = ops::BinaryOp("Add", a, b, builder.opts().WithName("C")); + ops::BinaryOp("MatMul", a, c, builder.opts().WithName("D")); + ops::UnaryOp("Abs", c, builder.opts().WithName("E")); + + TF_ASSERT_OK(builder.ToGraphDef(&graph)); + } + grappler::GrapplerItem item; + item.graph = graph; + + XlaFusionOptimizer optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + auto clusters = GetClusters(output); + EXPECT_EQ(2, clusters.size()); + EXPECT_EQ(clusters["C"], clusters["E"]); + EXPECT_TRUE(clusters.find("D") == clusters.cend()); +} + +TEST_F(XlaFusionOptimizerTest, IgnoreExplicitXLAAttrs) { + GraphDef graph; + { + GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); + Node* a = ops::SourceOp( + "Placeholder", + builder.opts().WithName("A").WithAttr("dtype", tensorflow::DT_FLOAT)); + Node* b = ops::SourceOp( + "Placeholder", + builder.opts().WithName("B").WithAttr("dtype", tensorflow::DT_FLOAT)); + + Node* c = ops::BinaryOp( + "Add", a, b, + builder.opts().WithName("C").WithDevice("/device:XLA_CPU")); + ops::BinaryOp("MatMul", a, c, builder.opts().WithName("D")); + Node* e = ops::UnaryOp("Abs", c, builder.opts().WithName("E")); + ops::UnaryOp("Cos", e, + builder.opts().WithName("F").WithAttr(kXlaCompileAttr, true)); + + TF_ASSERT_OK(builder.ToGraphDef(&graph)); + } + grappler::GrapplerItem item; + item.graph = graph; + + XlaFusionOptimizer optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + auto clusters = GetClusters(output); + EXPECT_TRUE(clusters.empty()); +} + +TEST_F(XlaFusionOptimizerTest, UncompilableCycles) { + GraphDef graph; + { + GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); + Node* a = ops::SourceOp("Const", builder.opts() + .WithName("A") + .WithAttr("dtype", DT_FLOAT) + .WithAttr("value", Tensor())); + Node* b = + ops::UnaryOp("UncompilableUnary", a, builder.opts().WithName("B")); + ops::BinaryOp("Mul", a, b, builder.opts().WithName("C")); + + TF_ASSERT_OK(builder.ToGraphDef(&graph)); + } + grappler::GrapplerItem item; + item.graph = graph; + + XlaFusionOptimizer optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + auto clusters = GetClusters(output); + EXPECT_TRUE(clusters.empty()); +} + +TEST_F(XlaFusionOptimizerTest, CompilableCycles) { + GraphDef graph; + { + GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); + Node* a = ops::SourceOp("Const", builder.opts() + .WithName("A") + .WithAttr("dtype", DT_FLOAT) + .WithAttr("value", Tensor())); + Node* b = ops::UnaryOp("Relu", a, builder.opts().WithName("B")); + ops::BinaryOp("Mul", a, b, builder.opts().WithName("C")); + TF_ASSERT_OK(builder.ToGraphDef(&graph)); + } + grappler::GrapplerItem item; + item.graph = graph; + + XlaFusionOptimizer optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + auto clusters = GetClusters(output); + EXPECT_EQ(3, clusters.size()); + EXPECT_EQ(clusters["A"], clusters["B"]); + EXPECT_EQ(clusters["A"], clusters["C"]); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h b/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h index 3148a5f809..0b8e0b692a 100644 --- a/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h +++ b/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h @@ -50,7 +50,7 @@ class CustomGraphOptimizerRegistrar { #define REGISTER_GRAPH_OPTIMIZER_AS(MyCustomGraphOptimizerClass, name) \ namespace { \ - static CustomGraphOptimizerRegistrar \ + static ::tensorflow::grappler::CustomGraphOptimizerRegistrar \ MyCustomGraphOptimizerClass##_registrar( \ []() { return new MyCustomGraphOptimizerClass; }, (name)); \ } // namespace diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index e6622486eb..143d9dc1c6 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -217,23 +217,9 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, bool is_optimized = false; GraphOptimizationResult optimization_result(item.id); + GraphOptimizer* fusion_optimizer = nullptr; + GraphOptimizer* sa_optimizer = nullptr; - // ScopedAllocatorOptimizer must run last, so move it to the - // end of optimizers and run only on the last iteration. - { - int sa_index = 0; - for (; sa_index < optimizers.size(); ++sa_index) { - if (optimizers[sa_index]->name() == "scoped_allocator_optimizer") { - break; - } - } - const int last_index = optimizers.size() - 1; - if (sa_index < last_index) { - optimizers[last_index].swap(optimizers[sa_index]); - } - } - - const int last_iteration = NumIterations(cfg_) - 1; for (int iteration = 0; iteration < NumIterations(cfg_); ++iteration) { VLOG(4) << "Starting optimization iteration " << iteration + 1; @@ -241,37 +227,40 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, // Some optimizers can run only once. if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue; // Some must run only on the last iteration. - if (optimizer->name() == "scoped_allocator_optimizer" && - iteration != last_iteration) + if (optimizer->name() == "scoped_allocator_optimizer") { + if (sa_optimizer == nullptr) sa_optimizer = optimizer.get(); + continue; + } + if (optimizer->name() == "xla-fusion") { + if (fusion_optimizer == nullptr) fusion_optimizer = optimizer.get(); continue; - - uint64 start_us = Env::Default()->NowMicros(); - // This swaps the current optimized_graph into optimized item and - // resets optimized_graph to an empty graph. - optimized_graph->Swap(&optimized_item.graph); - *optimized_graph = GraphDef(); - Status status = - optimizer->Optimize(cluster, optimized_item, optimized_graph); - uint64 end_us = Env::Default()->NowMicros(); - - string result; - if (!status.ok()) { - optimized_graph->Swap(&optimized_item.graph); - result = status.ToString(); - } else { - is_optimized = true; - float duration_ms = (end_us - start_us) / 1000.0f; - result = strings::StrCat( - PrintSizesBeforeAfter(optimized_item.graph, *optimized_graph), - ", time = ", duration_ms, "ms."); } - VLOG(4) << optimizer->name() << ": " << result; - OptimizerResult optimizer_result{optimizer->name(), result}; - optimization_result.results.push_back(optimizer_result); + Status status = RunOptimizer(optimizer.get(), cluster, &optimized_item, + optimized_graph, &optimization_result); + if (status.ok()) is_optimized = true; } } + // Run fusion optimizer if requested after all other optimizers since: 1) it + // doesn't need to be called more than once. 2) we don't want subsequent + // optimization passes to break the fusion clusters. We could potentially + // encapsulate the fusion clusters right away, but that will prevent a lot of + // optimizations from taking place since we don't have shape inference for + // functions, and we can't optimize across function boundaries. + if (fusion_optimizer != nullptr) { + Status status = RunOptimizer(fusion_optimizer, cluster, &optimized_item, + optimized_graph, &optimization_result); + if (status.ok()) is_optimized = true; + } + + // ScopedAllocatorOptimizer must run last. + if (sa_optimizer != nullptr) { + Status status = RunOptimizer(sa_optimizer, cluster, &optimized_item, + optimized_graph, &optimization_result); + if (status.ok()) is_optimized = true; + } + // Record graph optimization result. optimization_results_.push_back(optimization_result); @@ -286,6 +275,35 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, return Status::OK(); } +Status MetaOptimizer::RunOptimizer( + GraphOptimizer* optimizer, Cluster* cluster, GrapplerItem* optimized_item, + GraphDef* optimized_graph, GraphOptimizationResult* optimization_result) { + uint64 start_us = Env::Default()->NowMicros(); + // This swaps the current optimized_graph into optimized item and + // resets optimized_graph to an empty graph. + optimized_graph->Swap(&optimized_item->graph); + *optimized_graph = GraphDef(); + Status status = + optimizer->Optimize(cluster, *optimized_item, optimized_graph); + uint64 end_us = Env::Default()->NowMicros(); + + string result; + if (!status.ok()) { + optimized_graph->Swap(&optimized_item->graph); + result = status.ToString(); + } else { + float duration_ms = (end_us - start_us) / 1000.0f; + result = strings::StrCat( + PrintSizesBeforeAfter(optimized_item->graph, *optimized_graph), + ", time = ", duration_ms, "ms."); + } + VLOG(4) << optimizer->name() << ": " << result; + + OptimizerResult optimizer_result{optimizer->name(), result}; + optimization_result->results.push_back(optimizer_result); + return status; +} + Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { optimization_results_.clear(); diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index e736dd174e..151a54cbdf 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -72,6 +72,10 @@ class MetaOptimizer : public GraphOptimizer { std::vector results; }; + Status RunOptimizer(GraphOptimizer* optimizer, Cluster* cluster, + GrapplerItem* optimized_item, GraphDef* optimized_graph, + GraphOptimizationResult* optimization_result); + std::vector optimization_results_; }; -- GitLab From a3c642c945b4a27e5d826eb9c9cbc07132cb2bba Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 1 Jun 2018 18:00:43 -0700 Subject: [PATCH 514/902] Remove use of absl::make_unique absl is not yet ready for use by open source TensorFlow. :-( PiperOrigin-RevId: 198952953 --- tensorflow/contrib/cloud/kernels/gcs_config_ops.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc b/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc index ef4998212e..648a219fb8 100644 --- a/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc +++ b/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/platform/cloud/curl_http_request.h" #include "tensorflow/core/platform/cloud/gcs_file_system.h" #include "tensorflow/core/platform/cloud/oauth_client.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace { @@ -96,7 +97,8 @@ class GcsCredentialsOpKernel : public OpKernel { errors::InvalidArgument("JSON format incompatible; did not find fields " "`refresh_token` or `private_key`.")); - auto provider = absl::make_unique(json, ctx->env()); + auto provider = + tensorflow::MakeUnique(json, ctx->env()); // Test getting a token string dummy_token; @@ -121,7 +123,7 @@ class GcsCredentialsOpKernel : public OpKernel { initial_retry_delay_usec_(initial_retry_delay_usec) {} ConstantAuthProvider(const Json::Value& json, Env* env) - : ConstantAuthProvider(json, absl::make_unique(), env, + : ConstantAuthProvider(json, tensorflow::MakeUnique(), env, kInitialRetryDelayUsec) {} ~ConstantAuthProvider() override {} -- GitLab From 6eb43fc26785c4835747a79b3d6a3e094ef1c60f Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 4 Jun 2018 12:05:14 -0700 Subject: [PATCH 515/902] Fix test user ops PiperOrigin-RevId: 199171316 --- tensorflow/tools/ci_build/builds/test_user_ops.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh index c342367bac..25ecee4725 100755 --- a/tensorflow/tools/ci_build/builds/test_user_ops.sh +++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh @@ -239,8 +239,9 @@ function run_op() { fi } -run_op $("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; print(tf.Session('').run(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT})))") -run_op $("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; tf.enable_eager_execution(); print(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT}))") " in eager mode" +run_op "$("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; print(tf.Session('').run(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT})))")" +run_op "$("${PYTHON_BIN_PATH}" -c "import tensorflow as tf; tf.enable_eager_execution(); print(tf.load_op_library('./${USER_OP_SO}').${USER_OP}(${OP_INPUT}).numpy())")" " in eager mode" + popd -- GitLab From 0bb7c844dd4375d7f53c88a7eacf78b0d6552498 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Mon, 4 Jun 2018 12:08:15 -0700 Subject: [PATCH 516/902] Fix Python API. PiperOrigin-RevId: 199171845 --- tensorflow/contrib/lite/python/convert_saved_model.py | 4 ++-- .../contrib/lite/python/convert_saved_model_test.py | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/python/convert_saved_model.py b/tensorflow/contrib/lite/python/convert_saved_model.py index b952a72aab..5dad49f1ed 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model.py +++ b/tensorflow/contrib/lite/python/convert_saved_model.py @@ -216,9 +216,9 @@ def set_tensor_shapes(tensors, shapes): """ if shapes: for tensor in tensors: - shape = shapes.get(tensor.name) + shape = shapes.get(tensor_name(tensor)) if shape is not None: - tensor.set_shape(shapes[tensor.name]) + tensor.set_shape(shape) def freeze_saved_model(saved_model_dir, input_arrays, input_shapes, diff --git a/tensorflow/contrib/lite/python/convert_saved_model_test.py b/tensorflow/contrib/lite/python/convert_saved_model_test.py index 80e5dc6e46..1e570d2c89 100644 --- a/tensorflow/contrib/lite/python/convert_saved_model_test.py +++ b/tensorflow/contrib/lite/python/convert_saved_model_test.py @@ -73,10 +73,15 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32) self.assertEqual([None, 3, 5], tensor.shape.as_list()) - convert_saved_model.set_tensor_shapes([tensor], - {"Placeholder:0": [5, 3, 5]}) + convert_saved_model.set_tensor_shapes([tensor], {"Placeholder": [5, 3, 5]}) self.assertEqual([5, 3, 5], tensor.shape.as_list()) + def testSetTensorShapeNoneValid(self): + tensor = array_ops.placeholder(dtype=dtypes.float32) + + convert_saved_model.set_tensor_shapes([tensor], {"Placeholder": [1, 3, 5]}) + self.assertEqual([1, 3, 5], tensor.shape.as_list()) + def testSetTensorShapeInvalid(self): tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32) self.assertEqual([None, 3, 5], tensor.shape.as_list()) -- GitLab From bedf4eeb1361ef1483d9a0a6575f8c74d2eee572 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 4 Jun 2018 14:26:09 -0700 Subject: [PATCH 517/902] Fixing raspberry pi file for conflict. --- tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 3 --- .../tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 4 ++++ tools/bazel.rc | 6 ------ 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index cbd4a93e6d..4d1a30601e 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -102,9 +102,6 @@ bazel build -c opt ${PI_COPTS} \ --copt=-fomit-frame-pointer --cpu=armeabi \ --crosstool_top=@local_config_arm_compiler//:toolchain \ --verbose_failures \ - --distinct_host_configuration=true \ - //tensorflow:libtensorflow.so \ - //tensorflow:libtensorflow_framework.so \ //tensorflow/tools/benchmark:benchmark_model \ //tensorflow/tools/pip_package:build_pip_package diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 73520bb2ac..f4a0b232ec 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -73,6 +73,10 @@ if [[ "$release_build" != 1 ]]; then echo "build --define=override_eigen_strong_inline=true" >> "${TMP_BAZELRC}" fi +# The host and target platforms are the same in Windows build. So we don't have +# to distinct them. This helps avoid building the same targets twice. +echo "build --distinct_host_configuration=false" >> "${TMP_BAZELRC}" + echo "import %workspace%/${TMP_BAZELRC}" >> .bazelrc run_configure_for_cpu_build diff --git a/tools/bazel.rc b/tools/bazel.rc index 03aa52da1f..1c1e6afb65 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -1,14 +1,8 @@ -# By default, we don't distinct target and host platfroms. -# When doing cross compilation, use --config=cross_compile to distinct them. -build --distinct_host_configuration=false -build:cross_compile --distinct_host_configuration=true - # Android configs. Bazel needs to have --cpu and --fat_apk_cpu both set to the # target CPU to build transient dependencies correctly. See # https://docs.bazel.build/versions/master/user-manual.html#flag--fat_apk_cpu build:android --crosstool_top=//external:android/crosstool build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain -build:android --config=cross_compile build:android_arm --config=android build:android_arm --cpu=armeabi-v7a build:android_arm --fat_apk_cpu=armeabi-v7a -- GitLab From fedfc47ca6713adbbf82e10d4803c5fe94234bbd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Jun 2018 21:37:43 -0700 Subject: [PATCH 518/902] Resolve device names when passed into DistributionStrategy methods. PiperOrigin-RevId: 199241723 --- .../contrib/distribute/python/combinations.py | 26 +++++++++---------- .../distribute/python/mirrored_strategy.py | 9 ++++--- .../contrib/distribute/python/values.py | 7 ++--- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py index e400fa5be2..98e7228f24 100644 --- a/tensorflow/contrib/distribute/python/combinations.py +++ b/tensorflow/contrib/distribute/python/combinations.py @@ -46,9 +46,9 @@ import unittest from absl.testing import parameterized import six -from tensorflow.contrib.distribute.python import mirrored_strategy -from tensorflow.contrib.distribute.python import one_device_strategy -from tensorflow.contrib.distribute.python import tpu_strategy +from tensorflow.contrib.distribute.python import mirrored_strategy as mirrored_lib +from tensorflow.contrib.distribute.python import one_device_strategy as one_device_lib +from tensorflow.contrib.distribute.python import tpu_strategy as tpu_lib from tensorflow.contrib.optimizer_v2 import adam as adam_v2 from tensorflow.contrib.optimizer_v2 import gradient_descent as gradient_descent_v2 from tensorflow.python.eager import context @@ -289,9 +289,9 @@ class NamedObject(object): class NamedDistribution(object): """Translates DistributionStrategy and its data into a good name.""" - def __init__(self, name, distribution, required_gpus=None, + def __init__(self, name, distribution_fn, required_gpus=None, required_tpu=False): - self._distribution = distribution + self._distribution_fn = distribution_fn self._name = name self._required_gpus = required_gpus self._required_tpu = required_tpu @@ -301,7 +301,7 @@ class NamedDistribution(object): @property def strategy(self): - return self._distribution + return self._distribution_fn() @property def required_gpus(self): @@ -312,29 +312,29 @@ class NamedDistribution(object): return self._required_tpu +# pylint: disable=g-long-lambda default_strategy = NamedDistribution( "Default", - distribute_lib._default_distribution_strategy, # pylint: disable=protected-access + lambda: distribute_lib._default_distribution_strategy, # pylint: disable=protected-access required_gpus=None) one_device_strategy = NamedDistribution( - "OneDeviceCPU", one_device_strategy.OneDeviceStrategy("/cpu:0"), + "OneDeviceCPU", lambda: one_device_lib.OneDeviceStrategy("/cpu:0"), required_gpus=None) tpu_strategy_single_iteration = NamedDistribution( "TPUSingleIteration", - tpu_strategy.TPUStrategy(iterations_per_step=1), + lambda: tpu_lib.TPUStrategy(iterations_per_step=1), required_tpu=True) -tpu_strategy = NamedDistribution( - "TPU", tpu_strategy.TPUStrategy(), required_tpu=True) +tpu_strategy = NamedDistribution("TPU", tpu_lib.TPUStrategy, required_tpu=True) # Note that we disable prefetching for testing since prefetching makes # the input non-deterministic. mirrored_strategy_with_gpu_and_cpu = NamedDistribution( "MirroredCPUAndGPU", - mirrored_strategy.MirroredStrategy( + lambda: mirrored_lib.MirroredStrategy( ["/gpu:0", "/cpu:0"], prefetch_on_device=False), required_gpus=1) mirrored_strategy_with_two_gpus = NamedDistribution( "Mirrored2GPUs", - mirrored_strategy.MirroredStrategy( + lambda: mirrored_lib.MirroredStrategy( ["/gpu:0", "/gpu:1"], prefetch_on_device=False), required_gpus=2) diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index 14dbbd6e27..6eadba976b 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -84,9 +84,8 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): assert len(set(devices)) == len(devices), ( "No duplicates allowed in `devices` argument.") # TODO(josh11b): Require at least 2 devices? - self._devices = devices - self._canonical_device_set = set( - [device_util.canonicalize(d) for d in devices]) + self._devices = [device_util.resolve(d) for d in devices] + self._canonical_device_set = set(self._devices) self._device_index = values.PerDevice( dict((d, i) for i, d in enumerate(devices))) self._cross_tower_ops = cross_tower_ops @@ -400,7 +399,9 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): # pylint: disable=protected-access return list(colocate_with._index.keys()) elif isinstance(colocate_with, six.string_types): - return [colocate_with] + return [device_util.resolve(colocate_with)] + elif isinstance(colocate_with, list): + return [device_util.resolve(d) for d in colocate_with] else: return colocate_with diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py index 49b4e24daa..9572ade8e4 100644 --- a/tensorflow/contrib/distribute/python/values.py +++ b/tensorflow/contrib/distribute/python/values.py @@ -65,9 +65,10 @@ class DistributedValues(object): device = device_util.canonicalize(device) try: return self._index[device] - except KeyError: - raise ValueError("Device %s not found in %s (current device %s)" % - (device, self._index.keys(), device_util.current())) + except KeyError as e: + six.raise_from( + ValueError("Device %s not found in %s (current device %s)" % + (device, self._index.keys(), device_util.current())), e) def on_device(self, device): device = device_util.canonicalize(device) -- GitLab From d660ab0c392562be89f02400e492bd54a7f9d6b0 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Mon, 4 Jun 2018 22:09:11 -0700 Subject: [PATCH 519/902] [TF:XLA] Add method CreateNewModule to HloVerifiedTestBase, and remember all created modules, to verify at TearDown. PiperOrigin-RevId: 199244092 --- .../xla/service/algebraic_simplifier_test.cc | 47 +++++++++---------- .../xla/tests/hlo_verified_test_base.cc | 20 +++++--- .../xla/tests/hlo_verified_test_base.h | 16 ++++++- 3 files changed, 51 insertions(+), 32 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index cda157f9fa..27eb48181e 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -1714,7 +1714,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopPad) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1759,7 +1759,7 @@ TEST_F(AlgebraicSimplifierTest, NegativePadding) { EXPECT_THAT(computation->root_instruction(), op::Pad(param, zero)); EXPECT_TRUE(has_negative_padding(pad)); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Slice(op::Pad(param, zero))); EXPECT_FALSE( @@ -1781,7 +1781,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopReshape) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1804,7 +1804,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopSlice) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1932,7 +1932,8 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) { b.AddInstruction(HloInstruction::CreateConvolve(out_shape, input, filter, window, dnums)); - auto module = CreateNewModule(); + // TODO(b/80488902): verify this module. + auto module = HloTestBase::CreateNewModule(); auto* computation = module->AddEntryComputation(b.Build()); AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/true, @@ -2060,7 +2061,7 @@ TEST_F(AlgebraicSimplifierTest, MaxMinToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2090,7 +2091,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2121,7 +2122,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxWithBroadcastToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2151,7 +2152,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxNotToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); + EXPECT_FALSE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Minimum(op::Maximum(param0, max_value), min_value)); @@ -2184,7 +2185,7 @@ TEST_F(AlgebraicSimplifierTest, MinEquationWithMaxNotToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); + EXPECT_FALSE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Minimum(op::Add(op::Maximum(param0, max_value), max_value), @@ -2200,10 +2201,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) { HloInstruction::CreateParameter(0, r0f32, "scalar_param")); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6, 7}); - HloInstruction* broadcast = - builder.AddInstruction(HloInstruction::CreateBroadcast( - broadcast_shape, scalar_param, - AsInt64Slice(broadcast_shape.dimensions()))); + HloInstruction* broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(broadcast_shape, scalar_param, {})); Shape slice_shape = ShapeUtil::MakeShape(F32, {2, 2, 3, 3}); HloInstruction* slice = builder.AddInstruction(HloInstruction::CreateSlice( @@ -2219,10 +2218,10 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); // Running simplification again should not result in any further changes. - ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_FALSE(simplifier.Run(module).ValueOrDie()); root = computation->root_instruction(); EXPECT_THAT(root, op::Broadcast(scalar_param)); @@ -2237,10 +2236,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { HloInstruction::CreateConstant(Literal::CreateR0(42.0f))); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6}); - HloInstruction* broadcast = - builder.AddInstruction(HloInstruction::CreateBroadcast( - broadcast_shape, forty_two, - AsInt64Slice(broadcast_shape.dimensions()))); + HloInstruction* broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(broadcast_shape, forty_two, {})); HloInstruction* transpose = builder.AddInstruction(HloInstruction::CreateTranspose( @@ -2259,7 +2256,7 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); root = computation->root_instruction(); EXPECT_THAT(root, op::Broadcast(forty_two)); @@ -2268,7 +2265,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { // Test that ReduceWindow(Pad(op, x), y) can simplify to ReduceWindow(op, x). TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) { - auto module = CreateNewModule(); + // TODO(b/80488902): verify this module. + auto module = HloTestBase::CreateNewModule(); HloComputation::Builder builder(TestName()); // Create operand to the pad. @@ -2349,7 +2347,8 @@ TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) { // Test that ReduceWindow(Convert(Pad(op, x)), y) can simplify to // ReduceWindow(Convert(op), x). TEST_F(AlgebraicSimplifierTest, FoldConvertedPadIntoReduceWindow) { - auto module = CreateNewModule(); + // TODO(b/80488902): verify this module. + auto module = HloTestBase::CreateNewModule(); HloComputation::Builder builder(TestName()); // Create operand to the pad. @@ -2444,7 +2443,7 @@ TEST_F(AlgebraicSimplifierTest, ReversalOfTrivialDimensionsToBitcast) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); HloInstruction* root = computation->root_instruction(); EXPECT_EQ(a, root); diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc index c8a05c2e9e..22c664d142 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc @@ -41,14 +41,17 @@ void HloVerifiedTestBase::TearDown() { << "TearDown called more than once; it should be called exactly once."; tear_down_called_ = true; if (module_) { - VerifyModule(); + VerifyModule(module_.get()); + } + for (int i = 0; i < modules_.size(); ++i) { + VerifyModule(modules_.at(i).get()); } HloTestBase::TearDown(); } -void HloVerifiedTestBase::VerifyModule() { - HloVerifier verifier; - xla::StatusOr mutated = verifier.Run(module_.get()); +void HloVerifiedTestBase::VerifyModule(HloModule* module) { + HloVerifier verifier(/*allow_mixed_precision=*/true); + xla::StatusOr mutated = verifier.Run(module); if (!mutated.ok()) { ADD_FAILURE() << "HloVerifier failed: " << mutated.status(); } else { @@ -59,15 +62,20 @@ void HloVerifiedTestBase::VerifyModule() { HloModule& HloVerifiedTestBase::module() { if (!module_) { - module_ = CreateNewModule(); + module_ = HloTestBase::CreateNewModule(); } return *module_; } +HloModule* HloVerifiedTestBase::CreateNewModule(const string& name) { + modules_.emplace_back(HloTestBase::CreateNewModule()); + return modules_.back().get(); +} + void HloVerifiedTestBase::ParseAndVerifyModule( tensorflow::StringPiece hlo_text) { CHECK(!module_) << "Called ParseModule when test already has a module."; TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); - VerifyModule(); + VerifyModule(module_.get()); } } // namespace xla diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h index e5bb14a883..5b59cc77f6 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h @@ -52,11 +52,23 @@ class HloVerifiedTestBase : public HloTestBase { shape_verifier_ = std::move(shape_verifier); } + // Creates a new module for a test, and stores it in modules_ so it can be + // verified. Intentionally hides HloTestBase::CreateNewModule, to prevent + // creation of unverified modules. + HloModule* CreateNewModule(const string& name = TestName()); + + // It is confusing to store modules created by module() and CreateNewModule() + // in different fields, but it allows us to migrate tests to + // HloVerifiedTestBase more easily, so it's a win because we can verify more + // modules. See b/80488902. private: - std::unique_ptr module_; // Lazily populated. Access via module(). + // Lazily populated. Access via module(). + std::unique_ptr module_; + // Populated by calls to CreateNewModule. + std::vector> modules_; std::unique_ptr shape_verifier_; bool tear_down_called_ = false; - void VerifyModule(); + static void VerifyModule(HloModule* module); }; } // namespace xla -- GitLab From bf8d058ccaf30bc05bce5d4b13133d14aca42dfe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 01:00:50 -0700 Subject: [PATCH 520/902] Windows: Refactor bazel_test_lib.sh and common_env.sh - Removed workaround for https://github.com/bazelbuild/bazel/issues/2182 since it's fixed - Removed setting CUDA related environment variables. Assume they are already set. If not, configure.py will set default values for them. - Removed obsolete variables for cc_test targets. PiperOrigin-RevId: 199256482 --- .../ci_build/windows/bazel/bazel_test_lib.sh | 116 +----------------- .../ci_build/windows/bazel/common_env.sh | 5 - 2 files changed, 3 insertions(+), 118 deletions(-) diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index 582188fc00..a3e07737a4 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -14,130 +14,20 @@ # limitations under the License. # ============================================================================== # -# C++ tests -failing_cpu_cc_tests="\ - //tensorflow/core/kernels:control_flow_ops_test + \ - //tensorflow/core:example_example_parser_configuration_test + \ - //tensorflow/core:lib_core_status_test + \ - //tensorflow/core:lib_monitoring_collection_registry_test + \ - //tensorflow/core:lib_strings_numbers_test + \ - //tensorflow/core/platform/hadoop:hadoop_file_system_test + \ - //tensorflow/core:platform_file_system_test + \ - //tensorflow/core:platform_logging_test + \ - //tensorflow/core:util_sparse_sparse_tensor_test + \ - //tensorflow/cc:framework_gradient_checker_test + \ - //tensorflow/cc:framework_gradients_test + \ - //tensorflow/cc:gradients_array_grad_test + \ - //tensorflow/cc:gradients_math_grad_test + \ - //tensorflow/cc:gradients_nn_grad_test + \ - //tensorflow/cc/saved_model:loader_test \ -" - -broken_cpu_cc_tests="\ - //tensorflow/cc:framework_cc_ops_test + \ - //tensorflow/core/platform/cloud:time_util_test + \ - //tensorflow/core/platform/cloud:oauth_client_test + \ - //tensorflow/core/platform/cloud:http_request_test + \ - //tensorflow/core/platform/cloud:google_auth_provider_test + \ - //tensorflow/core/platform/cloud:gcs_file_system_test + \ - //tensorflow/core/kernels/cloud:bigquery_table_accessor_test + \ - //tensorflow/core/kernels/hexagon:graph_transferer_test + \ - //tensorflow/core/kernels:remote_fused_graph_execute_utils_test + \ - //tensorflow/core/kernels:requantize_op_test + \ - //tensorflow/core/kernels:requantization_range_op_test + \ - //tensorflow/core/kernels:quantized_reshape_op_test + \ - //tensorflow/core/kernels:quantized_pooling_ops_test + \ - //tensorflow/core/kernels:quantized_matmul_op_test + \ - //tensorflow/core/kernels:quantized_conv_ops_test + \ - //tensorflow/core/kernels:quantized_concat_op_test + \ - //tensorflow/core/kernels:quantized_bias_add_op_test + \ - //tensorflow/core/kernels:quantized_batch_norm_op_test + \ - //tensorflow/core/kernels:quantized_activation_ops_test + \ - //tensorflow/core/kernels:quantize_op_test + \ - //tensorflow/core/kernels:quantize_down_and_shrink_range_op_test + \ - //tensorflow/core/kernels:quantize_and_dequantize_op_test_gpu + \ - //tensorflow/core/kernels:quantize_and_dequantize_op_test + \ - //tensorflow/core/kernels:quantization_utils_test + \ - //tensorflow/core/kernels:debug_ops_test + \ - //tensorflow/core/distributed_runtime/rpc:rpc_rendezvous_mgr_test_gpu + \ - //tensorflow/core/distributed_runtime/rpc:rpc_rendezvous_mgr_test + \ - //tensorflow/core/distributed_runtime/rpc:grpc_tensor_coding_test + \ - //tensorflow/core/distributed_runtime/rpc:grpc_session_test_gpu + \ - //tensorflow/core/distributed_runtime/rpc:grpc_session_test + \ - //tensorflow/core/distributed_runtime/rpc:grpc_channel_test_gpu + \ - //tensorflow/core/distributed_runtime/rpc:grpc_channel_test + \ - //tensorflow/core/distributed_runtime:remote_device_test_gpu + \ - //tensorflow/core/distributed_runtime:remote_device_test + \ - //tensorflow/core/distributed_runtime:executor_test_gpu + \ - //tensorflow/core/distributed_runtime:executor_test + \ - //tensorflow/core/debug:debug_gateway_test + \ - //tensorflow/core/debug:debug_grpc_io_utils_test + \ - //tensorflow/core:util_reporter_test + \ - //tensorflow/core:util_memmapped_file_system_test + \ - //tensorflow/core:platform_subprocess_test + \ - //tensorflow/core:platform_profile_utils_cpu_utils_test + \ - //tensorflow/core:lib_jpeg_jpeg_mem_unittest + \ - //tensorflow/core/debug:debug_io_utils_test \ -" - -# lib_core_threadpool_test is timeout, but it passes when running alone -extra_failing_gpu_cc_tests="\ - //tensorflow/core:lib_core_threadpool_test + \ - //tensorflow/core:cuda_libdevice_path_test + \ - //tensorflow/core:common_runtime_direct_session_test + \ - //tensorflow/core:common_runtime_direct_session_with_tracking_alloc_test + \ - //tensorflow/core:device_tracer_test + \ - //tensorflow/core:ops_math_grad_test \ -" - -exclude_cpu_cc_tests="${failing_cpu_cc_tests} + ${broken_cpu_cc_tests}" - -exclude_gpu_cc_tests="${extra_failing_gpu_cc_tests} + ${exclude_cpu_cc_tests}" function run_configure_for_cpu_build { - # Due to a bug in Bazel: https://github.com/bazelbuild/bazel/issues/2182 - # yes "" | ./configure doesn't work on Windows, so we set all the - # environment variables in advance to avoid interact with the script. - export TF_NEED_CUDA=0 - if [ -z "$TF_ENABLE_XLA" ]; then - export TF_ENABLE_XLA=0 - fi - if [ -z "$TF_NEED_MKL" ]; then - export TF_NEED_MKL=0 - fi - export TF_NEED_VERBS=0 - export TF_NEED_GCP=1 - export TF_NEED_HDFS=0 - export TF_NEED_OPENCL_SYCL=0 - echo "" | ./configure + yes "" | ./configure } function run_configure_for_gpu_build { - # Due to a bug in Bazel: https://github.com/bazelbuild/bazel/issues/2182 - # yes "" | ./configure doesn't work on Windows, so we set all the - # environment variables in advance to avoid interact with the script. + # Enable CUDA support export TF_NEED_CUDA=1 - export TF_CUDA_VERSION=9.0 - export CUDA_TOOLKIT_PATH="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0" - export TF_CUDNN_VERSION=7.0 - if [ -z "$CUDNN_INSTALL_PATH" ]; then - export CUDNN_INSTALL_PATH="C:/tools/cuda" - fi - export TF_CUDA_COMPUTE_CAPABILITIES="3.7" - if [ -z "$TF_ENABLE_XLA" ]; then - export TF_ENABLE_XLA=0 - fi - export TF_NEED_VERBS=0 - export TF_NEED_MKL=0 - export TF_NEED_GCP=0 - export TF_NEED_HDFS=0 - export TF_NEED_OPENCL_SYCL=0 # TODO(pcloudy): Remove this after TensorFlow uses its own CRSOOTOOL # for GPU build on Windows export USE_MSVC_WRAPPER=1 - echo "" | ./configure + yes "" | ./configure } function set_gcs_remote_cache_options { diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh index 0e6c0227b7..eefa8ee2d5 100644 --- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh +++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh @@ -49,8 +49,3 @@ export PATH="/c/Program Files/Git/cmd:$PATH" # Make sure we have pip in PATH export PATH="/c/${PYTHON_BASE_PATH}/Scripts:$PATH" - -# Add Cuda and Cudnn dll directories into PATH -export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0/bin:$PATH" -export PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0/extras/CUPTI/libx64:$PATH" -export PATH="/c/tools/cuda/bin:$PATH" -- GitLab From 540333664e90cd64afd99df24bda374368682a60 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 01:57:19 -0700 Subject: [PATCH 521/902] Added missing backtick in tf.ones_like documentation PiperOrigin-RevId: 199262414 --- tensorflow/python/ops/array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 3c4946ae5f..8129334703 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1623,7 +1623,7 @@ def ones_like(tensor, dtype=None, name=None, optimize=True): Args: tensor: A `Tensor`. dtype: A type for the returned `Tensor`. Must be `float32`, `float64`, - `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`, + `int8`, `uint8`, `int16`, `uint16`, `int32`, `int64`, `complex64`, `complex128` or `bool`. name: A name for the operation (optional). optimize: if true, attempt to statically determine the shape of 'tensor' -- GitLab From 92789d7a76cfd599c597d4639135241ff9988ef0 Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Tue, 5 Jun 2018 03:56:47 -0700 Subject: [PATCH 522/902] Handle scalar input to assert_equal in eager. PiperOrigin-RevId: 199274329 --- tensorflow/python/kernel_tests/check_ops_test.py | 7 +++++++ tensorflow/python/ops/check_ops.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py index 5a83ec8d30..7ef841c96b 100644 --- a/tensorflow/python/kernel_tests/check_ops_test.py +++ b/tensorflow/python/kernel_tests/check_ops_test.py @@ -88,6 +88,13 @@ class AssertEqualTest(test.TestCase): out = array_ops.identity(small) self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() + def test_scalar_comparison(self): + const_true = constant_op.constant(True, name="true") + const_false = constant_op.constant(False, name="false") + with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): + check_ops.assert_equal(const_true, const_false, message="fail") + def test_returns_none_with_eager(self): with context.eager_mode(): small = constant_op.constant([1, 2], name="small") diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py index cabc1e724c..375a5ec2c3 100644 --- a/tensorflow/python/ops/check_ops.py +++ b/tensorflow/python/ops/check_ops.py @@ -341,8 +341,8 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None): y_sum, y_np[:y_sum])) index_and_values_str = '' - if x.shape == y.shape: - # If the shapes of x and y are the same, + if x.shape == y.shape and x.shape.as_list(): + # If the shapes of x and y are the same (and not scalars), # Get the values that actually differed and their indices. # If shapes are different this information is more confusing # than useful. -- GitLab From 22a8c240d59a173ff3f17ffda05b521aa3f222de Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 5 Jun 2018 07:27:58 -0700 Subject: [PATCH 523/902] Remove test dependencies that are no longer needed. PiperOrigin-RevId: 199293694 --- .../contrib/autograph/converters/control_flow_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/autograph/converters/control_flow_test.py b/tensorflow/contrib/autograph/converters/control_flow_test.py index 1a863590f9..9d23d9b5b7 100644 --- a/tensorflow/contrib/autograph/converters/control_flow_test.py +++ b/tensorflow/contrib/autograph/converters/control_flow_test.py @@ -42,7 +42,7 @@ class ControlFlowTest(converter_test_base.TestCase): node = self.parse_and_analyze(test_fn, {}) node = control_flow.transform(node, self.ctx) - with self.compiled(node, control_flow_ops.while_loop) as result: + with self.compiled(node) as result: with self.test_session() as sess: self.assertEqual((10, 5, 5), sess.run(result.test_fn(constant_op.constant(5)))) @@ -57,7 +57,7 @@ class ControlFlowTest(converter_test_base.TestCase): node = self.parse_and_analyze(test_fn, {}) node = control_flow.transform(node, self.ctx) - with self.compiled(node, control_flow_ops.while_loop) as result: + with self.compiled(node) as result: with self.test_session() as sess: self.assertEqual(0, sess.run(result.test_fn(constant_op.constant(5)))) @@ -75,7 +75,7 @@ class ControlFlowTest(converter_test_base.TestCase): node = self.parse_and_analyze(test_fn, {}) node = control_flow.transform(node, self.ctx) - with self.compiled(node, control_flow_ops.cond) as result: + with self.compiled(node) as result: with self.test_session() as sess: self.assertEqual((-1, 0), sess.run(result.test_fn(constant_op.constant(1)))) @@ -92,7 +92,7 @@ class ControlFlowTest(converter_test_base.TestCase): node = self.parse_and_analyze(test_fn, {}) node = control_flow.transform(node, self.ctx) - with self.compiled(node, control_flow_ops.cond) as result: + with self.compiled(node) as result: with self.test_session() as sess: self.assertEqual(-1, sess.run(result.test_fn(constant_op.constant(1)))) -- GitLab From c0dc76a3994c743151404b1401599fefb9f37dd4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 07:54:24 -0700 Subject: [PATCH 524/902] Fix generated_zip_test failure caused by regex matching failures. PiperOrigin-RevId: 199296333 --- .../testing/generated_examples_zip_test.cc | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 2f069ff8e7..e85020448a 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -48,7 +48,7 @@ tensorflow::Env* env = tensorflow::Env::Default(); // TODO(ahentz): make sure we clean this list up frequently. std::map kBrokenTests = { // Add only supports float32. (and "constant" tests use Add) - {R"(^\/adda.*int32)", "68808744"}, + {R"(^\/add_a.*int32)", "68808744"}, {R"(^\/constant.*int32)", "68808744"}, {R"(^\/mul.*int32)", "68808744"}, {R"(^\/div.*int32)", "68808744"}, @@ -61,25 +61,25 @@ std::map kBrokenTests = { "70527055"}, // L2Norm only supports tensors with 4D or fewer. - {R"(^\/l2normdim=.*,epsilon=.*,input_shape=\[.,.,.,.,.*\])", "67963684"}, + {R"(^\/l2norm_dim=.*,epsilon=.*,input_shape=\[.,.,.,.,.*\])", "67963684"}, // SpaceToBatchND only supports 4D tensors. {R"(^\/space_to_batch_nd.*input_shape=\[1,4,4,4,1,1\])", "70848787"}, // L2Norm only works for dim=-1. - {R"(^\/l2normdim=-2,epsilon=.*,input_shape=\[.,.\])", "67963812"}, - {R"(^\/l2normdim=0,epsilon=.*,input_shape=\[.,.\])", "67963812"}, - {R"(^\/l2normdim=-2,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, - {R"(^\/l2normdim=-2,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, - {R"(^\/l2normdim=2,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, - {R"(^\/l2normdim=2,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, - {R"(^\/l2normdim=0,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, - {R"(^\/l2normdim=0,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, - {R"(^\/l2normdim=1,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, - {R"(^\/l2normdim=1,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, - {R"(^\/l2normdim=\[2,3\],epsilon=.*,input_shape=\[3,15,14,3\])", + {R"(^\/l2norm_dim=-2,epsilon=.*,input_shape=\[.,.\])", "67963812"}, + {R"(^\/l2norm_dim=0,epsilon=.*,input_shape=\[.,.\])", "67963812"}, + {R"(^\/l2norm_dim=-2,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, + {R"(^\/l2norm_dim=-2,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + {R"(^\/l2norm_dim=2,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, + {R"(^\/l2norm_dim=2,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + {R"(^\/l2norm_dim=0,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, + {R"(^\/l2norm_dim=0,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + {R"(^\/l2norm_dim=1,epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, + {R"(^\/l2norm_dim=1,epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + {R"(^\/l2norm_dim=\[2,3\],epsilon=.*,input_shape=\[3,15,14,3\])", "67963812"}, - {R"(^\/l2normdim=\[2,3\],epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, + {R"(^\/l2norm_dim=\[2,3\],epsilon=.*,input_shape=\[1,3,4,3\])", "67963812"}, // ResizeBilinear looks completely incompatible with Tensorflow {R"(^\/resize_bilinear.*dtype=tf.int32)", "72401107"}, -- GitLab From 274f9510f68f237589df5c6a414e4b8e5ebcdba1 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 5 Jun 2018 08:13:07 -0700 Subject: [PATCH 525/902] Remove _USE_C_API staging from ops.py. PiperOrigin-RevId: 199298594 --- .../copy_graph/python/util/copy_elements.py | 1 - tensorflow/contrib/graph_editor/transform.py | 5 +- tensorflow/python/framework/ops.py | 544 +++++------------- tensorflow/python/framework/ops_test.py | 3 - 4 files changed, 160 insertions(+), 393 deletions(-) diff --git a/tensorflow/contrib/copy_graph/python/util/copy_elements.py b/tensorflow/contrib/copy_graph/python/util/copy_elements.py index 102bc460fd..a0dd3881a8 100644 --- a/tensorflow/contrib/copy_graph/python/util/copy_elements.py +++ b/tensorflow/contrib/copy_graph/python/util/copy_elements.py @@ -218,7 +218,6 @@ def copy_op_to_graph(org_instance, to_graph, variables, scope=''): new_control_inputs, input_types, new_original_op, op_def) #Use Graph's hidden methods to add the op - to_graph._add_op(new_op) # pylint: disable=protected-access to_graph._record_op_seen_by_control_dependencies(new_op) for device_function in reversed(to_graph._device_function_stack): new_op._set_device(device_function(new_op)) diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index 592d37b432..026a3d1200 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -189,9 +189,6 @@ def copy_op_handler(info, op, new_inputs, copy_shape=True, nodedef_fn=None): if op._original_op: op_._original_op = op._original_op - # Add op to the graph - info.graph_._add_op(op_) - return op_, op_.outputs @@ -492,7 +489,7 @@ class Transformer(object): t_ = info.transformed_ts[t] consumer_op_ = info.transformed_ops[consumer_op] t_index_ = list(consumer_op_.inputs).index(tmp_t_) - consumer_op_._update_input(t_index_, t_, update_dtype=False) # pylint: disable=protected-access + consumer_op_._update_input(t_index_, t_) # pylint: disable=protected-access def _connect_control_inputs(self, info): """Connect the previously copied ops.""" diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index eceea5276a..b2fd98f431 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -56,6 +56,7 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import compat from tensorflow.python.util import decorator_utils from tensorflow.python.util import tf_contextlib +from tensorflow.python.util.deprecation import deprecated_args from tensorflow.python.util.tf_export import tf_export @@ -288,15 +289,8 @@ class Tensor(_TensorLike): self._value_index = value_index self._dtype = dtypes.as_dtype(dtype) - if _USE_C_API: - # This will be set by set_shape_and_handle_data_for_outputs. - self._shape_val = None - else: - # The Python code requires all tensors start with a shape to support shape - # inference on imported while loops. This isn't necessary with the C API - # enabled because the C API provides the shapes for imported nodes. - # TODO(skyewm): remove when _USE_C_API is removed. - self._shape_val = tensor_shape.unknown_shape() + # This will be set by self.shape(). + self._shape_val = None # List of operations that use this Tensor as input. We maintain this list # to easily navigate a computation graph. @@ -384,7 +378,6 @@ class Tensor(_TensorLike): if _USE_C_SHAPES: self._shape_val = self._c_api_shape() else: - assert _USE_C_API # Call set_shape_and_handle_data_for_outputs in topological order on all # ops that are needed to compute self.op's shape. We do this instead of # having set_shape_and_handle_data_for_outputs recursively call @@ -508,8 +501,6 @@ class Tensor(_TensorLike): else: self._shape_val = self.shape.merge_with(shape) - if not self._op._graph._c_graph: return - # Update C shape even if _USE_C_SHAPES = False, since we still want # set_shape to be reflected in the C API graph for when we run it. if not isinstance(shape, tensor_shape.TensorShape): @@ -545,33 +536,14 @@ class Tensor(_TensorLike): Returns: A list of `Operation`s. """ - if self._op._c_op: # pylint: disable=protected-access - consumer_names = c_api.TF_OperationOutputConsumers_wrapper( - self._as_tf_output()) - # pylint: disable=protected-access - return [ - self.graph._get_operation_by_name_unsafe(name) - for name in consumer_names - ] - # pylint: enable=protected-access - else: - return self._consumers - - def _add_consumer(self, consumer): - """Add a consumer to this tensor. - - Args: - consumer: an Operation. - - Raises: - TypeError: if the consumer is not an Operation. - """ + consumer_names = c_api.TF_OperationOutputConsumers_wrapper( + self._as_tf_output()) # pylint: disable=protected-access - assert not self._op._c_op, "Tensor._add_consumer doesn't work with C API" + return [ + self.graph._get_operation_by_name_unsafe(name) + for name in consumer_names + ] # pylint: enable=protected-access - if not isinstance(consumer, Operation): - raise TypeError("Consumer must be an Operation: %s" % consumer) - self._consumers.append(consumer) def _as_node_def_input(self): """Return a value to use for the NodeDef "input" attribute. @@ -594,7 +566,6 @@ class Tensor(_TensorLike): def _as_tf_output(self): # pylint: disable=protected-access - assert self.op._c_op return c_api_util.tf_output(self.op._c_op, self.value_index) # pylint: enable=protected-access @@ -1722,18 +1693,8 @@ class Operation(object): "a Tensor, or IndexedSlices: %s" % c) control_input_ops.append(control_op) - # Don't set private fields with C API enabled to catch users who need to - # switch to public API. - # TODO(skyewm): delete these fields once we remove _USE_C_API - if not self._graph._c_graph: - self._inputs_val = list(inputs) # Defensive copy. - self._input_types_val = input_types - self._control_inputs_val = control_input_ops - self._node_def_val = copy.deepcopy(node_def) - self._op_def_val = op_def - else: - # This will be set by self.inputs. - self._inputs_val = None + # This will be set by self.inputs. + self._inputs_val = None self._id_value = self._graph._next_id() # pylint: disable=protected-access self._original_op = original_op @@ -1742,10 +1703,8 @@ class Operation(object): # Initialize self._c_op. if c_op: - # TODO(skyewm): remove this assert when we remove USE_C_API - assert self._graph._c_graph # pylint: disable=protected-access self._c_op = c_op - elif self._graph._c_graph: # pylint: disable=protected-access + else: if op_def is None: op_def = self._graph._get_op_def(node_def.op) # TODO(skyewm): op_def_library.apply_op() flattens the incoming inputs. @@ -1754,30 +1713,19 @@ class Operation(object): op_def, inputs, node_def.attr) self._c_op = _create_c_op(self._graph, node_def, grouped_inputs, control_input_ops) - else: - self._c_op = None - - # Mark that we consume the inputs. This is unnecessary and unsupported with - # the C API enabled, since the C API tracks the tensor consumers instead. - if not self._c_op: - for input_tensor in self._inputs_val: - input_tensor._add_consumer(self) # pylint: disable=protected-access # Initialize self._outputs. - if self._c_op: - num_outputs = c_api.TF_OperationNumOutputs(self._c_op) - output_types = [ - c_api.TF_OperationOutputType(c_api_util.tf_output(self._c_op, i)) - for i in range(num_outputs)] - assert output_types is not None - elif output_types is None: - output_types = [] - self._output_types_val = output_types + num_outputs = c_api.TF_OperationNumOutputs(self._c_op) + output_types = [ + c_api.TF_OperationOutputType(c_api_util.tf_output(self._c_op, i)) + for i in range(num_outputs)] self._outputs = [ Tensor(self, i, output_type) for i, output_type in enumerate(output_types) ] + self._graph._add_op(self) # pylint: disable=protected-access + if not c_op: self._control_flow_post_processing() @@ -1791,7 +1739,6 @@ class Operation(object): control_flow_util.CheckInputFromValidContext(self, input_tensor.op) if self._control_flow_context is not None: self._control_flow_context.AddOp(self) - self._recompute_node_def() def _reconstruct_sequence_inputs(self, op_def, inputs, attrs): """Regroups a flat list of input tensors into scalar and sequence inputs. @@ -1872,10 +1819,7 @@ class Operation(object): @property def name(self): """The full name of this operation.""" - if self._c_op: - return c_api.TF_OperationName(self._c_op) - else: - return self._node_def_val.name + return c_api.TF_OperationName(self._c_op) @property def _id(self): @@ -1891,10 +1835,7 @@ class Operation(object): assigned, or an empty string if it has not been assigned to a device. """ - if self._c_op: - return c_api.TF_OperationDevice(self._c_op) - else: - return self._node_def_val.device + return c_api.TF_OperationDevice(self._c_op) @property def _output_types(self): @@ -1907,28 +1848,21 @@ class Operation(object): The length of this list indicates the number of output endpoints of the operation. """ - if self._c_op: - num_outputs = c_api.TF_OperationNumOutputs(self._c_op) - output_types = [ - c_api.TF_OperationOutputType(self._tf_output(i)) - for i in xrange(num_outputs) - ] - # TODO(iga): Remove this assert after converting to C API by default. - # Just being a bit paranoid here. - assert self._output_types_val == output_types - # In all the tests we have output_types that are passed into - # Operation.__init__ are a list of ints (which is illegal according - # to the docstring), but input_types are instances of DType. - # This extra assert is to catch if we ever use DType for output_types. - if output_types: - assert isinstance(output_types[0], int) - return output_types - else: - return self._output_types_val + num_outputs = c_api.TF_OperationNumOutputs(self._c_op) + output_types = [ + c_api.TF_OperationOutputType(self._tf_output(i)) + for i in xrange(num_outputs) + ] + # In all the tests we have output_types that are passed into + # Operation.__init__ are a list of ints (which is illegal according + # to the docstring), but input_types are instances of DType. + # This extra assert is to catch if we ever use DType for output_types. + if output_types: + assert isinstance(output_types[0], int) + return output_types def _tf_output(self, output_idx): """Create and return a new TF_Output for output_idx'th output of this op.""" - assert self._c_op tf_output = c_api.TF_Output() tf_output.oper = self._c_op tf_output.index = output_idx @@ -1936,7 +1870,6 @@ class Operation(object): def _tf_input(self, input_idx): """Create and return a new TF_Input for input_idx'th input of this op.""" - assert self._c_op tf_input = c_api.TF_Input() tf_input.oper = self._c_op tf_input.index = input_idx @@ -1948,47 +1881,12 @@ class Operation(object): Args: device: string or device.. The device to set. """ - if self._c_op: - c_api.SetRequestedDevice( - self._graph._c_graph, # pylint: disable=protected-access - self._c_op, # pylint: disable=protected-access - compat.as_str(_device_string(device))) - else: - self._node_def_val.device = _device_string(device) - - def _add_input(self, tensor, dtype=None): - """Add a new input to this operation. - - Args: - tensor: the Tensor to add as an input. - dtype: tf.DType: type of the input; defaults to - the tensor's dtype. + c_api.SetRequestedDevice( + self._graph._c_graph, # pylint: disable=protected-access + self._c_op, # pylint: disable=protected-access + compat.as_str(_device_string(device))) - Raises: - TypeError: if tensor is not a Tensor, - or if input tensor type is not convertible to dtype. - ValueError: if the Tensor is from a different graph. - """ - assert not self._c_op, ( - "Operation._add_input doesn't work with C API") - if not isinstance(tensor, Tensor): - raise TypeError("tensor must be a Tensor: %s" % tensor) - _assert_same_graph(self, tensor) - if dtype is None: - dtype = tensor.dtype - else: - dtype = dtypes.as_dtype(dtype) - if not dtype.is_compatible_with(tensor.dtype): - raise TypeError( - "Cannot convert a tensor of type %s to an input of type %s" % - (tensor.dtype.name, dtype.name)) - self._inputs_val.append(tensor) - self._input_types_val.append(dtype) - tensor._add_consumer(self) # pylint: disable=protected-access - self._recompute_node_def() - - # TODO(skyewm): Remove `update_dtype` when we enable the C API. - def _update_input(self, index, tensor, update_dtype=True): + def _update_input(self, index, tensor): """Update the input to this operation at the given index. NOTE: This is for TF internal use only. Please don't use it. @@ -1996,7 +1894,6 @@ class Operation(object): Args: index: the index of the input to update. tensor: the Tensor to be used as the input at the given index. - update_dtype: If `False`, the type for this input is not updated. Raises: TypeError: if tensor is not a Tensor, @@ -2013,20 +1910,12 @@ class Operation(object): if not _USE_C_SHAPES: set_shape_and_handle_data_for_outputs(self) - if self._c_op: - # Reset cached inputs. - self._inputs_val = None - c_api.UpdateEdge( - self._graph._c_graph, # pylint: disable=protected-access - tensor._as_tf_output(), # pylint: disable=protected-access - self._tf_input(index)) - else: - self._inputs_val[index].consumers().remove(self) - self._inputs_val[index] = tensor - if update_dtype: - self._input_types_val[index] = tensor.dtype - tensor._add_consumer(self) # pylint: disable=protected-access - self._recompute_node_def() + # Reset cached inputs. + self._inputs_val = None + c_api.UpdateEdge( + self._graph._c_graph, # pylint: disable=protected-access + tensor._as_tf_output(), # pylint: disable=protected-access + self._tf_input(index)) def _add_control_inputs(self, ops): """Add a list of new control inputs to this operation. @@ -2038,19 +1927,10 @@ class Operation(object): TypeError: if ops is not a list of Operations. ValueError: if any op in ops is from a different graph. """ - if self._c_op: - for op in ops: - if not isinstance(op, Operation): - raise TypeError("op must be an Operation: %s" % op) - c_api.AddControlInput(self._graph._c_graph, self._c_op, op._c_op) # pylint: disable=protected-access - else: - if ops: - for op in ops: - if not isinstance(op, Operation): - raise TypeError("op must be an Operation: %s" % op) - _assert_same_graph(self, op) - self._control_inputs_val.append(op) - self._recompute_node_def() + for op in ops: + if not isinstance(op, Operation): + raise TypeError("op must be an Operation: %s" % op) + c_api.AddControlInput(self._graph._c_graph, self._c_op, op._c_op) # pylint: disable=protected-access def _add_control_input(self, op): """Add a new control input to this operation. @@ -2062,33 +1942,13 @@ class Operation(object): TypeError: if op is not an Operation. ValueError: if op is from a different graph. """ - if self._c_op: - if not isinstance(op, Operation): - raise TypeError("op must be an Operation: %s" % op) - c_api.AddControlInput(self._graph._c_graph, self._c_op, op._c_op) # pylint: disable=protected-access - else: - self._add_control_inputs([op]) + if not isinstance(op, Operation): + raise TypeError("op must be an Operation: %s" % op) + c_api.AddControlInput(self._graph._c_graph, self._c_op, op._c_op) # pylint: disable=protected-access def _remove_all_control_inputs(self): """Removes any control inputs to this operation.""" - if self._c_op: - c_api.RemoveAllControlInputs(self._graph._c_graph, self._c_op) # pylint: disable=protected-access - else: - del self.control_inputs[:] - - # Methods below are used when building the NodeDef and Graph proto. - def _recompute_node_def(self): - # TODO(skyewm): remove this function when we switch to C API - if self._c_op: return - - del self._node_def_val.input[:] - # pylint: disable=protected-access - self._node_def_val.input.extend( - [t._as_node_def_input() for t in self._inputs_val]) - # pylint: enable=protected-access - if self._control_inputs_val: - self._node_def_val.input.extend( - ["^%s" % op.name for op in self._control_inputs_val]) + c_api.RemoveAllControlInputs(self._graph._c_graph, self._c_op) # pylint: disable=protected-access def __str__(self): return str(self.node_def) @@ -2129,19 +1989,16 @@ class Operation(object): @property def inputs(self): """The list of `Tensor` objects representing the data inputs of this op.""" - if self._c_op: - if self._inputs_val is None: - tf_outputs = c_api.GetOperationInputs(self._c_op) - # pylint: disable=protected-access - retval = [ - self.graph._get_tensor_by_tf_output(tf_output) - for tf_output in tf_outputs - ] - # pylint: enable=protected-access - self._inputs_val = Operation._InputList(retval) - return self._inputs_val - else: - return Operation._InputList(self._inputs_val) + if self._inputs_val is None: + tf_outputs = c_api.GetOperationInputs(self._c_op) + # pylint: disable=protected-access + retval = [ + self.graph._get_tensor_by_tf_output(tf_output) + for tf_output in tf_outputs + ] + # pylint: enable=protected-access + self._inputs_val = Operation._InputList(retval) + return self._inputs_val @property def _inputs(self): @@ -2155,15 +2012,12 @@ class Operation(object): @property def _input_types(self): - if self._c_op: - num_inputs = c_api.TF_OperationNumInputs(self._c_op) - input_types = [ - dtypes.as_dtype(c_api.TF_OperationInputType(self._tf_input(i))) - for i in xrange(num_inputs) - ] - return input_types - else: - return self._input_types_val + num_inputs = c_api.TF_OperationNumInputs(self._c_op) + input_types = [ + dtypes.as_dtype(c_api.TF_OperationInputType(self._tf_input(i))) + for i in xrange(num_inputs) + ] + return input_types @_input_types.setter def _input_types(self, value): @@ -2183,16 +2037,13 @@ class Operation(object): A list of `Operation` objects. """ - if self._c_op: - control_c_ops = c_api.TF_OperationGetControlInputs_wrapper(self._c_op) - # pylint: disable=protected-access - return [ - self.graph._get_operation_by_name_unsafe( - c_api.TF_OperationName(c_op)) for c_op in control_c_ops - ] - # pylint: enable=protected-access - else: - return self._control_inputs_val + control_c_ops = c_api.TF_OperationGetControlInputs_wrapper(self._c_op) + # pylint: disable=protected-access + return [ + self.graph._get_operation_by_name_unsafe( + c_api.TF_OperationName(c_op)) for c_op in control_c_ops + ] + # pylint: enable=protected-access @property def _control_outputs(self): @@ -2205,18 +2056,13 @@ class Operation(object): A list of `Operation` objects. """ - if self._c_op: - control_c_ops = c_api.TF_OperationGetControlOutputs_wrapper(self._c_op) - # pylint: disable=protected-access - return [ - self.graph._get_operation_by_name_unsafe( - c_api.TF_OperationName(c_op)) for c_op in control_c_ops - ] - # pylint: enable=protected-access - else: - # TODO(apassos) this should be less inefficient. - return [o for o in self._graph.get_operations() - if self in o.control_inputs] + control_c_ops = c_api.TF_OperationGetControlOutputs_wrapper(self._c_op) + # pylint: disable=protected-access + return [ + self.graph._get_operation_by_name_unsafe( + c_api.TF_OperationName(c_op)) for c_op in control_c_ops + ] + # pylint: enable=protected-access @property def _control_inputs(self): @@ -2240,11 +2086,7 @@ class Operation(object): @property def type(self): """The type of the op (e.g. `"MatMul"`).""" - if self._c_op: - op_type = c_api.TF_OperationOpType(self._c_op) - return op_type - else: - return self._node_def_val.op + return c_api.TF_OperationOpType(self._c_op) @property def graph(self): @@ -2262,15 +2104,12 @@ class Operation(object): protocol buffer. """ # pylint: enable=line-too-long - if self._c_op: - with c_api_util.tf_buffer() as buf: - c_api.TF_OperationToNodeDef(self._c_op, buf) - data = c_api.TF_GetBuffer(buf) - node_def = node_def_pb2.NodeDef() - node_def.ParseFromString(compat.as_bytes(data)) - return node_def - else: - return self._node_def_val + with c_api_util.tf_buffer() as buf: + c_api.TF_OperationToNodeDef(self._c_op, buf) + data = c_api.TF_GetBuffer(buf) + node_def = node_def_pb2.NodeDef() + node_def.ParseFromString(compat.as_bytes(data)) + return node_def @property def _node_def(self): @@ -2289,10 +2128,7 @@ class Operation(object): protocol buffer. """ # pylint: enable=line-too-long - if self._c_op: - return self._graph._get_op_def(self.type) - else: - return self._op_def_val + return self._graph._get_op_def(self.type) @property def _op_def(self): @@ -2318,17 +2154,14 @@ class Operation(object): def _set_attr(self, attr_name, attr_value): """Private method used to set an attribute in the node_def.""" - if self._c_op: - buf = c_api.TF_NewBufferFromString( - compat.as_bytes(attr_value.SerializeToString())) - try: - # pylint: disable=protected-access - c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf) - # pylint: enable=protected-access - finally: - c_api.TF_DeleteBuffer(buf) - else: - self._node_def_val.attr[attr_name].CopyFrom(attr_value) + buf = c_api.TF_NewBufferFromString( + compat.as_bytes(attr_value.SerializeToString())) + try: + # pylint: disable=protected-access + c_api.SetAttr(self._graph._c_graph, self._c_op, attr_name, buf) + # pylint: enable=protected-access + finally: + c_api.TF_DeleteBuffer(buf) def get_attr(self, name): """Returns the value of the attr of this op with the given `name`. @@ -2343,21 +2176,15 @@ class Operation(object): ValueError: If this op does not have an attr with the given `name`. """ fields = ["s", "i", "f", "b", "type", "shape", "tensor", "func"] - if self._c_op: - try: - with c_api_util.tf_buffer() as buf: - c_api.TF_OperationGetAttrValueProto(self._c_op, name, buf) - data = c_api.TF_GetBuffer(buf) - except errors.InvalidArgumentError as e: - # Convert to ValueError for backwards compatibility. - raise ValueError(str(e)) - x = attr_value_pb2.AttrValue() - x.ParseFromString(data) - else: - if name not in self._node_def_val.attr: - raise ValueError( - "No attr named '" + name + "' in " + str(self._node_def_val)) - x = self._node_def_val.attr[name] + try: + with c_api_util.tf_buffer() as buf: + c_api.TF_OperationGetAttrValueProto(self._c_op, name, buf) + data = c_api.TF_GetBuffer(buf) + except errors.InvalidArgumentError as e: + # Convert to ValueError for backwards compatibility. + raise ValueError(str(e)) + x = attr_value_pb2.AttrValue() + x.ParseFromString(data) # Treat an empty oneof value as an empty list. if not x.WhichOneof("value"): @@ -2577,9 +2404,9 @@ def _set_shape_and_handle_data_for_outputs_c_api(op): def set_shape_and_handle_data_for_outputs(op): """Set the shapes and resource handle data for op's outputs. - When _USE_C_API = True, this is lazily called when a tensor's shape is first - requested. Usually this should work automatically, but some edge cases may - require manually calling this first to make sure Tensor._shape_val and + When _USE_C_SHAPES = False, this is lazily called when a tensor's shape is + first requested. Usually this should work automatically, but some edge cases + may require manually calling this first to make sure Tensor._shape_val and Tensor._handle_data are set (e.g. manually overriding _handle_data, copying a Tensor). """ @@ -3083,15 +2910,12 @@ class Graph(object): A `VersionDef`. """ # pylint: enable=line-too-long - if self._c_graph: - with c_api_util.tf_buffer() as buf: - c_api.TF_GraphVersions(self._c_graph, buf) - data = c_api.TF_GetBuffer(buf) - version_def = versions_pb2.VersionDef() - version_def.ParseFromString(compat.as_bytes(data)) - return version_def - else: - return self._graph_def_versions + with c_api_util.tf_buffer() as buf: + c_api.TF_GraphVersions(self._c_graph, buf) + data = c_api.TF_GetBuffer(buf) + version_def = versions_pb2.VersionDef() + version_def.ParseFromString(compat.as_bytes(data)) + return version_def @property def seed(self): @@ -3185,40 +3009,22 @@ class Graph(object): """ # pylint: enable=line-too-long - if self._c_graph: - with self._lock: - with c_api_util.tf_buffer() as buf: - c_api.TF_GraphToGraphDef(self._c_graph, buf) - data = c_api.TF_GetBuffer(buf) - graph = graph_pb2.GraphDef() - graph.ParseFromString(compat.as_bytes(data)) - # Strip the experimental library field iff it's empty. - if not graph.library.function: - graph.ClearField("library") - - if add_shapes: - for node in graph.node: - op = self._nodes_by_name[node.name] - if op.outputs: - node.attr["_output_shapes"].list.shape.extend( - [output.get_shape().as_proto() for output in op.outputs]) - else: - with self._lock: - graph = graph_pb2.GraphDef() - graph.versions.CopyFrom(self._graph_def_versions) - bytesize = 0 - for op_id in sorted(self._nodes_by_id): - op = self._nodes_by_id[op_id] - if from_version is None or op_id > from_version: - graph.node.extend([op.node_def]) - if op.outputs and add_shapes: - assert "_output_shapes" not in graph.node[-1].attr - graph.node[-1].attr["_output_shapes"].list.shape.extend( - [output.get_shape().as_proto() for output in op.outputs]) - bytesize += op.node_def.ByteSize() - if bytesize >= (1 << 31) or bytesize < 0: - raise ValueError("GraphDef cannot be larger than 2GB.") - self._copy_functions_to_graph_def(graph, bytesize) + with self._lock: + with c_api_util.tf_buffer() as buf: + c_api.TF_GraphToGraphDef(self._c_graph, buf) + data = c_api.TF_GetBuffer(buf) + graph = graph_pb2.GraphDef() + graph.ParseFromString(compat.as_bytes(data)) + # Strip the experimental library field iff it's empty. + if not graph.library.function: + graph.ClearField("library") + + if add_shapes: + for node in graph.node: + op = self._nodes_by_name[node.name] + if op.outputs: + node.attr["_output_shapes"].list.shape.extend( + [output.get_shape().as_proto() for output in op.outputs]) return graph, self._version def as_graph_def(self, from_version=None, add_shapes=False): @@ -3292,34 +3098,16 @@ class Graph(object): # Add function to graph # pylint: disable=protected-access - if self._c_graph: - # Handle functions created without using the C API. TODO(apassos,skyewm) - # remove this when all functions are generated using the C API by default - # as this will be unnecessary. - if not function._c_func: - serialized = function.definition.SerializeToString() - c_func = c_api.TF_FunctionImportFunctionDef(serialized) - function._c_func = c_api_util.ScopedTFFunction(c_func) - gradient = (function._grad_func._c_func.func if function._grad_func - else None) - c_api.TF_GraphCopyFunction(self._c_graph, function._c_func.func, gradient) - else: - # If there is already a function with the same name, raise an error - # if bodies are different. Else, do nothing. The C API version above - # has the same behavior. - previous = self._functions.get(name, None) - if previous: - # This check is not ideal as we can have a hash collision with only - # 32 bits in the hash, but the non C API mode is being deprecated. - # Don't bother changing it now. - if previous._hash_str == function._hash_str: - return - else: - raise ValueError("Cannot add function (%s, hash %s) to graph (%s). " - "Another function (%s, hash %s) is already defined " - "with that name (%s)" % ( - function, function._hash_str, self, - previous, previous._hash_str, name)) + # Handle functions created without using the C API. TODO(apassos,skyewm) + # remove this when all functions are generated using the C API by default + # as this will be unnecessary. + if not function._c_func: + serialized = function.definition.SerializeToString() + c_func = c_api.TF_FunctionImportFunctionDef(serialized) + function._c_func = c_api_util.ScopedTFFunction(c_func) + gradient = (function._grad_func._c_func.func if function._grad_func + else None) + c_api.TF_GraphCopyFunction(self._c_graph, function._c_func.func, gradient) # pylint: enable=protected-access self._functions[name] = function @@ -3334,6 +3122,9 @@ class Graph(object): return self._building_function # Helper functions to create operations. + @deprecated_args(None, + "Shapes are always computed; don't use the compute_shapes " + "as it has no effect.", "compute_shapes") def create_op( self, op_type, @@ -3370,8 +3161,8 @@ class Graph(object): proto). op_def: (Optional.) The `OpDef` proto that describes the `op_type` that the operation will have. - compute_shapes: (Optional.) If True, shape inference will be performed - to compute the shapes of the outputs. + compute_shapes: (Optional.) Deprecated. Has no effect (shapes are always + computed). compute_device: (Optional.) If True, device functions will be executed to compute the device property of the Operation. @@ -3381,8 +3172,9 @@ class Graph(object): Returns: An `Operation` object. - """ + del compute_shapes + self._check_not_finalized() for idx, a in enumerate(inputs): if not isinstance(a, Tensor): @@ -3412,18 +3204,7 @@ class Graph(object): input_types=input_types, original_op=self._default_original_op, op_def=op_def) - - # Note: shapes are lazily computed with the C API enabled. - # - # TODO(skyewm): unlike in the original Python implementation, the C API - # always computes shape information (even for function calls, which the - # original Python shape inference code doesn't handle). Deprecate the - # compute_shapes argument. - if not _USE_C_API and compute_shapes: - set_shape_and_handle_data_for_outputs(ret) - - self._create_op_helper(ret, compute_shapes=compute_shapes, - compute_device=compute_device) + self._create_op_helper(ret, compute_device=compute_device) return ret def _create_op_from_tf_operation(self, c_op, compute_device=True): @@ -3457,11 +3238,8 @@ class Graph(object): self._create_op_helper(ret, compute_device=compute_device) return ret - def _create_op_helper(self, op, compute_shapes=True, compute_device=True): + def _create_op_helper(self, op, compute_device=True): """Common logic for creating an op in this graph.""" - # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed. - self._add_op(op) - # Apply any additional attributes requested. Do not overwrite any existing # attributes. for key, value in self._attr_scope_map.items(): @@ -3528,8 +3306,7 @@ class Graph(object): # (2) "is_stateful" is set in OpDef # (3) "container" attribute is in OpDef # (4) "container" attribute is None - # TODO(skyewm): remove op.op_def check when _USE_C_API is removed. - if self._container and op.op_def and op.op_def.is_stateful: + if self._container and op.op_def.is_stateful: try: container_attr = op.get_attr("container") except ValueError: @@ -3816,17 +3593,14 @@ class Graph(object): def _get_op_def(self, type): # pylint: disable=redefined-builtin """Returns the `OpDef` proto for `type`. `type` is a string.""" - if self._c_graph: - with c_api_util.tf_buffer() as buf: - # pylint: disable=protected-access - c_api.TF_GraphGetOpDef(self._c_graph, compat.as_bytes(type), buf) - # pylint: enable=protected-access - data = c_api.TF_GetBuffer(buf) - op_def = op_def_pb2.OpDef() - op_def.ParseFromString(compat.as_bytes(data)) - return op_def - else: - return self._registered_ops[type] + with c_api_util.tf_buffer() as buf: + # pylint: disable=protected-access + c_api.TF_GraphGetOpDef(self._c_graph, compat.as_bytes(type), buf) + # pylint: enable=protected-access + data = c_api.TF_GetBuffer(buf) + op_def = op_def_pb2.OpDef() + op_def.ParseFromString(compat.as_bytes(data)) + return op_def def as_default(self): """Returns a context manager that makes this `Graph` the default graph. diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index e7732632f2..81355a279c 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -270,7 +270,6 @@ class OperationTest(test_util.TensorFlowTestCase): op1 = ops.Operation( ops._NodeDef("RefOutputFloatOutput", "op1"), g, [], [dtypes.float32_ref, dtypes.float32]) - g._add_op(op1) self.assertProtoEquals("op:'RefOutputFloatOutput' name:'op1'", op1.node_def) self.assertEquals([], list(op1.inputs)) ref_t, nonref_t = op1.values() @@ -279,14 +278,12 @@ class OperationTest(test_util.TensorFlowTestCase): ops._NodeDef("RefInputFloatInput", "op2"), g, [ref_t, nonref_t], [], input_types=[dtypes.float32_ref, dtypes.float32]) - g._add_op(op2) self.assertProtoEquals( "op:'RefInputFloatInput' name:'op2' input:'op1' input:'op1:1'", op2.node_def) self.assertEquals([ref_t, nonref_t], list(op2.inputs)) op3 = ops.Operation( ops._NodeDef("TwoFloatInputs", "op3"), g, [ref_t, nonref_t], []) - g._add_op(op3) self.assertProtoEquals( "op:'TwoFloatInputs' name:'op3' input:'op1' input:'op1:1'", op3.node_def) -- GitLab From 3653e80488f490ad744410a92ac287acf7035bda Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 5 Jun 2018 08:20:41 -0700 Subject: [PATCH 526/902] Address compiler warnings in tensorflow/core/distributed_runtime. PiperOrigin-RevId: 199299538 --- tensorflow/core/distributed_runtime/local_master.h | 2 +- tensorflow/core/distributed_runtime/master.cc | 8 ++++---- tensorflow/core/distributed_runtime/master_session.cc | 7 +++---- .../core/distributed_runtime/rpc/grpc_worker_service.cc | 4 ++-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/distributed_runtime/local_master.h b/tensorflow/core/distributed_runtime/local_master.h index cad6babad8..b9c76d0f1d 100644 --- a/tensorflow/core/distributed_runtime/local_master.h +++ b/tensorflow/core/distributed_runtime/local_master.h @@ -79,7 +79,7 @@ class LocalMaster : public MasterInterface { RunCallableResponse* response) override; Status ReleaseCallable(CallOptions* call_options, const ReleaseCallableRequest* request, - ReleaseCallableResponse* response); + ReleaseCallableResponse* response) override; // Registers the mapping from the given `target` to the given `master`. // diff --git a/tensorflow/core/distributed_runtime/master.cc b/tensorflow/core/distributed_runtime/master.cc index 4f9d84d158..a48f734d3e 100644 --- a/tensorflow/core/distributed_runtime/master.cc +++ b/tensorflow/core/distributed_runtime/master.cc @@ -473,7 +473,7 @@ void Master::PartialRunSetup(const PartialRunSetupRequest* req, return; } - SchedClosure([this, session, req, resp, done]() { + SchedClosure([session, req, resp, done]() { Status s = session->PartialRunSetup(req, resp); session->Unref(); done(s); @@ -628,7 +628,7 @@ void Master::MakeCallable(const MakeCallableRequest* req, } SchedClosure(std::bind( - [this, session, req, resp](MyClosure done) { + [session, req, resp](MyClosure done) { Status s = session->MakeCallable(*req, resp); session->Unref(); done(s); @@ -645,7 +645,7 @@ void Master::RunCallable(CallOptions* opts, const RunCallableRequest* req, } SchedClosure(std::bind( - [this, session, opts, req, resp](MyClosure done) { + [session, opts, req, resp](MyClosure done) { Status s = session->RunCallable(opts, *req, resp); session->Unref(); done(s); @@ -662,7 +662,7 @@ void Master::ReleaseCallable(const ReleaseCallableRequest* req, } SchedClosure(std::bind( - [this, session, req, resp](MyClosure done) { + [session, req, resp](MyClosure done) { Status s = session->ReleaseCallable(*req, resp); session->Unref(); done(s); diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index bd70eca3f6..e29bb76ddf 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -156,8 +156,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { LoggingResponse* resp = new LoggingResponse; p.worker->LoggingAsync( &req, resp, - [step_id, ss, resp, &scoped_mu, &waiting_for, - &all_done](const Status& s) { + [step_id, ss, resp, &scoped_mu, &all_done](const Status& s) { { mutex_lock l(scoped_mu); if (s.ok()) { @@ -1207,7 +1206,7 @@ Status MasterSession::CreateWorkerSessions( std::vector workers(worker_names.size()); // Release the workers. - auto cleanup = gtl::MakeCleanup([this, &workers, worker_cache] { + auto cleanup = gtl::MakeCleanup([&workers, worker_cache] { for (auto&& worker_group : workers) { if (worker_group.worker != nullptr) { worker_cache->ReleaseWorker(*worker_group.name, worker_group.worker); @@ -1289,7 +1288,7 @@ Status MasterSession::DeleteWorkerSessions() { std::vector workers(worker_names.size()); // Release the workers. - auto cleanup = gtl::MakeCleanup([this, &workers, worker_cache] { + auto cleanup = gtl::MakeCleanup([&workers, worker_cache] { for (auto&& worker_group : workers) { if (worker_group.worker != nullptr) { worker_cache->ReleaseWorker(*worker_group.name, worker_group.worker); diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc index 2e7b111963..aa9304a033 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc @@ -513,8 +513,8 @@ void GrpcWorker::RecvBufAsync(CallOptions* opts, const RecvBufRequest* request, CollectiveRemoteAccess* rma = ce_handle.get()->remote_access(); rma->buf_rendezvous()->ConsumeBuf( request->buf_rendezvous_key(), - [this, opts, request, response, done](const Status& status, - BufRendezvous::Hook* hook) { + [this, request, response, done](const Status& status, + BufRendezvous::Hook* hook) { Status s = status; if (s.ok()) { if (!DMAHelper::CanUseDMA(hook->prod_value)) { -- GitLab From e1f31d40b9d12e687100a689bc5439d78702124c Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Tue, 5 Jun 2018 08:42:28 -0700 Subject: [PATCH 527/902] Expose `@tfe.run_all_tests_in_graph_and_eager_modes`. PiperOrigin-RevId: 199302255 --- tensorflow/contrib/eager/python/tfe.py | 1 + tensorflow/python/framework/test_util.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index 5826700c73..fee9db46fa 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -115,6 +115,7 @@ from tensorflow.python.eager.execution_callbacks import seterr from tensorflow.python.framework.ops import enable_eager_execution from tensorflow.python.framework.ops import eager_run as run from tensorflow.python.framework.test_util import run_in_graph_and_eager_modes as run_test_in_graph_and_eager_modes +from tensorflow.python.framework.test_util import run_all_in_graph_and_eager_modes as run_all_tests_in_graph_and_eager_modes from tensorflow.python.ops.custom_gradient import custom_gradient from tensorflow.python.ops.resource_variable_ops import ResourceVariable as Variable from tensorflow.python.ops.variable_scope import EagerVariableStore diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index b56483f373..0c06d9aa41 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -644,6 +644,7 @@ def assert_no_garbage_created(f): def run_all_in_graph_and_eager_modes(cls): + """Execute all test methods in the given class with and without eager.""" base_decorator = run_in_graph_and_eager_modes() for name, value in cls.__dict__.copy().items(): if callable(value) and name.startswith("test"): -- GitLab From 51445a754dd3d6f3a7b2e89b8d02d0f467c36b63 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 09:16:39 -0700 Subject: [PATCH 528/902] Add computed receptive field parameters from popular convnets. PiperOrigin-RevId: 199306977 --- tensorflow/contrib/receptive_field/README.md | 32 +- .../receptive_field/RECEPTIVE_FIELD_TABLE.md | 629 ++++++++++++++++++ .../util/examples/csv_to_markdown_table.py | 82 +++ 3 files changed, 740 insertions(+), 3 deletions(-) create mode 100644 tensorflow/contrib/receptive_field/RECEPTIVE_FIELD_TABLE.md create mode 100644 tensorflow/contrib/receptive_field/python/util/examples/csv_to_markdown_table.py diff --git a/tensorflow/contrib/receptive_field/README.md b/tensorflow/contrib/receptive_field/README.md index 3ff85faf61..79b015a916 100644 --- a/tensorflow/contrib/receptive_field/README.md +++ b/tensorflow/contrib/receptive_field/README.md @@ -6,6 +6,32 @@ region your output features depend on. Better yet, using the parameters computed by the library, you can easily find the exact image region which is used to compute each convnet feature. +This library can be used to compute receptive field parameters of popular +convnets: + +
+ +convnet model | receptive field | effective stride | effective padding +:-----------------: | :-------------: | :--------------: | :---------------: +alexnet_v2 | 195 | 32 | 64 +vgg_16 | 212 | 32 | 90 +inception_v2 | 699 | 32 | 318 +inception_v3 | 1311 | 32 | 618 +inception_v4 | 2071 | 32 | 998 +inception_resnet_v2 | 3039 | 32 | 1482 +mobilenet_v1 | 315 | 32 | 126 +mobilenet_v1_075 | 315 | 32 | 126 +resnet_v1_50 | 483 | 32 | 241 +resnet_v1_101 | 1027 | 32 | 513 +resnet_v1_152 | 1507 | 32 | 753 +resnet_v1_200 | 1763 | 32 | 881 + +
+ +A comprehensive table with pre-computed receptive field parameters for different +end-points, input resolutions, and other variants of these networks can be found +[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/receptive_field/RECEPTIVE_FIELD_TABLE.md). + ## Basic usage The main function to be called is `compute_receptive_field_from_graph_def`, @@ -96,9 +122,9 @@ The script will write to stdout the receptive field parameters for many variants of several popular convnets: AlexNet, VGG, ResNet, Inception, Mobilenet. They are also written to the file `/tmp/rf_benchmark_results.csv`. -TODO: include here a plot for receptive field sizes of different convnets. - -TODO: include table/link to pre-computed RF parameters. +A comprehensive table with pre-computed receptive field parameters for different +networks can be found +[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/receptive_field/RECEPTIVE_FIELD_TABLE.md). ## Compute RF parameters from a graph pbtxt diff --git a/tensorflow/contrib/receptive_field/RECEPTIVE_FIELD_TABLE.md b/tensorflow/contrib/receptive_field/RECEPTIVE_FIELD_TABLE.md new file mode 100644 index 0000000000..736fbef6e7 --- /dev/null +++ b/tensorflow/contrib/receptive_field/RECEPTIVE_FIELD_TABLE.md @@ -0,0 +1,629 @@ +# Pre-computed receptive field parameters + +## Table with results + +The table below presents the receptive field parameters for several popular +convolutional neural networks. These are computed using the models from the +[TF-Slim +repository](https://github.com/tensorflow/models/tree/master/research/slim), +by using the [rf_benchmark +script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py). + +Questions? See the [FAQ](#faq). + +CNN | resolution | end-point | RF | effective stride | effective padding +:----------------------------: | :--------: | :------------------: | :--: | :--------------: | :---------------: +alexnet_v2 | None | alexnet_v2/conv1 | 11 | 4 | 0 +alexnet_v2 | None | alexnet_v2/pool1 | 19 | 8 | 0 +alexnet_v2 | None | alexnet_v2/conv2 | 51 | 8 | 16 +alexnet_v2 | None | alexnet_v2/conv3 | 99 | 16 | 32 +alexnet_v2 | None | alexnet_v2/conv4 | 131 | 16 | 48 +alexnet_v2 | None | alexnet_v2/conv5 | 163 | 16 | 64 +alexnet_v2 | None | alexnet_v2/pool5 | 195 | 32 | 64 +alexnet_v2 | 224 | alexnet_v2/conv1 | 11 | 4 | 0 +alexnet_v2 | 224 | alexnet_v2/pool1 | 19 | 8 | 0 +alexnet_v2 | 224 | alexnet_v2/conv2 | 51 | 8 | 16 +alexnet_v2 | 224 | alexnet_v2/conv3 | 99 | 16 | 32 +alexnet_v2 | 224 | alexnet_v2/conv4 | 131 | 16 | 48 +alexnet_v2 | 224 | alexnet_v2/conv5 | 163 | 16 | 64 +alexnet_v2 | 224 | alexnet_v2/pool5 | 195 | 32 | 64 +alexnet_v2 | 321 | alexnet_v2/conv1 | 11 | 4 | 0 +alexnet_v2 | 321 | alexnet_v2/pool1 | 19 | 8 | 0 +alexnet_v2 | 321 | alexnet_v2/conv2 | 51 | 8 | 16 +alexnet_v2 | 321 | alexnet_v2/conv3 | 99 | 16 | 32 +alexnet_v2 | 321 | alexnet_v2/conv4 | 131 | 16 | 48 +alexnet_v2 | 321 | alexnet_v2/conv5 | 163 | 16 | 64 +alexnet_v2 | 321 | alexnet_v2/pool5 | 195 | 32 | 64 +vgg_a | None | vgg_a/conv1/conv1_1 | 3 | 1 | 1 +vgg_a | None | vgg_a/pool1 | 4 | 2 | 1 +vgg_a | None | vgg_a/conv2/conv2_1 | 8 | 2 | 3 +vgg_a | None | vgg_a/pool2 | 10 | 4 | 3 +vgg_a | None | vgg_a/conv3/conv3_1 | 18 | 4 | 7 +vgg_a | None | vgg_a/conv3/conv3_2 | 26 | 4 | 11 +vgg_a | None | vgg_a/pool3 | 30 | 8 | 11 +vgg_a | None | vgg_a/conv4/conv4_1 | 46 | 8 | 19 +vgg_a | None | vgg_a/conv4/conv4_2 | 62 | 8 | 27 +vgg_a | None | vgg_a/pool4 | 70 | 16 | 27 +vgg_a | None | vgg_a/conv5/conv5_1 | 102 | 16 | 43 +vgg_a | None | vgg_a/conv5/conv5_2 | 134 | 16 | 59 +vgg_a | None | vgg_a/pool5 | 150 | 32 | 59 +vgg_a | 224 | vgg_a/conv1/conv1_1 | 3 | 1 | 1 +vgg_a | 224 | vgg_a/pool1 | 4 | 2 | 1 +vgg_a | 224 | vgg_a/conv2/conv2_1 | 8 | 2 | 3 +vgg_a | 224 | vgg_a/pool2 | 10 | 4 | 3 +vgg_a | 224 | vgg_a/conv3/conv3_1 | 18 | 4 | 7 +vgg_a | 224 | vgg_a/conv3/conv3_2 | 26 | 4 | 11 +vgg_a | 224 | vgg_a/pool3 | 30 | 8 | 11 +vgg_a | 224 | vgg_a/conv4/conv4_1 | 46 | 8 | 19 +vgg_a | 224 | vgg_a/conv4/conv4_2 | 62 | 8 | 27 +vgg_a | 224 | vgg_a/pool4 | 70 | 16 | 27 +vgg_a | 224 | vgg_a/conv5/conv5_1 | 102 | 16 | 43 +vgg_a | 224 | vgg_a/conv5/conv5_2 | 134 | 16 | 59 +vgg_a | 224 | vgg_a/pool5 | 150 | 32 | 59 +vgg_a | 321 | vgg_a/conv1/conv1_1 | 3 | 1 | 1 +vgg_a | 321 | vgg_a/pool1 | 4 | 2 | 1 +vgg_a | 321 | vgg_a/conv2/conv2_1 | 8 | 2 | 3 +vgg_a | 321 | vgg_a/pool2 | 10 | 4 | 3 +vgg_a | 321 | vgg_a/conv3/conv3_1 | 18 | 4 | 7 +vgg_a | 321 | vgg_a/conv3/conv3_2 | 26 | 4 | 11 +vgg_a | 321 | vgg_a/pool3 | 30 | 8 | 11 +vgg_a | 321 | vgg_a/conv4/conv4_1 | 46 | 8 | 19 +vgg_a | 321 | vgg_a/conv4/conv4_2 | 62 | 8 | 27 +vgg_a | 321 | vgg_a/pool4 | 70 | 16 | 27 +vgg_a | 321 | vgg_a/conv5/conv5_1 | 102 | 16 | 43 +vgg_a | 321 | vgg_a/conv5/conv5_2 | 134 | 16 | 59 +vgg_a | 321 | vgg_a/pool5 | 150 | 32 | 59 +vgg_16 | None | vgg_16/conv1/conv1_1 | 3 | 1 | 1 +vgg_16 | None | vgg_16/pool1 | 6 | 2 | 2 +vgg_16 | None | vgg_16/conv2/conv2_1 | 10 | 2 | 4 +vgg_16 | None | vgg_16/pool2 | 16 | 4 | 6 +vgg_16 | None | vgg_16/conv3/conv3_1 | 24 | 4 | 10 +vgg_16 | None | vgg_16/conv3/conv3_2 | 32 | 4 | 14 +vgg_16 | None | vgg_16/pool3 | 44 | 8 | 18 +vgg_16 | None | vgg_16/conv4/conv4_1 | 60 | 8 | 26 +vgg_16 | None | vgg_16/conv4/conv4_2 | 76 | 8 | 34 +vgg_16 | None | vgg_16/pool4 | 100 | 16 | 42 +vgg_16 | None | vgg_16/conv5/conv5_1 | 132 | 16 | 58 +vgg_16 | None | vgg_16/conv5/conv5_2 | 164 | 16 | 74 +vgg_16 | None | vgg_16/pool5 | 212 | 32 | 90 +vgg_16 | 224 | vgg_16/conv1/conv1_1 | 3 | 1 | 1 +vgg_16 | 224 | vgg_16/pool1 | 6 | 2 | 2 +vgg_16 | 224 | vgg_16/conv2/conv2_1 | 10 | 2 | 4 +vgg_16 | 224 | vgg_16/pool2 | 16 | 4 | 6 +vgg_16 | 224 | vgg_16/conv3/conv3_1 | 24 | 4 | 10 +vgg_16 | 224 | vgg_16/conv3/conv3_2 | 32 | 4 | 14 +vgg_16 | 224 | vgg_16/pool3 | 44 | 8 | 18 +vgg_16 | 224 | vgg_16/conv4/conv4_1 | 60 | 8 | 26 +vgg_16 | 224 | vgg_16/conv4/conv4_2 | 76 | 8 | 34 +vgg_16 | 224 | vgg_16/pool4 | 100 | 16 | 42 +vgg_16 | 224 | vgg_16/conv5/conv5_1 | 132 | 16 | 58 +vgg_16 | 224 | vgg_16/conv5/conv5_2 | 164 | 16 | 74 +vgg_16 | 224 | vgg_16/pool5 | 212 | 32 | 90 +vgg_16 | 321 | vgg_16/conv1/conv1_1 | 3 | 1 | 1 +vgg_16 | 321 | vgg_16/pool1 | 6 | 2 | 2 +vgg_16 | 321 | vgg_16/conv2/conv2_1 | 10 | 2 | 4 +vgg_16 | 321 | vgg_16/pool2 | 16 | 4 | 6 +vgg_16 | 321 | vgg_16/conv3/conv3_1 | 24 | 4 | 10 +vgg_16 | 321 | vgg_16/conv3/conv3_2 | 32 | 4 | 14 +vgg_16 | 321 | vgg_16/pool3 | 44 | 8 | 18 +vgg_16 | 321 | vgg_16/conv4/conv4_1 | 60 | 8 | 26 +vgg_16 | 321 | vgg_16/conv4/conv4_2 | 76 | 8 | 34 +vgg_16 | 321 | vgg_16/pool4 | 100 | 16 | 42 +vgg_16 | 321 | vgg_16/conv5/conv5_1 | 132 | 16 | 58 +vgg_16 | 321 | vgg_16/conv5/conv5_2 | 164 | 16 | 74 +vgg_16 | 321 | vgg_16/pool5 | 212 | 32 | 90 +inception_v2 | None | Conv2d_1a_7x7 | 7 | 2 | None +inception_v2 | None | MaxPool_2a_3x3 | 11 | 4 | None +inception_v2 | None | Conv2d_2b_1x1 | 11 | 4 | None +inception_v2 | None | Conv2d_2c_3x3 | 19 | 4 | None +inception_v2 | None | MaxPool_3a_3x3 | 27 | 8 | None +inception_v2 | None | Mixed_3b | 59 | 8 | None +inception_v2 | None | Mixed_3c | 91 | 8 | None +inception_v2 | None | Mixed_4a | 123 | 16 | None +inception_v2 | None | Mixed_4b | 187 | 16 | None +inception_v2 | None | Mixed_4c | 251 | 16 | None +inception_v2 | None | Mixed_4d | 315 | 16 | None +inception_v2 | None | Mixed_4e | 379 | 16 | None +inception_v2 | None | Mixed_5a | 443 | 32 | None +inception_v2 | None | Mixed_5b | 571 | 32 | None +inception_v2 | None | Mixed_5c | 699 | 32 | None +inception_v2 | 224 | Conv2d_1a_7x7 | 7 | 2 | 2 +inception_v2 | 224 | MaxPool_2a_3x3 | 11 | 4 | 2 +inception_v2 | 224 | Conv2d_2b_1x1 | 11 | 4 | 2 +inception_v2 | 224 | Conv2d_2c_3x3 | 19 | 4 | 6 +inception_v2 | 224 | MaxPool_3a_3x3 | 27 | 8 | 6 +inception_v2 | 224 | Mixed_3b | 59 | 8 | 22 +inception_v2 | 224 | Mixed_3c | 91 | 8 | 38 +inception_v2 | 224 | Mixed_4a | 123 | 16 | 46 +inception_v2 | 224 | Mixed_4b | 187 | 16 | 78 +inception_v2 | 224 | Mixed_4c | 251 | 16 | 110 +inception_v2 | 224 | Mixed_4d | 315 | 16 | 142 +inception_v2 | 224 | Mixed_4e | 379 | 16 | 174 +inception_v2 | 224 | Mixed_5a | 443 | 32 | 190 +inception_v2 | 224 | Mixed_5b | 571 | 32 | 254 +inception_v2 | 224 | Mixed_5c | 699 | 32 | 318 +inception_v2 | 321 | Conv2d_1a_7x7 | 7 | 2 | 3 +inception_v2 | 321 | MaxPool_2a_3x3 | 11 | 4 | 5 +inception_v2 | 321 | Conv2d_2b_1x1 | 11 | 4 | 5 +inception_v2 | 321 | Conv2d_2c_3x3 | 19 | 4 | 9 +inception_v2 | 321 | MaxPool_3a_3x3 | 27 | 8 | 13 +inception_v2 | 321 | Mixed_3b | 59 | 8 | 29 +inception_v2 | 321 | Mixed_3c | 91 | 8 | 45 +inception_v2 | 321 | Mixed_4a | 123 | 16 | 61 +inception_v2 | 321 | Mixed_4b | 187 | 16 | 93 +inception_v2 | 321 | Mixed_4c | 251 | 16 | 125 +inception_v2 | 321 | Mixed_4d | 315 | 16 | 157 +inception_v2 | 321 | Mixed_4e | 379 | 16 | 189 +inception_v2 | 321 | Mixed_5a | 443 | 32 | 221 +inception_v2 | 321 | Mixed_5b | 571 | 32 | 285 +inception_v2 | 321 | Mixed_5c | 699 | 32 | 349 +inception_v2-no-separable-conv | None | Conv2d_1a_7x7 | 7 | 2 | None +inception_v2-no-separable-conv | None | MaxPool_2a_3x3 | 11 | 4 | None +inception_v2-no-separable-conv | None | Conv2d_2b_1x1 | 11 | 4 | None +inception_v2-no-separable-conv | None | Conv2d_2c_3x3 | 19 | 4 | None +inception_v2-no-separable-conv | None | MaxPool_3a_3x3 | 27 | 8 | None +inception_v2-no-separable-conv | None | Mixed_3b | 59 | 8 | None +inception_v2-no-separable-conv | None | Mixed_3c | 91 | 8 | None +inception_v2-no-separable-conv | None | Mixed_4a | 123 | 16 | None +inception_v2-no-separable-conv | None | Mixed_4b | 187 | 16 | None +inception_v2-no-separable-conv | None | Mixed_4c | 251 | 16 | None +inception_v2-no-separable-conv | None | Mixed_4d | 315 | 16 | None +inception_v2-no-separable-conv | None | Mixed_4e | 379 | 16 | None +inception_v2-no-separable-conv | None | Mixed_5a | 443 | 32 | None +inception_v2-no-separable-conv | None | Mixed_5b | 571 | 32 | None +inception_v2-no-separable-conv | None | Mixed_5c | 699 | 32 | None +inception_v2-no-separable-conv | 224 | Conv2d_1a_7x7 | 7 | 2 | 2 +inception_v2-no-separable-conv | 224 | MaxPool_2a_3x3 | 11 | 4 | 2 +inception_v2-no-separable-conv | 224 | Conv2d_2b_1x1 | 11 | 4 | 2 +inception_v2-no-separable-conv | 224 | Conv2d_2c_3x3 | 19 | 4 | 6 +inception_v2-no-separable-conv | 224 | MaxPool_3a_3x3 | 27 | 8 | 6 +inception_v2-no-separable-conv | 224 | Mixed_3b | 59 | 8 | 22 +inception_v2-no-separable-conv | 224 | Mixed_3c | 91 | 8 | 38 +inception_v2-no-separable-conv | 224 | Mixed_4a | 123 | 16 | 46 +inception_v2-no-separable-conv | 224 | Mixed_4b | 187 | 16 | 78 +inception_v2-no-separable-conv | 224 | Mixed_4c | 251 | 16 | 110 +inception_v2-no-separable-conv | 224 | Mixed_4d | 315 | 16 | 142 +inception_v2-no-separable-conv | 224 | Mixed_4e | 379 | 16 | 174 +inception_v2-no-separable-conv | 224 | Mixed_5a | 443 | 32 | 190 +inception_v2-no-separable-conv | 224 | Mixed_5b | 571 | 32 | 254 +inception_v2-no-separable-conv | 224 | Mixed_5c | 699 | 32 | 318 +inception_v2-no-separable-conv | 321 | Conv2d_1a_7x7 | 7 | 2 | 3 +inception_v2-no-separable-conv | 321 | MaxPool_2a_3x3 | 11 | 4 | 5 +inception_v2-no-separable-conv | 321 | Conv2d_2b_1x1 | 11 | 4 | 5 +inception_v2-no-separable-conv | 321 | Conv2d_2c_3x3 | 19 | 4 | 9 +inception_v2-no-separable-conv | 321 | MaxPool_3a_3x3 | 27 | 8 | 13 +inception_v2-no-separable-conv | 321 | Mixed_3b | 59 | 8 | 29 +inception_v2-no-separable-conv | 321 | Mixed_3c | 91 | 8 | 45 +inception_v2-no-separable-conv | 321 | Mixed_4a | 123 | 16 | 61 +inception_v2-no-separable-conv | 321 | Mixed_4b | 187 | 16 | 93 +inception_v2-no-separable-conv | 321 | Mixed_4c | 251 | 16 | 125 +inception_v2-no-separable-conv | 321 | Mixed_4d | 315 | 16 | 157 +inception_v2-no-separable-conv | 321 | Mixed_4e | 379 | 16 | 189 +inception_v2-no-separable-conv | 321 | Mixed_5a | 443 | 32 | 221 +inception_v2-no-separable-conv | 321 | Mixed_5b | 571 | 32 | 285 +inception_v2-no-separable-conv | 321 | Mixed_5c | 699 | 32 | 349 +inception_v3 | None | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_v3 | None | Conv2d_2a_3x3 | 7 | 2 | 0 +inception_v3 | None | Conv2d_2b_3x3 | 11 | 2 | 2 +inception_v3 | None | MaxPool_3a_3x3 | 15 | 4 | 2 +inception_v3 | None | Conv2d_3b_1x1 | 15 | 4 | 2 +inception_v3 | None | Conv2d_4a_3x3 | 23 | 4 | 2 +inception_v3 | None | MaxPool_5a_3x3 | 31 | 8 | 2 +inception_v3 | None | Mixed_5b | 63 | 8 | 18 +inception_v3 | None | Mixed_5c | 95 | 8 | 34 +inception_v3 | None | Mixed_5d | 127 | 8 | 50 +inception_v3 | None | Mixed_6a | 159 | 16 | 58 +inception_v3 | None | Mixed_6b | 351 | 16 | 154 +inception_v3 | None | Mixed_6c | 543 | 16 | 250 +inception_v3 | None | Mixed_6d | 735 | 16 | 346 +inception_v3 | None | Mixed_6e | 927 | 16 | 442 +inception_v3 | None | Mixed_7a | 1055 | 32 | 490 +inception_v3 | None | Mixed_7b | 1183 | 32 | 554 +inception_v3 | None | Mixed_7c | 1311 | 32 | 618 +inception_v3 | 224 | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_v3 | 224 | Conv2d_2a_3x3 | 7 | 2 | 0 +inception_v3 | 224 | Conv2d_2b_3x3 | 11 | 2 | 2 +inception_v3 | 224 | MaxPool_3a_3x3 | 15 | 4 | 2 +inception_v3 | 224 | Conv2d_3b_1x1 | 15 | 4 | 2 +inception_v3 | 224 | Conv2d_4a_3x3 | 23 | 4 | 2 +inception_v3 | 224 | MaxPool_5a_3x3 | 31 | 8 | 2 +inception_v3 | 224 | Mixed_5b | 63 | 8 | 18 +inception_v3 | 224 | Mixed_5c | 95 | 8 | 34 +inception_v3 | 224 | Mixed_5d | 127 | 8 | 50 +inception_v3 | 224 | Mixed_6a | 159 | 16 | 58 +inception_v3 | 224 | Mixed_6b | 351 | 16 | 154 +inception_v3 | 224 | Mixed_6c | 543 | 16 | 250 +inception_v3 | 224 | Mixed_6d | 735 | 16 | 346 +inception_v3 | 224 | Mixed_6e | 927 | 16 | 442 +inception_v3 | 224 | Mixed_7a | 1055 | 32 | 490 +inception_v3 | 224 | Mixed_7b | 1183 | 32 | 554 +inception_v3 | 224 | Mixed_7c | 1311 | 32 | 618 +inception_v3 | 321 | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_v3 | 321 | Conv2d_2a_3x3 | 7 | 2 | 0 +inception_v3 | 321 | Conv2d_2b_3x3 | 11 | 2 | 2 +inception_v3 | 321 | MaxPool_3a_3x3 | 15 | 4 | 2 +inception_v3 | 321 | Conv2d_3b_1x1 | 15 | 4 | 2 +inception_v3 | 321 | Conv2d_4a_3x3 | 23 | 4 | 2 +inception_v3 | 321 | MaxPool_5a_3x3 | 31 | 8 | 2 +inception_v3 | 321 | Mixed_5b | 63 | 8 | 18 +inception_v3 | 321 | Mixed_5c | 95 | 8 | 34 +inception_v3 | 321 | Mixed_5d | 127 | 8 | 50 +inception_v3 | 321 | Mixed_6a | 159 | 16 | 58 +inception_v3 | 321 | Mixed_6b | 351 | 16 | 154 +inception_v3 | 321 | Mixed_6c | 543 | 16 | 250 +inception_v3 | 321 | Mixed_6d | 735 | 16 | 346 +inception_v3 | 321 | Mixed_6e | 927 | 16 | 442 +inception_v3 | 321 | Mixed_7a | 1055 | 32 | 490 +inception_v3 | 321 | Mixed_7b | 1183 | 32 | 554 +inception_v3 | 321 | Mixed_7c | 1311 | 32 | 618 +inception_v4 | None | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_v4 | None | Conv2d_2a_3x3 | 7 | 2 | 0 +inception_v4 | None | Conv2d_2b_3x3 | 11 | 2 | 2 +inception_v4 | None | Mixed_3a | 15 | 4 | 2 +inception_v4 | None | Mixed_4a | 47 | 4 | 14 +inception_v4 | None | Mixed_5a | 55 | 8 | 14 +inception_v4 | None | Mixed_5b | 87 | 8 | 30 +inception_v4 | None | Mixed_5c | 119 | 8 | 46 +inception_v4 | None | Mixed_5d | 151 | 8 | 62 +inception_v4 | None | Mixed_5e | 183 | 8 | 78 +inception_v4 | None | Mixed_6a | 215 | 16 | 86 +inception_v4 | None | Mixed_6b | 407 | 16 | 182 +inception_v4 | None | Mixed_6c | 599 | 16 | 278 +inception_v4 | None | Mixed_6d | 791 | 16 | 374 +inception_v4 | None | Mixed_6e | 983 | 16 | 470 +inception_v4 | None | Mixed_6f | 1175 | 16 | 566 +inception_v4 | None | Mixed_6g | 1367 | 16 | 662 +inception_v4 | None | Mixed_6h | 1559 | 16 | 758 +inception_v4 | None | Mixed_7a | 1687 | 32 | 806 +inception_v4 | None | Mixed_7b | 1815 | 32 | 870 +inception_v4 | None | Mixed_7c | 1943 | 32 | 934 +inception_v4 | None | Mixed_7d | 2071 | 32 | 998 +inception_v4 | 224 | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_v4 | 224 | Conv2d_2a_3x3 | 7 | 2 | 0 +inception_v4 | 224 | Conv2d_2b_3x3 | 11 | 2 | 2 +inception_v4 | 224 | Mixed_3a | 15 | 4 | 2 +inception_v4 | 224 | Mixed_4a | 47 | 4 | 14 +inception_v4 | 224 | Mixed_5a | 55 | 8 | 14 +inception_v4 | 224 | Mixed_5b | 87 | 8 | 30 +inception_v4 | 224 | Mixed_5c | 119 | 8 | 46 +inception_v4 | 224 | Mixed_5d | 151 | 8 | 62 +inception_v4 | 224 | Mixed_5e | 183 | 8 | 78 +inception_v4 | 224 | Mixed_6a | 215 | 16 | 86 +inception_v4 | 224 | Mixed_6b | 407 | 16 | 182 +inception_v4 | 224 | Mixed_6c | 599 | 16 | 278 +inception_v4 | 224 | Mixed_6d | 791 | 16 | 374 +inception_v4 | 224 | Mixed_6e | 983 | 16 | 470 +inception_v4 | 224 | Mixed_6f | 1175 | 16 | 566 +inception_v4 | 224 | Mixed_6g | 1367 | 16 | 662 +inception_v4 | 224 | Mixed_6h | 1559 | 16 | 758 +inception_v4 | 224 | Mixed_7a | 1687 | 32 | 806 +inception_v4 | 224 | Mixed_7b | 1815 | 32 | 870 +inception_v4 | 224 | Mixed_7c | 1943 | 32 | 934 +inception_v4 | 224 | Mixed_7d | 2071 | 32 | 998 +inception_v4 | 321 | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_v4 | 321 | Conv2d_2a_3x3 | 7 | 2 | 0 +inception_v4 | 321 | Conv2d_2b_3x3 | 11 | 2 | 2 +inception_v4 | 321 | Mixed_3a | 15 | 4 | 2 +inception_v4 | 321 | Mixed_4a | 47 | 4 | 14 +inception_v4 | 321 | Mixed_5a | 55 | 8 | 14 +inception_v4 | 321 | Mixed_5b | 87 | 8 | 30 +inception_v4 | 321 | Mixed_5c | 119 | 8 | 46 +inception_v4 | 321 | Mixed_5d | 151 | 8 | 62 +inception_v4 | 321 | Mixed_5e | 183 | 8 | 78 +inception_v4 | 321 | Mixed_6a | 215 | 16 | 86 +inception_v4 | 321 | Mixed_6b | 407 | 16 | 182 +inception_v4 | 321 | Mixed_6c | 599 | 16 | 278 +inception_v4 | 321 | Mixed_6d | 791 | 16 | 374 +inception_v4 | 321 | Mixed_6e | 983 | 16 | 470 +inception_v4 | 321 | Mixed_6f | 1175 | 16 | 566 +inception_v4 | 321 | Mixed_6g | 1367 | 16 | 662 +inception_v4 | 321 | Mixed_6h | 1559 | 16 | 758 +inception_v4 | 321 | Mixed_7a | 1687 | 32 | 806 +inception_v4 | 321 | Mixed_7b | 1815 | 32 | 870 +inception_v4 | 321 | Mixed_7c | 1943 | 32 | 934 +inception_v4 | 321 | Mixed_7d | 2071 | 32 | 998 +inception_resnet_v2 | None | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_resnet_v2 | None | Conv2d_2a_3x3 | 7 | 2 | 0 +inception_resnet_v2 | None | Conv2d_2b_3x3 | 11 | 2 | 2 +inception_resnet_v2 | None | MaxPool_3a_3x3 | 15 | 4 | 2 +inception_resnet_v2 | None | Conv2d_3b_1x1 | 15 | 4 | 2 +inception_resnet_v2 | None | Conv2d_4a_3x3 | 23 | 4 | 2 +inception_resnet_v2 | None | MaxPool_5a_3x3 | 31 | 8 | 2 +inception_resnet_v2 | None | Mixed_5b | 63 | 8 | 18 +inception_resnet_v2 | None | Mixed_6a | 415 | 16 | 186 +inception_resnet_v2 | None | PreAuxLogits | 2335 | 16 | 1146 +inception_resnet_v2 | None | Mixed_7a | 2399 | 32 | 1162 +inception_resnet_v2 | None | Conv2d_7b_1x1 | 3039 | 32 | 1482 +inception_resnet_v2 | 224 | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_resnet_v2 | 224 | Conv2d_2a_3x3 | 7 | 2 | 0 +inception_resnet_v2 | 224 | Conv2d_2b_3x3 | 11 | 2 | 2 +inception_resnet_v2 | 224 | MaxPool_3a_3x3 | 15 | 4 | 2 +inception_resnet_v2 | 224 | Conv2d_3b_1x1 | 15 | 4 | 2 +inception_resnet_v2 | 224 | Conv2d_4a_3x3 | 23 | 4 | 2 +inception_resnet_v2 | 224 | MaxPool_5a_3x3 | 31 | 8 | 2 +inception_resnet_v2 | 224 | Mixed_5b | 63 | 8 | 18 +inception_resnet_v2 | 224 | Mixed_6a | 415 | 16 | 186 +inception_resnet_v2 | 224 | PreAuxLogits | 2335 | 16 | 1146 +inception_resnet_v2 | 224 | Mixed_7a | 2399 | 32 | 1162 +inception_resnet_v2 | 224 | Conv2d_7b_1x1 | 3039 | 32 | 1482 +inception_resnet_v2 | 321 | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_resnet_v2 | 321 | Conv2d_2a_3x3 | 7 | 2 | 0 +inception_resnet_v2 | 321 | Conv2d_2b_3x3 | 11 | 2 | 2 +inception_resnet_v2 | 321 | MaxPool_3a_3x3 | 15 | 4 | 2 +inception_resnet_v2 | 321 | Conv2d_3b_1x1 | 15 | 4 | 2 +inception_resnet_v2 | 321 | Conv2d_4a_3x3 | 23 | 4 | 2 +inception_resnet_v2 | 321 | MaxPool_5a_3x3 | 31 | 8 | 2 +inception_resnet_v2 | 321 | Mixed_5b | 63 | 8 | 18 +inception_resnet_v2 | 321 | Mixed_6a | 415 | 16 | 186 +inception_resnet_v2 | 321 | PreAuxLogits | 2335 | 16 | 1146 +inception_resnet_v2 | 321 | Mixed_7a | 2399 | 32 | 1162 +inception_resnet_v2 | 321 | Conv2d_7b_1x1 | 3039 | 32 | 1482 +inception_resnet_v2-same | None | Conv2d_1a_3x3 | 3 | 2 | None +inception_resnet_v2-same | None | Conv2d_2a_3x3 | 7 | 2 | None +inception_resnet_v2-same | None | Conv2d_2b_3x3 | 11 | 2 | None +inception_resnet_v2-same | None | MaxPool_3a_3x3 | 15 | 4 | None +inception_resnet_v2-same | None | Conv2d_3b_1x1 | 15 | 4 | None +inception_resnet_v2-same | None | Conv2d_4a_3x3 | 23 | 4 | None +inception_resnet_v2-same | None | MaxPool_5a_3x3 | 31 | 8 | None +inception_resnet_v2-same | None | Mixed_5b | 63 | 8 | None +inception_resnet_v2-same | None | Mixed_6a | 415 | 16 | None +inception_resnet_v2-same | None | PreAuxLogits | 2335 | 16 | None +inception_resnet_v2-same | None | Mixed_7a | 2399 | 32 | None +inception_resnet_v2-same | None | Conv2d_7b_1x1 | 3039 | 32 | None +inception_resnet_v2-same | 224 | Conv2d_1a_3x3 | 3 | 2 | 0 +inception_resnet_v2-same | 224 | Conv2d_2a_3x3 | 7 | 2 | 2 +inception_resnet_v2-same | 224 | Conv2d_2b_3x3 | 11 | 2 | 4 +inception_resnet_v2-same | 224 | MaxPool_3a_3x3 | 15 | 4 | 4 +inception_resnet_v2-same | 224 | Conv2d_3b_1x1 | 15 | 4 | 4 +inception_resnet_v2-same | 224 | Conv2d_4a_3x3 | 23 | 4 | 8 +inception_resnet_v2-same | 224 | MaxPool_5a_3x3 | 31 | 8 | 8 +inception_resnet_v2-same | 224 | Mixed_5b | 63 | 8 | 24 +inception_resnet_v2-same | 224 | Mixed_6a | 415 | 16 | 192 +inception_resnet_v2-same | 224 | PreAuxLogits | 2335 | 16 | 1152 +inception_resnet_v2-same | 224 | Mixed_7a | 2399 | 32 | 1168 +inception_resnet_v2-same | 224 | Conv2d_7b_1x1 | 3039 | 32 | 1488 +inception_resnet_v2-same | 321 | Conv2d_1a_3x3 | 3 | 2 | 1 +inception_resnet_v2-same | 321 | Conv2d_2a_3x3 | 7 | 2 | 3 +inception_resnet_v2-same | 321 | Conv2d_2b_3x3 | 11 | 2 | 5 +inception_resnet_v2-same | 321 | MaxPool_3a_3x3 | 15 | 4 | 7 +inception_resnet_v2-same | 321 | Conv2d_3b_1x1 | 15 | 4 | 7 +inception_resnet_v2-same | 321 | Conv2d_4a_3x3 | 23 | 4 | 11 +inception_resnet_v2-same | 321 | MaxPool_5a_3x3 | 31 | 8 | 15 +inception_resnet_v2-same | 321 | Mixed_5b | 63 | 8 | 31 +inception_resnet_v2-same | 321 | Mixed_6a | 415 | 16 | 207 +inception_resnet_v2-same | 321 | PreAuxLogits | 2335 | 16 | 1167 +inception_resnet_v2-same | 321 | Mixed_7a | 2399 | 32 | 1199 +inception_resnet_v2-same | 321 | Conv2d_7b_1x1 | 3039 | 32 | 1519 +mobilenet_v1 | None | Conv2d_0 | 3 | 2 | None +mobilenet_v1 | None | Conv2d_1_pointwise | 7 | 2 | None +mobilenet_v1 | None | Conv2d_2_pointwise | 11 | 4 | None +mobilenet_v1 | None | Conv2d_3_pointwise | 19 | 4 | None +mobilenet_v1 | None | Conv2d_4_pointwise | 27 | 8 | None +mobilenet_v1 | None | Conv2d_5_pointwise | 43 | 8 | None +mobilenet_v1 | None | Conv2d_6_pointwise | 59 | 16 | None +mobilenet_v1 | None | Conv2d_7_pointwise | 91 | 16 | None +mobilenet_v1 | None | Conv2d_8_pointwise | 123 | 16 | None +mobilenet_v1 | None | Conv2d_9_pointwise | 155 | 16 | None +mobilenet_v1 | None | Conv2d_10_pointwise | 187 | 16 | None +mobilenet_v1 | None | Conv2d_11_pointwise | 219 | 16 | None +mobilenet_v1 | None | Conv2d_12_pointwise | 251 | 32 | None +mobilenet_v1 | None | Conv2d_13_pointwise | 315 | 32 | None +mobilenet_v1 | 224 | Conv2d_0 | 3 | 2 | 0 +mobilenet_v1 | 224 | Conv2d_1_pointwise | 7 | 2 | 2 +mobilenet_v1 | 224 | Conv2d_2_pointwise | 11 | 4 | 2 +mobilenet_v1 | 224 | Conv2d_3_pointwise | 19 | 4 | 6 +mobilenet_v1 | 224 | Conv2d_4_pointwise | 27 | 8 | 6 +mobilenet_v1 | 224 | Conv2d_5_pointwise | 43 | 8 | 14 +mobilenet_v1 | 224 | Conv2d_6_pointwise | 59 | 16 | 14 +mobilenet_v1 | 224 | Conv2d_7_pointwise | 91 | 16 | 30 +mobilenet_v1 | 224 | Conv2d_8_pointwise | 123 | 16 | 46 +mobilenet_v1 | 224 | Conv2d_9_pointwise | 155 | 16 | 62 +mobilenet_v1 | 224 | Conv2d_10_pointwise | 187 | 16 | 78 +mobilenet_v1 | 224 | Conv2d_11_pointwise | 219 | 16 | 94 +mobilenet_v1 | 224 | Conv2d_12_pointwise | 251 | 32 | 94 +mobilenet_v1 | 224 | Conv2d_13_pointwise | 315 | 32 | 126 +mobilenet_v1 | 321 | Conv2d_0 | 3 | 2 | 1 +mobilenet_v1 | 321 | Conv2d_1_pointwise | 7 | 2 | 3 +mobilenet_v1 | 321 | Conv2d_2_pointwise | 11 | 4 | 5 +mobilenet_v1 | 321 | Conv2d_3_pointwise | 19 | 4 | 9 +mobilenet_v1 | 321 | Conv2d_4_pointwise | 27 | 8 | 13 +mobilenet_v1 | 321 | Conv2d_5_pointwise | 43 | 8 | 21 +mobilenet_v1 | 321 | Conv2d_6_pointwise | 59 | 16 | 29 +mobilenet_v1 | 321 | Conv2d_7_pointwise | 91 | 16 | 45 +mobilenet_v1 | 321 | Conv2d_8_pointwise | 123 | 16 | 61 +mobilenet_v1 | 321 | Conv2d_9_pointwise | 155 | 16 | 77 +mobilenet_v1 | 321 | Conv2d_10_pointwise | 187 | 16 | 93 +mobilenet_v1 | 321 | Conv2d_11_pointwise | 219 | 16 | 109 +mobilenet_v1 | 321 | Conv2d_12_pointwise | 251 | 32 | 125 +mobilenet_v1 | 321 | Conv2d_13_pointwise | 315 | 32 | 157 +mobilenet_v1_075 | None | Conv2d_0 | 3 | 2 | None +mobilenet_v1_075 | None | Conv2d_1_pointwise | 7 | 2 | None +mobilenet_v1_075 | None | Conv2d_2_pointwise | 11 | 4 | None +mobilenet_v1_075 | None | Conv2d_3_pointwise | 19 | 4 | None +mobilenet_v1_075 | None | Conv2d_4_pointwise | 27 | 8 | None +mobilenet_v1_075 | None | Conv2d_5_pointwise | 43 | 8 | None +mobilenet_v1_075 | None | Conv2d_6_pointwise | 59 | 16 | None +mobilenet_v1_075 | None | Conv2d_7_pointwise | 91 | 16 | None +mobilenet_v1_075 | None | Conv2d_8_pointwise | 123 | 16 | None +mobilenet_v1_075 | None | Conv2d_9_pointwise | 155 | 16 | None +mobilenet_v1_075 | None | Conv2d_10_pointwise | 187 | 16 | None +mobilenet_v1_075 | None | Conv2d_11_pointwise | 219 | 16 | None +mobilenet_v1_075 | None | Conv2d_12_pointwise | 251 | 32 | None +mobilenet_v1_075 | None | Conv2d_13_pointwise | 315 | 32 | None +mobilenet_v1_075 | 224 | Conv2d_0 | 3 | 2 | 0 +mobilenet_v1_075 | 224 | Conv2d_1_pointwise | 7 | 2 | 2 +mobilenet_v1_075 | 224 | Conv2d_2_pointwise | 11 | 4 | 2 +mobilenet_v1_075 | 224 | Conv2d_3_pointwise | 19 | 4 | 6 +mobilenet_v1_075 | 224 | Conv2d_4_pointwise | 27 | 8 | 6 +mobilenet_v1_075 | 224 | Conv2d_5_pointwise | 43 | 8 | 14 +mobilenet_v1_075 | 224 | Conv2d_6_pointwise | 59 | 16 | 14 +mobilenet_v1_075 | 224 | Conv2d_7_pointwise | 91 | 16 | 30 +mobilenet_v1_075 | 224 | Conv2d_8_pointwise | 123 | 16 | 46 +mobilenet_v1_075 | 224 | Conv2d_9_pointwise | 155 | 16 | 62 +mobilenet_v1_075 | 224 | Conv2d_10_pointwise | 187 | 16 | 78 +mobilenet_v1_075 | 224 | Conv2d_11_pointwise | 219 | 16 | 94 +mobilenet_v1_075 | 224 | Conv2d_12_pointwise | 251 | 32 | 94 +mobilenet_v1_075 | 224 | Conv2d_13_pointwise | 315 | 32 | 126 +mobilenet_v1_075 | 321 | Conv2d_0 | 3 | 2 | 1 +mobilenet_v1_075 | 321 | Conv2d_1_pointwise | 7 | 2 | 3 +mobilenet_v1_075 | 321 | Conv2d_2_pointwise | 11 | 4 | 5 +mobilenet_v1_075 | 321 | Conv2d_3_pointwise | 19 | 4 | 9 +mobilenet_v1_075 | 321 | Conv2d_4_pointwise | 27 | 8 | 13 +mobilenet_v1_075 | 321 | Conv2d_5_pointwise | 43 | 8 | 21 +mobilenet_v1_075 | 321 | Conv2d_6_pointwise | 59 | 16 | 29 +mobilenet_v1_075 | 321 | Conv2d_7_pointwise | 91 | 16 | 45 +mobilenet_v1_075 | 321 | Conv2d_8_pointwise | 123 | 16 | 61 +mobilenet_v1_075 | 321 | Conv2d_9_pointwise | 155 | 16 | 77 +mobilenet_v1_075 | 321 | Conv2d_10_pointwise | 187 | 16 | 93 +mobilenet_v1_075 | 321 | Conv2d_11_pointwise | 219 | 16 | 109 +mobilenet_v1_075 | 321 | Conv2d_12_pointwise | 251 | 32 | 125 +mobilenet_v1_075 | 321 | Conv2d_13_pointwise | 315 | 32 | 157 +resnet_v1_50 | None | resnet_v1_50/block1 | 35 | 8 | None +resnet_v1_50 | None | resnet_v1_50/block2 | 99 | 16 | None +resnet_v1_50 | None | resnet_v1_50/block3 | 291 | 32 | None +resnet_v1_50 | None | resnet_v1_50/block4 | 483 | 32 | None +resnet_v1_50 | 224 | resnet_v1_50/block1 | 35 | 8 | 15 +resnet_v1_50 | 224 | resnet_v1_50/block2 | 99 | 16 | 47 +resnet_v1_50 | 224 | resnet_v1_50/block3 | 291 | 32 | 143 +resnet_v1_50 | 224 | resnet_v1_50/block4 | 483 | 32 | 239 +resnet_v1_50 | 321 | resnet_v1_50/block1 | 35 | 8 | 17 +resnet_v1_50 | 321 | resnet_v1_50/block2 | 99 | 16 | 49 +resnet_v1_50 | 321 | resnet_v1_50/block3 | 291 | 32 | 145 +resnet_v1_50 | 321 | resnet_v1_50/block4 | 483 | 32 | 241 +resnet_v1_101 | None | resnet_v1_101/block1 | 35 | 8 | None +resnet_v1_101 | None | resnet_v1_101/block2 | 99 | 16 | None +resnet_v1_101 | None | resnet_v1_101/block3 | 835 | 32 | None +resnet_v1_101 | None | resnet_v1_101/block4 | 1027 | 32 | None +resnet_v1_101 | 224 | resnet_v1_101/block1 | 35 | 8 | 15 +resnet_v1_101 | 224 | resnet_v1_101/block2 | 99 | 16 | 47 +resnet_v1_101 | 224 | resnet_v1_101/block3 | 835 | 32 | 415 +resnet_v1_101 | 224 | resnet_v1_101/block4 | 1027 | 32 | 511 +resnet_v1_101 | 321 | resnet_v1_101/block1 | 35 | 8 | 17 +resnet_v1_101 | 321 | resnet_v1_101/block2 | 99 | 16 | 49 +resnet_v1_101 | 321 | resnet_v1_101/block3 | 835 | 32 | 417 +resnet_v1_101 | 321 | resnet_v1_101/block4 | 1027 | 32 | 513 +resnet_v1_152 | None | resnet_v1_152/block1 | 35 | 8 | None +resnet_v1_152 | None | resnet_v1_152/block2 | 163 | 16 | None +resnet_v1_152 | None | resnet_v1_152/block3 | 1315 | 32 | None +resnet_v1_152 | None | resnet_v1_152/block4 | 1507 | 32 | None +resnet_v1_152 | 224 | resnet_v1_152/block1 | 35 | 8 | 15 +resnet_v1_152 | 224 | resnet_v1_152/block2 | 163 | 16 | 79 +resnet_v1_152 | 224 | resnet_v1_152/block3 | 1315 | 32 | 655 +resnet_v1_152 | 224 | resnet_v1_152/block4 | 1507 | 32 | 751 +resnet_v1_152 | 321 | resnet_v1_152/block1 | 35 | 8 | 17 +resnet_v1_152 | 321 | resnet_v1_152/block2 | 163 | 16 | 81 +resnet_v1_152 | 321 | resnet_v1_152/block3 | 1315 | 32 | 657 +resnet_v1_152 | 321 | resnet_v1_152/block4 | 1507 | 32 | 753 +resnet_v1_200 | None | resnet_v1_200/block1 | 35 | 8 | None +resnet_v1_200 | None | resnet_v1_200/block2 | 419 | 16 | None +resnet_v1_200 | None | resnet_v1_200/block3 | 1571 | 32 | None +resnet_v1_200 | None | resnet_v1_200/block4 | 1763 | 32 | None +resnet_v1_200 | 224 | resnet_v1_200/block1 | 35 | 8 | 15 +resnet_v1_200 | 224 | resnet_v1_200/block2 | 419 | 16 | 207 +resnet_v1_200 | 224 | resnet_v1_200/block3 | 1571 | 32 | 783 +resnet_v1_200 | 224 | resnet_v1_200/block4 | 1763 | 32 | 879 +resnet_v1_200 | 321 | resnet_v1_200/block1 | 35 | 8 | 17 +resnet_v1_200 | 321 | resnet_v1_200/block2 | 419 | 16 | 209 +resnet_v1_200 | 321 | resnet_v1_200/block3 | 1571 | 32 | 785 +resnet_v1_200 | 321 | resnet_v1_200/block4 | 1763 | 32 | 881 +resnet_v2_50 | None | resnet_v2_50/block1 | 35 | 8 | None +resnet_v2_50 | None | resnet_v2_50/block2 | 99 | 16 | None +resnet_v2_50 | None | resnet_v2_50/block3 | 291 | 32 | None +resnet_v2_50 | None | resnet_v2_50/block4 | 483 | 32 | None +resnet_v2_50 | 224 | resnet_v2_50/block1 | 35 | 8 | 15 +resnet_v2_50 | 224 | resnet_v2_50/block2 | 99 | 16 | 47 +resnet_v2_50 | 224 | resnet_v2_50/block3 | 291 | 32 | 143 +resnet_v2_50 | 224 | resnet_v2_50/block4 | 483 | 32 | 239 +resnet_v2_50 | 321 | resnet_v2_50/block1 | 35 | 8 | 17 +resnet_v2_50 | 321 | resnet_v2_50/block2 | 99 | 16 | 49 +resnet_v2_50 | 321 | resnet_v2_50/block3 | 291 | 32 | 145 +resnet_v2_50 | 321 | resnet_v2_50/block4 | 483 | 32 | 241 +resnet_v2_101 | None | resnet_v2_101/block1 | 35 | 8 | None +resnet_v2_101 | None | resnet_v2_101/block2 | 99 | 16 | None +resnet_v2_101 | None | resnet_v2_101/block3 | 835 | 32 | None +resnet_v2_101 | None | resnet_v2_101/block4 | 1027 | 32 | None +resnet_v2_101 | 224 | resnet_v2_101/block1 | 35 | 8 | 15 +resnet_v2_101 | 224 | resnet_v2_101/block2 | 99 | 16 | 47 +resnet_v2_101 | 224 | resnet_v2_101/block3 | 835 | 32 | 415 +resnet_v2_101 | 224 | resnet_v2_101/block4 | 1027 | 32 | 511 +resnet_v2_101 | 321 | resnet_v2_101/block1 | 35 | 8 | 17 +resnet_v2_101 | 321 | resnet_v2_101/block2 | 99 | 16 | 49 +resnet_v2_101 | 321 | resnet_v2_101/block3 | 835 | 32 | 417 +resnet_v2_101 | 321 | resnet_v2_101/block4 | 1027 | 32 | 513 +resnet_v2_152 | None | resnet_v2_152/block1 | 35 | 8 | None +resnet_v2_152 | None | resnet_v2_152/block2 | 163 | 16 | None +resnet_v2_152 | None | resnet_v2_152/block3 | 1315 | 32 | None +resnet_v2_152 | None | resnet_v2_152/block4 | 1507 | 32 | None +resnet_v2_152 | 224 | resnet_v2_152/block1 | 35 | 8 | 15 +resnet_v2_152 | 224 | resnet_v2_152/block2 | 163 | 16 | 79 +resnet_v2_152 | 224 | resnet_v2_152/block3 | 1315 | 32 | 655 +resnet_v2_152 | 224 | resnet_v2_152/block4 | 1507 | 32 | 751 +resnet_v2_152 | 321 | resnet_v2_152/block1 | 35 | 8 | 17 +resnet_v2_152 | 321 | resnet_v2_152/block2 | 163 | 16 | 81 +resnet_v2_152 | 321 | resnet_v2_152/block3 | 1315 | 32 | 657 +resnet_v2_152 | 321 | resnet_v2_152/block4 | 1507 | 32 | 753 +resnet_v2_200 | None | resnet_v2_200/block1 | 35 | 8 | None +resnet_v2_200 | None | resnet_v2_200/block2 | 419 | 16 | None +resnet_v2_200 | None | resnet_v2_200/block3 | 1571 | 32 | None +resnet_v2_200 | None | resnet_v2_200/block4 | 1763 | 32 | None +resnet_v2_200 | 224 | resnet_v2_200/block1 | 35 | 8 | 15 +resnet_v2_200 | 224 | resnet_v2_200/block2 | 419 | 16 | 207 +resnet_v2_200 | 224 | resnet_v2_200/block3 | 1571 | 32 | 783 +resnet_v2_200 | 224 | resnet_v2_200/block4 | 1763 | 32 | 879 +resnet_v2_200 | 321 | resnet_v2_200/block1 | 35 | 8 | 17 +resnet_v2_200 | 321 | resnet_v2_200/block2 | 419 | 16 | 209 +resnet_v2_200 | 321 | resnet_v2_200/block3 | 1571 | 32 | 785 +resnet_v2_200 | 321 | resnet_v2_200/block4 | 1763 | 32 | 881 + +## FAQ + +### What does a resolution of 'None' mean? + +In this case, the input resolution is undefined. For most models, the receptive +field parameters can be computed even without knowing the input resolution. + +### For some networks, effective_padding shows as 'None' (eg, for Inception_v2 or Mobilenet_v1 when input size is not specified). Why is that? + +This means that the padding for these networks depends on the input size. So, +unless we know exactly the input image dimensionality to be used, it is not +possible to determine the padding applied at the different layers. Look at the +other entries where the input size is fixed; for those cases, effective_padding +is not None. + +This happens due to Tensorflow's implementation of the 'SAME' padding mode, +which may depend on the input feature map size to a given layer. For background +on this, see [these notes from the TF +documentation](https://www.tensorflow.org/versions/master/api_guides/python/nn#Notes_on_SAME_Convolution_Padding). + +Also, note that in this case the program is not able to check if the network is +aligned (ie, it could be that the different paths from input to output have +receptive fields which are not consistently centered at the same position in the +input image). + +So you should be aware that such networks might not be aligned -- the program +has no way of checking it when the padding cannot be determined. + +### The receptive field parameters for network X seem different from what I expected... maybe your calculation is incorrect? + +First, note that the results presented here are based on the tensorflow +implementations from the [TF-Slim model +library](https://github.com/tensorflow/models/tree/master/research/slim). + +So, it is possible that due to some implementation details the RF parameters are +different. + +One common case of confusion is the TF-Slim Resnet implementation, which applies +stride in the last residual unit of each block, instead of at the input +activations in the first residual unit of each block (which is what is described +in the Resnet paper) -- see [this +comment](https://github.com/tensorflow/models/blob/master/research/slim/nets/resnet_utils.py#L30). +This makes the stride with respect to each convolution block potentially +different. In this case, though, note that a +[flag](https://github.com/tensorflow/models/blob/master/research/slim/nets/resnet_v1.py#L150) +may be used to recover the original striding convention. + +Second, it could be that we have a bug somewhere. While we include [many +tests](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py) +in our library, it is always possible that we missed something. If you suspect +this is happening, please file a GitHub issue +[here](https://github.com/tensorflow/tensorflow/issues). diff --git a/tensorflow/contrib/receptive_field/python/util/examples/csv_to_markdown_table.py b/tensorflow/contrib/receptive_field/python/util/examples/csv_to_markdown_table.py new file mode 100644 index 0000000000..4495d74bbf --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/examples/csv_to_markdown_table.py @@ -0,0 +1,82 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Simple script to convert CSV output from rf_benchmark to Markdown format. + +The input CSV should have the following fields: +- CNN +- input resolution +- end_point +- RF size hor +- RF size ver +- effective stride hor +- effective stride ver +- effective padding hor +- effective padding ver + +Since usually in all cases the parameters in the horizontal and vertical +directions are the same, this is assumed by this script, which only prints one +of them to the Markdown file. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import csv +import sys + +from tensorflow.python.platform import app + +cmd_args = None + + +def main(unused_argv): + with open(cmd_args.markdown_path, 'w') as f: + # Write table header and field size. + f.write('CNN | resolution | end-point | RF | effective stride | ' + 'effective padding|\n') + f.write( + ':--------------------: | :----------: | :---------------: | :-----: |' + ' :----: | :----:|\n') + with open(cmd_args.csv_path) as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + # Make sure horizontal and parameters are the same. + assert row['RF size hor'] == row['RF size ver'] + assert row['effective stride hor'] == row['effective stride ver'] + assert row['effective padding hor'] == row['effective padding ver'] + + f.write('%s|%s|%s|%s|%s|%s\n' % + (row['CNN'], row['input resolution'], row['end_point'], + row['RF size hor'], row['effective stride hor'], + row['effective padding hor'])) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.register('type', 'bool', lambda v: v.lower() == 'true') + parser.add_argument( + '--csv_path', + type=str, + default='/tmp/rf.csv', + help='Path where CSV output of rf_benchmark was saved.') + parser.add_argument( + '--markdown_path', + type=str, + default='/tmp/rf.md', + help='Path where Markdown output will be saved.') + cmd_args, unparsed = parser.parse_known_args() + app.run(main=main, argv=[sys.argv[0]] + unparsed) -- GitLab From 72f6b4d93059086c453d344103c3bfe308a4e90d Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 5 Jun 2018 09:18:14 -0700 Subject: [PATCH 529/902] Delete "RuntimeWarning" it is not having the intended effect. These `RuntimeWarning` are being interpreted as arguments to the string formatting, raising "TypeError: not all arguments converted during string formatting" errors. PiperOrigin-RevId: 199307228 --- tensorflow/python/keras/callbacks.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 36782728e8..8061d47295 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -424,7 +424,7 @@ class ModelCheckpoint(Callback): if mode not in ['auto', 'min', 'max']: logging.warning('ModelCheckpoint mode %s is unknown, ' - 'fallback to auto mode.', (mode), RuntimeWarning) + 'fallback to auto mode.', mode) mode = 'auto' if mode == 'min': @@ -451,7 +451,7 @@ class ModelCheckpoint(Callback): current = logs.get(self.monitor) if current is None: logging.warning('Can save best model only with %s available, ' - 'skipping.', self.monitor, RuntimeWarning) + 'skipping.', self.monitor) else: if self.monitor_op(current, self.best): if self.verbose > 0: @@ -515,7 +515,7 @@ class EarlyStopping(Callback): if mode not in ['auto', 'min', 'max']: logging.warning('EarlyStopping mode %s is unknown, ' - 'fallback to auto mode.', mode, RuntimeWarning) + 'fallback to auto mode.', mode) mode = 'auto' if mode == 'min': @@ -544,7 +544,7 @@ class EarlyStopping(Callback): if current is None: logging.warning('Early stopping conditioned on metric `%s` ' 'which is not available. Available metrics are: %s', - self.monitor, ','.join(list(logs.keys())), RuntimeWarning) + self.monitor, ','.join(list(logs.keys()))) return if self.monitor_op(current - self.min_delta, self.best): self.best = current @@ -898,7 +898,7 @@ class ReduceLROnPlateau(Callback): """ if self.mode not in ['auto', 'min', 'max']: logging.warning('Learning Rate Plateau Reducing mode %s is unknown, ' - 'fallback to auto mode.', self.mode, RuntimeWarning) + 'fallback to auto mode.', self.mode) self.mode = 'auto' if (self.mode == 'min' or (self.mode == 'auto' and 'acc' not in self.monitor)): @@ -920,7 +920,7 @@ class ReduceLROnPlateau(Callback): if current is None: logging.warning('Reduce LR on plateau conditioned on metric `%s` ' 'which is not available. Available metrics are: %s', - self.monitor, ','.join(list(logs.keys())), RuntimeWarning) + self.monitor, ','.join(list(logs.keys()))) else: if self.in_cooldown(): -- GitLab From 16a4b1e09f45eb329bdfc9811a3ea84571c6380e Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 5 Jun 2018 09:25:57 -0700 Subject: [PATCH 530/902] Automated g4 rollback of changelist 199244092 PiperOrigin-RevId: 199308328 --- .../xla/service/algebraic_simplifier_test.cc | 47 ++++++++++--------- .../xla/tests/hlo_verified_test_base.cc | 20 +++----- .../xla/tests/hlo_verified_test_base.h | 16 +------ 3 files changed, 32 insertions(+), 51 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 27eb48181e..cda157f9fa 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -1714,7 +1714,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopPad) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1759,7 +1759,7 @@ TEST_F(AlgebraicSimplifierTest, NegativePadding) { EXPECT_THAT(computation->root_instruction(), op::Pad(param, zero)); EXPECT_TRUE(has_negative_padding(pad)); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Slice(op::Pad(param, zero))); EXPECT_FALSE( @@ -1781,7 +1781,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopReshape) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1804,7 +1804,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopSlice) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1932,8 +1932,7 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) { b.AddInstruction(HloInstruction::CreateConvolve(out_shape, input, filter, window, dnums)); - // TODO(b/80488902): verify this module. - auto module = HloTestBase::CreateNewModule(); + auto module = CreateNewModule(); auto* computation = module->AddEntryComputation(b.Build()); AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/true, @@ -2061,7 +2060,7 @@ TEST_F(AlgebraicSimplifierTest, MaxMinToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2091,7 +2090,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2122,7 +2121,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxWithBroadcastToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2152,7 +2151,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxNotToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - EXPECT_FALSE(simplifier.Run(module).ValueOrDie()); + EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Minimum(op::Maximum(param0, max_value), min_value)); @@ -2185,7 +2184,7 @@ TEST_F(AlgebraicSimplifierTest, MinEquationWithMaxNotToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - EXPECT_FALSE(simplifier.Run(module).ValueOrDie()); + EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Minimum(op::Add(op::Maximum(param0, max_value), max_value), @@ -2201,8 +2200,10 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) { HloInstruction::CreateParameter(0, r0f32, "scalar_param")); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6, 7}); - HloInstruction* broadcast = builder.AddInstruction( - HloInstruction::CreateBroadcast(broadcast_shape, scalar_param, {})); + HloInstruction* broadcast = + builder.AddInstruction(HloInstruction::CreateBroadcast( + broadcast_shape, scalar_param, + AsInt64Slice(broadcast_shape.dimensions()))); Shape slice_shape = ShapeUtil::MakeShape(F32, {2, 2, 3, 3}); HloInstruction* slice = builder.AddInstruction(HloInstruction::CreateSlice( @@ -2218,10 +2219,10 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); // Running simplification again should not result in any further changes. - ASSERT_FALSE(simplifier.Run(module).ValueOrDie()); + ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie()); root = computation->root_instruction(); EXPECT_THAT(root, op::Broadcast(scalar_param)); @@ -2236,8 +2237,10 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { HloInstruction::CreateConstant(Literal::CreateR0(42.0f))); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6}); - HloInstruction* broadcast = builder.AddInstruction( - HloInstruction::CreateBroadcast(broadcast_shape, forty_two, {})); + HloInstruction* broadcast = + builder.AddInstruction(HloInstruction::CreateBroadcast( + broadcast_shape, forty_two, + AsInt64Slice(broadcast_shape.dimensions()))); HloInstruction* transpose = builder.AddInstruction(HloInstruction::CreateTranspose( @@ -2256,7 +2259,7 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); root = computation->root_instruction(); EXPECT_THAT(root, op::Broadcast(forty_two)); @@ -2265,8 +2268,7 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { // Test that ReduceWindow(Pad(op, x), y) can simplify to ReduceWindow(op, x). TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) { - // TODO(b/80488902): verify this module. - auto module = HloTestBase::CreateNewModule(); + auto module = CreateNewModule(); HloComputation::Builder builder(TestName()); // Create operand to the pad. @@ -2347,8 +2349,7 @@ TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) { // Test that ReduceWindow(Convert(Pad(op, x)), y) can simplify to // ReduceWindow(Convert(op), x). TEST_F(AlgebraicSimplifierTest, FoldConvertedPadIntoReduceWindow) { - // TODO(b/80488902): verify this module. - auto module = HloTestBase::CreateNewModule(); + auto module = CreateNewModule(); HloComputation::Builder builder(TestName()); // Create operand to the pad. @@ -2443,7 +2444,7 @@ TEST_F(AlgebraicSimplifierTest, ReversalOfTrivialDimensionsToBitcast) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); HloInstruction* root = computation->root_instruction(); EXPECT_EQ(a, root); diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc index 22c664d142..c8a05c2e9e 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc @@ -41,17 +41,14 @@ void HloVerifiedTestBase::TearDown() { << "TearDown called more than once; it should be called exactly once."; tear_down_called_ = true; if (module_) { - VerifyModule(module_.get()); - } - for (int i = 0; i < modules_.size(); ++i) { - VerifyModule(modules_.at(i).get()); + VerifyModule(); } HloTestBase::TearDown(); } -void HloVerifiedTestBase::VerifyModule(HloModule* module) { - HloVerifier verifier(/*allow_mixed_precision=*/true); - xla::StatusOr mutated = verifier.Run(module); +void HloVerifiedTestBase::VerifyModule() { + HloVerifier verifier; + xla::StatusOr mutated = verifier.Run(module_.get()); if (!mutated.ok()) { ADD_FAILURE() << "HloVerifier failed: " << mutated.status(); } else { @@ -62,20 +59,15 @@ void HloVerifiedTestBase::VerifyModule(HloModule* module) { HloModule& HloVerifiedTestBase::module() { if (!module_) { - module_ = HloTestBase::CreateNewModule(); + module_ = CreateNewModule(); } return *module_; } -HloModule* HloVerifiedTestBase::CreateNewModule(const string& name) { - modules_.emplace_back(HloTestBase::CreateNewModule()); - return modules_.back().get(); -} - void HloVerifiedTestBase::ParseAndVerifyModule( tensorflow::StringPiece hlo_text) { CHECK(!module_) << "Called ParseModule when test already has a module."; TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); - VerifyModule(module_.get()); + VerifyModule(); } } // namespace xla diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h index 5b59cc77f6..e5bb14a883 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h @@ -52,23 +52,11 @@ class HloVerifiedTestBase : public HloTestBase { shape_verifier_ = std::move(shape_verifier); } - // Creates a new module for a test, and stores it in modules_ so it can be - // verified. Intentionally hides HloTestBase::CreateNewModule, to prevent - // creation of unverified modules. - HloModule* CreateNewModule(const string& name = TestName()); - - // It is confusing to store modules created by module() and CreateNewModule() - // in different fields, but it allows us to migrate tests to - // HloVerifiedTestBase more easily, so it's a win because we can verify more - // modules. See b/80488902. private: - // Lazily populated. Access via module(). - std::unique_ptr module_; - // Populated by calls to CreateNewModule. - std::vector> modules_; + std::unique_ptr module_; // Lazily populated. Access via module(). std::unique_ptr shape_verifier_; bool tear_down_called_ = false; - static void VerifyModule(HloModule* module); + void VerifyModule(); }; } // namespace xla -- GitLab From ad1fc6b020e08c7a1092bfb85a175a3c5ddf4405 Mon Sep 17 00:00:00 2001 From: Christopher Suter Date: Tue, 5 Jun 2018 09:26:45 -0700 Subject: [PATCH 531/902] Eliminate nested try/catch's in Distribution._call_prob and friends. These nested try/catches have the unintended effect of hiding any downstream NotImplementedErrors and replacing them with an earlier exception. PiperOrigin-RevId: 199308457 --- .../python/ops/distributions/distribution.py | 61 ++++++------------- 1 file changed, 17 insertions(+), 44 deletions(-) diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py index 0db4749507..41dcd40188 100644 --- a/tensorflow/python/ops/distributions/distribution.py +++ b/tensorflow/python/ops/distributions/distribution.py @@ -722,11 +722,8 @@ class Distribution(_BaseDistribution): value = ops.convert_to_tensor(value, name="value") try: return self._log_prob(value, **kwargs) - except NotImplementedError as original_exception: - try: - return math_ops.log(self._prob(value, **kwargs)) - except NotImplementedError: - raise original_exception + except NotImplementedError: + return math_ops.log(self._prob(value, **kwargs)) def log_prob(self, value, name="log_prob"): """Log probability density/mass function. @@ -749,11 +746,8 @@ class Distribution(_BaseDistribution): value = ops.convert_to_tensor(value, name="value") try: return self._prob(value, **kwargs) - except NotImplementedError as original_exception: - try: - return math_ops.exp(self._log_prob(value, **kwargs)) - except NotImplementedError: - raise original_exception + except NotImplementedError: + return math_ops.exp(self._log_prob(value, **kwargs)) def prob(self, value, name="prob"): """Probability density/mass function. @@ -776,11 +770,8 @@ class Distribution(_BaseDistribution): value = ops.convert_to_tensor(value, name="value") try: return self._log_cdf(value, **kwargs) - except NotImplementedError as original_exception: - try: - return math_ops.log(self._cdf(value, **kwargs)) - except NotImplementedError: - raise original_exception + except NotImplementedError: + return math_ops.log(self._cdf(value, **kwargs)) def log_cdf(self, value, name="log_cdf"): """Log cumulative distribution function. @@ -813,11 +804,8 @@ class Distribution(_BaseDistribution): value = ops.convert_to_tensor(value, name="value") try: return self._cdf(value, **kwargs) - except NotImplementedError as original_exception: - try: - return math_ops.exp(self._log_cdf(value, **kwargs)) - except NotImplementedError: - raise original_exception + except NotImplementedError: + return math_ops.exp(self._log_cdf(value, **kwargs)) def cdf(self, value, name="cdf"): """Cumulative distribution function. @@ -846,11 +834,8 @@ class Distribution(_BaseDistribution): value = ops.convert_to_tensor(value, name="value") try: return self._log_survival_function(value, **kwargs) - except NotImplementedError as original_exception: - try: - return math_ops.log1p(-self.cdf(value, **kwargs)) - except NotImplementedError: - raise original_exception + except NotImplementedError: + return math_ops.log1p(-self.cdf(value, **kwargs)) def log_survival_function(self, value, name="log_survival_function"): """Log survival function. @@ -884,11 +869,8 @@ class Distribution(_BaseDistribution): value = ops.convert_to_tensor(value, name="value") try: return self._survival_function(value, **kwargs) - except NotImplementedError as original_exception: - try: - return 1. - self.cdf(value, **kwargs) - except NotImplementedError: - raise original_exception + except NotImplementedError: + return 1. - self.cdf(value, **kwargs) def survival_function(self, value, name="survival_function"): """Survival function. @@ -933,10 +915,7 @@ class Distribution(_BaseDistribution): def _call_quantile(self, value, name, **kwargs): with self._name_scope(name, values=[value]): value = ops.convert_to_tensor(value, name="value") - try: - return self._quantile(value, **kwargs) - except NotImplementedError as original_exception: - raise original_exception + return self._quantile(value, **kwargs) def quantile(self, value, name="quantile"): """Quantile function. Aka "inverse cdf" or "percent point function". @@ -982,11 +961,8 @@ class Distribution(_BaseDistribution): with self._name_scope(name): try: return self._variance() - except NotImplementedError as original_exception: - try: - return math_ops.square(self._stddev()) - except NotImplementedError: - raise original_exception + except NotImplementedError: + return math_ops.square(self._stddev()) def _stddev(self): raise NotImplementedError("stddev is not implemented") @@ -1014,11 +990,8 @@ class Distribution(_BaseDistribution): with self._name_scope(name): try: return self._stddev() - except NotImplementedError as original_exception: - try: - return math_ops.sqrt(self._variance()) - except NotImplementedError: - raise original_exception + except NotImplementedError: + return math_ops.sqrt(self._variance()) def _covariance(self): raise NotImplementedError("covariance is not implemented") -- GitLab From b8b93f363bbefb02e5a79757f1271e0086468261 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 5 Jun 2018 09:38:46 -0700 Subject: [PATCH 532/902] Edit error message to make it clear which yaml module you need. PiperOrigin-RevId: 199310214 --- tensorflow/python/keras/engine/network.py | 3 ++- tensorflow/python/keras/engine/saving.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index d43aba6875..c096669a5f 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -1457,7 +1457,8 @@ class Network(base_layer.Layer): ImportError: if yaml module is not found. """ if yaml is None: - raise ImportError('Requires yaml module installed.') + raise ImportError( + 'Requires yaml module installed (`pip install pyyaml`).') return yaml.dump(self._updated_config(), **kwargs) def summary(self, line_length=None, positions=None, print_fn=None): diff --git a/tensorflow/python/keras/engine/saving.py b/tensorflow/python/keras/engine/saving.py index 99ce64a469..40b693efde 100644 --- a/tensorflow/python/keras/engine/saving.py +++ b/tensorflow/python/keras/engine/saving.py @@ -323,7 +323,7 @@ def model_from_yaml(yaml_string, custom_objects=None): ImportError: if yaml module is not found. """ if yaml is None: - raise ImportError('Requires yaml module installed.') + raise ImportError('Requires yaml module installed (`pip install pyyaml`).') config = yaml.load(yaml_string) from tensorflow.python.keras.layers import deserialize # pylint: disable=g-import-not-at-top return deserialize(config, custom_objects=custom_objects) -- GitLab From 8c9afdf9c6c2e8139e2a0526bc41d5220be3b164 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 09:45:40 -0700 Subject: [PATCH 533/902] Fix docstring formatting. PiperOrigin-RevId: 199311231 --- tensorflow/python/estimator/training.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 522662cd32..fb6a68b4f7 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -295,6 +295,7 @@ def train_and_evaluate(estimator, train_spec, eval_spec): model will be trained with three epochs of training data instead of one epoch. Example of local (non-distributed) training: + ```python # Set up feature columns. categorial_feature_a = categorial_column_with_hash_bucket(...) @@ -339,12 +340,14 @@ def train_and_evaluate(estimator, train_spec, eval_spec): Setting environment variable depends on the platform. For example, on Linux, it can be done as follows (`$` is the shell prompt): + ``` $ TF_CONFIG='' python train_model.py ``` For the content in `TF_CONFIG`, assume that the training cluster spec looks like: + ``` cluster = {"chief": ["host0:2222"], "worker": ["host1:2222", "host2:2222", "host3:2222"], @@ -352,6 +355,7 @@ def train_and_evaluate(estimator, train_spec, eval_spec): ``` Example of `TF_CONFIG` for chief training worker (must have one and only one): + ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. @@ -371,6 +375,7 @@ def train_and_evaluate(estimator, train_spec, eval_spec): Example of `TF_CONFIG` for non-chief training worker (optional, could be multiple): + ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. @@ -387,6 +392,7 @@ def train_and_evaluate(estimator, train_spec, eval_spec): for non-chief training workers. Example of `TF_CONFIG` for parameter server, aka ps (could be multiple): + ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. @@ -405,6 +411,7 @@ def train_and_evaluate(estimator, train_spec, eval_spec): Example of `TF_CONFIG` for evaluator task. Evaluator is a special task that is not part of the training cluster. There could be only one. It is used for model evaluation. + ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. -- GitLab From c8090fa6acac1f9724671407964662137911921f Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Tue, 5 Jun 2018 10:19:49 -0700 Subject: [PATCH 534/902] Internal change. PiperOrigin-RevId: 199316885 --- .../lite/tools/benchmark/command_line_flags.cc | 2 +- .../lite/tools/benchmark/command_line_flags_test.cc | 13 +++++++++++++ tensorflow/core/BUILD | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/tools/benchmark/command_line_flags.cc b/tensorflow/contrib/lite/tools/benchmark/command_line_flags.cc index 723bf67e03..8195fc44be 100644 --- a/tensorflow/contrib/lite/tools/benchmark/command_line_flags.cc +++ b/tensorflow/contrib/lite/tools/benchmark/command_line_flags.cc @@ -35,7 +35,7 @@ bool ParseFlag(const std::string& arg, const std::string& flag, if (arg.find(flag_prefix) != 0) { return false; } - bool has_value = (arg.size() >= flag_prefix.size() + 1); + bool has_value = arg.size() >= flag_prefix.size(); *value_parsing_ok = has_value; if (has_value) { *value_parsing_ok = parse_func(arg.substr(flag_prefix.size())); diff --git a/tensorflow/contrib/lite/tools/benchmark/command_line_flags_test.cc b/tensorflow/contrib/lite/tools/benchmark/command_line_flags_test.cc index 74cf59105b..9a931d5ddd 100644 --- a/tensorflow/contrib/lite/tools/benchmark/command_line_flags_test.cc +++ b/tensorflow/contrib/lite/tools/benchmark/command_line_flags_test.cc @@ -53,6 +53,19 @@ TEST(CommandLineFlagsTest, BasicUsage) { EXPECT_EQ(argc, 1); } +TEST(CommandLineFlagsTest, EmptyStringFlag) { + int argc = 2; + std::string some_string = "invalid"; + const char* argv_strings[] = {"program_name", "--some_string="}; + bool parsed_ok = + Flags::Parse(&argc, reinterpret_cast(argv_strings), + {Flag("some_string", &some_string, "some string")}); + + EXPECT_EQ(true, parsed_ok); + EXPECT_EQ(some_string, ""); + EXPECT_EQ(argc, 1); +} + TEST(CommandLineFlagsTest, BadIntValue) { int some_int = 10; int argc = 2; diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 6bde2a0a4a..f5cc6ef2a1 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1439,6 +1439,7 @@ filegroup( "lib/png/**/*", "lib/gif/**/*", "util/events_writer.*", + "util/stats_calculator.*", "util/reporter.*", "platform/**/cuda_libdevice_path.*", "platform/default/test_benchmark.*", @@ -1522,6 +1523,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":protos_all_cc_impl", + ":stats_calculator_portable", "//third_party/eigen3", "@double_conversion//:double-conversion", "@nsync//:nsync_cpp", -- GitLab From 13b3439fffad7057755dc88802064cbe4eec7bfa Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 5 Jun 2018 10:28:38 -0700 Subject: [PATCH 535/902] Change order of installations. --- tensorflow/tools/ci_build/install/install_pip_packages.sh | 7 ++++--- .../ci_build/install/install_python3.5_pip_packages.sh | 4 +++- .../ci_build/install/install_python3.6_pip_packages.sh | 4 +++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index bd6c50bce9..dba2dfc490 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -21,9 +21,6 @@ set -e easy_install -U pip==9.0.3 easy_install3 -U pip==9.0.3 -pip2 install --upgrade setuptools==39.1.0 -pip3 install --upgrade setuptools==39.1.0 - # Install pip packages from whl files to avoid the time-consuming process of # building from source. @@ -57,6 +54,10 @@ pip3 install --upgrade markdown==2.6.8 pip2 install --upgrade protobuf==3.3.0 pip3 install --upgrade protobuf==3.3.0 +# Install last working version of setuptools. +pip2 install --upgrade setuptools==39.1.0 +pip3 install --upgrade setuptools==39.1.0 + # Remove obsolete version of six, which can sometimes confuse virtualenv. rm -rf /usr/lib/python3/dist-packages/six* diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index 0844c48980..e1978cd7d8 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -39,7 +39,6 @@ if [[ -z $pip35_version ]]; then fi set -e -pip3.5 install --upgrade setuptools==39.1.0 pip3.5 install --upgrade pip pip3.5 install --upgrade virtualenv @@ -51,6 +50,9 @@ pip3.5 install --upgrade six==1.10.0 # Install protobuf. pip3.5 install --upgrade protobuf==3.3.0 +# Install last working version of setuptools. +pip3.5 install --upgrade setuptools==39.1.0 + # Remove obsolete version of six, which can sometimes confuse virtualenv. rm -rf /usr/lib/python3/dist-packages/six* diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh index fb183b0e4f..0ffb8e67a4 100755 --- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh @@ -49,7 +49,6 @@ cd Python-3.6.1 make altinstall ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 -pip3 install --upgrade setuptools==39.1.0 pip3 install --upgrade pip pip3 install --upgrade virtualenv @@ -63,6 +62,9 @@ pip3 install --upgrade six==1.10.0 # Install protobuf. pip3 install --upgrade protobuf==3.3.0 +# Install last working version of setuptools. +pip3 install --upgrade setuptools==39.1.0 + # Remove obsolete version of six, which can sometimes confuse virtualenv. rm -rf /usr/lib/python3/dist-packages/six* -- GitLab From 23825b76e508ac3c110d295b63e4e07f2cebbcf8 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 5 Jun 2018 10:31:47 -0700 Subject: [PATCH 536/902] Making setuptools the last install to ensure it's accurate. --- tensorflow/tools/ci_build/install/install_pip_packages.sh | 8 ++++---- .../ci_build/install/install_python3.5_pip_packages.sh | 6 +++--- .../ci_build/install/install_python3.6_pip_packages.sh | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index dba2dfc490..b3d3f23ec8 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -54,10 +54,6 @@ pip3 install --upgrade markdown==2.6.8 pip2 install --upgrade protobuf==3.3.0 pip3 install --upgrade protobuf==3.3.0 -# Install last working version of setuptools. -pip2 install --upgrade setuptools==39.1.0 -pip3 install --upgrade setuptools==39.1.0 - # Remove obsolete version of six, which can sometimes confuse virtualenv. rm -rf /usr/lib/python3/dist-packages/six* @@ -113,3 +109,7 @@ pip2 install --upgrade gast pip3 install --upgrade gast pip2 install --upgrade termcolor pip3 install --upgrade termcolor + +# Install last working version of setuptools. +pip2 install --upgrade setuptools==39.1.0 +pip3 install --upgrade setuptools==39.1.0 diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index e1978cd7d8..61d34c7304 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -50,9 +50,6 @@ pip3.5 install --upgrade six==1.10.0 # Install protobuf. pip3.5 install --upgrade protobuf==3.3.0 -# Install last working version of setuptools. -pip3.5 install --upgrade setuptools==39.1.0 - # Remove obsolete version of six, which can sometimes confuse virtualenv. rm -rf /usr/lib/python3/dist-packages/six* @@ -84,4 +81,7 @@ pip3.5 install --upgrade astor pip3.5 install --upgrade gast pip3.5 install --upgrade termcolor +# Install last working version of setuptools. +pip3.5 install --upgrade setuptools==39.1.0 + # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh index 0ffb8e67a4..fe2d2cf11c 100755 --- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh @@ -62,9 +62,6 @@ pip3 install --upgrade six==1.10.0 # Install protobuf. pip3 install --upgrade protobuf==3.3.0 -# Install last working version of setuptools. -pip3 install --upgrade setuptools==39.1.0 - # Remove obsolete version of six, which can sometimes confuse virtualenv. rm -rf /usr/lib/python3/dist-packages/six* @@ -100,4 +97,7 @@ pip3 install --upgrade astor pip3 install --upgrade gast pip3 install --upgrade termcolor +# Install last working version of setuptools. +pip3 install --upgrade setuptools==39.1.0 + # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh) -- GitLab From a7c026e08864417b35dbe3c9e4b246725ad6ba59 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Tue, 5 Jun 2018 10:36:12 -0700 Subject: [PATCH 537/902] Respect name scopes opened in tower mode when creating vars in cross tower mode. PiperOrigin-RevId: 199319758 --- .../distribute/python/mirrored_strategy.py | 35 +++++++--- .../python/mirrored_strategy_multigpu_test.py | 68 +++++++++++++++++++ 2 files changed, 93 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index 6eadba976b..cef0a2907b 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -118,7 +118,10 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): if i > 0: # Give replicas meaningful distinct names: var0name = index[devices[0]].name.split(":")[0] - kwargs["name"] = "%s/replica_%d" % (var0name, i) + # We append a / to variable names created on towers with id > 0 to + # ensure that we ignore the name scope and instead use the given + # name as the absolute name of the variable. + kwargs["name"] = "%s/replica_%d/" % (var0name, i) # Initialize replicas with the same value: if context.executing_eagerly(): kwargs["initial_value"] = array_ops.identity( @@ -258,8 +261,15 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): {t.device: t.merge_args for t in threads}) merge_kwargs = values.regroup( {t.device: t.merge_kwargs for t in threads}) - merge_result = threads[0].merge_fn( - self, *merge_args, **merge_kwargs) + # We capture the name_scope of the MTT when we call merge_fn + # to ensure that if we have opened a name scope in the MTT, + # it will be respected when executing the merge function. We only + # capture the name_scope from the first MTT and assume it is + # the same for all other MTTs. + mtt_captured_name_scope = threads[0].captured_name_scope + with ops.name_scope(mtt_captured_name_scope): + merge_result = threads[0].merge_fn( + self, *merge_args, **merge_kwargs) for t in threads: t.merge_result = values.select_device(t.device, merge_result) finally: @@ -428,6 +438,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): self.merge_args = None self.merge_kwargs = None self.merge_result = None + self.captured_name_scope = None # We use a thread.Event for the main thread to signal when this # thread should start running (`should_run`), and another for # this thread to transfer control back to the main thread @@ -451,13 +462,13 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): self._variable_creator_stack = self.graph._variable_creator_stack[:] self._captured_var_scope = variable_scope.get_variable_scope() # Adding a "/" at end lets us re-enter this scope later. - self._captured_name_scope = self.graph.get_name_scope() - if self._captured_name_scope: - self._captured_name_scope += "/" + self._name_scope = self.graph.get_name_scope() + if self._name_scope: + self._name_scope += "/" if self.tower_id > 0: - if not self._captured_name_scope: - self._captured_name_scope = "" - self._captured_name_scope += "tower_%d/" % self.tower_id + if not self._name_scope: + self._name_scope = "" + self._name_scope += "tower_%d/" % self.tower_id def run(self): # pylint: disable=protected-access @@ -473,7 +484,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): _enter_graph(self.graph), \ MirroredTowerContext(self.distribution, self.tower_id), \ ops.device(self.device), \ - ops.name_scope(self._captured_name_scope), \ + ops.name_scope(self._name_scope), \ variable_scope.variable_scope( self._captured_var_scope, reuse=self.tower_id > 0), \ variable_scope.variable_creator_scope(self.variable_creator_fn): @@ -499,6 +510,10 @@ class MirroredTowerContext(distribute_lib.TowerContext): t.merge_fn = fn t.merge_args = args t.merge_kwargs = kwargs + t.captured_name_scope = t.graph.get_name_scope() + # Adding a "/" at end lets us re-enter this scope later. + if t.captured_name_scope: + t.captured_name_scope += "/" t.has_paused.set() t.should_run.wait() t.should_run.clear() diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py index 3f9a02b249..bccd278847 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py @@ -438,6 +438,74 @@ class MirroredStrategyVariableCreationTest(test.TestCase): self.assertEquals("foo/" + name + ":0", v0.name) self.assertEquals("tower_1/foo/" + name + ":0", v1.name) + # variable_scope.variable() respects name scopes when creating + # variables. On the other hand variable_scope.get_variable() ignores name + # scopes when creating variables. We test both methods of creating variables + # to make sure that we have the same variable names in both cases. + def testNameScopeWithVariable(self): + def in_cross_tower(_): + c = variable_scope.variable(1.0, name="c") + return c + + def model_fn(): + b = variable_scope.variable(1.0, name="b") + with ops.name_scope("foo"): + c = distribute_lib.get_tower_context().merge_call(in_cross_tower) + return b, c + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with context.graph_mode(), dist.scope(): + with ops.name_scope("main"): + a = variable_scope.variable(1.0, name="a") + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + result_b = result[0] + result_c = result[1] + self.assertIsInstance(result_b, values.DistributedValues) + self.assertIsInstance(result_c, values.DistributedValues) + a0, a1 = dist.unwrap(a) + b0, b1 = dist.unwrap(result_b) + c0, c1 = dist.unwrap(result_c) + self.assertEquals("main/a:0", a0.name) + self.assertEquals("main/a/replica_1:0", a1.name) + self.assertEquals("main/b:0", b0.name) + self.assertEquals("main/b/replica_1:0", b1.name) + self.assertEquals("main/foo/c:0", c0.name) + self.assertEquals("main/foo/c/replica_1:0", c1.name) + + def testNameScopeWithGetVariable(self): + def in_cross_tower(_): + c = variable_scope.get_variable("c", [1]) + return c + + def model_fn(): + b = variable_scope.get_variable("b", [1]) + with ops.name_scope("foo"): + c = distribute_lib.get_tower_context().merge_call(in_cross_tower) + return b, c + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with context.graph_mode(), dist.scope(): + with ops.name_scope("main"): + a = variable_scope.get_variable("a", [1]) + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + result_b = result[0] + result_c = result[1] + self.assertIsInstance(result_b, values.DistributedValues) + self.assertIsInstance(result_c, values.DistributedValues) + a0, a1 = dist.unwrap(a) + b0, b1 = dist.unwrap(result_b) + c0, c1 = dist.unwrap(result_c) + self.assertEquals("a:0", a0.name) + self.assertEquals("a/replica_1:0", a1.name) + self.assertEquals("b:0", b0.name) + self.assertEquals("b/replica_1:0", b1.name) + self.assertEquals("c:0", c0.name) + self.assertEquals("c/replica_1:0", c1.name) + def testDynamicRnnVariables(self): def model_fn(): inputs = constant_op.constant(2 * [2 * [[0.0, 1.0, 2.0, 3.0, 4.0]]]) -- GitLab From b2e56707ecbc6dc4b130a50424f5b85956f58720 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 10:43:07 -0700 Subject: [PATCH 538/902] Do not enable tensor ops for cuDNN RNN unless explicitly specified. PiperOrigin-RevId: 199321021 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 55c1083a61..f6564df0d0 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -1031,7 +1031,15 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor { rnn_mode, direction_mode, num_layers)); #if CUDNN_VERSION >= 7000 - if (RnnTensorOpMathEnabled()) { + // Require explicit algorithm config to enable tensor cores. Some configs + // return CUDNN_NOT_SUPPORTED when tensor ops are enabled (which is against + // the idiom that enabling tensor ops is only a hint: see nvbugs/2172799). + // We can only reasonably expect the user to handle the subsequent failure + // in profile mode, which is run with algorithms returned from + // GetRnnAlgorithms() (which are non-default and explicitly set whether to + // use tensor ops). + if (RnnTensorOpMathEnabled() && + !algorithm_config.algorithm().is_default()) { cudnnMathType_t math_type = algorithm_config.algorithm().tensor_ops_enabled() ? CUDNN_TENSOR_OP_MATH -- GitLab From fdc085f021f98e7f4cba44e716f4f85cb9704447 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 5 Jun 2018 11:11:16 -0700 Subject: [PATCH 539/902] Fixing the adamax_test rtol to be more lenient. --- tensorflow/contrib/opt/python/training/adamax_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index 21bf3f5313..a059aae130 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -224,8 +224,8 @@ class AdaMaxOptimizerTest(test.TestCase): var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), rtol=1e-2) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), rtol=1e-2) if use_resource: self.assertEqual("var0_%d/AdaMax:0" % (i,), opt.get_slot(var=var0, name="m").name) -- GitLab From 938d46df199720784555af6dddc339f250b10008 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 5 Jun 2018 11:31:55 -0700 Subject: [PATCH 540/902] Fixing line too long. --- tensorflow/contrib/opt/python/training/adamax_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index a059aae130..915e6504e1 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -224,8 +224,10 @@ class AdaMaxOptimizerTest(test.TestCase): var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params - self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), rtol=1e-2) - self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), rtol=1e-2) + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0), + rtol=1e-2) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1), + rtol=1e-2) if use_resource: self.assertEqual("var0_%d/AdaMax:0" % (i,), opt.get_slot(var=var0, name="m").name) -- GitLab From e86d969c07c14f8790f364d0b48724848db48d4e Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Tue, 5 Jun 2018 11:51:24 -0700 Subject: [PATCH 541/902] Fix bug in which uncompiled tf.keras.Models cannot be saved This bug seems to be specific to tf.keras, i.e., it doesn't happen to keras. PiperOrigin-RevId: 199334073 --- tensorflow/python/keras/engine/saving.py | 2 +- tensorflow/python/keras/engine/saving_test.py | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/engine/saving.py b/tensorflow/python/keras/engine/saving.py index 40b693efde..b9a2e1f25f 100644 --- a/tensorflow/python/keras/engine/saving.py +++ b/tensorflow/python/keras/engine/saving.py @@ -106,7 +106,7 @@ def save_model(model, filepath, overwrite=True, include_optimizer=True): model_layers = model.layers save_weights_to_hdf5_group(model_weights_group, model_layers) - if include_optimizer and hasattr(model, 'optimizer'): + if include_optimizer and model.optimizer: if isinstance(model.optimizer, optimizers.TFOptimizer): logging.warning( 'TensorFlow optimizers do not ' diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py index 5abca8a553..1470718a5e 100644 --- a/tensorflow/python/keras/engine/saving_test.py +++ b/tensorflow/python/keras/engine/saving_test.py @@ -288,6 +288,30 @@ class TestWholeModelSaving(test.TestCase): out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) + def test_sequential_model_saving_without_compile(self): + if h5py is None: + self.skipTest('h5py required to run this test') + + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.RepeatVector(3)) + model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) + + x = np.random.random((1, 3)) + out = model.predict(x) + fd, fname = tempfile.mkstemp('.h5') + + # Save the model without any compilation or training. + keras.models.save_model(model, fname) + + new_model = keras.models.load_model(fname) + os.close(fd) + os.remove(fname) + + out2 = new_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + def test_sequential_model_saving_2(self): if h5py is None: self.skipTest('h5py required to run this test') -- GitLab From b1fd2ef4d02719cd929fa574796b2c080a21a9ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 11:54:41 -0700 Subject: [PATCH 542/902] Add core/util/exec_on_stall.h a tool for debugging deadlocks with less logging. PiperOrigin-RevId: 199334548 --- tensorflow/core/BUILD | 31 ++++++-- tensorflow/core/util/exec_on_stall.h | 89 ++++++++++++++++++++++ tensorflow/core/util/exec_on_stall_test.cc | 47 ++++++++++++ 3 files changed, 160 insertions(+), 7 deletions(-) create mode 100644 tensorflow/core/util/exec_on_stall.h create mode 100644 tensorflow/core/util/exec_on_stall_test.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index f5cc6ef2a1..28af3ce4ea 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -72,24 +72,23 @@ licenses(["notice"]) # Apache 2.0 load( "//tensorflow:tensorflow.bzl", + "cc_header_only_library", "full_path", "if_android", - "if_not_android_mips_and_mips64", "if_ios", "if_linux_x86_64", "if_mobile", "if_not_mobile", - "if_windows", "if_not_windows", - "tf_copts", + "if_windows", "tf_cc_test", "tf_cc_tests", + "tf_copts", "tf_cuda_library", "tf_gen_op_libs", "tf_generate_proto_text_sources", "tf_genrule_cmd_append_to_srcs", "tf_opts_nortti_if_android", - "cc_header_only_library", ) load("//tensorflow:tensorflow.bzl", "tf_cc_test_mkl") load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") @@ -113,11 +112,11 @@ load( "tf_additional_human_readable_json_deps", "tf_additional_lib_defines", "tf_additional_lib_deps", + "tf_additional_lib_hdrs", + "tf_additional_lib_srcs", "tf_additional_libdevice_data", "tf_additional_libdevice_deps", "tf_additional_libdevice_srcs", - "tf_additional_lib_hdrs", - "tf_additional_lib_srcs", "tf_additional_minimal_lib_srcs", "tf_additional_mpi_lib_defines", "tf_additional_proto_hdrs", @@ -141,8 +140,8 @@ load( ) load( "//tensorflow/core:platform/default/build_config_root.bzl", - "tf_cuda_tests_tags", "if_static", + "tf_cuda_tests_tags", ) load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library") @@ -887,6 +886,12 @@ cc_library( ], ) +cc_library( + name = "exec_on_stall", + hdrs = ["util/exec_on_stall.h"], + deps = [":framework_lite"], +) + cc_library( name = "ptr_util", hdrs = ["util/ptr_util.h"], @@ -3252,6 +3257,18 @@ tf_cc_test( ], ) +tf_cc_test( + name = "exec_on_stall_test", + size = "small", + srcs = ["util/exec_on_stall_test.cc"], + deps = [ + ":exec_on_stall", + ":framework_lite", + ":test", + ":test_main", + ], +) + tf_cc_test( name = "lib_jpeg_jpeg_mem_unittest", srcs = ["lib/jpeg/jpeg_mem_unittest.cc"], diff --git a/tensorflow/core/util/exec_on_stall.h b/tensorflow/core/util/exec_on_stall.h new file mode 100644 index 0000000000..5c8f9d2324 --- /dev/null +++ b/tensorflow/core/util/exec_on_stall.h @@ -0,0 +1,89 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_EXEC_ON_STALL_H_ +#define TENSORFLOW_CORE_UTIL_EXEC_ON_STALL_H_ + +#include + +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + +// An object that executes a particular function only if it +// is not deleted within the allotted number of seconds. +// +// This can be useful in diagnosing deadlocks, stalls and memory leaks +// without logging too agressively. +class ExecuteOnStall { + public: + // delay_secs: If the object still exists after this many seconds, + // execute f. + // f: The function to be executed, for example a detailed log of the + // the state of an object to which this is attached. + // poll_microseconds: The spawned thread will wake and test whether + // the destructor has been invoked this frequently. + ExecuteOnStall(int delay_secs, std::function f, + int32 poll_microseconds = 100) + : disabled_(false), + joined_(false), + env_(Env::Default()), + f_(f), + poll_microseconds_(poll_microseconds) { + deadline_ = env_->NowMicros() + 1000000 * delay_secs; + env_->SchedClosure([this]() { + while (env_->NowMicros() < deadline_) { + { + mutex_lock l(mu_); + if (disabled_) { + break; + } + } + env_->SleepForMicroseconds(poll_microseconds_); + } + { + mutex_lock l(mu_); + if (!disabled_) { + f_(); + } + joined_ = true; + cond_var_.notify_all(); + } + }); + } + + ~ExecuteOnStall() { + // Wait for spawned thread to terminate. + mutex_lock l(mu_); + disabled_ = true; + if (!joined_) { + cond_var_.wait(l); + } + } + + private: + mutex mu_; + condition_variable cond_var_; + bool disabled_ GUARDED_BY(mu_); + bool joined_ GUARDED_BY(mu_); + Env* env_; + std::function f_; + int64 deadline_; + int32 poll_microseconds_; +}; + +} // namespace tensorflow +#endif // TENSORFLOW_CORE_UTIL_EXEC_ON_STALL_H_ diff --git a/tensorflow/core/util/exec_on_stall_test.cc b/tensorflow/core/util/exec_on_stall_test.cc new file mode 100644 index 0000000000..df8118d611 --- /dev/null +++ b/tensorflow/core/util/exec_on_stall_test.cc @@ -0,0 +1,47 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/exec_on_stall.h" + +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +struct Chunk { + std::unique_ptr stall_closure; +}; + +Chunk* NewChunk(int stall_seconds, std::function f) { + Chunk* c = new Chunk; + c->stall_closure.reset(new ExecuteOnStall(stall_seconds, std::move(f))); + return c; +} + +TEST(ExecuteOnStallTest, BothWays) { + bool a_triggered = false; + bool b_triggered = false; + Chunk* a = NewChunk(1, [&a_triggered]() { a_triggered = true; }); + Chunk* b = NewChunk(1, [&b_triggered]() { b_triggered = true; }); + delete a; + Env::Default()->SleepForMicroseconds(2000000); + EXPECT_FALSE(a_triggered); + EXPECT_TRUE(b_triggered); + delete b; +} + +} // namespace +} // namespace tensorflow -- GitLab From 62a70dd873bc8488b10df5ad55254119173a5d0c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 11:58:16 -0700 Subject: [PATCH 543/902] Extend and refactor reader_ops_test PiperOrigin-RevId: 199335030 --- .../python/kernel_tests/reader_ops_test.py | 352 ++++++++---------- 1 file changed, 163 insertions(+), 189 deletions(-) diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py index 82a27eebee..7be473a5e7 100644 --- a/tensorflow/python/kernel_tests/reader_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_ops_test.py @@ -77,6 +77,69 @@ _TEXT = b"""Gaily bedight, """ +class TFCompressionTestCase(test.TestCase): + + def setUp(self): + super(TFCompressionTestCase, self).setUp() + self._num_files = 2 + self._num_records = 7 + + def _Record(self, f, r): + return compat.as_bytes("Record %d of file %d" % (r, f)) + + def _CreateFiles(self, options=None, prefix=""): + filenames = [] + for i in range(self._num_files): + name = prefix + "tfrecord.%d.txt" % i + records = [self._Record(i, j) for j in range(self._num_records)] + fn = self._WriteRecordsToFile(records, name, options) + filenames.append(fn) + return filenames + + def _WriteRecordsToFile(self, records, name="tfrecord", options=None): + fn = os.path.join(self.get_temp_dir(), name) + with tf_record.TFRecordWriter(fn, options=options) as writer: + for r in records: + writer.write(r) + return fn + + def _ZlibCompressFile(self, infile, name="tfrecord.z"): + # zlib compress the file and write compressed contents to file. + with open(infile, "rb") as f: + cdata = zlib.compress(f.read()) + + zfn = os.path.join(self.get_temp_dir(), name) + with open(zfn, "wb") as f: + f.write(cdata) + return zfn + + def _GzipCompressFile(self, infile, name="tfrecord.gz"): + # gzip compress the file and write compressed contents to file. + with open(infile, "rb") as f: + cdata = f.read() + + gzfn = os.path.join(self.get_temp_dir(), name) + with gzip.GzipFile(gzfn, "wb") as f: + f.write(cdata) + return gzfn + + def _ZlibDecompressFile(self, infile, name="tfrecord"): + with open(infile, "rb") as f: + cdata = zlib.decompress(f.read()) + fn = os.path.join(self.get_temp_dir(), name) + with open(fn, "wb") as f: + f.write(cdata) + return fn + + def _GzipDecompressFile(self, infile, name="tfrecord"): + with gzip.GzipFile(infile, "rb") as f: + cdata = f.read() + fn = os.path.join(self.get_temp_dir(), name) + with open(fn, "wb") as f: + f.write(cdata) + return fn + + class IdentityReaderTest(test.TestCase): def _ExpectRead(self, sess, key, value, expected): @@ -348,7 +411,7 @@ class TextLineReaderTest(test.TestCase): k, v = sess.run([key, value]) -class FixedLengthRecordReaderTest(test.TestCase): +class FixedLengthRecordReaderTest(TFCompressionTestCase): def setUp(self): super(FixedLengthRecordReaderTest, self).setUp() @@ -407,40 +470,18 @@ class FixedLengthRecordReaderTest(test.TestCase): # gap_bytes=hop_bytes-record_bytes def _CreateGzipFiles(self, num_records, gap_bytes): - filenames = [] - for i in range(self._num_files): - fn = os.path.join(self.get_temp_dir(), "fixed_length_record.%d.txt" % i) - filenames.append(fn) - with gzip.GzipFile(fn, "wb") as f: - f.write(b"H" * self._header_bytes) - if num_records > 0: - f.write(self._Record(i, 0)) - for j in range(1, num_records): - if gap_bytes > 0: - f.write(b"G" * gap_bytes) - f.write(self._Record(i, j)) - f.write(b"F" * self._footer_bytes) + filenames = self._CreateFiles(num_records, gap_bytes) + for fn in filenames: + # compress inplace. + self._GzipCompressFile(fn, fn) return filenames # gap_bytes=hop_bytes-record_bytes def _CreateZlibFiles(self, num_records, gap_bytes): - filenames = [] - for i in range(self._num_files): - fn = os.path.join(self.get_temp_dir(), "fixed_length_record.%d.txt" % i) - filenames.append(fn) - with open(fn + ".tmp", "wb") as f: - f.write(b"H" * self._header_bytes) - if num_records > 0: - f.write(self._Record(i, 0)) - for j in range(1, num_records): - if gap_bytes > 0: - f.write(b"G" * gap_bytes) - f.write(self._Record(i, j)) - f.write(b"F" * self._footer_bytes) - with open(fn + ".tmp", "rb") as f: - cdata = zlib.compress(f.read()) - with open(fn, "wb") as zf: - zf.write(cdata) + filenames = self._CreateFiles(num_records, gap_bytes) + for fn in filenames: + # compress inplace. + self._ZlibCompressFile(fn, fn) return filenames def _CreateGzipOverlappedRecordFiles(self, num_overlapped_records): @@ -477,10 +518,7 @@ class FixedLengthRecordReaderTest(test.TestCase): ]) f.write(compat.as_bytes(all_records_str)) f.write(b"F" * self._footer_bytes) - with open(fn + ".tmp", "rb") as f: - cdata = zlib.compress(f.read()) - with open(fn, "wb") as zf: - zf.write(cdata) + self._ZlibCompressFile(fn + ".tmp", fn) return filenames # gap_bytes=hop_bytes-record_bytes @@ -529,7 +567,6 @@ class FixedLengthRecordReaderTest(test.TestCase): for i in range(self._num_files): for j in range(num_overlapped_records): k, v = sess.run([key, value]) - print(v) self.assertAllEqual("%s:%d" % (files[i], j), compat.as_text(k)) self.assertAllEqual(self._OverlappedRecord(i, j), v) @@ -579,25 +616,10 @@ class FixedLengthRecordReaderTest(test.TestCase): files, num_overlapped_records, encoding="ZLIB") -class TFRecordReaderTest(test.TestCase): +class TFRecordReaderTest(TFCompressionTestCase): def setUp(self): super(TFRecordReaderTest, self).setUp() - self._num_files = 2 - self._num_records = 7 - - def _Record(self, f, r): - return compat.as_bytes("Record %d of file %d" % (r, f)) - - def _CreateFiles(self): - filenames = [] - for i in range(self._num_files): - fn = os.path.join(self.get_temp_dir(), "tf_record.%d.txt" % i) - filenames.append(fn) - writer = tf_record.TFRecordWriter(fn) - for j in range(self._num_records): - writer.write(self._Record(i, j)) - return filenames def testOneEpoch(self): files = self._CreateFiles() @@ -647,107 +669,106 @@ class TFRecordReaderTest(test.TestCase): self.assertEqual(self._num_files * self._num_records, num_v) def testReadZlibFiles(self): - files = self._CreateFiles() - zlib_files = [] - for i, fn in enumerate(files): - with open(fn, "rb") as f: - cdata = zlib.compress(f.read()) - - zfn = os.path.join(self.get_temp_dir(), "tfrecord_%s.z" % i) - with open(zfn, "wb") as f: - f.write(cdata) - zlib_files.append(zfn) + options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) + files = self._CreateFiles(options) with self.test_session() as sess: - options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) reader = io_ops.TFRecordReader(name="test_reader", options=options) queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) - queue.enqueue_many([zlib_files]).run() + queue.enqueue_many([files]).run() queue.close().run() for i in range(self._num_files): for j in range(self._num_records): k, v = sess.run([key, value]) - self.assertTrue(compat.as_text(k).startswith("%s:" % zlib_files[i])) + self.assertTrue(compat.as_text(k).startswith("%s:" % files[i])) self.assertAllEqual(self._Record(i, j), v) def testReadGzipFiles(self): - files = self._CreateFiles() - gzip_files = [] - for i, fn in enumerate(files): - with open(fn, "rb") as f: - cdata = f.read() - - zfn = os.path.join(self.get_temp_dir(), "tfrecord_%s.gz" % i) - with gzip.GzipFile(zfn, "wb") as f: - f.write(cdata) - gzip_files.append(zfn) + options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP) + files = self._CreateFiles(options) with self.test_session() as sess: - options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP) reader = io_ops.TFRecordReader(name="test_reader", options=options) queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) - queue.enqueue_many([gzip_files]).run() + queue.enqueue_many([files]).run() queue.close().run() for i in range(self._num_files): for j in range(self._num_records): k, v = sess.run([key, value]) - self.assertTrue(compat.as_text(k).startswith("%s:" % gzip_files[i])) + self.assertTrue(compat.as_text(k).startswith("%s:" % files[i])) self.assertAllEqual(self._Record(i, j), v) -class TFRecordWriterZlibTest(test.TestCase): +class TFRecordWriterTest(TFCompressionTestCase): def setUp(self): - super(TFRecordWriterZlibTest, self).setUp() - self._num_files = 2 - self._num_records = 7 + super(TFRecordWriterTest, self).setUp() + + def _AssertFilesEqual(self, a, b, equal): + for an, bn in zip(a, b): + with open(an, "rb") as af, open(bn, "rb") as bf: + if equal: + self.assertEqual(af.read(), bf.read()) + else: + self.assertNotEqual(af.read(), bf.read()) + + def testWriteReadZLibFiles(self): + # Write uncompressed then compress manually. + options = tf_record.TFRecordOptions(TFRecordCompressionType.NONE) + files = self._CreateFiles(options, prefix="uncompressed") + zlib_files = [ + self._ZlibCompressFile(fn, "tfrecord_%s.z" % i) + for i, fn in enumerate(files) + ] + self._AssertFilesEqual(files, zlib_files, False) - def _Record(self, f, r): - return compat.as_bytes("Record %d of file %d" % (r, f)) + # Now write compressd and verify same. + options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) + compressed_files = self._CreateFiles(options, prefix="compressed") + self._AssertFilesEqual(compressed_files, zlib_files, True) - def _CreateFiles(self): - filenames = [] - for i in range(self._num_files): - fn = os.path.join(self.get_temp_dir(), "tf_record.%d.txt" % i) - filenames.append(fn) - options = tf_record.TFRecordOptions( - compression_type=TFRecordCompressionType.ZLIB) - writer = tf_record.TFRecordWriter(fn, options=options) - for j in range(self._num_records): - writer.write(self._Record(i, j)) - writer.close() - del writer + # Decompress compress and verify same. + uncompressed_files = [ + self._ZlibDecompressFile(fn, "tfrecord_%s.z" % i) + for i, fn in enumerate(compressed_files) + ] + self._AssertFilesEqual(uncompressed_files, files, True) + + def testWriteReadGzipFiles(self): + # Write uncompressed then compress manually. + options = tf_record.TFRecordOptions(TFRecordCompressionType.NONE) + files = self._CreateFiles(options, prefix="uncompressed") + gzip_files = [ + self._GzipCompressFile(fn, "tfrecord_%s.gz" % i) + for i, fn in enumerate(files) + ] + self._AssertFilesEqual(files, gzip_files, False) - return filenames + # Now write compressd and verify same. + options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP) + compressed_files = self._CreateFiles(options, prefix="compressed") - def _WriteRecordsToFile(self, records, name="tf_record"): - fn = os.path.join(self.get_temp_dir(), name) - writer = tf_record.TFRecordWriter(fn, options=None) - for r in records: - writer.write(r) - writer.close() - del writer - return fn + # Note: Gzips written by TFRecordWriter add 'tfrecord_0' so + # compressed_files can't be compared with gzip_files - def _ZlibCompressFile(self, infile, name="tfrecord.z"): - # zlib compress the file and write compressed contents to file. - with open(infile, "rb") as f: - cdata = zlib.compress(f.read()) + # Decompress compress and verify same. + uncompressed_files = [ + self._GzipDecompressFile(fn, "tfrecord_%s.gz" % i) + for i, fn in enumerate(compressed_files) + ] + self._AssertFilesEqual(uncompressed_files, files, True) - zfn = os.path.join(self.get_temp_dir(), name) - with open(zfn, "wb") as f: - f.write(cdata) - return zfn + +class TFRecordWriterZlibTest(TFCompressionTestCase): def testOneEpoch(self): - files = self._CreateFiles() + options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) + files = self._CreateFiles(options) with self.test_session() as sess: - options = tf_record.TFRecordOptions( - compression_type=TFRecordCompressionType.ZLIB) reader = io_ops.TFRecordReader(name="test_reader", options=options) queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) @@ -788,8 +809,7 @@ class TFRecordWriterZlibTest(test.TestCase): h.write(output) with self.test_session() as sess: - options = tf_record.TFRecordOptions( - compression_type=TFRecordCompressionType.ZLIB) + options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) reader = io_ops.TFRecordReader(name="test_reader", options=options) queue = data_flow_ops.FIFOQueue(1, [dtypes.string], shapes=()) key, value = reader.read(queue) @@ -808,9 +828,7 @@ class TFRecordWriterZlibTest(test.TestCase): # read the compressed contents and verify. actual = [] for r in tf_record.tf_record_iterator( - zfn, - options=tf_record.TFRecordOptions( - tf_record.TFRecordCompressionType.ZLIB)): + zfn, options=tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)): actual.append(r) self.assertEqual(actual, original) @@ -822,12 +840,9 @@ class TFRecordWriterZlibTest(test.TestCase): fn = self._WriteRecordsToFile(original, "zlib_read_write_large.tfrecord") zfn = self._ZlibCompressFile(fn, "zlib_read_write_large.tfrecord.z") - # read the compressed contents and verify. actual = [] for r in tf_record.tf_record_iterator( - zfn, - options=tf_record.TFRecordOptions( - tf_record.TFRecordCompressionType.ZLIB)): + zfn, options=tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)): actual.append(r) self.assertEqual(actual, original) @@ -835,13 +850,7 @@ class TFRecordWriterZlibTest(test.TestCase): """Verify that files produced are gzip compatible.""" original = [b"foo", b"bar"] fn = self._WriteRecordsToFile(original, "gzip_read_write.tfrecord") - - # gzip compress the file and write compressed contents to file. - with open(fn, "rb") as f: - cdata = f.read() - gzfn = os.path.join(self.get_temp_dir(), "tf_record.gz") - with gzip.GzipFile(gzfn, "wb") as f: - f.write(cdata) + gzfn = self._GzipCompressFile(fn, "tfrecord.gz") actual = [] for r in tf_record.tf_record_iterator( @@ -850,89 +859,54 @@ class TFRecordWriterZlibTest(test.TestCase): self.assertEqual(actual, original) -class TFRecordIteratorTest(test.TestCase): +class TFRecordIteratorTest(TFCompressionTestCase): def setUp(self): super(TFRecordIteratorTest, self).setUp() self._num_records = 7 - def _Record(self, r): - return compat.as_bytes("Record %d" % r) - - def _WriteCompressedRecordsToFile( - self, - records, - name="tfrecord.z", - compression_type=tf_record.TFRecordCompressionType.ZLIB): - fn = os.path.join(self.get_temp_dir(), name) - options = tf_record.TFRecordOptions(compression_type=compression_type) - writer = tf_record.TFRecordWriter(fn, options=options) - for r in records: - writer.write(r) - writer.close() - del writer - return fn - - def _ZlibDecompressFile(self, infile, name="tfrecord", wbits=zlib.MAX_WBITS): - with open(infile, "rb") as f: - cdata = zlib.decompress(f.read(), wbits) - zfn = os.path.join(self.get_temp_dir(), name) - with open(zfn, "wb") as f: - f.write(cdata) - return zfn - def testIterator(self): - fn = self._WriteCompressedRecordsToFile( - [self._Record(i) for i in range(self._num_records)], - "compressed_records") - options = tf_record.TFRecordOptions( - compression_type=TFRecordCompressionType.ZLIB) + records = [self._Record(0, i) for i in range(self._num_records)] + options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) + fn = self._WriteRecordsToFile(records, "compressed_records", options) + reader = tf_record.tf_record_iterator(fn, options) - for i in range(self._num_records): + for expected in records: record = next(reader) - self.assertAllEqual(self._Record(i), record) + self.assertAllEqual(expected, record) with self.assertRaises(StopIteration): record = next(reader) def testWriteZlibRead(self): """Verify compression with TFRecordWriter is zlib library compatible.""" original = [b"foo", b"bar"] - fn = self._WriteCompressedRecordsToFile(original, - "write_zlib_read.tfrecord.z") + options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) + fn = self._WriteRecordsToFile(original, "write_zlib_read.tfrecord.z", + options) + zfn = self._ZlibDecompressFile(fn, "write_zlib_read.tfrecord") - actual = [] - for r in tf_record.tf_record_iterator(zfn): - actual.append(r) + actual = list(tf_record.tf_record_iterator(zfn)) self.assertEqual(actual, original) def testWriteZlibReadLarge(self): """Verify compression for large records is zlib library compatible.""" # Make it large (about 5MB) original = [_TEXT * 10240] - fn = self._WriteCompressedRecordsToFile(original, - "write_zlib_read_large.tfrecord.z") - zfn = self._ZlibDecompressFile(fn, "write_zlib_read_large.tf_record") - actual = [] - for r in tf_record.tf_record_iterator(zfn): - actual.append(r) + options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) + fn = self._WriteRecordsToFile(original, "write_zlib_read_large.tfrecord.z", + options) + zfn = self._ZlibDecompressFile(fn, "write_zlib_read_large.tfrecord") + actual = list(tf_record.tf_record_iterator(zfn)) self.assertEqual(actual, original) def testWriteGzipRead(self): original = [b"foo", b"bar"] - fn = self._WriteCompressedRecordsToFile( - original, - "write_gzip_read.tfrecord.gz", - compression_type=TFRecordCompressionType.GZIP) - - with gzip.GzipFile(fn, "rb") as f: - cdata = f.read() - zfn = os.path.join(self.get_temp_dir(), "tf_record") - with open(zfn, "wb") as f: - f.write(cdata) + options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP) + fn = self._WriteRecordsToFile(original, "write_gzip_read.tfrecord.gz", + options) - actual = [] - for r in tf_record.tf_record_iterator(zfn): - actual.append(r) + gzfn = self._GzipDecompressFile(fn, "write_gzip_read.tfrecord") + actual = list(tf_record.tf_record_iterator(gzfn)) self.assertEqual(actual, original) def testBadFile(self): -- GitLab From 920df27282b3f5d03d79f54ef05cea305c2a30d7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 12:11:17 -0700 Subject: [PATCH 544/902] Implementation of the symmetrically quantized LSTM TFLite Op. PiperOrigin-RevId: 199337082 --- .../lite/kernels/internal/kernel_utils.cc | 262 ++- .../lite/kernels/internal/kernel_utils.h | 83 + tensorflow/contrib/lite/kernels/lstm.cc | 454 ++++- tensorflow/contrib/lite/kernels/lstm_test.cc | 1769 ++++++++++------- 4 files changed, 1791 insertions(+), 777 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc index 67e3810479..6e62183975 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc @@ -63,6 +63,8 @@ void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr, // Quantize input from float to uint8 + quantization params (scaling // factor). float unused_min, unused_max; + // TODO(mirkov,raziel): replace this for-loop with a MACRO (or function) + // whichever is faster. for (int b = 0; b < batch_size; ++b) { const int offset = b * input_size; tensor_utils::SymmetricQuantizeFloats( @@ -147,6 +149,7 @@ void LstmStep( input_to_input_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, input_gate_scratch, /*result_stride=*/1); } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( input_to_forget_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, forget_gate_scratch, /*result_stride=*/1); @@ -161,8 +164,7 @@ void LstmStep( if (!use_cifg) { tensor_utils::MatrixBatchVectorMultiplyAccumulate( recurrent_to_input_weights_ptr, n_cell, n_output, output_state_ptr, - n_batch, input_gate_scratch, - /*result_stride=*/1); + n_batch, input_gate_scratch, /*result_stride=*/1); } tensor_utils::MatrixBatchVectorMultiplyAccumulate( recurrent_to_forget_weights_ptr, n_cell, n_output, output_state_ptr, @@ -253,5 +255,261 @@ void LstmStep( output_state_ptr); } +// TODO(alanchiao): move this to tensor_utils. +void VectorMultiply(const int8_t* vector, const int v_size, const float scale, + float* result) { + for (int i = 0; i < v_size; ++i) { + *result++ = scale * *vector++; + } +} + +void LstmStep( + const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr, + float input_to_input_weights_scale, + const int8_t* input_to_forget_weights_ptr, + float input_to_forget_weights_scale, + const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale, + const int8_t* input_to_output_weights_ptr, + float input_to_output_weights_scale, + const int8_t* recurrent_to_input_weights_ptr, + float recurrent_to_input_weights_scale, + const int8_t* recurrent_to_forget_weights_ptr, + float recurrent_to_forget_weights_scale, + const int8_t* recurrent_to_cell_weights_ptr, + float recurrent_to_cell_weights_scale, + const int8_t* recurrent_to_output_weights_ptr, + float recurrent_to_output_weights_scale, + const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale, + const int8_t* cell_to_forget_weights_ptr, + float cell_to_forget_weights_scale, + const int8_t* cell_to_output_weights_ptr, + float cell_to_output_weights_scale, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr, + float projection_weights_scale, const float* projection_bias_ptr, + const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, + int n_output, float* input_gate_scratch, float* forget_gate_scratch, + float* cell_scratch, float* output_gate_scratch, float* scaling_factors, + float* product_scaling_factors, float* recovered_cell_weights, + int8_t* quantized_input_ptr_batch, int8_t* quantized_output_state_ptr, + int8_t* quantized_cell_state_ptr, float* output_state_ptr, + float* cell_state_ptr, float* output_ptr_batch) { + // Since we have already checked that weights are all there or none, we can + // check the existense of only one to the get the condition. + const bool use_cifg = (input_to_input_weights_ptr == nullptr); + const bool use_peephole = (cell_to_output_weights_ptr != nullptr); + // Initialize scratch buffers with bias. + if (!use_cifg) { + tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch, + input_gate_scratch); + } + tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch, + forget_gate_scratch); + tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch, + cell_scratch); + tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch, + output_gate_scratch); + + if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) { + // Save quantization and matmul computation for all zero input. + float unused_min, unused_max; + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_input; + tensor_utils::SymmetricQuantizeFloats( + input_ptr_batch + offset, n_input, quantized_input_ptr_batch + offset, + &unused_min, &unused_max, &scaling_factors[b]); + } + // For each batch and cell: compute input_weight * input. + if (!use_cifg) { + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * input_to_input_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_input_weights_ptr, n_cell, n_input, + quantized_input_ptr_batch, product_scaling_factors, n_batch, + input_gate_scratch, /*result_stride=*/1); + } + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * input_to_forget_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_forget_weights_ptr, n_cell, n_input, quantized_input_ptr_batch, + product_scaling_factors, n_batch, forget_gate_scratch, + /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * input_to_cell_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_cell_weights_ptr, n_cell, n_input, quantized_input_ptr_batch, + product_scaling_factors, n_batch, cell_scratch, /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * input_to_cell_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_output_weights_ptr, n_cell, n_input, quantized_input_ptr_batch, + product_scaling_factors, n_batch, output_gate_scratch, + /*result_stride=*/1); + } + + if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) { + // Save quantization and matmul computation for all zero input. + float unused_min, unused_max; + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_output; + tensor_utils::SymmetricQuantizeFloats(output_state_ptr + offset, n_output, + quantized_output_state_ptr + offset, + &unused_min, &unused_max, + &scaling_factors[b]); + } + // For each batch and cell: compute recurrent_weight * output_state. + if (!use_cifg) { + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * recurrent_to_input_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_input_weights_ptr, n_cell, n_output, + quantized_output_state_ptr, product_scaling_factors, n_batch, + input_gate_scratch, /*result_stride=*/1); + } + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * recurrent_to_forget_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_forget_weights_ptr, n_cell, n_output, + quantized_output_state_ptr, product_scaling_factors, n_batch, + forget_gate_scratch, /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * recurrent_to_cell_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_cell_weights_ptr, n_cell, n_output, + quantized_output_state_ptr, product_scaling_factors, n_batch, + cell_scratch, /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * recurrent_to_output_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_output_weights_ptr, n_cell, n_output, + quantized_output_state_ptr, product_scaling_factors, n_batch, + output_gate_scratch, /*result_stride=*/1); + } + + // Save quantization and matmul computation for all zero input. + const bool is_cell_state_all_zeros = + tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell); + + // For each batch and cell: update input gate. + if (!use_cifg) { + if (use_peephole && !is_cell_state_all_zeros) { + VectorMultiply(cell_to_input_weights_ptr, n_cell, + 1. / cell_to_input_weights_scale, recovered_cell_weights); + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + recovered_cell_weights, n_cell, cell_state_ptr, n_batch, + input_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, + input_gate_scratch); + } + + // For each batch and cell: update forget gate. + if (use_peephole && !is_cell_state_all_zeros) { + VectorMultiply(cell_to_forget_weights_ptr, n_cell, + 1. / cell_to_forget_weights_scale, recovered_cell_weights); + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + recovered_cell_weights, n_cell, cell_state_ptr, n_batch, + forget_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, + forget_gate_scratch); + + // For each batch and cell: update the cell. + tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr, + n_batch * n_cell, cell_state_ptr); + tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, + params->activation, cell_scratch); + if (use_cifg) { + tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, + forget_gate_scratch); + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr); + } else { + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr); + } + if (params->cell_clip > 0.0) { + tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, + params->cell_clip, cell_state_ptr); + } + + // For each batch and cell: update the output gate. + if (use_peephole && !is_cell_state_all_zeros) { + VectorMultiply(cell_to_output_weights_ptr, n_cell, + 1. / cell_to_output_weights_scale, recovered_cell_weights); + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + recovered_cell_weights, n_cell, cell_state_ptr, n_batch, + output_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, + output_gate_scratch); + tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell, + params->activation, cell_scratch); + tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, + n_batch * n_cell, output_gate_scratch); + + // For each batch: update the projection and output_state. + const bool use_projection_weight = (projection_weights_ptr != nullptr); + const bool use_projection_bias = (projection_bias_ptr != nullptr); + if (use_projection_weight) { + if (use_projection_bias) { + tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, + n_batch, output_ptr_batch); + } else { + tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output); + } + if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) { + // Save quantization and matmul computation for all zero input. + float unused_min, unused_max; + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_cell; + tensor_utils::SymmetricQuantizeFloats( + output_gate_scratch + offset, n_cell, + quantized_cell_state_ptr + offset, &unused_min, &unused_max, + &scaling_factors[b]); + } + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * projection_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + projection_weights_ptr, n_output, n_cell, quantized_cell_state_ptr, + product_scaling_factors, n_batch, output_ptr_batch, + /*result_stride=*/1); + } + if (params->proj_clip > 0.0) { + tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output, + params->proj_clip, output_ptr_batch); + } + } else { + tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, + output_ptr_batch); + } + tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output, + output_state_ptr); +} + } // namespace kernel_utils } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h index f3f42f0840..2a11b37a60 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h @@ -92,6 +92,89 @@ void LstmStep( float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, float* output_ptr_batch); +// Same as above but with quantized weight matrices. In detail: +// Input of size 'n_batch * n_input': +// input_ptr_batch +// +// LSTM weights: +// Quantized input weights of size 'n_cell * n_input': +// input_to_input_weights - optional (can be nullptr) +// input_to_forget_weights +// input_to_cell_weights +// input_to_input_weights +// Quantized recurrent weights of size 'n_cell * n_output': +// recurrent_to_input_weights - optional +// recurrent_to_forget_weights +// recurrent_to_cell_weights +// recurrent_to_input_weights +// Quantized peephole weights of size 'n_cell', representing diagonal matrices. +// cell_to_input_weights - optional +// cell_to_cell_weights - optional +// cell_to_output_weights - optional +// Quantized projection weights of size 'n_output * n_cell' +// projection_weights_ptr - optional +// Weight scales (scalars) for each of the weights above. +// input_to_input_weights_scale - optional +// input_to_forget_weights_scale +// input_to_cell_weights_scale +// input_to_output_weights_scale +// recurrent_to_input_weights_scale - optional +// recurrent_to_forget_weights_scale +// recurrent_to_cell_weights_scale +// recurrent_to_output_weights_scale +// cell_to_input_weights_scale, +// cell_to_forget_weights_scale, +// cell_to_output_weights_scale, +// projection_weights_scale - optional +// Gate biases of size 'n_cell': +// input_gate_bias_ptr - optional +// forget_gate_bias_ptr +// cell_gate_bias_ptr +// output_gate_bias_ptr +// +// Temporary pre-allocated storage for quantized values: +// quantized_input_ptr_batch (same size as input_ptr_batch) +// quantized_output_state_ptr (same size as output_state_ptr) +// quantized_cell_state_ptr (same size as cell_state_ptr) +// Temporary pre-allocated storage for recovered values: +// recovered_cell_weights (same size as cell_to_*_weights) +// +// Outputs: +// output_state_ptr - size 'n_batch * n_output' +// cell_state_ptr - size 'n_batch * n_cell' +// output_ptr_batch - size 'n_batch * n_output' +void LstmStep( + const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr, + float input_to_input_weights_scale, + const int8_t* input_to_forget_weights_ptr, + float input_to_forget_weights_scale, + const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale, + const int8_t* input_to_output_weights_ptr, + float input_to_output_weights_scale, + const int8_t* recurrent_to_input_weights_ptr, + float recurrent_to_input_weights_scale, + const int8_t* recurrent_to_forget_weights_ptr, + float recurrent_to_forget_weights_scale, + const int8_t* recurrent_to_cell_weights_ptr, + float recurrent_to_cell_weights_scale, + const int8_t* recurrent_to_output_weights_ptr, + float recurrent_to_output_weights_scale, + const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale, + const int8_t* cell_to_forget_weights_ptr, + float cell_to_forget_weights_scale, + const int8_t* cell_to_output_weights_ptr, + float cell_to_output_weights_scale, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr, + float projection_weights_scale, const float* projection_bias_ptr, + const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, + int n_output, float* input_gate_scratch, float* forget_gate_scratch, + float* cell_scratch, float* output_gate_scratch, float* scaling_factors, + float* product_scaling_factors, float* recovered_cell_weights, + int8_t* quantized_input_ptr_batch, int8_t* quantized_output_state_ptr, + int8_t* quantized_cell_state_ptr, float* output_state_ptr, + float* cell_state_ptr, float* output_ptr_batch); + } // namespace kernel_utils } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_KERNEL_UTILS_H_ diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index 9aae3e571b..eb26a02455 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -86,7 +86,8 @@ constexpr int kOutputTensor = 2; void* Init(TfLiteContext* context, const char* buffer, size_t length) { auto* op_data = new OpData; op_data->kernel_type = kTfLiteLSTMFullKernel; - context->AddTensors(context, 1, &op_data->scratch_tensor_index); + context->AddTensors(context, /*tensors_to_add=*/7, + &op_data->scratch_tensor_index); return op_data; } @@ -94,7 +95,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TfLiteNode* node, int n_input, int n_output, int n_cell) { - auto* params = reinterpret_cast(node->builtin_data); + const auto* params = reinterpret_cast(node->builtin_data); // Making sure clipping parameters have valid values. // == 0 means no clipping @@ -104,7 +105,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); - if (input_to_input_weights) { + if (input_to_input_weights != nullptr) { TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input); @@ -124,7 +125,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, const TfLiteTensor* recurrent_to_input_weights = GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor); - if (recurrent_to_input_weights) { + if (recurrent_to_input_weights != nullptr) { TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[0], n_cell); @@ -214,7 +215,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, const TfLiteTensor* projection_weights = GetOptionalInputTensor(context, node, kProjectionWeightsTensor); - if (projection_weights) { + if (projection_weights != nullptr) { TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output); TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); @@ -222,7 +223,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, const TfLiteTensor* projection_bias = GetOptionalInputTensor(context, node, kProjectionBiasTensor); - if (projection_bias) { + if (projection_bias != nullptr) { TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output); } @@ -252,6 +253,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Inferring batch size, number of outputs and number of cells from the // input tensors. const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); TF_LITE_ENSURE(context, input->dims->size > 1); const int n_batch = input->dims->data[0]; const int n_input = input->dims->data[1]; @@ -296,86 +298,148 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, cell_state, cell_size)); - // Create a scratch buffer tensor. + // Mark state tensors as persistent tensors. + output_state->allocation_type = kTfLiteArenaRwPersistent; + cell_state->allocation_type = kTfLiteArenaRwPersistent; + + // The weights are of consistent type, so it suffices to check one. + // TODO(mirkov): create a utility/macro for this check, so all Ops can use it. + const bool is_hybrid_op = (input_to_output_weights->type == kTfLiteUInt8 && + input->type == kTfLiteFloat32); + TfLiteIntArrayFree(node->temporaries); - node->temporaries = TfLiteIntArrayCreate(1); + if (is_hybrid_op) { + node->temporaries = TfLiteIntArrayCreate(7); + } else { + node->temporaries = TfLiteIntArrayCreate(1); + } node->temporaries->data[0] = op_data->scratch_tensor_index; + + // Create a scratch buffer tensor. TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/0); scratch_buffer->type = input->type; scratch_buffer->allocation_type = kTfLiteArenaRw; - // Mark state tensors as persistent tensors. - output_state->allocation_type = kTfLiteArenaRwPersistent; - cell_state->allocation_type = kTfLiteArenaRwPersistent; - const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); const bool use_cifg = (input_to_input_weights == nullptr); + TfLiteIntArray* scratch_buffer_size = TfLiteIntArrayCreate(2); + scratch_buffer_size->data[0] = n_batch; if (use_cifg) { - TfLiteIntArray* scratch_buffer_size = TfLiteIntArrayCreate(2); - scratch_buffer_size->data[0] = n_batch; // Reserving space for Cell, Forget, Output gates scratch_buffer_size->data[1] = n_cell * 3; - TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_buffer, - scratch_buffer_size)); } else { - TfLiteIntArray* scratch_buffer_size = TfLiteIntArrayCreate(2); - scratch_buffer_size->data[0] = n_batch; // Reserving space for Input, Cell, Forget, Output gates scratch_buffer_size->data[1] = n_cell * 4; - TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_buffer, - scratch_buffer_size)); + } + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_buffer, + scratch_buffer_size)); + + if (is_hybrid_op) { + // Allocate temporary tensors to store quantized values of input, + // output_state and cell_state tensors. + node->temporaries->data[1] = op_data->scratch_tensor_index + 1; + TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1); + input_quantized->type = kTfLiteUInt8; + input_quantized->allocation_type = kTfLiteArenaRw; + if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) { + TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims); + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_quantized, + input_quantized_size)); + } + node->temporaries->data[2] = op_data->scratch_tensor_index + 2; + TfLiteTensor* output_state_quantized = + GetTemporary(context, node, /*index=*/2); + output_state_quantized->type = kTfLiteUInt8; + output_state_quantized->allocation_type = kTfLiteArenaRw; + if (!TfLiteIntArrayEqual(output_state_quantized->dims, + output_state->dims)) { + TfLiteIntArray* output_state_quantized_size = + TfLiteIntArrayCopy(output_state->dims); + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, output_state_quantized, + output_state_quantized_size)); + } + node->temporaries->data[3] = op_data->scratch_tensor_index + 3; + TfLiteTensor* cell_state_quantized = + GetTemporary(context, node, /*index=*/3); + cell_state_quantized->type = kTfLiteUInt8; + cell_state_quantized->allocation_type = kTfLiteArenaRw; + if (!TfLiteIntArrayEqual(cell_state_quantized->dims, cell_state->dims)) { + TfLiteIntArray* cell_state_quantized_size = + TfLiteIntArrayCopy(cell_state->dims); + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, cell_state_quantized, + cell_state_quantized_size)); + } + + // Allocate temporary tensors to store scaling factors and product scaling + // factors. The latter is a convenience storage which allows to quantize + // a vector once (which produces the scaling factors) and multiply it with + // different matrices (which requires multiplying the scaling factors with + // the scaling factor of the matrix). + node->temporaries->data[4] = op_data->scratch_tensor_index + 4; + TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/4); + scaling_factors->type = kTfLiteFloat32; + scaling_factors->allocation_type = kTfLiteArenaRw; + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = n_batch; + if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, + scaling_factors_size)); + } + node->temporaries->data[5] = op_data->scratch_tensor_index + 5; + TfLiteTensor* prod_scaling_factors = + GetTemporary(context, node, /*index=*/5); + prod_scaling_factors->type = kTfLiteFloat32; + prod_scaling_factors->allocation_type = kTfLiteArenaRw; + TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1); + prod_scaling_factors_size->data[0] = n_batch; + if (!TfLiteIntArrayEqual(prod_scaling_factors->dims, + prod_scaling_factors_size)) { + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, prod_scaling_factors, + prod_scaling_factors_size)); + } + + // Allocate a temporary tensor to store the recovered cell weights. Since + // this is used for diagonal matrices, only need to store n_cell values. + node->temporaries->data[6] = op_data->scratch_tensor_index + 6; + TfLiteTensor* recovered_cell_weights = + GetTemporary(context, node, /*index=*/6); + recovered_cell_weights->type = kTfLiteFloat32; + recovered_cell_weights->allocation_type = kTfLiteArenaRw; + TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1); + recovered_cell_weights_size->data[0] = n_cell; + if (!TfLiteIntArrayEqual(recovered_cell_weights->dims, + recovered_cell_weights_size)) { + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, recovered_cell_weights, + recovered_cell_weights_size)); + } } return kTfLiteOk; } // The LSTM Op engine. -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - auto* params = reinterpret_cast(node->builtin_data); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - - const TfLiteTensor* input_to_input_weights = - GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); - const TfLiteTensor* input_to_forget_weights = - GetInput(context, node, kInputToForgetWeightsTensor); - const TfLiteTensor* input_to_cell_weights = - GetInput(context, node, kInputToCellWeightsTensor); - const TfLiteTensor* input_to_output_weights = - GetInput(context, node, kInputToOutputWeightsTensor); - - const TfLiteTensor* recurrent_to_input_weights = - GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor); - const TfLiteTensor* recurrent_to_forget_weights = - GetInput(context, node, kRecurrentToForgetWeightsTensor); - const TfLiteTensor* recurrent_to_cell_weights = - GetInput(context, node, kRecurrentToCellWeightsTensor); - const TfLiteTensor* recurrent_to_output_weights = - GetInput(context, node, kRecurrentToOutputWeightsTensor); - - const TfLiteTensor* cell_to_input_weights = - GetOptionalInputTensor(context, node, kCellToInputWeightsTensor); - const TfLiteTensor* cell_to_forget_weights = - GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor); - const TfLiteTensor* cell_to_output_weights = - GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor); - - const TfLiteTensor* input_gate_bias = - GetOptionalInputTensor(context, node, kInputGateBiasTensor); - const TfLiteTensor* forget_gate_bias = - GetInput(context, node, kForgetGateBiasTensor); - const TfLiteTensor* cell_bias = GetInput(context, node, kCellGateBiasTensor); - const TfLiteTensor* output_gate_bias = - GetInput(context, node, kOutputGateBiasTensor); - - const TfLiteTensor* projection_weights = - GetOptionalInputTensor(context, node, kProjectionWeightsTensor); - const TfLiteTensor* projection_bias = - GetOptionalInputTensor(context, node, kProjectionBiasTensor); - - TfLiteTensor* output_state = GetOutput(context, node, kOutputStateTensor); - TfLiteTensor* cell_state = GetOutput(context, node, kCellStateTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - +TfLiteStatus EvalFloat( + const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights, + const TfLiteTensor* input_to_forget_weights, + const TfLiteTensor* input_to_cell_weights, + const TfLiteTensor* input_to_output_weights, + const TfLiteTensor* recurrent_to_input_weights, + const TfLiteTensor* recurrent_to_forget_weights, + const TfLiteTensor* recurrent_to_cell_weights, + const TfLiteTensor* recurrent_to_output_weights, + const TfLiteTensor* cell_to_input_weights, + const TfLiteTensor* cell_to_forget_weights, + const TfLiteTensor* cell_to_output_weights, + const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias, + const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias, + const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias, + const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer, + TfLiteTensor* output_state, TfLiteTensor* cell_state, + TfLiteTensor* output) { const int n_batch = input->dims->data[0]; const int n_input = input->dims->data[1]; // n_cell and n_output will be the same size when there is no projection. @@ -387,9 +451,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); - // Index the scratch buffers pointers to the global scratch buffer. - TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/0); - float* input_gate_scratch = nullptr; float* cell_scratch = nullptr; float* forget_gate_scratch = nullptr; @@ -457,6 +518,259 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } +TfLiteStatus EvalHybrid( + const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights, + const TfLiteTensor* input_to_forget_weights, + const TfLiteTensor* input_to_cell_weights, + const TfLiteTensor* input_to_output_weights, + const TfLiteTensor* recurrent_to_input_weights, + const TfLiteTensor* recurrent_to_forget_weights, + const TfLiteTensor* recurrent_to_cell_weights, + const TfLiteTensor* recurrent_to_output_weights, + const TfLiteTensor* cell_to_input_weights, + const TfLiteTensor* cell_to_forget_weights, + const TfLiteTensor* cell_to_output_weights, + const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias, + const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias, + const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias, + const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer, + TfLiteTensor* scaling_factors, TfLiteTensor* prod_scaling_factors, + TfLiteTensor* recovered_cell_weights, TfLiteTensor* input_quantized, + TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized, + TfLiteTensor* output_state, TfLiteTensor* cell_state, + TfLiteTensor* output) { + const int n_batch = input->dims->data[0]; + const int n_input = input->dims->data[1]; + // n_cell and n_output will be the same size when there is no projection. + const int n_cell = input_to_output_weights->dims->data[0]; + const int n_output = recurrent_to_output_weights->dims->data[1]; + + // Since we have already checked that weights are all there or none, we can + // check the existence of only one to get the condition. + const bool use_cifg = (input_to_input_weights == nullptr); + const bool use_peephole = (cell_to_output_weights != nullptr); + + float* input_gate_scratch = nullptr; + float* cell_scratch = nullptr; + float* forget_gate_scratch = nullptr; + float* output_gate_scratch = nullptr; + if (use_cifg) { + cell_scratch = scratch_buffer->data.f; + forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch; + output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch; + } else { + input_gate_scratch = scratch_buffer->data.f; + cell_scratch = scratch_buffer->data.f + n_cell * n_batch; + forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch; + output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch; + } + + // Check optional tensors, the respective pointers can be null. + int8_t* input_to_input_weights_ptr = nullptr; + float input_to_input_weights_scale = 1.0f; + int8_t* recurrent_to_input_weights_ptr = nullptr; + float recurrent_to_input_weights_scale = 1.0f; + float* input_gate_bias_ptr = nullptr; + if (!use_cifg) { + input_to_input_weights_ptr = + reinterpret_cast(input_to_input_weights->data.uint8); + recurrent_to_input_weights_ptr = + reinterpret_cast(recurrent_to_input_weights->data.uint8); + input_gate_bias_ptr = input_gate_bias->data.f; + input_to_input_weights_scale = input_to_input_weights->params.scale; + recurrent_to_input_weights_scale = recurrent_to_input_weights->params.scale; + } + + int8_t* cell_to_input_weights_ptr = nullptr; + int8_t* cell_to_forget_weights_ptr = nullptr; + int8_t* cell_to_output_weights_ptr = nullptr; + float cell_to_input_weights_scale = 1.0f; + float cell_to_forget_weights_scale = 1.0f; + float cell_to_output_weights_scale = 1.0f; + if (use_peephole) { + if (!use_cifg) { + cell_to_input_weights_ptr = + reinterpret_cast(cell_to_input_weights->data.uint8); + cell_to_input_weights_scale = cell_to_input_weights->params.scale; + } + cell_to_forget_weights_ptr = + reinterpret_cast(cell_to_forget_weights->data.uint8); + cell_to_output_weights_ptr = + reinterpret_cast(cell_to_output_weights->data.uint8); + cell_to_forget_weights_scale = cell_to_forget_weights->params.scale; + cell_to_output_weights_scale = cell_to_output_weights->params.scale; + } + + const int8_t* projection_weights_ptr = + (projection_weights == nullptr) + ? nullptr + : reinterpret_cast(projection_weights->data.uint8); + const float projection_weights_scale = + (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale; + const float* projection_bias_ptr = + (projection_bias == nullptr) ? nullptr : projection_bias->data.f; + + // Required tensors, pointers are non-null. + const float* input_ptr_batch = input->data.f; + const int8_t* input_to_forget_weights_ptr = + reinterpret_cast(input_to_forget_weights->data.uint8); + const float input_to_forget_weights_scale = + input_to_forget_weights->params.scale; + const int8_t* input_to_cell_weights_ptr = + reinterpret_cast(input_to_cell_weights->data.uint8); + const float input_to_cell_weights_scale = input_to_cell_weights->params.scale; + const int8_t* input_to_output_weights_ptr = + reinterpret_cast(input_to_output_weights->data.uint8); + const float input_to_output_weights_scale = + input_to_output_weights->params.scale; + const int8_t* recurrent_to_forget_weights_ptr = + reinterpret_cast(recurrent_to_forget_weights->data.uint8); + const float recurrent_to_forget_weights_scale = + recurrent_to_forget_weights->params.scale; + const int8_t* recurrent_to_cell_weights_ptr = + reinterpret_cast(recurrent_to_cell_weights->data.uint8); + const float recurrent_to_cell_weights_scale = + recurrent_to_cell_weights->params.scale; + const int8_t* recurrent_to_output_weights_ptr = + reinterpret_cast(recurrent_to_output_weights->data.uint8); + const float recurrent_to_output_weights_scale = + recurrent_to_output_weights->params.scale; + const float* forget_gate_bias_ptr = forget_gate_bias->data.f; + const float* cell_bias_ptr = cell_bias->data.f; + const float* output_gate_bias_ptr = output_gate_bias->data.f; + + float* output_state_ptr = output_state->data.f; + float* cell_state_ptr = cell_state->data.f; + float* output_ptr_batch = output->data.f; + + // Temporary storage for quantized values and scaling factors. + int8_t* quantized_input_ptr = + reinterpret_cast(input_quantized->data.uint8); + int8_t* quantized_output_state_ptr = + reinterpret_cast(output_state_quantized->data.uint8); + int8_t* quantized_cell_state_ptr = + reinterpret_cast(cell_state_quantized->data.uint8); + float* scaling_factors_ptr = scaling_factors->data.f; + float* prod_scaling_factors_ptr = prod_scaling_factors->data.f; + float* recovered_cell_weights_ptr = recovered_cell_weights->data.f; + + kernel_utils::LstmStep( + input_ptr_batch, input_to_input_weights_ptr, input_to_input_weights_scale, + input_to_forget_weights_ptr, input_to_forget_weights_scale, + input_to_cell_weights_ptr, input_to_cell_weights_scale, + input_to_output_weights_ptr, input_to_output_weights_scale, + recurrent_to_input_weights_ptr, recurrent_to_input_weights_scale, + recurrent_to_forget_weights_ptr, recurrent_to_forget_weights_scale, + recurrent_to_cell_weights_ptr, recurrent_to_cell_weights_scale, + recurrent_to_output_weights_ptr, recurrent_to_output_weights_scale, + cell_to_input_weights_ptr, cell_to_input_weights_scale, + cell_to_forget_weights_ptr, cell_to_forget_weights_scale, + cell_to_output_weights_ptr, cell_to_output_weights_scale, + input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr, + output_gate_bias_ptr, projection_weights_ptr, projection_weights_scale, + projection_bias_ptr, params, n_batch, n_cell, n_input, n_output, + input_gate_scratch, forget_gate_scratch, cell_scratch, + output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr, + recovered_cell_weights_ptr, quantized_input_ptr, + quantized_output_state_ptr, quantized_cell_state_ptr, output_state_ptr, + cell_state_ptr, output_ptr_batch); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const auto* params = reinterpret_cast(node->builtin_data); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + + const TfLiteTensor* input_to_input_weights = + GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); + const TfLiteTensor* input_to_forget_weights = + GetInput(context, node, kInputToForgetWeightsTensor); + const TfLiteTensor* input_to_cell_weights = + GetInput(context, node, kInputToCellWeightsTensor); + const TfLiteTensor* input_to_output_weights = + GetInput(context, node, kInputToOutputWeightsTensor); + + const TfLiteTensor* recurrent_to_input_weights = + GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor); + const TfLiteTensor* recurrent_to_forget_weights = + GetInput(context, node, kRecurrentToForgetWeightsTensor); + const TfLiteTensor* recurrent_to_cell_weights = + GetInput(context, node, kRecurrentToCellWeightsTensor); + const TfLiteTensor* recurrent_to_output_weights = + GetInput(context, node, kRecurrentToOutputWeightsTensor); + + const TfLiteTensor* cell_to_input_weights = + GetOptionalInputTensor(context, node, kCellToInputWeightsTensor); + const TfLiteTensor* cell_to_forget_weights = + GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor); + const TfLiteTensor* cell_to_output_weights = + GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor); + + const TfLiteTensor* input_gate_bias = + GetOptionalInputTensor(context, node, kInputGateBiasTensor); + const TfLiteTensor* forget_gate_bias = + GetInput(context, node, kForgetGateBiasTensor); + const TfLiteTensor* cell_bias = GetInput(context, node, kCellGateBiasTensor); + const TfLiteTensor* output_gate_bias = + GetInput(context, node, kOutputGateBiasTensor); + + const TfLiteTensor* projection_weights = + GetOptionalInputTensor(context, node, kProjectionWeightsTensor); + const TfLiteTensor* projection_bias = + GetOptionalInputTensor(context, node, kProjectionBiasTensor); + + // Index the scratch buffers pointers to the global scratch buffer. + TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/0); + + TfLiteTensor* output_state = GetOutput(context, node, kOutputStateTensor); + TfLiteTensor* cell_state = GetOutput(context, node, kCellStateTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + // TODO(mirkov): add a check that weights are all uint8s or all floats. + switch (input_to_output_weights->type) { + case kTfLiteFloat32: { + return EvalFloat(input, input_to_input_weights, input_to_forget_weights, + input_to_cell_weights, input_to_output_weights, + recurrent_to_input_weights, recurrent_to_forget_weights, + recurrent_to_cell_weights, recurrent_to_output_weights, + cell_to_input_weights, cell_to_forget_weights, + cell_to_output_weights, input_gate_bias, + forget_gate_bias, cell_bias, output_gate_bias, + projection_weights, projection_bias, params, + scratch_buffer, output_state, cell_state, output); + } + case kTfLiteUInt8: { + TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1); + TfLiteTensor* output_state_quantized = + GetTemporary(context, node, /*index=*/2); + TfLiteTensor* cell_state_quantized = + GetTemporary(context, node, /*index=*/3); + TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/4); + TfLiteTensor* prod_scaling_factors = + GetTemporary(context, node, /*index=*/5); + TfLiteTensor* recovered_cell_weights = + GetTemporary(context, node, /*index=*/6); + return EvalHybrid( + input, input_to_input_weights, input_to_forget_weights, + input_to_cell_weights, input_to_output_weights, + recurrent_to_input_weights, recurrent_to_forget_weights, + recurrent_to_cell_weights, recurrent_to_output_weights, + cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights, + input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, + projection_weights, projection_bias, params, scratch_buffer, + scaling_factors, prod_scaling_factors, recovered_cell_weights, + input_quantized, output_state_quantized, cell_state_quantized, + output_state, cell_state, output); + } + default: + context->ReportError(context, "Type %d is not currently supported.", + input_to_output_weights->type); + return kTfLiteError; + } + return kTfLiteOk; +} + } // namespace full // For basic kernel (5-inputs). @@ -491,7 +805,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, node->inputs->size == kInputNum); TF_LITE_ENSURE(context, node->outputs->size == kOutputNum); - // Only Float32 is supportted currently. + // Only Float32 is supported currently. // TODO(ycling): Implement quantize uint8 support. for (int index = 0; index < node->inputs->size; ++index) { TfLiteTensor* tensor = &context->tensors[node->inputs->data[index]]; diff --git a/tensorflow/contrib/lite/kernels/lstm_test.cc b/tensorflow/contrib/lite/kernels/lstm_test.cc index d81220d8d3..6da29a4a92 100644 --- a/tensorflow/contrib/lite/kernels/lstm_test.cc +++ b/tensorflow/contrib/lite/kernels/lstm_test.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ // Unit test for TFLite LSTM op. -#include #include #include @@ -35,7 +34,8 @@ class LSTMOpModel : public SingleOpModel { LSTMOpModel(int n_batch, int n_input, int n_cell, int n_output, bool use_cifg, bool use_peephole, bool use_projection_weights, bool use_projection_bias, float cell_clip, float proj_clip, - const std::vector>& input_shapes) + const std::vector>& input_shapes, + const TensorType& weight_type = TensorType_FLOAT32) : n_batch_(n_batch), n_input_(n_input), n_cell_(n_cell), @@ -45,31 +45,31 @@ class LSTMOpModel : public SingleOpModel { if (use_cifg) { input_to_input_weights_ = AddNullInput(); } else { - input_to_input_weights_ = AddInput(TensorType_FLOAT32); + input_to_input_weights_ = AddInput(weight_type); } - input_to_forget_weights_ = AddInput(TensorType_FLOAT32); - input_to_cell_weights_ = AddInput(TensorType_FLOAT32); - input_to_output_weights_ = AddInput(TensorType_FLOAT32); + input_to_forget_weights_ = AddInput(weight_type); + input_to_cell_weights_ = AddInput(weight_type); + input_to_output_weights_ = AddInput(weight_type); if (use_cifg) { recurrent_to_input_weights_ = AddNullInput(); } else { - recurrent_to_input_weights_ = AddInput(TensorType_FLOAT32); + recurrent_to_input_weights_ = AddInput(weight_type); } - recurrent_to_forget_weights_ = AddInput(TensorType_FLOAT32); - recurrent_to_cell_weights_ = AddInput(TensorType_FLOAT32); - recurrent_to_output_weights_ = AddInput(TensorType_FLOAT32); + recurrent_to_forget_weights_ = AddInput(weight_type); + recurrent_to_cell_weights_ = AddInput(weight_type); + recurrent_to_output_weights_ = AddInput(weight_type); if (use_peephole) { if (use_cifg) { cell_to_input_weights_ = AddNullInput(); } else { - cell_to_input_weights_ = AddInput(TensorType_FLOAT32); + cell_to_input_weights_ = AddInput(weight_type); } - cell_to_forget_weights_ = AddInput(TensorType_FLOAT32); - cell_to_output_weights_ = AddInput(TensorType_FLOAT32); + cell_to_forget_weights_ = AddInput(weight_type); + cell_to_output_weights_ = AddInput(weight_type); } else { cell_to_input_weights_ = AddNullInput(); cell_to_forget_weights_ = AddNullInput(); @@ -86,7 +86,7 @@ class LSTMOpModel : public SingleOpModel { output_gate_bias_ = AddInput(TensorType_FLOAT32); if (use_projection_weights) { - projection_weights_ = AddInput(TensorType_FLOAT32); + projection_weights_ = AddInput(weight_type); if (use_projection_bias) { projection_bias_ = AddInput(TensorType_FLOAT32); } else { @@ -192,8 +192,9 @@ class LSTMOpModel : public SingleOpModel { zero_buffer.get() + zero_buffer_size); } - void SetInput(int offset, float* begin, float* end) { - PopulateTensor(input_, offset, begin, end); + void SetInput(int offset, const float* begin, const float* end) { + PopulateTensor(input_, offset, const_cast(begin), + const_cast(end)); } std::vector GetOutput() { return ExtractVector(output_); } @@ -203,7 +204,7 @@ class LSTMOpModel : public SingleOpModel { int num_cells() { return n_cell_; } int num_batches() { return n_batch_; } - private: + protected: int input_; int input_to_input_weights_; int input_to_forget_weights_; @@ -237,7 +238,182 @@ class LSTMOpModel : public SingleOpModel { int n_output_; }; -TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) { +class HybridLSTMOpModel : public LSTMOpModel { + public: + HybridLSTMOpModel(int n_batch, int n_input, int n_cell, int n_output, + bool use_cifg, bool use_peephole, + bool use_projection_weights, bool use_projection_bias, + float cell_clip, float proj_clip, + const std::vector>& input_shapes) + : LSTMOpModel(n_batch, n_input, n_cell, n_output, use_cifg, use_peephole, + use_projection_weights, use_projection_bias, cell_clip, + proj_clip, input_shapes, TensorType_UINT8) {} + + void SetInputToInputWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(input_to_input_weights_, f); + } + + void SetInputToForgetWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(input_to_forget_weights_, f); + } + + void SetInputToCellWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(input_to_cell_weights_, f); + } + + void SetInputToOutputWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(input_to_output_weights_, f); + } + + void SetRecurrentToInputWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f); + } + + void SetRecurrentToForgetWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f); + } + + void SetRecurrentToCellWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f); + } + + void SetRecurrentToOutputWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f); + } + + void SetCellToInputWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(cell_to_input_weights_, f); + } + + void SetCellToForgetWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f); + } + + void SetCellToOutputWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(cell_to_output_weights_, f); + } + + void SetProjectionWeights(std::initializer_list f) { + SymmetricQuantizeAndPopulate(projection_weights_, f); + } +}; + +class BaseLstmTest : public ::testing::Test { + protected: + // Weights of the LSTM model. Some are optional. + std::initializer_list input_to_input_weights_; + std::initializer_list input_to_cell_weights_; + std::initializer_list input_to_forget_weights_; + std::initializer_list input_to_output_weights_; + std::initializer_list input_gate_bias_; + std::initializer_list cell_gate_bias_; + std::initializer_list forget_gate_bias_; + std::initializer_list output_gate_bias_; + std::initializer_list recurrent_to_input_weights_; + std::initializer_list recurrent_to_cell_weights_; + std::initializer_list recurrent_to_forget_weights_; + std::initializer_list recurrent_to_output_weights_; + std::initializer_list cell_to_input_weights_; + std::initializer_list cell_to_forget_weights_; + std::initializer_list cell_to_output_weights_; + std::initializer_list projection_weights_; + + // LSTM input is stored as num_batch x num_inputs vector. + std::vector> lstm_input_; + // LSTM output is stored as num_batch x num_outputs vector. + std::vector> lstm_golden_output_; + + // Compares output up to tolerance to the result of the lstm given the input. + void VerifyGoldens(const std::vector>& input, + const std::vector>& output, + LSTMOpModel* lstm, float tolerance = 1e-5) { + const int num_batches = input.size(); + EXPECT_GT(num_batches, 0); + const int num_inputs = lstm->num_inputs(); + EXPECT_GT(num_inputs, 0); + const int input_sequence_size = input[0].size() / num_inputs; + EXPECT_GT(input_sequence_size, 0); + for (int i = 0; i < input_sequence_size; ++i) { + for (int b = 0; b < num_batches; ++b) { + const float* batch_start = input[b].data() + i * num_inputs; + const float* batch_end = batch_start + num_inputs; + + lstm->SetInput(b * lstm->num_inputs(), batch_start, batch_end); + } + + lstm->Invoke(); + + const int num_outputs = lstm->num_outputs(); + std::vector expected; + for (int b = 0; b < num_batches; ++b) { + const float* golden_start_batch = output[b].data() + i * num_outputs; + const float* golden_end_batch = golden_start_batch + num_outputs; + expected.insert(expected.end(), golden_start_batch, golden_end_batch); + } + EXPECT_THAT(lstm->GetOutput(), + ElementsAreArray(ArrayFloatNear(expected, tolerance))); + for (int i = 0; i < num_outputs; ++i) { + std::cout << lstm->GetOutput()[i] << ", "; + } + std::cout << std::endl; + for (int i = 0; i < num_outputs; ++i) { + std::cout << expected[i] << ", "; + } + std::cout << std::endl; + } + } +}; + +class NoCifgNoPeepholeNoProjectionNoClippingLstmTest : public BaseLstmTest { + void SetUp() override { + input_to_input_weights_ = {-0.45018822, -0.02338299, -0.0870589, + -0.34550029, 0.04266912, -0.15680569, + -0.34856534, 0.43890524}; + input_to_cell_weights_ = {-0.50013041, 0.1370284, 0.11810488, 0.2013163, + -0.20583314, 0.44344562, 0.22077113, -0.29909778}; + input_to_forget_weights_ = {0.09701663, 0.20334584, -0.50592935, + -0.31343272, -0.40032279, 0.44781327, + 0.01387155, -0.35593212}; + input_to_output_weights_ = {-0.25065863, -0.28290087, 0.04613829, + 0.40525138, 0.44272184, 0.03897077, + -0.1556896, 0.19487578}; + input_gate_bias_ = {0., 0., 0., 0.}; + cell_gate_bias_ = {0., 0., 0., 0.}; + forget_gate_bias_ = {1., 1., 1., 1.}; + output_gate_bias_ = {0., 0., 0., 0.}; + + recurrent_to_input_weights_ = { + -0.0063535, -0.2042388, 0.31454784, -0.35746509, + 0.28902304, 0.08183324, -0.16555229, 0.02286911, + -0.13566875, 0.03034258, 0.48091322, -0.12528998, + 0.24077177, -0.51332325, -0.33502164, 0.10629296}; + + recurrent_to_cell_weights_ = { + -0.3407414, 0.24443203, -0.2078532, 0.26320225, + 0.05695659, -0.00123841, -0.4744786, -0.35869038, + -0.06418842, -0.13502428, -0.501764, 0.22830659, + -0.46367589, 0.26016325, -0.03894562, -0.16368064}; + + recurrent_to_forget_weights_ = { + -0.48684245, -0.06655136, 0.42224967, 0.2112639, + 0.27654213, 0.20864892, -0.07646349, 0.45877004, + 0.00141793, -0.14609534, 0.36447752, 0.09196436, + 0.28053468, 0.01560611, -0.20127171, -0.01140004}; + + recurrent_to_output_weights_ = { + 0.43385774, -0.17194885, 0.2718237, 0.09215671, + 0.24107647, -0.39835793, 0.18212086, 0.01301402, + 0.48572797, -0.50656658, 0.20047462, -0.20607421, + -0.51818722, -0.15390486, 0.0468148, 0.39922136}; + + lstm_input_ = {{2., 3., 3., 4., 1., 1.}}; + lstm_golden_output_ = {{-0.02973187, 0.1229473, 0.20885126, -0.15358765, + -0.03716109, 0.12507336, 0.41193449, -0.20860538, + -0.15053082, 0.09120187, 0.24278517, -0.12222792}}; + } +}; + +TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. @@ -257,10 +433,10 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) { {n_cell, n_input}, // input_to_cell_weight tensor {n_cell, n_input}, // input_to_output_weight tensor - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor + {n_cell, n_output}, // recurrent_to_input_weight_tensor + {n_cell, n_output}, // recurrent_to_forget_weight_tensor + {n_cell, n_output}, // recurrent_to_cell_weight_tensor + {n_cell, n_output}, // recurrent_to_output_weight_tensor {0}, // cell_to_input_weight tensor {0}, // cell_to_forget_weight tensor @@ -275,79 +451,137 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) { {0}, // projection_bias tensor }); - lstm.SetInputToInputWeights({-0.45018822, -0.02338299, -0.0870589, - -0.34550029, 0.04266912, -0.15680569, - -0.34856534, 0.43890524}); - - lstm.SetInputToCellWeights({-0.50013041, 0.1370284, 0.11810488, 0.2013163, - -0.20583314, 0.44344562, 0.22077113, - -0.29909778}); - - lstm.SetInputToForgetWeights({0.09701663, 0.20334584, -0.50592935, - -0.31343272, -0.40032279, 0.44781327, - 0.01387155, -0.35593212}); - - lstm.SetInputToOutputWeights({-0.25065863, -0.28290087, 0.04613829, - 0.40525138, 0.44272184, 0.03897077, -0.1556896, - 0.19487578}); + lstm.SetInputToInputWeights(input_to_input_weights_); + lstm.SetInputToCellWeights(input_to_cell_weights_); + lstm.SetInputToForgetWeights(input_to_forget_weights_); + lstm.SetInputToOutputWeights(input_to_output_weights_); - lstm.SetInputGateBias({0., 0., 0., 0.}); + lstm.SetInputGateBias(input_gate_bias_); + lstm.SetCellBias(cell_gate_bias_); + lstm.SetForgetGateBias(forget_gate_bias_); + lstm.SetOutputGateBias(output_gate_bias_); - lstm.SetCellBias({0., 0., 0., 0.}); + lstm.SetRecurrentToInputWeights(recurrent_to_input_weights_); + lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_); + lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_); + lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_); - lstm.SetForgetGateBias({1., 1., 1., 1.}); - - lstm.SetOutputGateBias({0., 0., 0., 0.}); - - lstm.SetRecurrentToInputWeights( - {-0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324, - -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322, - -0.12528998, 0.24077177, -0.51332325, -0.33502164, 0.10629296}); - - lstm.SetRecurrentToCellWeights( - {-0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841, - -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659, - -0.46367589, 0.26016325, -0.03894562, -0.16368064}); + // Resetting cell_state and output_state + lstm.ResetCellState(); + lstm.ResetOutputState(); - lstm.SetRecurrentToForgetWeights( - {-0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892, - -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436, - 0.28053468, 0.01560611, -0.20127171, -0.01140004}); + VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); +} - lstm.SetRecurrentToOutputWeights( - {0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793, - 0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421, - -0.51818722, -0.15390486, 0.0468148, 0.39922136}); +TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, HybridLstmBlackBoxTest) { + const int n_batch = 1; + const int n_input = 2; + // n_cell and n_output have the same size when there is no projection. + const int n_cell = 4; + const int n_output = 4; - static float lstm_input[] = {2., 3., 3., 4., 1., 1.}; - static float lstm_golden_output[] = {-0.02973187, 0.1229473, 0.20885126, - -0.15358765, -0.03716109, 0.12507336, - 0.41193449, -0.20860538, -0.15053082, - 0.09120187, 0.24278517, -0.12222792}; + HybridLSTMOpModel lstm( + n_batch, n_input, n_cell, n_output, + /*use_cifg=*/false, /*use_peephole=*/false, + /*use_projection_weights=*/false, + /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, + { + {n_batch, n_input}, // input tensor + + {n_cell, n_input}, // input_to_input_weight tensor + {n_cell, n_input}, // input_to_forget_weight tensor + {n_cell, n_input}, // input_to_cell_weight tensor + {n_cell, n_input}, // input_to_output_weight tensor + + {n_cell, n_output}, // recurrent_to_input_weight tensor + {n_cell, n_output}, // recurrent_to_forget_weight tensor + {n_cell, n_output}, // recurrent_to_cell_weight tensor + {n_cell, n_output}, // recurrent_to_output_weight tensor + + {0}, // cell_to_input_weight tensor + {0}, // cell_to_forget_weight tensor + {0}, // cell_to_output_weight tensor + + {n_cell}, // input_gate_bias tensor + {n_cell}, // forget_gate_bias tensor + {n_cell}, // cell_bias tensor + {n_cell}, // output_gate_bias tensor + + {0, 0}, // projection_weight tensor + {0}, // projection_bias tensor + }); + + lstm.SetInputToInputWeights(input_to_input_weights_); + lstm.SetInputToCellWeights(input_to_cell_weights_); + lstm.SetInputToForgetWeights(input_to_forget_weights_); + lstm.SetInputToOutputWeights(input_to_output_weights_); + + lstm.SetInputGateBias(input_gate_bias_); + lstm.SetCellBias(cell_gate_bias_); + lstm.SetForgetGateBias(forget_gate_bias_); + lstm.SetOutputGateBias(output_gate_bias_); + + lstm.SetRecurrentToInputWeights(recurrent_to_input_weights_); + lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_); + lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_); + lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_); // Resetting cell_state and output_state lstm.ResetCellState(); lstm.ResetOutputState(); - const int input_sequence_size = - sizeof(lstm_input) / sizeof(float) / (lstm.num_inputs()); - for (int i = 0; i < input_sequence_size; i++) { - float* batch0_start = lstm_input + i * lstm.num_inputs(); - float* batch0_end = batch0_start + lstm.num_inputs(); + VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, + /*tolerance=*/0.0157651); +} - lstm.SetInput(0, batch0_start, batch0_end); +class CifgNoPeepholeNoProjectionNoClippingLstmTest : public BaseLstmTest { + void SetUp() override { + input_to_cell_weights_ = {-0.49770179, -0.27711356, -0.09624726, + 0.05100781, 0.04717243, 0.48944736, + -0.38535351, -0.17212132}; - lstm.Invoke(); + input_to_forget_weights_ = {-0.55291498, -0.42866567, 0.13056988, + -0.3633365, -0.22755712, 0.28253698, + 0.24407166, 0.33826375}; - float* golden_start = lstm_golden_output + i * lstm.num_outputs(); - float* golden_end = golden_start + lstm.num_outputs(); - std::vector expected; - expected.insert(expected.end(), golden_start, golden_end); - EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); + input_to_output_weights_ = {0.10725588, -0.02335852, -0.55932593, + -0.09426838, -0.44257352, 0.54939759, + 0.01533556, 0.42751634}; + cell_gate_bias_ = {0., 0., 0., 0.}; + forget_gate_bias_ = {1., 1., 1., 1.}; + output_gate_bias_ = {0., 0., 0., 0.}; + + recurrent_to_cell_weights_ = { + 0.54066205, -0.32668582, -0.43562764, -0.56094903, + 0.42957711, 0.01841056, -0.32764608, -0.33027974, + -0.10826075, 0.20675004, 0.19069612, -0.03026325, + -0.54532051, 0.33003211, 0.44901288, 0.21193194}; + + recurrent_to_forget_weights_ = { + -0.13832897, -0.0515101, -0.2359007, -0.16661474, + -0.14340827, 0.36986142, 0.23414481, 0.55899, + 0.10798943, -0.41174671, 0.17751795, -0.34484994, + -0.35874045, -0.11352962, 0.27268326, 0.54058349}; + + recurrent_to_output_weights_ = { + 0.41613156, 0.42610586, -0.16495961, -0.5663873, + 0.30579174, -0.05115908, -0.33941799, 0.23364776, + 0.11178309, 0.09481031, -0.26424935, 0.46261835, + 0.50248802, 0.26114327, -0.43736315, 0.33149987}; + + cell_to_forget_weights_ = {0.47485286, -0.51955009, -0.24458408, + 0.31544167}; + cell_to_output_weights_ = {-0.17135078, 0.82760304, 0.85573703, + -0.77109635}; + + lstm_input_ = {{2., 3., 3., 4., 1., 1.}}; + lstm_golden_output_ = {{-0.36444446, -0.00352185, 0.12886585, -0.05163646, + -0.42312205, -0.01218222, 0.24201041, -0.08124574, + -0.358325, -0.04621704, 0.21641694, -0.06471302}}; } -} +}; -TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) { +TEST_F(CifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. @@ -385,74 +619,689 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) { {0}, // projection_bias tensor }); - lstm.SetInputToCellWeights({-0.49770179, -0.27711356, -0.09624726, 0.05100781, - 0.04717243, 0.48944736, -0.38535351, - -0.17212132}); - - lstm.SetInputToForgetWeights({-0.55291498, -0.42866567, 0.13056988, - -0.3633365, -0.22755712, 0.28253698, 0.24407166, - 0.33826375}); - - lstm.SetInputToOutputWeights({0.10725588, -0.02335852, -0.55932593, - -0.09426838, -0.44257352, 0.54939759, - 0.01533556, 0.42751634}); - - lstm.SetCellBias({0., 0., 0., 0.}); + lstm.SetInputToCellWeights(input_to_cell_weights_); + lstm.SetInputToForgetWeights(input_to_forget_weights_); + lstm.SetInputToOutputWeights(input_to_output_weights_); - lstm.SetForgetGateBias({1., 1., 1., 1.}); + lstm.SetCellBias(cell_gate_bias_); + lstm.SetForgetGateBias(forget_gate_bias_); + lstm.SetOutputGateBias(output_gate_bias_); - lstm.SetOutputGateBias({0., 0., 0., 0.}); + lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_); + lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_); + lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_); - lstm.SetRecurrentToCellWeights( - {0.54066205, -0.32668582, -0.43562764, -0.56094903, 0.42957711, - 0.01841056, -0.32764608, -0.33027974, -0.10826075, 0.20675004, - 0.19069612, -0.03026325, -0.54532051, 0.33003211, 0.44901288, - 0.21193194}); + lstm.SetCellToForgetWeights(cell_to_forget_weights_); + lstm.SetCellToOutputWeights(cell_to_output_weights_); - lstm.SetRecurrentToForgetWeights( - {-0.13832897, -0.0515101, -0.2359007, -0.16661474, -0.14340827, - 0.36986142, 0.23414481, 0.55899, 0.10798943, -0.41174671, 0.17751795, - -0.34484994, -0.35874045, -0.11352962, 0.27268326, 0.54058349}); + // Resetting cell_state and output_state + lstm.ResetCellState(); + lstm.ResetOutputState(); - lstm.SetRecurrentToOutputWeights( - {0.41613156, 0.42610586, -0.16495961, -0.5663873, 0.30579174, -0.05115908, - -0.33941799, 0.23364776, 0.11178309, 0.09481031, -0.26424935, 0.46261835, - 0.50248802, 0.26114327, -0.43736315, 0.33149987}); + VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); +} - lstm.SetCellToForgetWeights( - {0.47485286, -0.51955009, -0.24458408, 0.31544167}); - lstm.SetCellToOutputWeights( - {-0.17135078, 0.82760304, 0.85573703, -0.77109635}); +TEST_F(CifgNoPeepholeNoProjectionNoClippingLstmTest, HybridLstmBlackBoxTest) { + const int n_batch = 1; + const int n_input = 2; + // n_cell and n_output have the same size when there is no projection. + const int n_cell = 4; + const int n_output = 4; - static float lstm_input[] = {2., 3., 3., 4., 1., 1.}; - static float lstm_golden_output[] = {-0.36444446, -0.00352185, 0.12886585, - -0.05163646, -0.42312205, -0.01218222, - 0.24201041, -0.08124574, -0.358325, - -0.04621704, 0.21641694, -0.06471302}; + HybridLSTMOpModel lstm( + n_batch, n_input, n_cell, n_output, + /*use_cifg=*/true, /*use_peephole=*/true, + /*use_projection_weights=*/false, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + { + {n_batch, n_input}, // input tensor + + {0, 0}, // input_to_input_weight tensor + {n_cell, n_input}, // input_to_forget_weight tensor + {n_cell, n_input}, // input_to_cell_weight tensor + {n_cell, n_input}, // input_to_output_weight tensor + + {0, 0}, // recurrent_to_input_weight tensor + {n_cell, n_output}, // recurrent_to_forget_weight tensor + {n_cell, n_output}, // recurrent_to_cell_weight tensor + {n_cell, n_output}, // recurrent_to_output_weight tensor + + {0}, // cell_to_input_weight tensor + {n_cell}, // cell_to_forget_weight tensor + {n_cell}, // cell_to_output_weight tensor + + {0}, // input_gate_bias tensor + {n_cell}, // forget_gate_bias tensor + {n_cell}, // cell_bias tensor + {n_cell}, // output_gate_bias tensor + + {0, 0}, // projection_weight tensor + {0}, // projection_bias tensor + }); + + lstm.SetInputToCellWeights(input_to_cell_weights_); + lstm.SetInputToForgetWeights(input_to_forget_weights_); + lstm.SetInputToOutputWeights(input_to_output_weights_); + + lstm.SetCellBias(cell_gate_bias_); + lstm.SetForgetGateBias(forget_gate_bias_); + lstm.SetOutputGateBias(output_gate_bias_); + + lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_); + lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_); + lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_); + + lstm.SetCellToForgetWeights(cell_to_forget_weights_); + lstm.SetCellToOutputWeights(cell_to_output_weights_); // Resetting cell_state and output_state lstm.ResetCellState(); lstm.ResetOutputState(); - const int input_sequence_size = - sizeof(lstm_input) / sizeof(float) / (lstm.num_inputs()); - for (int i = 0; i < input_sequence_size; i++) { - float* batch0_start = lstm_input + i * lstm.num_inputs(); - float* batch0_end = batch0_start + lstm.num_inputs(); - - lstm.SetInput(0, batch0_start, batch0_end); - - lstm.Invoke(); + VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.03573); +} - float* golden_start = lstm_golden_output + i * lstm.num_outputs(); - float* golden_end = golden_start + lstm.num_outputs(); - std::vector expected; - expected.insert(expected.end(), golden_start, golden_end); - EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); +class NoCifgPeepholeProjectionClippingLstmTest : public BaseLstmTest { + void SetUp() override { + input_to_input_weights_ = { + 0.021393683, 0.06124551, 0.046905167, -0.014657677, -0.03149463, + 0.09171803, 0.14647801, 0.10797193, -0.0057968358, 0.0019193048, + -0.2726754, 0.10154029, -0.018539885, 0.080349885, -0.10262385, + -0.022599787, -0.09121155, -0.008675967, -0.045206103, -0.0821282, + -0.008045952, 0.015478081, 0.055217247, 0.038719587, 0.044153627, + -0.06453243, 0.05031825, -0.046935108, -0.008164439, 0.014574226, + -0.1671009, -0.15519552, -0.16819797, -0.13971269, -0.11953059, + 0.25005487, -0.22790983, 0.009855087, -0.028140958, -0.11200698, + 0.11295408, -0.0035217577, 0.054485075, 0.05184695, 0.064711206, + 0.10989193, 0.11674786, 0.03490607, 0.07727357, 0.11390585, + -0.1863375, -0.1034451, -0.13945189, -0.049401227, -0.18767063, + 0.042483903, 0.14233552, 0.13832581, 0.18350165, 0.14545603, + -0.028545704, 0.024939531, 0.050929718, 0.0076203286, -0.0029723682, + -0.042484224, -0.11827596, -0.09171104, -0.10808628, -0.16327988, + -0.2273378, -0.0993647, -0.017155107, 0.0023917493, 0.049272764, + 0.0038534778, 0.054764505, 0.089753784, 0.06947234, 0.08014476, + -0.04544234, -0.0497073, -0.07135631, -0.048929106, -0.004042012, + -0.009284026, 0.018042054, 0.0036860977, -0.07427302, -0.11434604, + -0.018995456, 0.031487543, 0.012834908, 0.019977754, 0.044256654, + -0.39292613, -0.18519334, -0.11651281, -0.06809892, 0.011373677}; + + input_to_forget_weights_ = { + -0.0018401089, -0.004852237, 0.03698424, 0.014181704, + 0.028273236, -0.016726194, -0.05249759, -0.10204261, + 0.00861066, -0.040979505, -0.009899187, 0.01923892, + -0.028177269, -0.08535103, -0.14585495, 0.10662567, + -0.01909731, -0.017883534, -0.0047269356, -0.045103323, + 0.0030784295, 0.076784775, 0.07463696, 0.094531395, + 0.0814421, -0.12257899, -0.033945758, -0.031303465, + 0.045630626, 0.06843887, -0.13492945, -0.012480007, + -0.0811829, -0.07224499, -0.09628791, 0.045100946, + 0.0012300825, 0.013964662, 0.099372394, 0.02543059, + 0.06958324, 0.034257296, 0.0482646, 0.06267997, + 0.052625068, 0.12784666, 0.07077897, 0.025725935, + 0.04165009, 0.07241905, 0.018668644, -0.037377294, + -0.06277783, -0.08833636, -0.040120605, -0.011405586, + -0.007808335, -0.010301386, -0.005102167, 0.027717464, + 0.05483423, 0.11449111, 0.11289652, 0.10939839, + 0.13396506, -0.08402166, -0.01901462, -0.044678304, + -0.07720565, 0.014350063, -0.11757958, -0.0652038, + -0.08185733, -0.076754324, -0.092614375, 0.10405491, + 0.052960336, 0.035755895, 0.035839386, -0.012540553, + 0.036881298, 0.02913376, 0.03420159, 0.05448447, + -0.054523353, 0.02582715, 0.02327355, -0.011857179, + -0.0011980024, -0.034641717, -0.026125094, -0.17582615, + -0.15923657, -0.27486774, -0.0006143371, 0.0001771948, + -8.470171e-05, 0.02651807, 0.045790765, 0.06956496}; + + input_to_cell_weights_ = { + -0.04580283, -0.09549462, -0.032418985, -0.06454633, + -0.043528453, 0.043018587, -0.049152344, -0.12418144, + -0.078985475, -0.07596889, 0.019484362, -0.11434962, + -0.0074034138, -0.06314844, -0.092981495, 0.0062155537, + -0.025034338, -0.0028890965, 0.048929527, 0.06235075, + 0.10665918, -0.032036792, -0.08505916, -0.10843358, + -0.13002433, -0.036816437, -0.02130134, -0.016518239, + 0.0047691227, -0.0025825808, 0.066017866, 0.029991534, + -0.10652836, -0.1037554, -0.13056071, -0.03266643, + -0.033702414, -0.006473424, -0.04611692, 0.014419339, + -0.025174323, 0.0396852, 0.081777506, 0.06157468, + 0.10210095, -0.009658194, 0.046511717, 0.03603906, + 0.0069369148, 0.015960095, -0.06507666, 0.09551598, + 0.053568836, 0.06408714, 0.12835667, -0.008714329, + -0.20211966, -0.12093674, 0.029450472, 0.2849013, + -0.029227901, 0.1164364, -0.08560263, 0.09941786, + -0.036999565, -0.028842626, -0.0033637602, -0.017012902, + -0.09720865, -0.11193351, -0.029155117, -0.017936034, + -0.009768936, -0.04223324, -0.036159635, 0.06505112, + -0.021742892, -0.023377212, -0.07221364, -0.06430552, + 0.05453865, 0.091149814, 0.06387331, 0.007518393, + 0.055960953, 0.069779344, 0.046411168, 0.10509911, + 0.07463894, 0.0075130584, 0.012850982, 0.04555431, + 0.056955688, 0.06555285, 0.050801456, -0.009862683, + 0.00826772, -0.026555609, -0.0073611983, -0.0014897042}; + + input_to_output_weights_ = { + -0.0998932, -0.07201956, -0.052803773, -0.15629593, -0.15001918, + -0.07650751, 0.02359855, -0.075155355, -0.08037709, -0.15093534, + 0.029517552, -0.04751393, 0.010350531, -0.02664851, -0.016839722, + -0.023121163, 0.0077019283, 0.012851257, -0.05040649, -0.0129761, + -0.021737747, -0.038305793, -0.06870586, -0.01481247, -0.001285394, + 0.10124236, 0.083122835, 0.053313006, -0.062235646, -0.075637154, + -0.027833903, 0.029774971, 0.1130802, 0.09218906, 0.09506135, + -0.086665764, -0.037162706, -0.038880914, -0.035832845, -0.014481564, + -0.09825003, -0.12048569, -0.097665586, -0.05287633, -0.0964047, + -0.11366429, 0.035777505, 0.13568819, 0.052451383, 0.050649304, + 0.05798951, -0.021852335, -0.099848844, 0.014740475, -0.078897946, + 0.04974699, 0.014160473, 0.06973932, 0.04964942, 0.033364646, + 0.08190124, 0.025535367, 0.050893165, 0.048514254, 0.06945813, + -0.078907564, -0.06707616, -0.11844508, -0.09986688, -0.07509403, + 0.06263226, 0.14925587, 0.20188436, 0.12098451, 0.14639415, + 0.0015017595, -0.014267382, -0.03417257, 0.012711468, 0.0028300495, + -0.024758482, -0.05098548, -0.0821182, 0.014225672, 0.021544158, + 0.08949725, 0.07505268, -0.0020780868, 0.04908258, 0.06476295, + -0.022907063, 0.027562456, 0.040185735, 0.019567577, -0.015598739, + -0.049097303, -0.017121866, -0.083368234, -0.02332002, -0.0840956}; + + input_gate_bias_ = {0.02234832, 0.14757581, 0.18176508, 0.10380666, + 0.053110216, -0.06928846, -0.13942584, -0.11816189, + 0.19483899, 0.03652339, -0.10250295, 0.036714908, + -0.18426876, 0.036065217, 0.21810818, 0.02383196, + -0.043370757, 0.08690144, -0.04444982, 0.00030581196}; + + forget_gate_bias_ = {0.035185695, -0.042891346, -0.03032477, 0.23027696, + 0.11098921, 0.15378423, 0.09263801, 0.09790885, + 0.09508917, 0.061199076, 0.07665568, -0.015443159, + -0.03499149, 0.046190713, 0.08895977, 0.10899629, + 0.40694186, 0.06030037, 0.012413437, -0.06108739}; + + cell_gate_bias_ = {-0.024379363, 0.0055531194, 0.23377132, 0.033463873, + -0.1483596, -0.10639995, -0.091433935, 0.058573797, + -0.06809782, -0.07889636, -0.043246906, -0.09829136, + -0.4279842, 0.034901652, 0.18797937, 0.0075234566, + 0.016178843, 0.1749513, 0.13975595, 0.92058027}; + + output_gate_bias_ = {0.046159424, -0.0012809046, 0.03563469, 0.12648113, + 0.027195795, 0.35373217, -0.018957434, 0.008907322, + -0.0762701, 0.12018895, 0.04216877, 0.0022856654, + 0.040952638, 0.3147856, 0.08225149, -0.057416286, + -0.14995944, -0.008040261, 0.13208859, 0.029760877}; + + recurrent_to_input_weights_ = { + -0.001374326, -0.078856036, 0.10672688, 0.029162422, + -0.11585556, 0.02557986, -0.13446963, -0.035785314, + -0.01244275, 0.025961924, -0.02337298, -0.044228926, + -0.055839065, -0.046598054, -0.010546039, -0.06900766, + 0.027239809, 0.022582639, -0.013296484, -0.05459212, + 0.08981, -0.045407712, 0.08682226, -0.06867011, + -0.14390695, -0.02916037, 0.000996957, 0.091420636, + 0.14283475, -0.07390571, -0.06402044, 0.062524505, + -0.093129106, 0.04860203, -0.08364217, -0.08119002, + 0.009352075, 0.22920375, 0.0016303885, 0.11583097, + -0.13732095, 0.012405723, -0.07551853, 0.06343048, + 0.12162708, -0.031923793, -0.014335606, 0.01790974, + -0.10650317, -0.0724401, 0.08554849, -0.05727212, + 0.06556731, -0.042729504, -0.043227166, 0.011683251, + -0.013082158, -0.029302018, -0.010899579, -0.062036745, + -0.022509435, -0.00964907, -0.01567329, 0.04260106, + -0.07787477, -0.11576462, 0.017356863, 0.048673786, + -0.017577527, -0.05527947, -0.082487635, -0.040137455, + -0.10820036, -0.04666372, 0.022746278, -0.07851417, + 0.01068115, 0.032956902, 0.022433773, 0.0026891115, + 0.08944216, -0.0685835, 0.010513544, 0.07228705, + 0.02032331, -0.059686817, -0.0005566496, -0.086984694, + 0.040414046, -0.1380399, 0.094208956, -0.05722982, + 0.012092817, -0.04989123, -0.086576, -0.003399834, + -0.04696032, -0.045747425, 0.10091314, 0.048676282, + -0.029037097, 0.031399418, -0.0040285117, 0.047237843, + 0.09504992, 0.041799378, -0.049185462, -0.031518843, + -0.10516937, 0.026374253, 0.10058866, -0.0033195973, + -0.041975245, 0.0073591834, 0.0033782164, -0.004325073, + -0.10167381, 0.042500053, -0.01447153, 0.06464186, + -0.017142897, 0.03312627, 0.009205989, 0.024138335, + -0.011337001, 0.035530265, -0.010912711, 0.0706555, + -0.005894094, 0.051841937, -0.1401738, -0.02351249, + 0.0365468, 0.07590991, 0.08838724, 0.021681072, + -0.10086113, 0.019608743, -0.06195883, 0.077335775, + 0.023646897, -0.095322326, 0.02233014, 0.09756986, + -0.048691444, -0.009579111, 0.07595467, 0.11480546, + -0.09801813, 0.019894179, 0.08502348, 0.004032281, + 0.037211012, 0.068537936, -0.048005626, -0.091520436, + -0.028379958, -0.01556313, 0.06554592, -0.045599163, + -0.01672207, -0.020169014, -0.011877351, -0.20212261, + 0.010889619, 0.0047078193, 0.038385306, 0.08540671, + -0.017140968, -0.0035865551, 0.016678626, 0.005633034, + 0.015963363, 0.00871737, 0.060130805, 0.028611384, + 0.10109069, -0.015060172, -0.07894427, 0.06401885, + 0.011584063, -0.024466386, 0.0047652307, -0.09041358, + 0.030737216, -0.0046374933, 0.14215417, -0.11823516, + 0.019899689, 0.006106124, -0.027092824, 0.0786356, + 0.05052217, -0.058925, -0.011402121, -0.024987547, + -0.0013661642, -0.06832946, -0.015667673, -0.1083353, + -0.00096863037, -0.06988685, -0.053350925, -0.027275559, + -0.033664223, -0.07978348, -0.025200296, -0.017207067, + -0.058403496, -0.055697463, 0.005798788, 0.12965427, + -0.062582195, 0.0013350133, -0.10482091, 0.0379771, + 0.072521195, -0.0029455067, -0.13797039, -0.03628521, + 0.013806405, -0.017858358, -0.01008298, -0.07700066, + -0.017081132, 0.019358726, 0.0027079724, 0.004635139, + 0.062634714, -0.02338735, -0.039547626, -0.02050681, + 0.03385117, -0.083611414, 0.002862572, -0.09421313, + 0.058618143, -0.08598433, 0.00972939, 0.023867095, + -0.053934585, -0.023203006, 0.07452513, -0.048767887, + -0.07314807, -0.056307215, -0.10433547, -0.06440842, + 0.04328182, 0.04389765, -0.020006588, -0.09076438, + -0.11652589, -0.021705797, 0.03345259, -0.010329105, + -0.025767034, 0.013057034, -0.07316461, -0.10145612, + 0.06358255, 0.18531723, 0.07759293, 0.12006465, + 0.1305557, 0.058638252, -0.03393652, 0.09622831, + -0.16253184, -2.4580743e-06, 0.079869635, -0.070196845, + -0.005644518, 0.06857898, -0.12598175, -0.035084512, + 0.03156317, -0.12794146, -0.031963028, 0.04692781, + 0.030070418, 0.0071660685, -0.095516115, -0.004643372, + 0.040170413, -0.062104587, -0.0037324072, 0.0554317, + 0.08184801, -0.019164372, 0.06791302, 0.034257166, + -0.10307039, 0.021943003, 0.046745934, 0.0790918, + -0.0265588, -0.007824208, 0.042546265, -0.00977924, + -0.0002440307, -0.017384544, -0.017990116, 0.12252321, + -0.014512694, -0.08251313, 0.08861942, 0.13589665, + 0.026351685, 0.012641483, 0.07466548, 0.044301085, + -0.045414884, -0.051112458, 0.03444247, -0.08502782, + -0.04106223, -0.028126027, 0.028473156, 0.10467447}; + + recurrent_to_cell_weights_ = { + -0.037322544, 0.018592842, 0.0056175636, -0.06253426, + 0.055647098, -0.05713207, -0.05626563, 0.005559383, + 0.03375411, -0.025757805, -0.088049285, 0.06017052, + -0.06570978, 0.007384076, 0.035123326, -0.07920549, + 0.053676967, 0.044480428, -0.07663568, 0.0071805613, + 0.08089997, 0.05143358, 0.038261272, 0.03339287, + -0.027673481, 0.044746667, 0.028349208, 0.020090483, + -0.019443132, -0.030755889, -0.0040000007, 0.04465846, + -0.021585021, 0.0031670958, 0.0053199246, -0.056117613, + -0.10893326, 0.076739706, -0.08509834, -0.027997585, + 0.037871376, 0.01449768, -0.09002357, -0.06111149, + -0.046195522, 0.0422062, -0.005683705, -0.1253618, + -0.012925729, -0.04890792, 0.06985068, 0.037654128, + 0.03398274, -0.004781977, 0.007032333, -0.031787455, + 0.010868644, -0.031489216, 0.09525667, 0.013939797, + 0.0058680447, 0.0167067, 0.02668468, -0.04797466, + -0.048885044, -0.12722108, 0.035304096, 0.06554885, + 0.00972396, -0.039238118, -0.05159735, -0.11329045, + 0.1613692, -0.03750952, 0.06529313, -0.071974665, + -0.11769596, 0.015524369, -0.0013754242, -0.12446318, + 0.02786344, -0.014179351, 0.005264273, 0.14376344, + 0.015983658, 0.03406988, -0.06939408, 0.040699873, + 0.02111075, 0.09669095, 0.041345075, -0.08316494, + -0.07684199, -0.045768797, 0.032298047, -0.041805092, + 0.0119405, 0.0061010392, 0.12652606, 0.0064572375, + -0.024950314, 0.11574242, 0.04508852, -0.04335324, + 0.06760663, -0.027437469, 0.07216407, 0.06977076, + -0.05438599, 0.034033038, -0.028602652, 0.05346137, + 0.043184172, -0.037189785, 0.10420091, 0.00882477, + -0.054019816, -0.074273005, -0.030617684, -0.0028467078, + 0.024302477, -0.0038869337, 0.005332455, 0.0013399826, + 0.04361412, -0.007001822, 0.09631092, -0.06702025, + -0.042049985, -0.035070654, -0.04103342, -0.10273396, + 0.0544271, 0.037184782, -0.13150354, -0.0058036847, + -0.008264958, 0.042035464, 0.05891794, 0.029673764, + 0.0063542654, 0.044788733, 0.054816857, 0.062257513, + -0.00093483756, 0.048938446, -0.004952862, -0.007730018, + -0.04043371, -0.017094059, 0.07229206, -0.023670016, + -0.052195564, -0.025616996, -0.01520939, 0.045104615, + -0.007376126, 0.003533447, 0.006570588, 0.056037236, + 0.12436656, 0.051817212, 0.028532185, -0.08686856, + 0.11868599, 0.07663395, -0.07323171, 0.03463402, + -0.050708205, -0.04458982, -0.11590894, 0.021273347, + 0.1251325, -0.15313013, -0.12224372, 0.17228661, + 0.023029093, 0.086124025, 0.006445803, -0.03496501, + 0.028332196, 0.04449512, -0.042436164, -0.026587414, + -0.006041347, -0.09292539, -0.05678812, 0.03897832, + 0.09465633, 0.008115513, -0.02171956, 0.08304309, + 0.071401566, 0.019622514, 0.032163795, -0.004167056, + 0.02295182, 0.030739572, 0.056506045, 0.004612461, + 0.06524936, 0.059999723, 0.046395954, -0.0045512207, + -0.1335546, -0.030136576, 0.11584653, -0.014678886, + 0.0020118146, -0.09688814, -0.0790206, 0.039770417, + -0.0329582, 0.07922767, 0.029322514, 0.026405897, + 0.04207835, -0.07073373, 0.063781224, 0.0859677, + -0.10925287, -0.07011058, 0.048005477, 0.03438226, + -0.09606514, -0.006669445, -0.043381985, 0.04240257, + -0.06955775, -0.06769346, 0.043903265, -0.026784198, + -0.017840602, 0.024307009, -0.040079936, -0.019946516, + 0.045318738, -0.12233574, 0.026170589, 0.0074471775, + 0.15978073, 0.10185836, 0.10298046, -0.015476589, + -0.039390966, -0.072174534, 0.0739445, -0.1211869, + -0.0347889, -0.07943156, 0.014809798, -0.12412325, + -0.0030663363, 0.039695457, 0.0647603, -0.08291318, + -0.018529687, -0.004423833, 0.0037507233, 0.084633216, + -0.01514876, -0.056505352, -0.012800942, -0.06994386, + 0.012962922, -0.031234352, 0.07029052, 0.016418684, + 0.03618972, 0.055686004, -0.08663945, -0.017404709, + -0.054761406, 0.029065743, 0.052404847, 0.020238016, + 0.0048197987, -0.0214882, 0.07078733, 0.013016777, + 0.06262858, 0.009184685, 0.020785125, -0.043904778, + -0.0270329, -0.03299152, -0.060088247, -0.015162964, + -0.001828936, 0.12642565, -0.056757294, 0.013586685, + 0.09232601, -0.035886683, 0.06000002, 0.05229691, + -0.052580316, -0.082029596, -0.010794592, 0.012947712, + -0.036429964, -0.085508935, -0.13127148, -0.017744139, + 0.031502828, 0.036232427, -0.031581745, 0.023051167, + -0.05325106, -0.03421577, 0.028793324, -0.034633752, + -0.009881397, -0.043551125, -0.018609839, 0.0019097115, + -0.008799762, 0.056595087, 0.0022273948, 0.055752404}; + + recurrent_to_forget_weights_ = { + -0.057784554, -0.026057621, -0.068447545, -0.022581743, + 0.14811787, 0.10826372, 0.09471067, 0.03987225, + -0.0039523416, 0.00030638507, 0.053185795, 0.10572994, + 0.08414449, -0.022036452, -0.00066928595, -0.09203576, + 0.032950465, -0.10985798, -0.023809856, 0.0021431844, + -0.02196096, -0.00326074, 0.00058621005, -0.074678116, + -0.06193199, 0.055729095, 0.03736828, 0.020123724, + 0.061878487, -0.04729229, 0.034919553, -0.07585433, + -0.04421272, -0.044019096, 0.085488975, 0.04058006, + -0.06890133, -0.030951202, -0.024628663, -0.07672815, + 0.034293607, 0.08556707, -0.05293577, -0.033561368, + -0.04899627, 0.0241671, 0.015736353, -0.095442444, + -0.029564252, 0.016493602, -0.035026584, 0.022337519, + -0.026871363, 0.004780428, 0.0077918363, -0.03601621, + 0.016435321, -0.03263031, -0.09543275, -0.047392778, + 0.013454138, 0.028934088, 0.01685226, -0.086110644, + -0.046250615, -0.01847454, 0.047608484, 0.07339695, + 0.034546845, -0.04881143, 0.009128804, -0.08802852, + 0.03761666, 0.008096139, -0.014454086, 0.014361001, + -0.023502491, -0.0011840804, -0.07607001, 0.001856849, + -0.06509276, -0.006021153, -0.08570962, -0.1451793, + 0.060212336, 0.055259194, 0.06974018, 0.049454916, + -0.027794661, -0.08077226, -0.016179763, 0.1169753, + 0.17213494, -0.0056326236, -0.053934924, -0.0124349, + -0.11520337, 0.05409887, 0.088759385, 0.0019655675, + 0.0042065294, 0.03881498, 0.019844765, 0.041858196, + -0.05695512, 0.047233116, 0.038937137, -0.06542224, + 0.014429736, -0.09719407, 0.13908425, -0.05379757, + 0.012321099, 0.082840554, -0.029899208, 0.044217527, + 0.059855383, 0.07711018, -0.045319796, 0.0948846, + -0.011724666, -0.0033288454, -0.033542685, -0.04764985, + -0.13873616, 0.040668588, 0.034832682, -0.015319203, + -0.018715994, 0.046002675, 0.0599172, -0.043107376, + 0.0294216, -0.002314414, -0.022424703, 0.0030315618, + 0.0014641669, 0.0029166266, -0.11878115, 0.013738511, + 0.12375372, -0.0006038222, 0.029104086, 0.087442465, + 0.052958444, 0.07558703, 0.04817258, 0.044462286, + -0.015213451, -0.08783778, -0.0561384, -0.003008196, + 0.047060397, -0.002058388, 0.03429439, -0.018839769, + 0.024734668, 0.024614193, -0.042046934, 0.09597743, + -0.0043254104, 0.04320769, 0.0064070094, -0.0019131786, + -0.02558259, -0.022822596, -0.023273505, -0.02464396, + -0.10991725, -0.006240552, 0.0074488563, 0.024044557, + 0.04383914, -0.046476185, 0.028658995, 0.060410924, + 0.050786525, 0.009452605, -0.0073054377, -0.024810238, + 0.0052906186, 0.0066939713, -0.0020913032, 0.014515517, + 0.015898481, 0.021362653, -0.030262267, 0.016587038, + -0.011442813, 0.041154444, -0.007631438, -0.03423484, + -0.010977775, 0.036152758, 0.0066366293, 0.11915515, + 0.02318443, -0.041350313, 0.021485701, -0.10906167, + -0.028218046, -0.00954771, 0.020531068, -0.11995105, + -0.03672871, 0.024019798, 0.014255957, -0.05221243, + -0.00661567, -0.04630967, 0.033188973, 0.10107534, + -0.014027541, 0.030796422, -0.10270911, -0.035999842, + 0.15443139, 0.07684145, 0.036571592, -0.035900835, + -0.0034699554, 0.06209149, 0.015920248, -0.031122351, + -0.03858649, 0.01849943, 0.13872518, 0.01503974, + 0.069941424, -0.06948533, -0.0088794185, 0.061282158, + -0.047401894, 0.03100163, -0.041533746, -0.10430945, + 0.044574402, -0.01425562, -0.024290353, 0.034563623, + 0.05866852, 0.023947537, -0.09445152, 0.035450947, + 0.02247216, -0.0042998926, 0.061146557, -0.10250651, + 0.020881841, -0.06747029, 0.10062043, -0.0023941975, + 0.03532124, -0.016341697, 0.09685456, -0.016764693, + 0.051808182, 0.05875331, -0.04536488, 0.001626336, + -0.028892258, -0.01048663, -0.009793449, -0.017093895, + 0.010987891, 0.02357273, -0.00010856845, 0.0099760275, + -0.001845119, -0.03551521, 0.0018358806, 0.05763657, + -0.01769146, 0.040995963, 0.02235177, -0.060430344, + 0.11475477, -0.023854522, 0.10071741, 0.0686208, + -0.014250481, 0.034261297, 0.047418304, 0.08562733, + -0.030519066, 0.0060542435, 0.014653856, -0.038836084, + 0.04096551, 0.032249358, -0.08355519, -0.026823482, + 0.056386515, -0.010401743, -0.028396193, 0.08507674, + 0.014410365, 0.020995233, 0.17040324, 0.11511526, + 0.02459721, 0.0066619175, 0.025853224, -0.023133837, + -0.081302024, 0.017264642, -0.009585969, 0.09491168, + -0.051313367, 0.054532815, -0.014298593, 0.10657464, + 0.007076659, 0.10964551, 0.0409152, 0.008275321, + -0.07283536, 0.07937492, 0.04192024, -0.1075027}; + + recurrent_to_output_weights_ = { + 0.025825322, -0.05813119, 0.09495884, -0.045984812, + -0.01255415, -0.0026479573, -0.08196161, -0.054914974, + -0.0046604523, -0.029587349, -0.044576716, -0.07480124, + -0.082868785, 0.023254942, 0.027502948, -0.0039728214, + -0.08683098, -0.08116779, -0.014675607, -0.037924774, + -0.023314456, -0.007401714, -0.09255757, 0.029460307, + -0.08829125, -0.005139627, -0.08989442, -0.0555066, + 0.13596267, -0.025062224, -0.048351806, -0.03850004, + 0.07266485, -0.022414139, 0.05940088, 0.075114764, + 0.09597592, -0.010211725, -0.0049794707, -0.011523867, + -0.025980417, 0.072999895, 0.11091378, -0.081685916, + 0.014416728, 0.043229222, 0.034178585, -0.07530371, + 0.035837382, -0.085607, -0.007721233, -0.03287832, + -0.043848954, -0.06404588, -0.06632928, -0.073643476, + 0.008214239, -0.045984086, 0.039764922, 0.03474462, + 0.060612556, -0.080590084, 0.049127717, 0.04151091, + -0.030063879, 0.008801774, -0.023021035, -0.019558564, + 0.05158114, -0.010947698, -0.011825728, 0.0075720972, + 0.0699727, -0.0039981045, 0.069350146, 0.08799282, + 0.016156472, 0.035502106, 0.11695009, 0.006217345, + 0.13392477, -0.037875112, 0.025745004, 0.08940699, + -0.00924166, 0.0046702605, -0.036598757, -0.08811812, + 0.10522024, -0.032441203, 0.008176899, -0.04454919, + 0.07058152, 0.0067963637, 0.039206743, 0.03259838, + 0.03725492, -0.09515802, 0.013326398, -0.052055415, + -0.025676316, 0.03198509, -0.015951829, -0.058556724, + 0.036879618, 0.043357447, 0.028362012, -0.05908629, + 0.0059240665, -0.04995891, -0.019187413, 0.0276265, + -0.01628143, 0.0025863599, 0.08800015, 0.035250366, + -0.022165963, -0.07328642, -0.009415526, -0.07455109, + 0.11690406, 0.0363299, 0.07411125, 0.042103454, + -0.009660886, 0.019076364, 0.018299393, -0.046004917, + 0.08891175, 0.0431396, -0.026327137, -0.051502608, + 0.08979574, -0.051670972, 0.04940282, -0.07491107, + -0.021240504, 0.022596184, -0.034280192, 0.060163025, + -0.058211457, -0.051837247, -0.01349775, -0.04639988, + -0.035936575, -0.011681591, 0.064818054, 0.0073146066, + -0.021745546, -0.043124277, -0.06471268, -0.07053354, + -0.029321948, -0.05330136, 0.016933719, -0.053782392, + 0.13747959, -0.1361751, -0.11569455, 0.0033329215, + 0.05693899, -0.053219706, 0.063698, 0.07977434, + -0.07924483, 0.06936997, 0.0034815092, -0.007305279, + -0.037325785, -0.07251102, -0.033633437, -0.08677009, + 0.091591336, -0.14165086, 0.021752775, 0.019683983, + 0.0011612234, -0.058154266, 0.049996935, 0.0288841, + -0.0024567875, -0.14345716, 0.010955264, -0.10234828, + 0.1183656, -0.0010731248, -0.023590032, -0.072285876, + -0.0724771, -0.026382286, -0.0014920527, 0.042667855, + 0.0018776858, 0.02986552, 0.009814309, 0.0733756, + 0.12289186, 0.018043943, -0.0458958, 0.049412545, + 0.033632483, 0.05495232, 0.036686596, -0.013781798, + -0.010036754, 0.02576849, -0.08307328, 0.010112348, + 0.042521734, -0.05869831, -0.071689695, 0.03876447, + -0.13275425, -0.0352966, -0.023077697, 0.10285965, + 0.084736146, 0.15568255, -0.00040734606, 0.027835453, + -0.10292561, -0.032401145, 0.10053256, -0.026142767, + -0.08271222, -0.0030240538, -0.016368777, 0.1070414, + 0.042672627, 0.013456989, -0.0437609, -0.022309763, + 0.11576483, 0.04108048, 0.061026827, -0.0190714, + -0.0869359, 0.037901703, 0.0610107, 0.07202949, + 0.01675338, 0.086139716, -0.08795751, -0.014898893, + -0.023771819, -0.01965048, 0.007955471, -0.043740474, + 0.03346837, -0.10549954, 0.090567775, 0.042013682, + -0.03176985, 0.12569028, -0.02421228, -0.029526481, + 0.023851605, 0.031539805, 0.05292009, -0.02344001, + -0.07811758, -0.08834428, 0.10094801, 0.16594367, + -0.06861939, -0.021256343, -0.041093912, -0.06669611, + 0.035498552, 0.021757556, -0.09302526, -0.015403468, + -0.06614931, -0.051798206, -0.013874718, 0.03630673, + 0.010412845, -0.08077351, 0.046185967, 0.0035662893, + 0.03541868, -0.094149634, -0.034814864, 0.003128424, + -0.020674974, -0.03944324, -0.008110165, -0.11113267, + 0.08484226, 0.043586485, 0.040582247, 0.0968012, + -0.065249965, -0.028036479, 0.0050708856, 0.0017462453, + 0.0326779, 0.041296225, 0.09164146, -0.047743853, + -0.015952192, -0.034451712, 0.084197424, -0.05347844, + -0.11768019, 0.085926116, -0.08251791, -0.045081906, + 0.0948852, 0.068401024, 0.024856757, 0.06978981, + -0.057309967, -0.012775832, -0.0032452994, 0.01977615, + -0.041040014, -0.024264973, 0.063464895, 0.05431621, + }; + + cell_to_input_weights_ = { + 0.040369894, 0.030746894, 0.24704495, 0.018586371, -0.037586458, + -0.15312155, -0.11812848, -0.11465643, 0.20259799, 0.11418174, + -0.10116027, -0.011334949, 0.12411352, -0.076769054, -0.052169047, + 0.21198851, -0.38871562, -0.09061183, -0.09683246, -0.21929175}; + + cell_to_forget_weights_ = { + -0.01998659, -0.15568835, -0.24248174, -0.012770197, 0.041331276, + -0.072311886, -0.052123554, -0.0066330447, -0.043891653, 0.036225766, + -0.047248036, 0.021479502, 0.033189066, 0.11952997, -0.020432774, + 0.64658105, -0.06650122, -0.03467612, 0.095340036, 0.23647355}; + + cell_to_output_weights_ = { + 0.08286371, -0.08261836, -0.51210177, 0.002913762, 0.17764764, + -0.5495371, -0.08460716, -0.24552552, 0.030037103, 0.04123544, + -0.11940523, 0.007358328, 0.1890978, 0.4833202, -0.34441817, + 0.36312827, -0.26375428, 0.1457655, -0.19724406, 0.15548733}; + + projection_weights_ = { + -0.009802181, 0.09401916, 0.0717386, -0.13895074, + 0.09641832, 0.060420845, 0.08539281, 0.054285463, + 0.061395317, 0.034448683, -0.042991187, 0.019801661, + -0.16840284, -0.015726732, -0.23041931, -0.024478018, + -0.10959692, -0.013875541, 0.18600968, -0.061274476, + 0.0138165, -0.08160894, -0.07661644, 0.032372914, + 0.16169067, 0.22465782, -0.03993472, -0.004017731, + 0.08633481, -0.28869787, 0.08682067, 0.17240396, + 0.014975425, 0.056431185, 0.031037588, 0.16702051, + 0.0077946745, 0.15140012, 0.29405436, 0.120285, + -0.188994, -0.027265169, 0.043389652, -0.022061434, + 0.014777949, -0.20203483, 0.094781205, 0.19100232, + 0.13987629, -0.036132768, -0.06426278, -0.05108664, + 0.13221376, 0.009441198, -0.16715929, 0.15859416, + -0.040437475, 0.050779544, -0.022187516, 0.012166504, + 0.027685808, -0.07675938, -0.0055694645, -0.09444123, + 0.0046453946, 0.050794356, 0.10770313, -0.20790008, + -0.07149004, -0.11425117, 0.008225835, -0.035802525, + 0.14374903, 0.15262283, 0.048710253, 0.1847461, + -0.007487823, 0.11000021, -0.09542012, 0.22619456, + -0.029149994, 0.08527916, 0.009043713, 0.0042746216, + 0.016261552, 0.022461696, 0.12689082, -0.043589946, + -0.12035478, -0.08361797, -0.050666027, -0.1248618, + -0.1275799, -0.071875185, 0.07377272, 0.09944291, + -0.18897448, -0.1593054, -0.06526116, -0.040107165, + -0.004618631, -0.067624845, -0.007576253, 0.10727444, + 0.041546922, -0.20424393, 0.06907816, 0.050412357, + 0.00724631, 0.039827548, 0.12449835, 0.10747581, + 0.13708383, 0.09134148, -0.12617786, -0.06428341, + 0.09956831, 0.1208086, -0.14676677, -0.0727722, + 0.1126304, 0.010139365, 0.015571211, -0.038128063, + 0.022913318, -0.042050496, 0.16842307, -0.060597885, + 0.10531834, -0.06411776, -0.07451711, -0.03410368, + -0.13393489, 0.06534304, 0.003620307, 0.04490757, + 0.05970546, 0.05197996, 0.02839995, 0.10434969, + -0.013699693, -0.028353551, -0.07260381, 0.047201227, + -0.024575593, -0.036445823, 0.07155557, 0.009672501, + -0.02328883, 0.009533515, -0.03606021, -0.07421458, + -0.028082801, -0.2678904, -0.13221288, 0.18419984, + -0.13012612, -0.014588381, -0.035059117, -0.04824723, + 0.07830115, -0.056184657, 0.03277091, 0.025466874, + 0.14494097, -0.12522776, -0.098633975, -0.10766018, + -0.08317623, 0.08594209, 0.07749552, 0.039474737, + 0.1776665, -0.07409566, -0.0477268, 0.29323658, + 0.10801441, 0.1154011, 0.013952499, 0.10739139, + 0.10708251, -0.051456142, 0.0074137426, -0.10430189, + 0.10034707, 0.045594677, 0.0635285, -0.0715442, + -0.089667566, -0.10811871, 0.00026344223, 0.08298446, + -0.009525053, 0.006585689, -0.24567553, -0.09450807, + 0.09648481, 0.026996298, -0.06419476, -0.04752702, + -0.11063944, -0.23441927, -0.17608605, -0.052156363, + 0.067035615, 0.19271925, -0.0032889997, -0.043264326, + 0.09663576, -0.057112187, -0.10100678, 0.0628376, + 0.04447668, 0.017961001, -0.10094388, -0.10190601, + 0.18335468, 0.10494553, -0.052095775, -0.0026118709, + 0.10539724, -0.04383912, -0.042349473, 0.08438151, + -0.1947263, 0.02251204, 0.11216432, -0.10307853, + 0.17351969, -0.039091777, 0.08066188, -0.00561982, + 0.12633002, 0.11335965, -0.0088127935, -0.019777594, + 0.06864014, -0.059751723, 0.016233567, -0.06894641, + -0.28651384, -0.004228674, 0.019708522, -0.16305895, + -0.07468996, -0.0855457, 0.099339016, -0.07580735, + -0.13775392, 0.08434318, 0.08330512, -0.12131499, + 0.031935584, 0.09180414, -0.08876437, -0.08049874, + 0.008753825, 0.03498998, 0.030215185, 0.03907079, + 0.089751154, 0.029194152, -0.03337423, -0.019092513, + 0.04331237, 0.04299654, -0.036394123, -0.12915532, + 0.09793732, 0.07512415, -0.11319543, -0.032502122, + 0.15661901, 0.07671967, -0.005491124, -0.19379048, + -0.218606, 0.21448623, 0.017840758, 0.1416943, + -0.07051762, 0.19488361, 0.02664691, -0.18104725, + -0.09334311, 0.15026465, -0.15493552, -0.057762887, + -0.11604192, -0.262013, -0.01391798, 0.012185008, + 0.11156489, -0.07483202, 0.06693364, -0.26151478, + 0.046425626, 0.036540434, -0.16435726, 0.17338543, + -0.21401681, -0.11385144, -0.08283257, -0.069031075, + 0.030635102, 0.010969227, 0.11109743, 0.010919218, + 0.027526086, 0.13519906, 0.01891392, -0.046839405, + -0.040167913, 0.017953383, -0.09700955, 0.0061885654, + -0.07000971, 0.026893595, -0.038844477, 0.14543656}; + + lstm_input_ = { + {// Batch0: 4 (input_sequence_size) * 5 (n_input) + 0.787926, 0.151646, 0.071352, 0.118426, 0.458058, // step 0 + 0.596268, 0.998386, 0.568695, 0.864524, 0.571277, // step 1 + 0.073204, 0.296072, 0.743333, 0.069199, 0.045348, // step 2 + 0.867394, 0.291279, 0.013714, 0.482521, 0.626339}, // step 3 + + {// Batch1: 4 (input_sequence_size) * 5 (n_input) + 0.295743, 0.544053, 0.690064, 0.858138, 0.497181, // step 0 + 0.642421, 0.524260, 0.134799, 0.003639, 0.162482, // step 1 + 0.640394, 0.930399, 0.050782, 0.432485, 0.988078, // step 2 + 0.082922, 0.563329, 0.865614, 0.333232, 0.259916} // step 3 + }; + + lstm_golden_output_ = { + {// Batch0: 4 (input_sequence_size) * 16 (n_output) + -0.00396806, 0.029352, -0.00279226, 0.0159977, -0.00835576, + -0.0211779, 0.0283512, -0.0114597, 0.00907307, -0.0244004, + -0.0152191, -0.0259063, 0.00914318, 0.00415118, 0.017147, + 0.0134203, -0.0166936, 0.0381209, 0.000889694, 0.0143363, + -0.0328911, -0.0234288, 0.0333051, -0.012229, 0.0110322, + -0.0457725, -0.000832209, -0.0202817, 0.0327257, 0.0121308, + 0.0155969, 0.0312091, -0.0213783, 0.0350169, 0.000324794, + 0.0276012, -0.0263374, -0.0371449, 0.0446149, -0.0205474, + 0.0103729, -0.0576349, -0.0150052, -0.0292043, 0.0376827, + 0.0136115, 0.0243435, 0.0354492, -0.0189322, 0.0464512, + -0.00251373, 0.0225745, -0.0308346, -0.0317124, 0.0460407, + -0.0189395, 0.0149363, -0.0530162, -0.0150767, -0.0340193, + 0.0286833, 0.00824207, 0.0264887, 0.0305169}, + {// Batch1: 4 (input_sequence_size) * 16 (n_output) + -0.013869, 0.0287268, -0.00334693, 0.00733398, -0.0287926, + -0.0186926, 0.0193662, -0.0115437, 0.00422612, -0.0345232, + 0.00223253, -0.00957321, 0.0210624, 0.013331, 0.0150954, + 0.02168, -0.0141913, 0.0322082, 0.00227024, 0.0260507, + -0.0188721, -0.0296489, 0.0399134, -0.0160509, 0.0116039, + -0.0447318, -0.0150515, -0.0277406, 0.0316596, 0.0118233, + 0.0214762, 0.0293641, -0.0204549, 0.0450315, -0.00117378, + 0.0167673, -0.0375007, -0.0238314, 0.038784, -0.0174034, + 0.0131743, -0.0506589, -0.0048447, -0.0240239, 0.0325789, + 0.00790065, 0.0220157, 0.0333314, -0.0264787, 0.0387855, + -0.000764675, 0.0217599, -0.037537, -0.0335206, 0.0431679, + -0.0211424, 0.010203, -0.062785, -0.00832363, -0.025181, + 0.0412031, 0.0118723, 0.0239643, 0.0394009}}; } -} +}; -TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) { +TEST_F(NoCifgPeepholeProjectionClippingLstmTest, LstmBlackBoxTest) { const int n_batch = 2; const int n_input = 5; const int n_cell = 20; @@ -489,588 +1338,98 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) { {0}, // projection_bias tensor }); - lstm.SetInputToInputWeights( - {0.021393683, 0.06124551, 0.046905167, -0.014657677, -0.03149463, - 0.09171803, 0.14647801, 0.10797193, -0.0057968358, 0.0019193048, - -0.2726754, 0.10154029, -0.018539885, 0.080349885, -0.10262385, - -0.022599787, -0.09121155, -0.008675967, -0.045206103, -0.0821282, - -0.008045952, 0.015478081, 0.055217247, 0.038719587, 0.044153627, - -0.06453243, 0.05031825, -0.046935108, -0.008164439, 0.014574226, - -0.1671009, -0.15519552, -0.16819797, -0.13971269, -0.11953059, - 0.25005487, -0.22790983, 0.009855087, -0.028140958, -0.11200698, - 0.11295408, -0.0035217577, 0.054485075, 0.05184695, 0.064711206, - 0.10989193, 0.11674786, 0.03490607, 0.07727357, 0.11390585, - -0.1863375, -0.1034451, -0.13945189, -0.049401227, -0.18767063, - 0.042483903, 0.14233552, 0.13832581, 0.18350165, 0.14545603, - -0.028545704, 0.024939531, 0.050929718, 0.0076203286, -0.0029723682, - -0.042484224, -0.11827596, -0.09171104, -0.10808628, -0.16327988, - -0.2273378, -0.0993647, -0.017155107, 0.0023917493, 0.049272764, - 0.0038534778, 0.054764505, 0.089753784, 0.06947234, 0.08014476, - -0.04544234, -0.0497073, -0.07135631, -0.048929106, -0.004042012, - -0.009284026, 0.018042054, 0.0036860977, -0.07427302, -0.11434604, - -0.018995456, 0.031487543, 0.012834908, 0.019977754, 0.044256654, - -0.39292613, -0.18519334, -0.11651281, -0.06809892, 0.011373677}); - - lstm.SetInputToForgetWeights( - {-0.0018401089, -0.004852237, 0.03698424, 0.014181704, 0.028273236, - -0.016726194, -0.05249759, -0.10204261, 0.00861066, -0.040979505, - -0.009899187, 0.01923892, -0.028177269, -0.08535103, -0.14585495, - 0.10662567, -0.01909731, -0.017883534, -0.0047269356, -0.045103323, - 0.0030784295, 0.076784775, 0.07463696, 0.094531395, 0.0814421, - -0.12257899, -0.033945758, -0.031303465, 0.045630626, 0.06843887, - -0.13492945, -0.012480007, -0.0811829, -0.07224499, -0.09628791, - 0.045100946, 0.0012300825, 0.013964662, 0.099372394, 0.02543059, - 0.06958324, 0.034257296, 0.0482646, 0.06267997, 0.052625068, - 0.12784666, 0.07077897, 0.025725935, 0.04165009, 0.07241905, - 0.018668644, -0.037377294, -0.06277783, -0.08833636, -0.040120605, - -0.011405586, -0.007808335, -0.010301386, -0.005102167, 0.027717464, - 0.05483423, 0.11449111, 0.11289652, 0.10939839, 0.13396506, - -0.08402166, -0.01901462, -0.044678304, -0.07720565, 0.014350063, - -0.11757958, -0.0652038, -0.08185733, -0.076754324, -0.092614375, - 0.10405491, 0.052960336, 0.035755895, 0.035839386, -0.012540553, - 0.036881298, 0.02913376, 0.03420159, 0.05448447, -0.054523353, - 0.02582715, 0.02327355, -0.011857179, -0.0011980024, -0.034641717, - -0.026125094, -0.17582615, -0.15923657, -0.27486774, -0.0006143371, - 0.0001771948, -8.470171e-05, 0.02651807, 0.045790765, 0.06956496}); - - lstm.SetInputToCellWeights( - {-0.04580283, -0.09549462, -0.032418985, -0.06454633, - -0.043528453, 0.043018587, -0.049152344, -0.12418144, - -0.078985475, -0.07596889, 0.019484362, -0.11434962, - -0.0074034138, -0.06314844, -0.092981495, 0.0062155537, - -0.025034338, -0.0028890965, 0.048929527, 0.06235075, - 0.10665918, -0.032036792, -0.08505916, -0.10843358, - -0.13002433, -0.036816437, -0.02130134, -0.016518239, - 0.0047691227, -0.0025825808, 0.066017866, 0.029991534, - -0.10652836, -0.1037554, -0.13056071, -0.03266643, - -0.033702414, -0.006473424, -0.04611692, 0.014419339, - -0.025174323, 0.0396852, 0.081777506, 0.06157468, - 0.10210095, -0.009658194, 0.046511717, 0.03603906, - 0.0069369148, 0.015960095, -0.06507666, 0.09551598, - 0.053568836, 0.06408714, 0.12835667, -0.008714329, - -0.20211966, -0.12093674, 0.029450472, 0.2849013, - -0.029227901, 0.1164364, -0.08560263, 0.09941786, - -0.036999565, -0.028842626, -0.0033637602, -0.017012902, - -0.09720865, -0.11193351, -0.029155117, -0.017936034, - -0.009768936, -0.04223324, -0.036159635, 0.06505112, - -0.021742892, -0.023377212, -0.07221364, -0.06430552, - 0.05453865, 0.091149814, 0.06387331, 0.007518393, - 0.055960953, 0.069779344, 0.046411168, 0.10509911, - 0.07463894, 0.0075130584, 0.012850982, 0.04555431, - 0.056955688, 0.06555285, 0.050801456, -0.009862683, - 0.00826772, -0.026555609, -0.0073611983, -0.0014897042}); - - lstm.SetInputToOutputWeights( - {-0.0998932, -0.07201956, -0.052803773, -0.15629593, -0.15001918, - -0.07650751, 0.02359855, -0.075155355, -0.08037709, -0.15093534, - 0.029517552, -0.04751393, 0.010350531, -0.02664851, -0.016839722, - -0.023121163, 0.0077019283, 0.012851257, -0.05040649, -0.0129761, - -0.021737747, -0.038305793, -0.06870586, -0.01481247, -0.001285394, - 0.10124236, 0.083122835, 0.053313006, -0.062235646, -0.075637154, - -0.027833903, 0.029774971, 0.1130802, 0.09218906, 0.09506135, - -0.086665764, -0.037162706, -0.038880914, -0.035832845, -0.014481564, - -0.09825003, -0.12048569, -0.097665586, -0.05287633, -0.0964047, - -0.11366429, 0.035777505, 0.13568819, 0.052451383, 0.050649304, - 0.05798951, -0.021852335, -0.099848844, 0.014740475, -0.078897946, - 0.04974699, 0.014160473, 0.06973932, 0.04964942, 0.033364646, - 0.08190124, 0.025535367, 0.050893165, 0.048514254, 0.06945813, - -0.078907564, -0.06707616, -0.11844508, -0.09986688, -0.07509403, - 0.06263226, 0.14925587, 0.20188436, 0.12098451, 0.14639415, - 0.0015017595, -0.014267382, -0.03417257, 0.012711468, 0.0028300495, - -0.024758482, -0.05098548, -0.0821182, 0.014225672, 0.021544158, - 0.08949725, 0.07505268, -0.0020780868, 0.04908258, 0.06476295, - -0.022907063, 0.027562456, 0.040185735, 0.019567577, -0.015598739, - -0.049097303, -0.017121866, -0.083368234, -0.02332002, -0.0840956}); - - lstm.SetInputGateBias( - {0.02234832, 0.14757581, 0.18176508, 0.10380666, 0.053110216, - -0.06928846, -0.13942584, -0.11816189, 0.19483899, 0.03652339, - -0.10250295, 0.036714908, -0.18426876, 0.036065217, 0.21810818, - 0.02383196, -0.043370757, 0.08690144, -0.04444982, 0.00030581196}); - - lstm.SetForgetGateBias({0.035185695, -0.042891346, -0.03032477, 0.23027696, - 0.11098921, 0.15378423, 0.09263801, 0.09790885, - 0.09508917, 0.061199076, 0.07665568, -0.015443159, - -0.03499149, 0.046190713, 0.08895977, 0.10899629, - 0.40694186, 0.06030037, 0.012413437, -0.06108739}); - - lstm.SetCellBias({-0.024379363, 0.0055531194, 0.23377132, 0.033463873, - -0.1483596, -0.10639995, -0.091433935, 0.058573797, - -0.06809782, -0.07889636, -0.043246906, -0.09829136, - -0.4279842, 0.034901652, 0.18797937, 0.0075234566, - 0.016178843, 0.1749513, 0.13975595, 0.92058027}); - - lstm.SetOutputGateBias( - {0.046159424, -0.0012809046, 0.03563469, 0.12648113, 0.027195795, - 0.35373217, -0.018957434, 0.008907322, -0.0762701, 0.12018895, - 0.04216877, 0.0022856654, 0.040952638, 0.3147856, 0.08225149, - -0.057416286, -0.14995944, -0.008040261, 0.13208859, 0.029760877}); - - lstm.SetRecurrentToInputWeights( - {-0.001374326, -0.078856036, 0.10672688, 0.029162422, - -0.11585556, 0.02557986, -0.13446963, -0.035785314, - -0.01244275, 0.025961924, -0.02337298, -0.044228926, - -0.055839065, -0.046598054, -0.010546039, -0.06900766, - 0.027239809, 0.022582639, -0.013296484, -0.05459212, - 0.08981, -0.045407712, 0.08682226, -0.06867011, - -0.14390695, -0.02916037, 0.000996957, 0.091420636, - 0.14283475, -0.07390571, -0.06402044, 0.062524505, - -0.093129106, 0.04860203, -0.08364217, -0.08119002, - 0.009352075, 0.22920375, 0.0016303885, 0.11583097, - -0.13732095, 0.012405723, -0.07551853, 0.06343048, - 0.12162708, -0.031923793, -0.014335606, 0.01790974, - -0.10650317, -0.0724401, 0.08554849, -0.05727212, - 0.06556731, -0.042729504, -0.043227166, 0.011683251, - -0.013082158, -0.029302018, -0.010899579, -0.062036745, - -0.022509435, -0.00964907, -0.01567329, 0.04260106, - -0.07787477, -0.11576462, 0.017356863, 0.048673786, - -0.017577527, -0.05527947, -0.082487635, -0.040137455, - -0.10820036, -0.04666372, 0.022746278, -0.07851417, - 0.01068115, 0.032956902, 0.022433773, 0.0026891115, - 0.08944216, -0.0685835, 0.010513544, 0.07228705, - 0.02032331, -0.059686817, -0.0005566496, -0.086984694, - 0.040414046, -0.1380399, 0.094208956, -0.05722982, - 0.012092817, -0.04989123, -0.086576, -0.003399834, - -0.04696032, -0.045747425, 0.10091314, 0.048676282, - -0.029037097, 0.031399418, -0.0040285117, 0.047237843, - 0.09504992, 0.041799378, -0.049185462, -0.031518843, - -0.10516937, 0.026374253, 0.10058866, -0.0033195973, - -0.041975245, 0.0073591834, 0.0033782164, -0.004325073, - -0.10167381, 0.042500053, -0.01447153, 0.06464186, - -0.017142897, 0.03312627, 0.009205989, 0.024138335, - -0.011337001, 0.035530265, -0.010912711, 0.0706555, - -0.005894094, 0.051841937, -0.1401738, -0.02351249, - 0.0365468, 0.07590991, 0.08838724, 0.021681072, - -0.10086113, 0.019608743, -0.06195883, 0.077335775, - 0.023646897, -0.095322326, 0.02233014, 0.09756986, - -0.048691444, -0.009579111, 0.07595467, 0.11480546, - -0.09801813, 0.019894179, 0.08502348, 0.004032281, - 0.037211012, 0.068537936, -0.048005626, -0.091520436, - -0.028379958, -0.01556313, 0.06554592, -0.045599163, - -0.01672207, -0.020169014, -0.011877351, -0.20212261, - 0.010889619, 0.0047078193, 0.038385306, 0.08540671, - -0.017140968, -0.0035865551, 0.016678626, 0.005633034, - 0.015963363, 0.00871737, 0.060130805, 0.028611384, - 0.10109069, -0.015060172, -0.07894427, 0.06401885, - 0.011584063, -0.024466386, 0.0047652307, -0.09041358, - 0.030737216, -0.0046374933, 0.14215417, -0.11823516, - 0.019899689, 0.006106124, -0.027092824, 0.0786356, - 0.05052217, -0.058925, -0.011402121, -0.024987547, - -0.0013661642, -0.06832946, -0.015667673, -0.1083353, - -0.00096863037, -0.06988685, -0.053350925, -0.027275559, - -0.033664223, -0.07978348, -0.025200296, -0.017207067, - -0.058403496, -0.055697463, 0.005798788, 0.12965427, - -0.062582195, 0.0013350133, -0.10482091, 0.0379771, - 0.072521195, -0.0029455067, -0.13797039, -0.03628521, - 0.013806405, -0.017858358, -0.01008298, -0.07700066, - -0.017081132, 0.019358726, 0.0027079724, 0.004635139, - 0.062634714, -0.02338735, -0.039547626, -0.02050681, - 0.03385117, -0.083611414, 0.002862572, -0.09421313, - 0.058618143, -0.08598433, 0.00972939, 0.023867095, - -0.053934585, -0.023203006, 0.07452513, -0.048767887, - -0.07314807, -0.056307215, -0.10433547, -0.06440842, - 0.04328182, 0.04389765, -0.020006588, -0.09076438, - -0.11652589, -0.021705797, 0.03345259, -0.010329105, - -0.025767034, 0.013057034, -0.07316461, -0.10145612, - 0.06358255, 0.18531723, 0.07759293, 0.12006465, - 0.1305557, 0.058638252, -0.03393652, 0.09622831, - -0.16253184, -2.4580743e-06, 0.079869635, -0.070196845, - -0.005644518, 0.06857898, -0.12598175, -0.035084512, - 0.03156317, -0.12794146, -0.031963028, 0.04692781, - 0.030070418, 0.0071660685, -0.095516115, -0.004643372, - 0.040170413, -0.062104587, -0.0037324072, 0.0554317, - 0.08184801, -0.019164372, 0.06791302, 0.034257166, - -0.10307039, 0.021943003, 0.046745934, 0.0790918, - -0.0265588, -0.007824208, 0.042546265, -0.00977924, - -0.0002440307, -0.017384544, -0.017990116, 0.12252321, - -0.014512694, -0.08251313, 0.08861942, 0.13589665, - 0.026351685, 0.012641483, 0.07466548, 0.044301085, - -0.045414884, -0.051112458, 0.03444247, -0.08502782, - -0.04106223, -0.028126027, 0.028473156, 0.10467447}); - - lstm.SetRecurrentToForgetWeights( - {-0.057784554, -0.026057621, -0.068447545, -0.022581743, - 0.14811787, 0.10826372, 0.09471067, 0.03987225, - -0.0039523416, 0.00030638507, 0.053185795, 0.10572994, - 0.08414449, -0.022036452, -0.00066928595, -0.09203576, - 0.032950465, -0.10985798, -0.023809856, 0.0021431844, - -0.02196096, -0.00326074, 0.00058621005, -0.074678116, - -0.06193199, 0.055729095, 0.03736828, 0.020123724, - 0.061878487, -0.04729229, 0.034919553, -0.07585433, - -0.04421272, -0.044019096, 0.085488975, 0.04058006, - -0.06890133, -0.030951202, -0.024628663, -0.07672815, - 0.034293607, 0.08556707, -0.05293577, -0.033561368, - -0.04899627, 0.0241671, 0.015736353, -0.095442444, - -0.029564252, 0.016493602, -0.035026584, 0.022337519, - -0.026871363, 0.004780428, 0.0077918363, -0.03601621, - 0.016435321, -0.03263031, -0.09543275, -0.047392778, - 0.013454138, 0.028934088, 0.01685226, -0.086110644, - -0.046250615, -0.01847454, 0.047608484, 0.07339695, - 0.034546845, -0.04881143, 0.009128804, -0.08802852, - 0.03761666, 0.008096139, -0.014454086, 0.014361001, - -0.023502491, -0.0011840804, -0.07607001, 0.001856849, - -0.06509276, -0.006021153, -0.08570962, -0.1451793, - 0.060212336, 0.055259194, 0.06974018, 0.049454916, - -0.027794661, -0.08077226, -0.016179763, 0.1169753, - 0.17213494, -0.0056326236, -0.053934924, -0.0124349, - -0.11520337, 0.05409887, 0.088759385, 0.0019655675, - 0.0042065294, 0.03881498, 0.019844765, 0.041858196, - -0.05695512, 0.047233116, 0.038937137, -0.06542224, - 0.014429736, -0.09719407, 0.13908425, -0.05379757, - 0.012321099, 0.082840554, -0.029899208, 0.044217527, - 0.059855383, 0.07711018, -0.045319796, 0.0948846, - -0.011724666, -0.0033288454, -0.033542685, -0.04764985, - -0.13873616, 0.040668588, 0.034832682, -0.015319203, - -0.018715994, 0.046002675, 0.0599172, -0.043107376, - 0.0294216, -0.002314414, -0.022424703, 0.0030315618, - 0.0014641669, 0.0029166266, -0.11878115, 0.013738511, - 0.12375372, -0.0006038222, 0.029104086, 0.087442465, - 0.052958444, 0.07558703, 0.04817258, 0.044462286, - -0.015213451, -0.08783778, -0.0561384, -0.003008196, - 0.047060397, -0.002058388, 0.03429439, -0.018839769, - 0.024734668, 0.024614193, -0.042046934, 0.09597743, - -0.0043254104, 0.04320769, 0.0064070094, -0.0019131786, - -0.02558259, -0.022822596, -0.023273505, -0.02464396, - -0.10991725, -0.006240552, 0.0074488563, 0.024044557, - 0.04383914, -0.046476185, 0.028658995, 0.060410924, - 0.050786525, 0.009452605, -0.0073054377, -0.024810238, - 0.0052906186, 0.0066939713, -0.0020913032, 0.014515517, - 0.015898481, 0.021362653, -0.030262267, 0.016587038, - -0.011442813, 0.041154444, -0.007631438, -0.03423484, - -0.010977775, 0.036152758, 0.0066366293, 0.11915515, - 0.02318443, -0.041350313, 0.021485701, -0.10906167, - -0.028218046, -0.00954771, 0.020531068, -0.11995105, - -0.03672871, 0.024019798, 0.014255957, -0.05221243, - -0.00661567, -0.04630967, 0.033188973, 0.10107534, - -0.014027541, 0.030796422, -0.10270911, -0.035999842, - 0.15443139, 0.07684145, 0.036571592, -0.035900835, - -0.0034699554, 0.06209149, 0.015920248, -0.031122351, - -0.03858649, 0.01849943, 0.13872518, 0.01503974, - 0.069941424, -0.06948533, -0.0088794185, 0.061282158, - -0.047401894, 0.03100163, -0.041533746, -0.10430945, - 0.044574402, -0.01425562, -0.024290353, 0.034563623, - 0.05866852, 0.023947537, -0.09445152, 0.035450947, - 0.02247216, -0.0042998926, 0.061146557, -0.10250651, - 0.020881841, -0.06747029, 0.10062043, -0.0023941975, - 0.03532124, -0.016341697, 0.09685456, -0.016764693, - 0.051808182, 0.05875331, -0.04536488, 0.001626336, - -0.028892258, -0.01048663, -0.009793449, -0.017093895, - 0.010987891, 0.02357273, -0.00010856845, 0.0099760275, - -0.001845119, -0.03551521, 0.0018358806, 0.05763657, - -0.01769146, 0.040995963, 0.02235177, -0.060430344, - 0.11475477, -0.023854522, 0.10071741, 0.0686208, - -0.014250481, 0.034261297, 0.047418304, 0.08562733, - -0.030519066, 0.0060542435, 0.014653856, -0.038836084, - 0.04096551, 0.032249358, -0.08355519, -0.026823482, - 0.056386515, -0.010401743, -0.028396193, 0.08507674, - 0.014410365, 0.020995233, 0.17040324, 0.11511526, - 0.02459721, 0.0066619175, 0.025853224, -0.023133837, - -0.081302024, 0.017264642, -0.009585969, 0.09491168, - -0.051313367, 0.054532815, -0.014298593, 0.10657464, - 0.007076659, 0.10964551, 0.0409152, 0.008275321, - -0.07283536, 0.07937492, 0.04192024, -0.1075027}); - - lstm.SetRecurrentToCellWeights( - {-0.037322544, 0.018592842, 0.0056175636, -0.06253426, - 0.055647098, -0.05713207, -0.05626563, 0.005559383, - 0.03375411, -0.025757805, -0.088049285, 0.06017052, - -0.06570978, 0.007384076, 0.035123326, -0.07920549, - 0.053676967, 0.044480428, -0.07663568, 0.0071805613, - 0.08089997, 0.05143358, 0.038261272, 0.03339287, - -0.027673481, 0.044746667, 0.028349208, 0.020090483, - -0.019443132, -0.030755889, -0.0040000007, 0.04465846, - -0.021585021, 0.0031670958, 0.0053199246, -0.056117613, - -0.10893326, 0.076739706, -0.08509834, -0.027997585, - 0.037871376, 0.01449768, -0.09002357, -0.06111149, - -0.046195522, 0.0422062, -0.005683705, -0.1253618, - -0.012925729, -0.04890792, 0.06985068, 0.037654128, - 0.03398274, -0.004781977, 0.007032333, -0.031787455, - 0.010868644, -0.031489216, 0.09525667, 0.013939797, - 0.0058680447, 0.0167067, 0.02668468, -0.04797466, - -0.048885044, -0.12722108, 0.035304096, 0.06554885, - 0.00972396, -0.039238118, -0.05159735, -0.11329045, - 0.1613692, -0.03750952, 0.06529313, -0.071974665, - -0.11769596, 0.015524369, -0.0013754242, -0.12446318, - 0.02786344, -0.014179351, 0.005264273, 0.14376344, - 0.015983658, 0.03406988, -0.06939408, 0.040699873, - 0.02111075, 0.09669095, 0.041345075, -0.08316494, - -0.07684199, -0.045768797, 0.032298047, -0.041805092, - 0.0119405, 0.0061010392, 0.12652606, 0.0064572375, - -0.024950314, 0.11574242, 0.04508852, -0.04335324, - 0.06760663, -0.027437469, 0.07216407, 0.06977076, - -0.05438599, 0.034033038, -0.028602652, 0.05346137, - 0.043184172, -0.037189785, 0.10420091, 0.00882477, - -0.054019816, -0.074273005, -0.030617684, -0.0028467078, - 0.024302477, -0.0038869337, 0.005332455, 0.0013399826, - 0.04361412, -0.007001822, 0.09631092, -0.06702025, - -0.042049985, -0.035070654, -0.04103342, -0.10273396, - 0.0544271, 0.037184782, -0.13150354, -0.0058036847, - -0.008264958, 0.042035464, 0.05891794, 0.029673764, - 0.0063542654, 0.044788733, 0.054816857, 0.062257513, - -0.00093483756, 0.048938446, -0.004952862, -0.007730018, - -0.04043371, -0.017094059, 0.07229206, -0.023670016, - -0.052195564, -0.025616996, -0.01520939, 0.045104615, - -0.007376126, 0.003533447, 0.006570588, 0.056037236, - 0.12436656, 0.051817212, 0.028532185, -0.08686856, - 0.11868599, 0.07663395, -0.07323171, 0.03463402, - -0.050708205, -0.04458982, -0.11590894, 0.021273347, - 0.1251325, -0.15313013, -0.12224372, 0.17228661, - 0.023029093, 0.086124025, 0.006445803, -0.03496501, - 0.028332196, 0.04449512, -0.042436164, -0.026587414, - -0.006041347, -0.09292539, -0.05678812, 0.03897832, - 0.09465633, 0.008115513, -0.02171956, 0.08304309, - 0.071401566, 0.019622514, 0.032163795, -0.004167056, - 0.02295182, 0.030739572, 0.056506045, 0.004612461, - 0.06524936, 0.059999723, 0.046395954, -0.0045512207, - -0.1335546, -0.030136576, 0.11584653, -0.014678886, - 0.0020118146, -0.09688814, -0.0790206, 0.039770417, - -0.0329582, 0.07922767, 0.029322514, 0.026405897, - 0.04207835, -0.07073373, 0.063781224, 0.0859677, - -0.10925287, -0.07011058, 0.048005477, 0.03438226, - -0.09606514, -0.006669445, -0.043381985, 0.04240257, - -0.06955775, -0.06769346, 0.043903265, -0.026784198, - -0.017840602, 0.024307009, -0.040079936, -0.019946516, - 0.045318738, -0.12233574, 0.026170589, 0.0074471775, - 0.15978073, 0.10185836, 0.10298046, -0.015476589, - -0.039390966, -0.072174534, 0.0739445, -0.1211869, - -0.0347889, -0.07943156, 0.014809798, -0.12412325, - -0.0030663363, 0.039695457, 0.0647603, -0.08291318, - -0.018529687, -0.004423833, 0.0037507233, 0.084633216, - -0.01514876, -0.056505352, -0.012800942, -0.06994386, - 0.012962922, -0.031234352, 0.07029052, 0.016418684, - 0.03618972, 0.055686004, -0.08663945, -0.017404709, - -0.054761406, 0.029065743, 0.052404847, 0.020238016, - 0.0048197987, -0.0214882, 0.07078733, 0.013016777, - 0.06262858, 0.009184685, 0.020785125, -0.043904778, - -0.0270329, -0.03299152, -0.060088247, -0.015162964, - -0.001828936, 0.12642565, -0.056757294, 0.013586685, - 0.09232601, -0.035886683, 0.06000002, 0.05229691, - -0.052580316, -0.082029596, -0.010794592, 0.012947712, - -0.036429964, -0.085508935, -0.13127148, -0.017744139, - 0.031502828, 0.036232427, -0.031581745, 0.023051167, - -0.05325106, -0.03421577, 0.028793324, -0.034633752, - -0.009881397, -0.043551125, -0.018609839, 0.0019097115, - -0.008799762, 0.056595087, 0.0022273948, 0.055752404}); - - lstm.SetRecurrentToOutputWeights({ - 0.025825322, -0.05813119, 0.09495884, -0.045984812, -0.01255415, - -0.0026479573, -0.08196161, -0.054914974, -0.0046604523, -0.029587349, - -0.044576716, -0.07480124, -0.082868785, 0.023254942, 0.027502948, - -0.0039728214, -0.08683098, -0.08116779, -0.014675607, -0.037924774, - -0.023314456, -0.007401714, -0.09255757, 0.029460307, -0.08829125, - -0.005139627, -0.08989442, -0.0555066, 0.13596267, -0.025062224, - -0.048351806, -0.03850004, 0.07266485, -0.022414139, 0.05940088, - 0.075114764, 0.09597592, -0.010211725, -0.0049794707, -0.011523867, - -0.025980417, 0.072999895, 0.11091378, -0.081685916, 0.014416728, - 0.043229222, 0.034178585, -0.07530371, 0.035837382, -0.085607, - -0.007721233, -0.03287832, -0.043848954, -0.06404588, -0.06632928, - -0.073643476, 0.008214239, -0.045984086, 0.039764922, 0.03474462, - 0.060612556, -0.080590084, 0.049127717, 0.04151091, -0.030063879, - 0.008801774, -0.023021035, -0.019558564, 0.05158114, -0.010947698, - -0.011825728, 0.0075720972, 0.0699727, -0.0039981045, 0.069350146, - 0.08799282, 0.016156472, 0.035502106, 0.11695009, 0.006217345, - 0.13392477, -0.037875112, 0.025745004, 0.08940699, -0.00924166, - 0.0046702605, -0.036598757, -0.08811812, 0.10522024, -0.032441203, - 0.008176899, -0.04454919, 0.07058152, 0.0067963637, 0.039206743, - 0.03259838, 0.03725492, -0.09515802, 0.013326398, -0.052055415, - -0.025676316, 0.03198509, -0.015951829, -0.058556724, 0.036879618, - 0.043357447, 0.028362012, -0.05908629, 0.0059240665, -0.04995891, - -0.019187413, 0.0276265, -0.01628143, 0.0025863599, 0.08800015, - 0.035250366, -0.022165963, -0.07328642, -0.009415526, -0.07455109, - 0.11690406, 0.0363299, 0.07411125, 0.042103454, -0.009660886, - 0.019076364, 0.018299393, -0.046004917, 0.08891175, 0.0431396, - -0.026327137, -0.051502608, 0.08979574, -0.051670972, 0.04940282, - -0.07491107, -0.021240504, 0.022596184, -0.034280192, 0.060163025, - -0.058211457, -0.051837247, -0.01349775, -0.04639988, -0.035936575, - -0.011681591, 0.064818054, 0.0073146066, -0.021745546, -0.043124277, - -0.06471268, -0.07053354, -0.029321948, -0.05330136, 0.016933719, - -0.053782392, 0.13747959, -0.1361751, -0.11569455, 0.0033329215, - 0.05693899, -0.053219706, 0.063698, 0.07977434, -0.07924483, - 0.06936997, 0.0034815092, -0.007305279, -0.037325785, -0.07251102, - -0.033633437, -0.08677009, 0.091591336, -0.14165086, 0.021752775, - 0.019683983, 0.0011612234, -0.058154266, 0.049996935, 0.0288841, - -0.0024567875, -0.14345716, 0.010955264, -0.10234828, 0.1183656, - -0.0010731248, -0.023590032, -0.072285876, -0.0724771, -0.026382286, - -0.0014920527, 0.042667855, 0.0018776858, 0.02986552, 0.009814309, - 0.0733756, 0.12289186, 0.018043943, -0.0458958, 0.049412545, - 0.033632483, 0.05495232, 0.036686596, -0.013781798, -0.010036754, - 0.02576849, -0.08307328, 0.010112348, 0.042521734, -0.05869831, - -0.071689695, 0.03876447, -0.13275425, -0.0352966, -0.023077697, - 0.10285965, 0.084736146, 0.15568255, -0.00040734606, 0.027835453, - -0.10292561, -0.032401145, 0.10053256, -0.026142767, -0.08271222, - -0.0030240538, -0.016368777, 0.1070414, 0.042672627, 0.013456989, - -0.0437609, -0.022309763, 0.11576483, 0.04108048, 0.061026827, - -0.0190714, -0.0869359, 0.037901703, 0.0610107, 0.07202949, - 0.01675338, 0.086139716, -0.08795751, -0.014898893, -0.023771819, - -0.01965048, 0.007955471, -0.043740474, 0.03346837, -0.10549954, - 0.090567775, 0.042013682, -0.03176985, 0.12569028, -0.02421228, - -0.029526481, 0.023851605, 0.031539805, 0.05292009, -0.02344001, - -0.07811758, -0.08834428, 0.10094801, 0.16594367, -0.06861939, - -0.021256343, -0.041093912, -0.06669611, 0.035498552, 0.021757556, - -0.09302526, -0.015403468, -0.06614931, -0.051798206, -0.013874718, - 0.03630673, 0.010412845, -0.08077351, 0.046185967, 0.0035662893, - 0.03541868, -0.094149634, -0.034814864, 0.003128424, -0.020674974, - -0.03944324, -0.008110165, -0.11113267, 0.08484226, 0.043586485, - 0.040582247, 0.0968012, -0.065249965, -0.028036479, 0.0050708856, - 0.0017462453, 0.0326779, 0.041296225, 0.09164146, -0.047743853, - -0.015952192, -0.034451712, 0.084197424, -0.05347844, -0.11768019, - 0.085926116, -0.08251791, -0.045081906, 0.0948852, 0.068401024, - 0.024856757, 0.06978981, -0.057309967, -0.012775832, -0.0032452994, - 0.01977615, -0.041040014, -0.024264973, 0.063464895, 0.05431621, - }); - - lstm.SetCellToInputWeights( - {0.040369894, 0.030746894, 0.24704495, 0.018586371, -0.037586458, - -0.15312155, -0.11812848, -0.11465643, 0.20259799, 0.11418174, - -0.10116027, -0.011334949, 0.12411352, -0.076769054, -0.052169047, - 0.21198851, -0.38871562, -0.09061183, -0.09683246, -0.21929175}); - - lstm.SetCellToForgetWeights( - {-0.01998659, -0.15568835, -0.24248174, -0.012770197, 0.041331276, - -0.072311886, -0.052123554, -0.0066330447, -0.043891653, 0.036225766, - -0.047248036, 0.021479502, 0.033189066, 0.11952997, -0.020432774, - 0.64658105, -0.06650122, -0.03467612, 0.095340036, 0.23647355}); - - lstm.SetCellToOutputWeights( - {0.08286371, -0.08261836, -0.51210177, 0.002913762, 0.17764764, - -0.5495371, -0.08460716, -0.24552552, 0.030037103, 0.04123544, - -0.11940523, 0.007358328, 0.1890978, 0.4833202, -0.34441817, - 0.36312827, -0.26375428, 0.1457655, -0.19724406, 0.15548733}); - - lstm.SetProjectionWeights( - {-0.009802181, 0.09401916, 0.0717386, -0.13895074, 0.09641832, - 0.060420845, 0.08539281, 0.054285463, 0.061395317, 0.034448683, - -0.042991187, 0.019801661, -0.16840284, -0.015726732, -0.23041931, - -0.024478018, -0.10959692, -0.013875541, 0.18600968, -0.061274476, - 0.0138165, -0.08160894, -0.07661644, 0.032372914, 0.16169067, - 0.22465782, -0.03993472, -0.004017731, 0.08633481, -0.28869787, - 0.08682067, 0.17240396, 0.014975425, 0.056431185, 0.031037588, - 0.16702051, 0.0077946745, 0.15140012, 0.29405436, 0.120285, - -0.188994, -0.027265169, 0.043389652, -0.022061434, 0.014777949, - -0.20203483, 0.094781205, 0.19100232, 0.13987629, -0.036132768, - -0.06426278, -0.05108664, 0.13221376, 0.009441198, -0.16715929, - 0.15859416, -0.040437475, 0.050779544, -0.022187516, 0.012166504, - 0.027685808, -0.07675938, -0.0055694645, -0.09444123, 0.0046453946, - 0.050794356, 0.10770313, -0.20790008, -0.07149004, -0.11425117, - 0.008225835, -0.035802525, 0.14374903, 0.15262283, 0.048710253, - 0.1847461, -0.007487823, 0.11000021, -0.09542012, 0.22619456, - -0.029149994, 0.08527916, 0.009043713, 0.0042746216, 0.016261552, - 0.022461696, 0.12689082, -0.043589946, -0.12035478, -0.08361797, - -0.050666027, -0.1248618, -0.1275799, -0.071875185, 0.07377272, - 0.09944291, -0.18897448, -0.1593054, -0.06526116, -0.040107165, - -0.004618631, -0.067624845, -0.007576253, 0.10727444, 0.041546922, - -0.20424393, 0.06907816, 0.050412357, 0.00724631, 0.039827548, - 0.12449835, 0.10747581, 0.13708383, 0.09134148, -0.12617786, - -0.06428341, 0.09956831, 0.1208086, -0.14676677, -0.0727722, - 0.1126304, 0.010139365, 0.015571211, -0.038128063, 0.022913318, - -0.042050496, 0.16842307, -0.060597885, 0.10531834, -0.06411776, - -0.07451711, -0.03410368, -0.13393489, 0.06534304, 0.003620307, - 0.04490757, 0.05970546, 0.05197996, 0.02839995, 0.10434969, - -0.013699693, -0.028353551, -0.07260381, 0.047201227, -0.024575593, - -0.036445823, 0.07155557, 0.009672501, -0.02328883, 0.009533515, - -0.03606021, -0.07421458, -0.028082801, -0.2678904, -0.13221288, - 0.18419984, -0.13012612, -0.014588381, -0.035059117, -0.04824723, - 0.07830115, -0.056184657, 0.03277091, 0.025466874, 0.14494097, - -0.12522776, -0.098633975, -0.10766018, -0.08317623, 0.08594209, - 0.07749552, 0.039474737, 0.1776665, -0.07409566, -0.0477268, - 0.29323658, 0.10801441, 0.1154011, 0.013952499, 0.10739139, - 0.10708251, -0.051456142, 0.0074137426, -0.10430189, 0.10034707, - 0.045594677, 0.0635285, -0.0715442, -0.089667566, -0.10811871, - 0.00026344223, 0.08298446, -0.009525053, 0.006585689, -0.24567553, - -0.09450807, 0.09648481, 0.026996298, -0.06419476, -0.04752702, - -0.11063944, -0.23441927, -0.17608605, -0.052156363, 0.067035615, - 0.19271925, -0.0032889997, -0.043264326, 0.09663576, -0.057112187, - -0.10100678, 0.0628376, 0.04447668, 0.017961001, -0.10094388, - -0.10190601, 0.18335468, 0.10494553, -0.052095775, -0.0026118709, - 0.10539724, -0.04383912, -0.042349473, 0.08438151, -0.1947263, - 0.02251204, 0.11216432, -0.10307853, 0.17351969, -0.039091777, - 0.08066188, -0.00561982, 0.12633002, 0.11335965, -0.0088127935, - -0.019777594, 0.06864014, -0.059751723, 0.016233567, -0.06894641, - -0.28651384, -0.004228674, 0.019708522, -0.16305895, -0.07468996, - -0.0855457, 0.099339016, -0.07580735, -0.13775392, 0.08434318, - 0.08330512, -0.12131499, 0.031935584, 0.09180414, -0.08876437, - -0.08049874, 0.008753825, 0.03498998, 0.030215185, 0.03907079, - 0.089751154, 0.029194152, -0.03337423, -0.019092513, 0.04331237, - 0.04299654, -0.036394123, -0.12915532, 0.09793732, 0.07512415, - -0.11319543, -0.032502122, 0.15661901, 0.07671967, -0.005491124, - -0.19379048, -0.218606, 0.21448623, 0.017840758, 0.1416943, - -0.07051762, 0.19488361, 0.02664691, -0.18104725, -0.09334311, - 0.15026465, -0.15493552, -0.057762887, -0.11604192, -0.262013, - -0.01391798, 0.012185008, 0.11156489, -0.07483202, 0.06693364, - -0.26151478, 0.046425626, 0.036540434, -0.16435726, 0.17338543, - -0.21401681, -0.11385144, -0.08283257, -0.069031075, 0.030635102, - 0.010969227, 0.11109743, 0.010919218, 0.027526086, 0.13519906, - 0.01891392, -0.046839405, -0.040167913, 0.017953383, -0.09700955, - 0.0061885654, -0.07000971, 0.026893595, -0.038844477, 0.14543656}); - - static float lstm_input[][20] = { - {// Batch0: 4 (input_sequence_size) * 5 (n_input) - 0.787926, 0.151646, 0.071352, 0.118426, 0.458058, 0.596268, 0.998386, - 0.568695, 0.864524, 0.571277, 0.073204, 0.296072, 0.743333, 0.069199, - 0.045348, 0.867394, 0.291279, 0.013714, 0.482521, 0.626339}, - - {// Batch1: 4 (input_sequence_size) * 5 (n_input) - 0.295743, 0.544053, 0.690064, 0.858138, 0.497181, 0.642421, 0.524260, - 0.134799, 0.003639, 0.162482, 0.640394, 0.930399, 0.050782, 0.432485, - 0.988078, 0.082922, 0.563329, 0.865614, 0.333232, 0.259916}}; - - static float lstm_golden_output[][64] = { - {// Batch0: 4 (input_sequence_size) * 16 (n_output) - -0.00396806, 0.029352, -0.00279226, 0.0159977, -0.00835576, - -0.0211779, 0.0283512, -0.0114597, 0.00907307, -0.0244004, - -0.0152191, -0.0259063, 0.00914318, 0.00415118, 0.017147, - 0.0134203, -0.0166936, 0.0381209, 0.000889694, 0.0143363, - -0.0328911, -0.0234288, 0.0333051, -0.012229, 0.0110322, - -0.0457725, -0.000832209, -0.0202817, 0.0327257, 0.0121308, - 0.0155969, 0.0312091, -0.0213783, 0.0350169, 0.000324794, - 0.0276012, -0.0263374, -0.0371449, 0.0446149, -0.0205474, - 0.0103729, -0.0576349, -0.0150052, -0.0292043, 0.0376827, - 0.0136115, 0.0243435, 0.0354492, -0.0189322, 0.0464512, - -0.00251373, 0.0225745, -0.0308346, -0.0317124, 0.0460407, - -0.0189395, 0.0149363, -0.0530162, -0.0150767, -0.0340193, - 0.0286833, 0.00824207, 0.0264887, 0.0305169}, - {// Batch1: 4 (input_sequence_size) * 16 (n_output) - -0.013869, 0.0287268, -0.00334693, 0.00733398, -0.0287926, - -0.0186926, 0.0193662, -0.0115437, 0.00422612, -0.0345232, - 0.00223253, -0.00957321, 0.0210624, 0.013331, 0.0150954, - 0.02168, -0.0141913, 0.0322082, 0.00227024, 0.0260507, - -0.0188721, -0.0296489, 0.0399134, -0.0160509, 0.0116039, - -0.0447318, -0.0150515, -0.0277406, 0.0316596, 0.0118233, - 0.0214762, 0.0293641, -0.0204549, 0.0450315, -0.00117378, - 0.0167673, -0.0375007, -0.0238314, 0.038784, -0.0174034, - 0.0131743, -0.0506589, -0.0048447, -0.0240239, 0.0325789, - 0.00790065, 0.0220157, 0.0333314, -0.0264787, 0.0387855, - -0.000764675, 0.0217599, -0.037537, -0.0335206, 0.0431679, - -0.0211424, 0.010203, -0.062785, -0.00832363, -0.025181, - 0.0412031, 0.0118723, 0.0239643, 0.0394009}}; + lstm.SetInputToInputWeights(input_to_input_weights_); + lstm.SetInputToCellWeights(input_to_cell_weights_); + lstm.SetInputToForgetWeights(input_to_forget_weights_); + lstm.SetInputToOutputWeights(input_to_output_weights_); + + lstm.SetInputGateBias(input_gate_bias_); + lstm.SetCellBias(cell_gate_bias_); + lstm.SetForgetGateBias(forget_gate_bias_); + lstm.SetOutputGateBias(output_gate_bias_); + + lstm.SetRecurrentToInputWeights(recurrent_to_input_weights_); + lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_); + lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_); + lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_); + + lstm.SetCellToInputWeights(cell_to_input_weights_); + lstm.SetCellToForgetWeights(cell_to_forget_weights_); + lstm.SetCellToOutputWeights(cell_to_output_weights_); + + lstm.SetProjectionWeights(projection_weights_); // Resetting cell_state and output_state lstm.ResetCellState(); lstm.ResetOutputState(); - const int input_sequence_size = - sizeof(lstm_input[0]) / sizeof(float) / (lstm.num_inputs()); - for (int i = 0; i < input_sequence_size; i++) { - float* batch0_start = lstm_input[0] + i * lstm.num_inputs(); - float* batch0_end = batch0_start + lstm.num_inputs(); + VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); +} - lstm.SetInput(0, batch0_start, batch0_end); +TEST_F(NoCifgPeepholeProjectionClippingLstmTest, HybridLstmBlackBoxTest) { + const int n_batch = 2; + const int n_input = 5; + const int n_cell = 20; + const int n_output = 16; - float* batch1_start = lstm_input[1] + i * lstm.num_inputs(); - float* batch1_end = batch1_start + lstm.num_inputs(); - lstm.SetInput(lstm.num_inputs(), batch1_start, batch1_end); + HybridLSTMOpModel lstm( + n_batch, n_input, n_cell, n_output, + /*use_cifg=*/false, /*use_peephole=*/true, + /*use_projection_weights=*/true, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + { + {n_batch, n_input}, // input tensor + + {n_cell, n_input}, // input_to_input_weight tensor + {n_cell, n_input}, // input_to_forget_weight tensor + {n_cell, n_input}, // input_to_cell_weight tensor + {n_cell, n_input}, // input_to_output_weight tensor + + {n_cell, n_output}, // recurrent_to_input_weight tensor + {n_cell, n_output}, // recurrent_to_forget_weight tensor + {n_cell, n_output}, // recurrent_to_cell_weight tensor + {n_cell, n_output}, // recurrent_to_output_weight tensor + + {n_cell}, // cell_to_input_weight tensor + {n_cell}, // cell_to_forget_weight tensor + {n_cell}, // cell_to_output_weight tensor + + {n_cell}, // input_gate_bias tensor + {n_cell}, // forget_gate_bias tensor + {n_cell}, // cell_bias tensor + {n_cell}, // output_gate_bias tensor + + {n_output, n_cell}, // projection_weight tensor + {0}, // projection_bias tensor + }); + + lstm.SetInputToInputWeights(input_to_input_weights_); + lstm.SetInputToCellWeights(input_to_cell_weights_); + lstm.SetInputToForgetWeights(input_to_forget_weights_); + lstm.SetInputToOutputWeights(input_to_output_weights_); + + lstm.SetInputGateBias(input_gate_bias_); + lstm.SetCellBias(cell_gate_bias_); + lstm.SetForgetGateBias(forget_gate_bias_); + lstm.SetOutputGateBias(output_gate_bias_); + + lstm.SetRecurrentToInputWeights(recurrent_to_input_weights_); + lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_); + lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_); + lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_); + + lstm.SetCellToInputWeights(cell_to_input_weights_); + lstm.SetCellToForgetWeights(cell_to_forget_weights_); + lstm.SetCellToOutputWeights(cell_to_output_weights_); + + lstm.SetProjectionWeights(projection_weights_); - lstm.Invoke(); + // Resetting cell_state and output_state + lstm.ResetCellState(); + lstm.ResetOutputState(); - float* golden_start_batch0 = lstm_golden_output[0] + i * lstm.num_outputs(); - float* golden_end_batch0 = golden_start_batch0 + lstm.num_outputs(); - float* golden_start_batch1 = lstm_golden_output[1] + i * lstm.num_outputs(); - float* golden_end_batch1 = golden_start_batch1 + lstm.num_outputs(); - std::vector expected; - expected.insert(expected.end(), golden_start_batch0, golden_end_batch0); - expected.insert(expected.end(), golden_start_batch1, golden_end_batch1); - EXPECT_THAT(lstm.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); - } + VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.00467); } } // namespace -- GitLab From 2b5f598fbd822f911ad305ae1e57325aefd50826 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 5 Jun 2018 12:19:43 -0700 Subject: [PATCH 545/902] Move ReplaceMulWithSquare to a separate optimizer stage. PiperOrigin-RevId: 199338297 --- .../optimizers/arithmetic_optimizer.cc | 68 ++++++++++++------- .../optimizers/arithmetic_optimizer.h | 1 + .../optimizers/arithmetic_optimizer_test.cc | 47 +++++++------ 3 files changed, 73 insertions(+), 43 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 400af82627..561930f858 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -2079,6 +2079,49 @@ class FoldMultiplyIntoConv : public ArithmeticOptimizerStage { } }; +// Replace Mul node with identical inputs with a Square. +class ReplaceMulWithSquare : public ArithmeticOptimizerStage { + public: + explicit ReplaceMulWithSquare(const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("ReplaceMulWithSquare", ctx, ctx_ext) {} + ~ReplaceMulWithSquare() override = default; + + bool IsSupported(const NodeDef* node) const override { + return IsMul(*node) && node->input(0) == node->input(1); + } + + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { + const NodeScopeAndName mul = ParseNodeScopeAndName(node->name()); + const string optimized_node_name = OptimizedNodeName(mul); + if (ctx().node_map->NodeExists(optimized_node_name)) return Status::OK(); + + const DataType type = GetDataTypeFromAttr(*node, "T"); + bool is_complex = (type == DT_COMPLEX64) || (type == DT_COMPLEX128); + + string task; + string device; + bool is_on_cpu = + DeviceNameUtils::SplitDeviceName(node->device(), &task, &device) && + str_util::StrContains(device, DEVICE_CPU); + + if (!is_complex || is_on_cpu) { + NodeDef* new_square_node = AddCopyNode(optimized_node_name, node); + new_square_node->set_op("Square"); + for (int i = 1; i < new_square_node->input_size(); ++i) { + new_square_node->set_input(i - 1, new_square_node->input(i)); + } + new_square_node->mutable_input()->RemoveLast(); + for (const string& input : new_square_node->input()) { + ctx().node_map->AddOutput(NodeName(input), new_square_node->name()); + } + *simplified_node_name = new_square_node->name(); + } + + return Status::OK(); + } +}; + } // namespace class UniqueNodes { @@ -2331,29 +2374,6 @@ void ArithmeticOptimizer::ForwardControlDependencies( // ArithmeticOptimizerStage string ArithmeticOptimizer::TrySimplifyAndReplaceUses( const NodeDef* node, SetVector* nodes_to_simplify) { - if (node->op() == "Mul" && node->input(0) == node->input(1) && - !OptimizedNodeExists(*node, "square")) { - const DataType type = GetDataTypeFromAttr(*node, "T"); - bool is_complex = (type == DT_COMPLEX64) || (type == DT_COMPLEX128); - string dontcare; - string device; - bool is_on_cpu = - DeviceNameUtils::SplitDeviceName(node->device(), &dontcare, &device) && - str_util::StrContains(device, DEVICE_CPU); - if (!is_complex || is_on_cpu) { - NodeDef* new_square_node = AddNode(*node, "square", /*copy_node=*/true); - new_square_node->set_op("Square"); - for (int i = 1; i < new_square_node->input_size(); ++i) { - new_square_node->set_input(i - 1, new_square_node->input(i)); - } - new_square_node->mutable_input()->RemoveLast(); - for (const string& input : new_square_node->input()) { - node_map_->AddOutput(NodeName(input), new_square_node->name()); - } - return new_square_node->name(); - } - } - if (IsAggregate(*node) && NumNonControlInputs(*node) > 0) { // Discard aggregate nodes with a single input and no control dependencies. if (node->input_size() == 1) { @@ -2528,6 +2548,8 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) { pipeline.AddStage(ctx, ctx_ext); if (options_.remove_negation) pipeline.AddStage(ctx, ctx_ext); + if (options_.replace_mul_with_square) + pipeline.AddStage(ctx, ctx_ext); if (options_.remove_logical_not) pipeline.AddStage(ctx, ctx_ext); if (options_.reorder_cast_and_transpose) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index e6fc311929..8e00b83a70 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -74,6 +74,7 @@ class ArithmeticOptimizer : public GraphOptimizer { bool remove_redundant_cast = true; bool remove_redundant_reshape = true; bool reorder_cast_and_transpose = true; + bool replace_mul_with_square = true; // Choose which arithmetic optimizer stages will be enabled for a given // optimization level by default. diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index b9fec0f860..f15cbfe407 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -139,6 +139,7 @@ class ArithmeticOptimizerTest : public GrapplerTest { options.remove_negation = false; options.remove_logical_not = false; options.reorder_cast_and_transpose = false; + options.replace_mul_with_square = false; optimizer->options_ = options; } @@ -201,6 +202,11 @@ class ArithmeticOptimizerTest : public GrapplerTest { optimizer->options_.reorder_cast_and_transpose = true; } + void EnableOnlyReplaceMulWithSquare(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.replace_mul_with_square = true; + } + void EnableOnlyHoistCWiseUnaryChains(ArithmeticOptimizer* optimizer) { DisableAllStages(optimizer); optimizer->options_.hoist_cwise_unary_chains = true; @@ -345,33 +351,36 @@ TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) { test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } -TEST_F(ArithmeticOptimizerTest, MulToSquare) { +TEST_F(ArithmeticOptimizerTest, ReplaceMulWithSquare) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output c = ops::Const(s.WithOpName("c"), {1.0f, 2.0f}, {1, 2}); Output d = ops::Const(s.WithOpName("d"), {3.0f, 4.0f}, {1, 2}); Output mul = ops::Mul(s.WithControlDependencies(d).WithOpName("mul"), c, c); Output id = ops::Identity(s.WithOpName("id"), mul); + GrapplerItem item; + item.fetch = {"id"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - std::vector fetch = {"id"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); EXPECT_EQ(1, tensors_expected.size()); - ArithmeticOptimizer optimizer; GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); + ArithmeticOptimizer optimizer; + EnableOnlyReplaceMulWithSquare(&optimizer); + OptimizeAndPrune(&optimizer, &item, &output); - EXPECT_EQ(5, output.node_size()); - EXPECT_EQ("id", output.node(3).name()); - EXPECT_EQ(OptimizedName("mul_square"), output.node(3).input(0)); - EXPECT_EQ("Square", output.node(4).op()); - EXPECT_EQ(OptimizedName("mul_square"), output.node(4).name()); - EXPECT_EQ(2, output.node(4).input_size()); - EXPECT_EQ("c", output.node(4).input(0)); - EXPECT_EQ("^d", output.node(4).input(1)); + EXPECT_EQ(4, output.node_size()); - auto tensors = EvaluateNodes(output, fetch); + NodeMap node_map(&output); + const string p = "ArithmeticOptimizer/ReplaceMulWithSquare"; + const NodeDef* square_node = node_map.GetNode(strings::StrCat(p, "_", "mul")); + + ASSERT_NE(square_node, nullptr); + EXPECT_EQ("Square", square_node->op()); + EXPECT_EQ("c", square_node->input(0)); + EXPECT_EQ("^d", square_node->input(1)); + + auto tensors = EvaluateNodes(output, item.fetch); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } @@ -386,12 +395,10 @@ TEST_F(ArithmeticOptimizerTest, RemoveInvolution_AdjacentNodes) { auto recip2 = ops::Reciprocal(s.WithOpName("recip2"), recip1); auto id = ops::Identity(s.WithOpName("id"), recip2); - std::vector fetch = {"id"}; - GrapplerItem item; - item.fetch = fetch; + item.fetch = {"id"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); EXPECT_EQ(1, tensors_expected.size()); GraphDef output; @@ -404,7 +411,7 @@ TEST_F(ArithmeticOptimizerTest, RemoveInvolution_AdjacentNodes) { EXPECT_EQ("id", output.node(1).name()); EXPECT_EQ("c", output.node(1).input(0)); - auto tensors = EvaluateNodes(output, fetch); + auto tensors = EvaluateNodes(output, item.fetch); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } -- GitLab From a1e258706972fb8c686434163b4f939010deab34 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 12:32:18 -0700 Subject: [PATCH 546/902] Fixing typo in Subtract Kernel. PiperOrigin-RevId: 199340127 --- tensorflow/contrib/lite/kernels/sub.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/kernels/sub.cc b/tensorflow/contrib/lite/kernels/sub.cc index d788159a8d..bdcaab8e2f 100644 --- a/tensorflow/contrib/lite/kernels/sub.cc +++ b/tensorflow/contrib/lite/kernels/sub.cc @@ -175,7 +175,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { output); } else { context->ReportError( - context, "output type %d is not support, requires float|uint8 types.", + context, "output type %d is not supported, requires float|uint8 types.", output->type); return kTfLiteError; } -- GitLab From 397f04acb1faeff451691d7fdc0f754eeb547cc1 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Tue, 5 Jun 2018 12:41:22 -0700 Subject: [PATCH 547/902] Fix for Raspberry Pi build breakage (#19782) --- tensorflow/contrib/lite/toco/toco_port.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/toco/toco_port.cc b/tensorflow/contrib/lite/toco/toco_port.cc index 49a3302caf..3a5911c28d 100644 --- a/tensorflow/contrib/lite/toco/toco_port.cc +++ b/tensorflow/contrib/lite/toco/toco_port.cc @@ -18,12 +18,10 @@ limitations under the License. #include "tensorflow/contrib/lite/toco/toco_types.h" #include "tensorflow/core/platform/logging.h" -#ifdef __ARM_ARCH_7A__ +#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) namespace std { -double round(double x) { - return ::round(x); -} -} +double round(double x) { return ::round(x); } +} // namespace std #endif namespace toco { -- GitLab From b7928ac78d3cd688967bcf4e5253e384b355070f Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Tue, 5 Jun 2018 12:42:44 -0700 Subject: [PATCH 548/902] Clarifies how to pass training hooks to TPUEstimator in the docstring for TPUEstimator. PiperOrigin-RevId: 199341721 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 83 ++++++++++++++----- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index f63e9e8bda..64ae35dfc5 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -122,6 +122,33 @@ def _create_global_step(graph): def _create_or_get_iterations_per_loop(): + """Creates or gets the iterations_per_loop variable. + + In TPUEstimator, the user provided computation, the model_fn, is wrapped + inside a tf.while_loop for peak performance. The iterations of the loop are + specified by this variable, which adjusts its value on the CPU after each TPU + program execution and before the next TPU execution. + + The purpose of using a variable, rather then a constant, is to allow + TPUEstimator adapt the TPU training iterations according to the final steps + specified by users. For example, if the user sets the iterations_per_loop as 4 + in TPUConfig and steps as 10 in TPUEstimator.train(), the iterations_per_loop + variable will have the following value before each TPU training. + + - 1-th TPU execution: iterations_per_loop = 4 + - 2-th TPU execution: iterations_per_loop = 4 + - 3-th TPU execution: iterations_per_loop = 2 + + As model_fn increases the global step once per train_op invocation, the global + step is 10 after all TPU executions, matching the steps=10 inputs passed in by + users. + + Returns: + A TF non-trainable resource variable. + + Raises: + RuntimeError: If multi iterations_per_loop variables were found. + """ graph = ops.get_default_graph() collection_name = '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR) iter_vars = graph.get_collection(collection_name) @@ -388,20 +415,21 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): return def _cancel_session(): - # Close the session to avoid the main thread from hanging. If input - # pipeline triggers any error, the infeed thread dies but the main thread - # for TPU computation waits for the infeed enqueue forever. Close the - # Session to cancel the main thread Session.run execution. - # - # We sleep for a few seconds before closing to give some time - # for the TPU compilation error, if any, propagating, from TPU to CPU - # host. Compilation errors should be reported by the main thread so that - # the program can be interrupted and users can take action. Due to a race - # condition, the infeed thread might see an error first. Closing the - # session here immediately would result in a session cancellation - # exception in the main thread, instead of the expected compile error. - # User code that depends on having the proper exception type will - # therefore be confused. + """Close the session to avoid the main thread from hanging. + + If input pipeline triggers any error, the infeed thread dies but the main + thread for TPU computation waits for the infeed enqueue forever. Close the + Session to cancel the main thread Session.run execution. + + We sleep for a few seconds before closing to give some time for the TPU + compilation error, if any, propagating, from TPU to CPU host. Compilation + errors should be reported by the main thread so that the program can be + interrupted and users can take action. Due to a race condition, the + infeed thread might see an error first. Closing the session here + immediately would result in a session cancellation exception in the main + thread, instead of the expected compile error. User code that depends on + having the proper exception type will therefore be confused. + """ time.sleep(5) # If the main session is still running, the infeed/outfeed errors are @@ -721,6 +749,15 @@ def generate_per_host_enqueue_ops_fn_for_host( tpu_ordinal_function = None def enqueue_ops_fn(): + """A Fn returning the TPU infeed enqueue ops. + + By providing as a Fn, it can be invoked inside the tf.while_loop such that + the input pipeline for multiple iterations can be executed by one + Session.run call. + + Returns: + list of dict of ops. + """ with ops.device(device): num_of_replicas_per_host = ctx.num_of_replicas_per_host # Convert user input to features and labels. If the user returns a @@ -1095,10 +1132,16 @@ class _InputPipeline(object): return enqueue_ops, all_hooks, run_infeed_loop_on_coordinator def _validate_input_pipeline(self): - # Perform some sanity checks to log user friendly information. We should - # error out to give users better error message. But, if - # _WRAP_INPUT_FN_INTO_WHILE_LOOP is False (legacy behavior), we cannot break - # user code, so, log a warning. + """Validates the input pipeline. + + Perform some sanity checks to log user friendly information. We should + error out to give users better error message. But, if + _WRAP_INPUT_FN_INTO_WHILE_LOOP is False (legacy behavior), we cannot break + user code, so, log a warning. + + Raises: + RuntimeError: If the validation failed. + """ if ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS): err_msg = ('Input pipeline contains one or more QueueRunners. ' 'It could be slow and not scalable. Please consider ' @@ -1837,7 +1880,8 @@ class TPUEstimator(estimator_lib.Estimator): Args: model_fn: Model function as required by `Estimator`. For training, the returned `EstimatorSpec` cannot have hooks as it is not supported in - `TPUEstimator`. + `TPUEstimator`. Instead, the user can pass the training hooks as + an argument to `TPUEstimator.train()`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. If `None`, the model_dir in @@ -2898,6 +2942,7 @@ class _StopSignals(object): @staticmethod def should_stop(scalar_stopping_signal): + """Detects whether scalar_stopping_signal indicates stopping.""" if isinstance(scalar_stopping_signal, ops.Tensor): # STOPPING_SIGNAL is a constant True. Here, the logical_and is just the TF # way to express the bool check whether scalar_stopping_signal is True. -- GitLab From c681be04ec15cdfc225bc61132420781bf23d298 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 5 Jun 2018 13:12:02 -0700 Subject: [PATCH 549/902] Move SimplifyAggregation to separate aggregation stage. PiperOrigin-RevId: 199346067 --- .../optimizers/arithmetic_optimizer.cc | 171 +++++++++++------- .../optimizers/arithmetic_optimizer.h | 1 + .../optimizers/arithmetic_optimizer_test.cc | 68 +++++-- 3 files changed, 154 insertions(+), 86 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 561930f858..2408652c87 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -2122,6 +2122,109 @@ class ReplaceMulWithSquare : public ArithmeticOptimizerStage { } }; +// Simplify aggregation (e.g. AddN) nodes: +// +// 1. Discard aggregate nodes with a single input and no control dependencies. +// +// 2. Try to rewrite aggregations of N >= 2 identical terms (possibly due to +// deduping or other rewrites) so we can get rid of the sum entirely. +// +// The expression (using AddN as an example of an aggregate op): +// AddN(x, x, x, ... ,x) +// <-- N terms --> +// can be rewritten to: +// Mul(Const(N), x)) +// +class SimplifyAggregation : public ArithmeticOptimizerStage { + public: + explicit SimplifyAggregation(const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("SimplifyAggregation", ctx, ctx_ext) {} + ~SimplifyAggregation() override = default; + + bool IsSupported(const NodeDef* node) const override { + return IsAggregate(*node) && NumNonControlInputs(*node) > 0; + } + + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { + // 1. Discard aggregate nodes with a single input and no control deps. + if (node->input_size() == 1) { + *simplified_node_name = node->input(0); + return Status::OK(); + } + + // 2. Rewrite aggregations of N >= 2 identical terms. + + // All non-control inputs must be identical. + bool all_equal = true; + int num_inputs = 1; + for (int i = 1; i < node->input_size(); ++i) { + if (IsControlInput(node->input(i))) break; + ++num_inputs; + if (node->input(i) != node->input(0)) { + all_equal = false; + break; + } + } + if (!all_equal) return Status::OK(); + + // And node should not be optimized earlier. + const NodeScopeAndName node_scope_and_name = + ParseNodeScopeAndName(node->name()); + const string optimized_const_name = + OptimizedNodeName(node_scope_and_name, "Const"); + const string optimized_mul_name = + OptimizedNodeName(node_scope_and_name, "Mul"); + + bool is_already_optimized = + ctx().node_map->NodeExists(optimized_const_name) || + ctx().node_map->NodeExists(optimized_mul_name); + + if (is_already_optimized) return Status::OK(); + + // At this point all preconditions are met, and we safely do the rewrite. + VLOG(3) << "Simplify aggregation with identical inputs: node=" + << node->name() << " num_inputs=" << num_inputs; + + // 1. Create constant node with value N. + const auto type = GetDataTypeFromAttr(*node, "T"); + Tensor t(type, TensorShape({})); + Status status = SetTensorValue(type, num_inputs, &t); + if (!status.ok()) { + return errors::Internal("Failed to create const node: ", + status.error_message()); + } + + TensorValue value(&t); + NodeDef* new_const_node = AddEmptyNode(optimized_const_name); + status = ConstantFolding::CreateNodeDef(new_const_node->name(), value, + new_const_node); + if (!status.ok()) { + return errors::Internal("Failed to create const node: ", + status.error_message()); + } + new_const_node->set_device(node->device()); + MaybeAddControlInput(NodeName(node->input(0)), new_const_node, + ctx().optimized_graph, ctx().node_map); + AddToOptimizationQueue(new_const_node); + + // 2. Replace the aggregate node with Mul(Const(N), x). + NodeDef* new_mul_node = AddEmptyNode(optimized_mul_name); + new_mul_node->set_op("Mul"); + new_mul_node->set_device(node->device()); + SetDataTypeToAttr(type, "T", new_mul_node); + new_mul_node->add_input(new_const_node->name()); + ctx().node_map->AddOutput(new_const_node->name(), new_mul_node->name()); + new_mul_node->add_input(node->input(0)); + ctx().node_map->AddOutput(node->input(0), new_mul_node->name()); + + ForwardControlDependencies(new_mul_node, {node}); + *simplified_node_name = new_mul_node->name(); + + return Status::OK(); + } +}; + } // namespace class UniqueNodes { @@ -2374,72 +2477,6 @@ void ArithmeticOptimizer::ForwardControlDependencies( // ArithmeticOptimizerStage string ArithmeticOptimizer::TrySimplifyAndReplaceUses( const NodeDef* node, SetVector* nodes_to_simplify) { - if (IsAggregate(*node) && NumNonControlInputs(*node) > 0) { - // Discard aggregate nodes with a single input and no control dependencies. - if (node->input_size() == 1) { - return node->input(0); - } - - // Try to rewrite aggregations of N >= 2 identical terms (possibly due - // to deduping or other rewrites) so we can get rid of the sum entirely. - // The expression (using AddN as an example of an aggregate op): - // AddN(x, x, x, ... ,x) - // <-- N terms --> - // can be rewritten to - // Mul(Const(N), x)) - // - bool all_equal = true; - int num_inputs = 1; - for (int i = 1; i < node->input_size(); ++i) { - if (IsControlInput(node->input(i))) { - break; - } - ++num_inputs; - if (node->input(i) != node->input(0)) { - all_equal = false; - break; - } - } - if (all_equal && !OptimizedNodeExists(*node, "const") && - !OptimizedNodeExists(*node, "mul")) { - // 1. Create constant node with value N. - const auto type = GetDataTypeFromAttr(*node, "T"); - Tensor t(type, TensorShape({})); - Status status = SetTensorValue(type, num_inputs, &t); - if (!status.ok()) { - LOG(WARNING) << "Failed to create const node: " - << status.error_message(); - return ""; - } - TensorValue value(&t); - NodeDef* new_const_node = AddNode(*node, "const", /*copy_node=*/false); - status = ConstantFolding::CreateNodeDef(new_const_node->name(), value, - new_const_node); - if (!status.ok()) { - LOG(WARNING) << "Failed to create const node: " - << status.error_message(); - return ""; - } - new_const_node->set_device(node->device()); - MaybeAddControlInput(NodeName(node->input(0)), new_const_node, - optimized_graph_, node_map_.get()); - nodes_to_simplify->PushBack(new_const_node); - - // 2. Replace the aggregate node with Mul(Const(N), x). - NodeDef* new_mul_node = AddNode(*node, "mul", /*copy_node=*/false); - new_mul_node->set_op("Mul"); - new_mul_node->set_device(node->device()); - SetDataTypeToAttr(type, "T", new_mul_node); - new_mul_node->add_input(new_const_node->name()); - node_map_->AddOutput(new_const_node->name(), new_mul_node->name()); - new_mul_node->add_input(node->input(0)); - node_map_->AddOutput(node->input(0), new_mul_node->name()); - - ForwardControlDependencies(new_mul_node, {node}); - return new_mul_node->name(); - } - } - // Fold Transpose into matrix multiplication. if ((node->op() == "MatMul" || node->op() == "SparseMatMul" || node->op() == "BatchMatMul") && @@ -2554,6 +2591,8 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) { pipeline.AddStage(ctx, ctx_ext); if (options_.reorder_cast_and_transpose) pipeline.AddStage(ctx, ctx_ext); + if (options_.simplify_aggregation) + pipeline.AddStage(ctx, ctx_ext); if (options_.hoist_cwise_unary_chains) pipeline.AddStage(ctx, ctx_ext); if (options_.convert_sqrt_div_to_rsqrt_mul) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 8e00b83a70..549ea3fde5 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -75,6 +75,7 @@ class ArithmeticOptimizer : public GraphOptimizer { bool remove_redundant_reshape = true; bool reorder_cast_and_transpose = true; bool replace_mul_with_square = true; + bool simplify_aggregation = true; // Choose which arithmetic optimizer stages will be enabled for a given // optimization level by default. diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index f15cbfe407..f79347cde6 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -40,21 +40,37 @@ constexpr char kHoistFactorOptimizerMul[] = constexpr char kHoistFactorOptimizerAdd[] = "ArithmeticOptimizer/HoistCommonFactor_Add_"; -// Optimized name of outer Mul node by HoistCommonFactorOutOfAggregation +constexpr char kSimplifyAggregationConst[] = + "ArithmeticOptimizer/SimplifyAggregation_Const_"; + +constexpr char kSimplifyAggregationMul[] = + "ArithmeticOptimizer/SimplifyAggregation_Mul_"; + +// Optimized name of outer Mul node by HoistCommonFactorOutOfAggregation. string HoistMulName(const string& name) { return AddPrefixToNodeName(name, kHoistFactorOptimizerMul, ""); } -// Optimized name of outer Div node by HoistCommonFactorOutOfAggregation +// Optimized name of outer Div node by HoistCommonFactorOutOfAggregation. string HoistDivName(const string& name) { return AddPrefixToNodeName(name, kHoistFactorOptimizerDiv, ""); } -// Optimized name of inner Add node by HoistCommonFactorOutOfAggregation +// Optimized name of inner Add node by HoistCommonFactorOutOfAggregation. string HoistAddName(const string& name) { return AddPrefixToNodeName(name, kHoistFactorOptimizerAdd, ""); } +// Optimized name of Const node by SimplifyAggregation. +string AggregationConstName(const string& name) { + return AddPrefixToNodeName(name, kSimplifyAggregationConst, ""); +} + +// Optimized name of Mul node by SimplifyAggregation. +string AggregationMulName(const string& name) { + return AddPrefixToNodeName(name, kSimplifyAggregationMul, ""); +} + string OptimizedName(const string& name) { return AddPrefixToNodeName(name, kArithmeticOptimizer); } @@ -140,6 +156,7 @@ class ArithmeticOptimizerTest : public GrapplerTest { options.remove_logical_not = false; options.reorder_cast_and_transpose = false; options.replace_mul_with_square = false; + options.simplify_aggregation = false; optimizer->options_ = options; } @@ -226,6 +243,11 @@ class ArithmeticOptimizerTest : public GrapplerTest { DisableAllStages(optimizer); optimizer->options_.remove_logical_not = true; } + + void EnableOnlySimplifyAggregation(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.simplify_aggregation = true; + } }; TEST_F(ArithmeticOptimizerTest, NoOp) { @@ -500,10 +522,10 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { Output id = ops::Identity(s.WithOpName("id"), add); GrapplerItem item; + item.fetch = {"id"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - std::vector fetch = {"id"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); EXPECT_EQ(1, tensors_expected.size()); ArithmeticOptimizer optimizer; @@ -513,22 +535,25 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) { EXPECT_EQ(5, output.node_size()); - const NodeDef* new_const = node_map.GetNode(OptimizedName("add_const")); + const string optimized_const_name = AggregationConstName("add"); + const string optimized_mul_name = AggregationMulName("add"); + + const NodeDef* new_const = node_map.GetNode(optimized_const_name); ASSERT_NE(new_const, nullptr); EXPECT_EQ("^x", new_const->input(0)); EXPECT_EQ(std::string("\0\0\0@", 4), new_const->attr().at("value").tensor().tensor_content()); - const NodeDef* new_mul = node_map.GetNode(OptimizedName("add_mul")); + const NodeDef* new_mul = node_map.GetNode(optimized_mul_name); ASSERT_NE(new_mul, nullptr); - EXPECT_EQ(OptimizedName("add_const"), new_mul->input(0)); + EXPECT_EQ(optimized_const_name, new_mul->input(0)); EXPECT_EQ("x", new_mul->input(1)); const NodeDef* new_id = node_map.GetNode("id"); ASSERT_NE(new_id, nullptr); - EXPECT_EQ(OptimizedName("add_mul"), new_id->input(0)); + EXPECT_EQ(optimized_mul_name, new_id->input(0)); - auto tensors = EvaluateNodes(output, fetch); + auto tensors = EvaluateNodes(output, item.fetch); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } @@ -554,21 +579,24 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimpleWithControlDep) { EXPECT_EQ(6, output.node_size()); - const NodeDef* new_const = node_map.GetNode(OptimizedName("add_const")); + const string optimized_const_name = AggregationConstName("add"); + const string optimized_mul_name = AggregationMulName("add"); + + const NodeDef* new_const = node_map.GetNode(optimized_const_name); ASSERT_NE(new_const, nullptr); EXPECT_EQ("^x", new_const->input(0)); EXPECT_EQ(std::string("\0\0\0@", 4), new_const->attr().at("value").tensor().tensor_content()); - const NodeDef* new_mul = node_map.GetNode(OptimizedName("add_mul")); + const NodeDef* new_mul = node_map.GetNode(optimized_mul_name); ASSERT_NE(new_mul, nullptr); - EXPECT_EQ(OptimizedName("add_const"), new_mul->input(0)); + EXPECT_EQ(optimized_const_name, new_mul->input(0)); EXPECT_EQ("x", new_mul->input(1)); EXPECT_EQ("^y", new_mul->input(2)); const NodeDef* new_id = node_map.GetNode("id"); ASSERT_NE(new_id, nullptr); - EXPECT_EQ(OptimizedName("add_mul"), new_id->input(0)); + EXPECT_EQ(optimized_mul_name, new_id->input(0)); auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(1, tensors.size()); @@ -633,24 +661,24 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { ASSERT_NE(add_4_node, nullptr); EXPECT_EQ("Add", add_4_node->op()); EXPECT_EQ(2, add_4_node->input_size()); - EXPECT_EQ(OptimizedName("Add_const"), add_4_node->input(0)); - EXPECT_EQ(OptimizedName("Add_1_const"), add_4_node->input(1)); + EXPECT_EQ(AggregationConstName("Add"), add_4_node->input(0)); + EXPECT_EQ(AggregationConstName("Add_1"), add_4_node->input(1)); const NodeDef* add_5_node = node_map.GetNode(HoistAddName("Add_5")); ASSERT_NE(add_5_node, nullptr); EXPECT_EQ("Add", add_5_node->op()); EXPECT_EQ(2, add_5_node->input_size()); - EXPECT_EQ(OptimizedName("Add_const"), add_5_node->input(0)); - EXPECT_EQ(OptimizedName("Add_1_const"), add_5_node->input(1)); + EXPECT_EQ(AggregationConstName("Add"), add_5_node->input(0)); + EXPECT_EQ(AggregationConstName("Add_1"), add_5_node->input(1)); - const NodeDef* add_const_node = node_map.GetNode(OptimizedName("Add_const")); + const NodeDef* add_const_node = node_map.GetNode(AggregationConstName("Add")); ASSERT_NE(add_const_node, nullptr); EXPECT_EQ("Const", add_const_node->op()); EXPECT_EQ(1, add_const_node->input_size()); EXPECT_EQ("^Placeholder", add_const_node->input(0)); const NodeDef* add_1_const_node = - node_map.GetNode(OptimizedName("Add_1_const")); + node_map.GetNode(AggregationConstName("Add_1")); ASSERT_NE(add_1_const_node, nullptr); EXPECT_EQ("Const", add_1_const_node->op()); EXPECT_EQ(1, add_1_const_node->input_size()); -- GitLab From 1bac6186e19353d9881584ce8ec51bf35d627842 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 5 Jun 2018 13:16:57 -0700 Subject: [PATCH 550/902] Introduce tf.contrib.control_flow.new_cond. new_cond is a new implementation of tf.cond. Instead of emitting control flow ops (i.e. Switch and Merge nodes), new_cond emits a single If op, which represents the conditional branches as TF functions. With this change, users can use new_cond and take its gradient. The idea is for new_cond to eventually replace tf.cond. There are several functional and performance gaps that must be addressed first, including: * Gradients won't work on imported graphs * Misc. limitations of TF functions (lack of collections, device scopes, etc.) PiperOrigin-RevId: 199346735 --- tensorflow/contrib/BUILD | 5 +- tensorflow/contrib/__init__.py | 1 + tensorflow/contrib/cmake/python_modules.txt | 2 + tensorflow/contrib/control_flow/BUILD | 48 +++ tensorflow/contrib/control_flow/__init__.py | 31 ++ .../contrib/control_flow/python/cond_v2.py | 394 ++++++++++++++++++ .../control_flow/python/cond_v2_test.py | 113 +++++ .../api_def/base_api/api_def_FakeParam.pbtxt | 24 ++ .../python_api/api_def_FakeParam.pbtxt | 4 + tensorflow/core/kernels/functional_ops.cc | 19 + tensorflow/core/ops/functional_ops.cc | 17 + tensorflow/python/BUILD | 5 +- 12 files changed, 660 insertions(+), 3 deletions(-) create mode 100644 tensorflow/contrib/control_flow/BUILD create mode 100644 tensorflow/contrib/control_flow/__init__.py create mode 100644 tensorflow/contrib/control_flow/python/cond_v2.py create mode 100644 tensorflow/contrib/control_flow/python/cond_v2_test.py create mode 100644 tensorflow/core/api_def/base_api/api_def_FakeParam.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FakeParam.pbtxt diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 0f9c80404a..50b1ae5cc3 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -31,13 +31,15 @@ py_library( "//tensorflow/contrib/cluster_resolver:cluster_resolver_py", "//tensorflow/contrib/coder:coder_py", "//tensorflow/contrib/compiler:compiler_py", + "//tensorflow/contrib/autograph", "//tensorflow/contrib/constrained_optimization", + "//tensorflow/contrib/control_flow", "//tensorflow/contrib/copy_graph:copy_graph_py", "//tensorflow/contrib/crf:crf_py", "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_py", "//tensorflow/contrib/data", - "//tensorflow/contrib/distribute:distribute", "//tensorflow/contrib/deprecated:deprecated_py", + "//tensorflow/contrib/distribute:distribute", "//tensorflow/contrib/distributions:distributions_py", "//tensorflow/contrib/eager/python:tfe", "//tensorflow/contrib/estimator:estimator_py", @@ -83,7 +85,6 @@ py_library( "//tensorflow/contrib/proto", "//tensorflow/contrib/quantization:quantization_py", "//tensorflow/contrib/quantize:quantize_graph", - "//tensorflow/contrib/autograph", "//tensorflow/contrib/receptive_field:receptive_field_py", "//tensorflow/contrib/recurrent:recurrent_py", "//tensorflow/contrib/reduce_slice_ops:reduce_slice_ops_py", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 9aad772f0a..ad8c40395c 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -30,6 +30,7 @@ from tensorflow.contrib import cluster_resolver from tensorflow.contrib import coder from tensorflow.contrib import compiler from tensorflow.contrib import constrained_optimization +from tensorflow.contrib import control_flow from tensorflow.contrib import copy_graph from tensorflow.contrib import crf from tensorflow.contrib import cudnn_rnn diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index fece56c412..015cb73bbd 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -115,6 +115,8 @@ tensorflow/contrib/coder/python/ops tensorflow/contrib/compiler tensorflow/contrib/constrained_optimization tensorflow/contrib/constrained_optimization/python +tensorflow/contrib/control_flow +tensorflow/contrib/control_flow/python tensorflow/contrib/copy_graph tensorflow/contrib/copy_graph/python tensorflow/contrib/copy_graph/python/util diff --git a/tensorflow/contrib/control_flow/BUILD b/tensorflow/contrib/control_flow/BUILD new file mode 100644 index 0000000000..746b5b5b5e --- /dev/null +++ b/tensorflow/contrib/control_flow/BUILD @@ -0,0 +1,48 @@ +# New implementations of control flow ops + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +load("//tensorflow:tensorflow.bzl", "tf_py_test") + +py_library( + name = "control_flow", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + deps = [ + ":cond_v2", + ], +) + +py_library( + name = "cond_v2", + srcs = ["python/cond_v2.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:c_api_util", + "//tensorflow/python:framework_ops", + "//tensorflow/python:function", + "//tensorflow/python:functional_ops_gen", + "//tensorflow/python:gradients", + "//tensorflow/python:pywrap_tensorflow", + ], +) + +tf_py_test( + name = "cond_v2_test", + size = "small", + srcs = ["python/cond_v2_test.py"], + additional_deps = [ + ":cond_v2", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework", + "//tensorflow/python:gradients", + ], + grpc_enabled = True, +) diff --git a/tensorflow/contrib/control_flow/__init__.py b/tensorflow/contrib/control_flow/__init__.py new file mode 100644 index 0000000000..582af2cf10 --- /dev/null +++ b/tensorflow/contrib/control_flow/__init__.py @@ -0,0 +1,31 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""New implementations of TF control flow ops. + +@@cond_v2 +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=unused-import +from tensorflow.contrib.control_flow.python.cond_v2 import cond_v2 +# pylint: enable=unused-import + +from tensorflow.python.util.all_util import remove_undocumented + +remove_undocumented(__name__) diff --git a/tensorflow/contrib/control_flow/python/cond_v2.py b/tensorflow/contrib/control_flow/python/cond_v2.py new file mode 100644 index 0000000000..90c678d0f6 --- /dev/null +++ b/tensorflow/contrib/control_flow/python/cond_v2.py @@ -0,0 +1,394 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""cond_v2 and gradient. + +This is a version of cond that emits a single If op, as well as the gradient +function for If ops produced by cond_v2. This will eventually replace the +current tf.cond implementation once it reaches feature and performance parity. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python import pywrap_tensorflow as c_api +from tensorflow.python.framework import c_api_util +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_functional_ops +from tensorflow.python.ops import gradients_impl + + +# NOTE(skyewm): TensorFlow uses protected class methods and fields to signify +# that they aren't part of the official public API. These protected members +# often need to be used by implementation code however. Rather than litter the +# code with pylint comments, we ignore protected access violations for +# readability. +# pylint: disable=protected-access + + +def cond_v2(pred, true_fn, false_fn, name="cond"): + """Like tf.cond, except emits a single If op.""" + with ops.name_scope(name) as scope: + true_graph = function.func_graph_from_py_func(true_fn, [], [], + name="%s_true" % scope) + false_graph = function.func_graph_from_py_func(false_fn, [], [], + name="%s_false" % scope) + _check_same_outputs(true_graph, false_graph) + + # Add inputs to true_graph and false_graph to make them match. Note that + # this modifies true_graph and false_graph. + cond_inputs = _make_inputs_match(true_graph, false_graph, + true_graph.extra_inputs, + false_graph.extra_inputs) + + # Add all intermediate tensors as function outputs so they're available for + # the gradient computation. + + true_intermediates = _get_intermediates(true_graph) + false_intermediates = _get_intermediates(false_graph) + + # Save the original number of outputs to return to the caller. + num_cond_outputs = len(true_graph.outputs) + + # Make the number/type of new intermediate outputs match. + extra_true_outputs, extra_false_outputs = _pad_params( + true_graph, false_graph, true_intermediates, false_intermediates) + + true_graph.outputs.extend(extra_true_outputs) + false_graph.outputs.extend(extra_false_outputs) + + # Create the If op. + tensors = gen_functional_ops._if( + pred, cond_inputs, [t.dtype for t in true_graph.outputs], + _create_new_tf_function(true_graph), + _create_new_tf_function(false_graph), + name=scope) + + # TODO(b/79883549): if we could make Graphs from FunctionDefs, we wouldn't + # need this extra state. Requiring extra state also prevents the ability to + # take the gradient of deserialized If ops. + tensors[0].op._true_graph = true_graph + tensors[0].op._false_graph = false_graph + + return tensors[:num_cond_outputs] + + +@ops.RegisterGradient("If") +def _IfGrad(op, *grads): # pylint: disable=invalid-name + """The gradient of an If op produced by cond_v2.""" + true_graph = op._true_graph + false_graph = op._false_graph + + # Create grad functions that compute the gradient of the true/false forward + # graphs. These functions will capture tensors from the forward pass + # functions. + true_grad_graph = _create_grad_func( + true_graph, grads, "%sgrad" % true_graph.name) + false_grad_graph = _create_grad_func( + false_graph, grads, "%sgrad" % false_graph.name) + + assert ([t.dtype for t in true_grad_graph.outputs] == + [t.dtype for t in false_grad_graph.outputs]) + + # Match up the captured grad function inputs with outputs of 'op' and other + # external tensors. + true_grad_inputs = _get_grad_inputs(op, true_graph, true_grad_graph) + false_grad_inputs = _get_grad_inputs(op, false_graph, false_grad_graph) + + # Make the inputs to true_grad_graph and false_grad_graph match. Note that + # this modifies true_grad_graph and false_grad_graph. + grad_inputs = _make_inputs_match(true_grad_graph, false_grad_graph, + true_grad_inputs, false_grad_inputs) + + # Add all intermediate tensors as function outputs so they're available for + # higher-order gradient computations. + + true_grad_intermediates = _get_intermediates(true_grad_graph) + false_grad_intermediates = _get_intermediates(false_grad_graph) + + # Save the original number of gradient outputs to return. + num_grad_outputs = len(true_grad_graph.outputs) + + # Make the number/type of new intermediate outputs match. + extra_true_grad_outputs, extra_false_grad_outputs = _pad_params( + true_grad_graph, false_grad_graph, + true_grad_intermediates, false_grad_intermediates) + + true_grad_graph.outputs.extend(extra_true_grad_outputs) + false_grad_graph.outputs.extend(extra_false_grad_outputs) + + # Create the gradient If op. + tensors = gen_functional_ops._if( + op.inputs[0], grad_inputs, [t.dtype for t in true_grad_graph.outputs], + _create_new_tf_function(true_grad_graph), + _create_new_tf_function(false_grad_graph)) + tensors[0].op._true_graph = true_grad_graph + tensors[0].op._false_graph = false_grad_graph + + # The predicate has no gradient. + return [None] + tensors[:num_grad_outputs] + + +def _grad_fn(func_graph, grads): + """The gradient function for each conditional branch. + + This function builds the gradient graph of the corresponding forward-pass + conditional branch in `func_graph`. This is done by differentiating + func_graph's outputs w.r.t. its inputs. + + Args: + func_graph: function._FuncGraph. The corresponding forward-pass function. + grads: The list of input gradient Tensors. + + Returns: + The output gradient Tensors. + """ + # Filter out untrainable function outputs. + # NOTE(skyewm): If we don't do this, the untrainable tensors can sometimes + # cause _GradientsHelper to raise an exception (e.g. the implementation + # doesn't expect 'ys' to contain boolean tensors). + assert len(func_graph.outputs) == len(grads) + ys = [] + grad_ys = [] + for y, grad_y in zip(func_graph.outputs, grads): + if not gradients_impl._IsTrainable(y): + continue + ys.append(y) + grad_ys.append(grad_y) + + # Build the gradient graph. Note that this builds the gradient computation of + # func_graph in the current graph, which requires capturing tensors from + # func_graph. The captured func_graph tensors are resolved to external tensors + # in _get_grad_inputs. + result = gradients_impl._GradientsHelper( + ys, func_graph.inputs, grad_ys=grad_ys, + src_graph=func_graph) + + # Functions can't return None; replace Nones with zero tensors. + # TODO(b/80444525): don't return anything here and make _IfGrad return None if + # both branches have zero gradient. + for i in range(len(result)): + if result[i] is None: + result[i] = array_ops.zeros_like(func_graph.inputs[i]) + + return result + + +def _create_grad_func(func_graph, grads, name): + """Returns the _FuncGraph representation of _grad_fn.""" + return function.func_graph_from_py_func(lambda: _grad_fn(func_graph, grads), + [], [], name) + + +def _get_grad_inputs(if_op, cond_graph, grad_graph): + """Returns the tensors we should pass to grad_graph. + + This method handles tensors captured from cond_graph in grad_graph. It + converts these to suitable input tensors from the outer graph. + + Args: + if_op: Operation. The forward-pass If op that uses cond_graph. + cond_graph: function._FuncGraph. The forward-pass function. + grad_graph: function._FuncGraph. The gradients function. + + Returns: + A list of inputs tensors to be passed to grad_graph. + """ + inputs = [] + + # Maps placeholders in cond_graph -> input tensor in outer graph. + forward_input_map = {v: k for k, v in cond_graph._captured.items()} + + for t in grad_graph.extra_inputs: + if t.graph == ops.get_default_graph(): + # t is in the outer graph (e.g. one of the input gradients). + inputs.append(t) + elif t in forward_input_map: + # t is an input placeholder in cond_graph. Get the corresponding input + # tensor in the outer graph. + assert t.graph == cond_graph + assert forward_input_map[t].graph == ops.get_default_graph() + inputs.append(forward_input_map[t]) + else: + # t is an intermediate value in cond_graph. Get the corresponding output + # of 'if_op' (note that all intermediate values are outputs). + assert t.graph == cond_graph + output_idx = cond_graph.outputs.index(t) + inputs.append(if_op.outputs[output_idx]) + + return inputs + + +def _create_new_tf_function(func_graph): + """Converts func_graph to a TF_Function and adds it to the current graph. + + Args: + func_graph: function._FuncGraph + + Returns: + The name of the new TF_Function. + """ + func_graph.name = "%s_" % func_graph.name + c_func = c_api.TF_GraphToFunction_wrapper( + func_graph._c_graph, + func_graph.name, + False, # append_hash_to_fn_name + None, # opers + [t._as_tf_output() for t in func_graph.inputs], + [t._as_tf_output() for t in func_graph.outputs], + [], + None, # opts + None) # description + c_func = c_api_util.ScopedTFFunction(c_func) + c_api.TF_GraphCopyFunction( + ops.get_default_graph()._c_graph, c_func.func, None) + return func_graph.name + + +def _get_intermediates(func_graph): + """Returns all tensors in `func_graph` that aren't inputs or outputs.""" + intermediates = [] + for op in func_graph.get_operations(): + for t in op.outputs: + if t in func_graph.inputs: continue + if t in func_graph.outputs: continue + intermediates.append(t) + return intermediates + + +def _separate_unique_inputs(true_inputs, false_inputs): + """Separates tensors appearing only in true_inputs or false_inputs, or both. + + Args: + true_inputs: list of Tensors + false_inputs: list of Tensors + + Returns: + Three lists of Tensors: + 1. The tensors that appear in both true_inputs and false_inputs + 2. The tensors that only appear in true_inputs + 3. The tensors that only appear in false_inputs + """ + true_inputs = set(true_inputs) + false_inputs = set(false_inputs) + + shared_inputs = true_inputs.intersection(false_inputs) + true_only_inputs = true_inputs - false_inputs + false_only_inputs = false_inputs - true_inputs + + return list(shared_inputs), list(true_only_inputs), list(false_only_inputs) + + +def _pad_params(true_graph, false_graph, true_params, false_params): + """Returns new param lists that have matching signatures. + + This is done by mirroring each param list in the other using dummy params. + There is no merging of params. + + Args: + true_graph: function._FuncGraph + false_graph: function._FuncGraph + true_params: a list of Tensors from true_graph + false_params: a list of Tensors from false_graph + + Returns: + A new list of Tensors in true_graph and a new list of Tensors in + false_graph. The two lists have the same number of Tensors, with matching + types and shapes across the lists. + """ + new_true_params = (true_params + + _create_dummy_params(true_graph, false_params)) + new_false_inputs = (_create_dummy_params(false_graph, true_params) + + false_params) + return new_true_params, new_false_inputs + + +def _make_inputs_match(true_graph, false_graph, true_inputs, false_inputs): + """Modifies true_graph and false_graph so they have the same input signature. + + This method reorders and/or adds parameters to true_graph and false_graph so + they have the same input signature, and updates the 'inputs', 'extra_inputs', + and '_captured' fields of both graphs accordingly. It uses the input tensors + from the outer graph to avoid duplicating shared arguments. + + Args: + true_graph: function._FuncGraph + false_graph: function._FuncGraph + true_inputs: a list of Tensors in the outer graph. The inputs for + true_graph. + false_inputs: a list of Tensors in the outer graph. The inputs for + false_graph. + + Returns: + A new list of Tensors from the outer graph that are the new inputs for both + true_graph and false_graph. This is a deduped version of true_inputs + + false_inputs. + """ + shared_inputs, true_only_inputs, false_only_inputs = _separate_unique_inputs( + true_inputs, false_inputs) + + new_inputs = shared_inputs + true_only_inputs + false_only_inputs + + true_input_to_param = dict(zip(true_inputs, true_graph.inputs)) + false_input_to_param = dict(zip(false_inputs, false_graph.inputs)) + + true_graph.inputs = ( + [true_input_to_param[t] for t in shared_inputs] + + [true_input_to_param[t] for t in true_only_inputs] + + _create_dummy_params(true_graph, false_only_inputs)) + + false_graph.inputs = ( + [false_input_to_param[t] for t in shared_inputs] + + _create_dummy_params(false_graph, true_only_inputs) + + [false_input_to_param[t] for t in false_only_inputs]) + + # Rewrite the _FuncGraphs' state to reflect the new inputs. + true_graph.extra_inputs = new_inputs + false_graph.extra_inputs = new_inputs + + true_graph._captured = dict(zip(new_inputs, true_graph.inputs)) + false_graph._captured = dict(zip(new_inputs, false_graph.inputs)) + + return new_inputs + + +def _create_dummy_params(func_graph, template_tensors): + """Creates tensors in func_graph to represent template_tensors. + + Args: + func_graph: function._FuncGraph. + template_tensors: a list of tensors in the outer graph. + + Returns: + A list of tensors in func_graph. + """ + with func_graph.as_default(): + return [gen_functional_ops.fake_param(dtype=t.dtype, shape=t.shape) + for t in template_tensors] + + +def _check_same_outputs(true_graph, false_graph): + """Raises an error if true_graph and false_graph have different outputs.""" + true_output_types = [t.dtype for t in true_graph.outputs] + false_output_types = [t.dtype for t in false_graph.outputs] + if (len(true_graph.outputs) != len(false_graph.outputs) or + true_output_types != false_output_types): + raise ValueError( + "true_fn() and false_fn() must return the same number and type of " + "arguments, got:\n" + " true_fn: %s\n" + " false_fn: %s" % (true_output_types, false_output_types)) diff --git a/tensorflow/contrib/control_flow/python/cond_v2_test.py b/tensorflow/contrib/control_flow/python/cond_v2_test.py new file mode 100644 index 0000000000..c94f3a6584 --- /dev/null +++ b/tensorflow/contrib/control_flow/python/cond_v2_test.py @@ -0,0 +1,113 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for cond_v2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.control_flow.python import cond_v2 +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class NewCondTest(test.TestCase): + + def _testCond(self, true_fn, false_fn, train_vals): + pred = array_ops.placeholder(dtypes.bool, name="pred") + + expected = control_flow_ops.cond(pred, true_fn, false_fn, name="expected") + actual = cond_v2.cond_v2(pred, true_fn, false_fn, name="actual") + + expected_grad = gradients_impl.gradients(expected, train_vals) + actual_grad = gradients_impl.gradients(actual, train_vals) + + with self.test_session() as sess: + expected_val, actual_val, expected_grad_val, actual_grad_val = sess.run( + (expected, actual, expected_grad, actual_grad), {pred: True}) + self.assertEqual(expected_val, actual_val) + self.assertEqual(expected_grad_val, actual_grad_val) + + expected_val, actual_val, expected_grad_val, actual_grad_val = sess.run( + (expected, actual, expected_grad, actual_grad), {pred: False}) + self.assertEqual(expected_val, actual_val) + self.assertEqual(expected_grad_val, actual_grad_val) + + def testBasic(self): + x = constant_op.constant(1.0, name="x") + y = constant_op.constant(2.0, name="y") + + def true_fn(): + return x * 2.0 + + def false_fn(): + return y * 3.0 + + self._testCond(true_fn, false_fn, [x]) + self._testCond(true_fn, false_fn, [x, y]) + self._testCond(true_fn, false_fn, [y]) + + def testBasic2(self): + x = constant_op.constant(1.0, name="x") + y = constant_op.constant(2.0, name="y") + + def true_fn(): + return x * y * 2.0 + + def false_fn(): + return 2.0 + + self._testCond(true_fn, false_fn, [x]) + self._testCond(true_fn, false_fn, [x, y]) + self._testCond(true_fn, false_fn, [y]) + + def testSecondDerivative(self): + pred = array_ops.placeholder(dtypes.bool, name="pred") + x = constant_op.constant(3.0, name="x") + + def true_fn(): + return math_ops.pow(x, 3) + + def false_fn(): + return x + + cond = cond_v2.cond_v2(pred, true_fn, false_fn, name="cond") + cond_grad = gradients_impl.gradients(cond, [x]) + cond_grad_grad = gradients_impl.gradients(cond_grad, [x]) + + with self.test_session() as sess: + # d[x^3]/dx = 3x^2 + true_val = sess.run(cond_grad, {pred: True}) + self.assertEqual(true_val, [27.0]) + # d[x]/dx = 1 + false_val = sess.run(cond_grad, {pred: False}) + self.assertEqual(false_val, [1.0]) + + true_val = sess.run(cond_grad_grad, {pred: True}) + # d2[x^3]/dx2 = 6x + self.assertEqual(true_val, [18.0]) + false_val = sess.run(cond_grad_grad, {pred: False}) + # d2[x]/dx2 = 0 + self.assertEqual(false_val, [0.0]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/core/api_def/base_api/api_def_FakeParam.pbtxt b/tensorflow/core/api_def/base_api/api_def_FakeParam.pbtxt new file mode 100644 index 0000000000..d110aba42b --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_FakeParam.pbtxt @@ -0,0 +1,24 @@ +op { + graph_op_name: "FakeParam" + visibility: SKIP + out_arg { + name: "output" + description: <