diff --git a/tools/bazel.rc b/.bazelrc similarity index 95% rename from tools/bazel.rc rename to .bazelrc index 02e5b843069d814c6d7fe2ba7c091084ba7db2ca..d5d20309df82498a552df759e3d200a914a4cfb7 100644 --- a/tools/bazel.rc +++ b/.bazelrc @@ -43,6 +43,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld' build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true +build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain +build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true + build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true @@ -81,3 +84,5 @@ build:dynamic_kernels --define=dynamic_loaded_kernels=true build --define=PREFIX=/usr build --define=LIBDIR=$(PREFIX)/lib build --define=INCLUDEDIR=$(PREFIX)/include + +# Do not commit the tf_configure.bazelrc line diff --git a/.gitignore b/.gitignore index 1ef4c297ee4f369775c13b32a46a55887de719e7..cb65f447d4a551266e237714a16d71b58bcfc51d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ .DS_Store .ipynb_checkpoints node_modules -/.bazelrc /.tf_configure.bazelrc /bazel-* /bazel_pip diff --git a/configure.py b/configure.py index 3fcaaa9d0ef51c57fb40fcafd8579977f37375ef..89dc79b6b6bb168339d05182fd9da47dfc90ce54 100644 --- a/configure.py +++ b/configure.py @@ -35,7 +35,6 @@ except ImportError: _DEFAULT_CUDA_VERSION = '9.0' _DEFAULT_CUDNN_VERSION = '7' -_DEFAULT_NCCL_VERSION = '2.2' _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0' _DEFAULT_CUDA_PATH = '/usr/local/cuda' _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda' @@ -48,10 +47,13 @@ _SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15, 16] _DEFAULT_PROMPT_ASK_ATTEMPTS = 10 -_TF_WORKSPACE_ROOT = os.path.abspath(os.path.dirname(__file__)) _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc' -_TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME) -_TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE') +_TF_WORKSPACE_ROOT = '' +_TF_BAZELRC = '' + +NCCL_LIB_PATHS = [ + 'lib64/', 'lib/powerpc64le-linux-gnu/', 'lib/x86_64-linux-gnu/', '' +] if platform.machine() == 'ppc64le': _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/powerpc64le-linux-gnu/' @@ -224,7 +226,7 @@ def setup_python(environ_cp): python_lib_path = default_python_lib_path environ_cp['PYTHON_LIB_PATH'] = python_lib_path - python_major_version = get_python_major_version(python_bin_path) + _ = get_python_major_version(python_bin_path) # Convert python path to Windows style before writing into bazel.rc if is_windows() or is_cygwin(): @@ -243,10 +245,10 @@ def setup_python(environ_cp): f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path) -def reset_tf_configure_bazelrc(workspace_path): +def reset_tf_configure_bazelrc(): """Reset file that contains customized config settings.""" open(_TF_BAZELRC, 'w').close() - bazelrc_path = os.path.join(workspace_path, '.bazelrc') + bazelrc_path = os.path.join(_TF_WORKSPACE_ROOT, '.bazelrc') data = [] if os.path.exists(bazelrc_path): @@ -257,12 +259,7 @@ def reset_tf_configure_bazelrc(workspace_path): if _TF_BAZELRC_FILENAME in l: continue f.write('%s\n' % l) - if is_windows(): - tf_bazelrc_path = _TF_BAZELRC.replace('\\', '/') - else: - tf_bazelrc_path = _TF_BAZELRC - f.write('import %s\n' % tf_bazelrc_path) - + f.write('import %%workspace%%/%s\n' % _TF_BAZELRC_FILENAME) def cleanup_makefile(): """Delete any leftover BUILD files from the Makefile build. @@ -386,7 +383,9 @@ def set_build_var(environ_cp, var = str(int(get_var(environ_cp, var_name, query_item, enabled_by_default))) environ_cp[var_name] = var if var == '1': - write_to_bazelrc('build --define %s=true' % option_name) + write_to_bazelrc( + 'build:%s --define %s=true' % (bazel_config_name, option_name)) + write_to_bazelrc('build --config=%s' % bazel_config_name) elif bazel_config_name is not None: # TODO(mikecase): Migrate all users of configure.py to use --config Bazel # options and not to set build configs through environment variables. @@ -885,7 +884,7 @@ def set_tf_cudnn_version(environ_cp): """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""" ask_cudnn_version = ( 'Please specify the cuDNN version you want to use. ' - '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION + '[Leave empty to default to cuDNN %s]: ') % _DEFAULT_CUDNN_VERSION for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): tf_cudnn_version = get_from_env_or_user_or_default( @@ -1042,7 +1041,7 @@ def set_tf_tensorrt_install_path(environ_cp): for lib_file in possible_files: if is_cuda_compatible(lib_file, cuda_ver, cudnn_ver): matches = nvinfer_pattern.search(lib_file) - if len(matches.groups()) == 0: + if not matches.groups(): continue ver_str = matches.group(1) ver = convert_version_to_int(ver_str) if len(ver_str) else 0 @@ -1098,7 +1097,7 @@ def set_tf_tensorrt_install_path(environ_cp): def set_tf_nccl_install_path(environ_cp): - """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION. + """Set NCCL_INSTALL_PATH, NCCL_HDR_PATH and TF_NCCL_VERSION. Args: environ_cp: copy of the os.environ. @@ -1111,59 +1110,119 @@ def set_tf_nccl_install_path(environ_cp): raise ValueError('Currently NCCL is only supported on Linux platforms.') ask_nccl_version = ( - 'Please specify the NCCL version you want to use. If NCCL %s is not ' - 'installed, then you can use version 1.3 that can be fetched ' - 'automatically but it may have worse performance with multiple GPUs. ' - '[Default is %s]: ') % (_DEFAULT_NCCL_VERSION, _DEFAULT_NCCL_VERSION) + 'Please specify the locally installed NCCL version you want to use. ' + '[Default is to use https://github.com/nvidia/nccl]: ') for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): tf_nccl_version = get_from_env_or_user_or_default( - environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, _DEFAULT_NCCL_VERSION) - tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1) + environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, '') - if tf_nccl_version == '1': - break # No need to get install path, NCCL 1 is a GitHub repo. + if not tf_nccl_version: + break # No need to get install path, building the open source code. + + tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1) - # TODO(csigg): Look with ldconfig first if we can find the library in paths + # Look with ldconfig first if we can find the library in paths # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding # include directory. This is where the NCCL .deb packages install them. - # Then ask the user if we should use that. Instead of a single - # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to - # nccl_configure.bzl - default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') - ask_nccl_path = (r'Please specify the location where NCCL %s library is ' - 'installed. Refer to README.md for more details. [Default ' - 'is %s]:') % (tf_nccl_version, default_nccl_path) - nccl_install_path = get_from_env_or_user_or_default( - environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path) - - # Result returned from "read" will be used unexpanded. That make "~" - # unusable. Going through one more level of expansion to handle that. - nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path)) - if is_windows() or is_cygwin(): - nccl_install_path = cygpath(nccl_install_path) - - if is_windows(): - nccl_lib_path = 'lib/x64/nccl.lib' - elif is_linux(): - nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version - elif is_macos(): - nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version - - nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) - nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h') - if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path): - # Set NCCL_INSTALL_PATH - environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path - write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) - break - # Reset and Retry - print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' + # First check to see if NCCL is in the ldconfig. + # If its found, use that location. + if is_linux(): + ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' + nccl2_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) + nccl2_path_from_ldconfig = re.search('.*libnccl.so .* => (.*)', + nccl2_path_from_ldconfig) + if nccl2_path_from_ldconfig: + nccl2_path_from_ldconfig = nccl2_path_from_ldconfig.group(1) + if os.path.exists('%s.%s' % (nccl2_path_from_ldconfig, tf_nccl_version)): + nccl_install_path = os.path.dirname(nccl2_path_from_ldconfig) + print('NCCL libraries found in ' + nccl2_path_from_ldconfig) + + # Check if this is the main system lib location + if re.search('.*linux-gnu', nccl_install_path): + trunc_nccl_install_path = '/usr' + print('This looks like a system path.') + else: + trunc_nccl_install_path = nccl_install_path + '/..' + + # Look for header + nccl_hdr_path = trunc_nccl_install_path + '/include' + print('Assuming NCCL header path is ' + nccl_hdr_path) + if os.path.exists(nccl_hdr_path + '/nccl.h'): + # Set NCCL_INSTALL_PATH + environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path + write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) + + # Set NCCL_HDR_PATH + environ_cp['NCCL_HDR_PATH'] = nccl_hdr_path + write_action_env_to_bazelrc('NCCL_HDR_PATH', nccl_hdr_path) + break + else: + print( + 'The header for NCCL2 cannot be found. Please install the libnccl-dev package.' + ) + else: + print('NCCL2 is listed by ldconfig but the library is not found. ' + 'Your ldconfig is out of date. Please run sudo ldconfig.') + else: + # NCCL is not found in ldconfig. Ask the user for the location. + default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') + ask_nccl_path = ( + r'Please specify the location where NCCL %s library is ' + 'installed. Refer to README.md for more details. [Default ' + 'is %s]:') % (tf_nccl_version, default_nccl_path) + nccl_install_path = get_from_env_or_user_or_default( + environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path) + + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + nccl_install_path = os.path.realpath( + os.path.expanduser(nccl_install_path)) + if is_windows() or is_cygwin(): + nccl_install_path = cygpath(nccl_install_path) + + if is_windows(): + nccl_lib_path = 'lib/x64/nccl.lib' + elif is_linux(): + nccl_lib_filename = 'libnccl.so.%s' % tf_nccl_version + nccl_lpath = '%s/lib/%s' % (nccl_install_path, nccl_lib_filename) + if not os.path.exists(nccl_lpath): + for relative_path in NCCL_LIB_PATHS: + path = '%s/%s%s' % (nccl_install_path, relative_path, + nccl_lib_filename) + if os.path.exists(path): + print('NCCL found at ' + path) + nccl_lib_path = path + break + else: + nccl_lib_path = nccl_lpath + elif is_macos(): + nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version + + nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) + nccl_hdr_path = os.path.join( + os.path.dirname(nccl_lib_path), '../include/nccl.h') + print('Assuming NCCL header path is ' + nccl_hdr_path) + if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path): + # Set NCCL_INSTALL_PATH + environ_cp['NCCL_INSTALL_PATH'] = os.path.dirname(nccl_lib_path) + write_action_env_to_bazelrc('NCCL_INSTALL_PATH', + os.path.dirname(nccl_lib_path)) + + # Set NCCL_HDR_PATH + environ_cp['NCCL_HDR_PATH'] = os.path.dirname(nccl_hdr_path) + write_action_env_to_bazelrc('NCCL_HDR_PATH', + os.path.dirname(nccl_hdr_path)) + break + + # Reset and Retry + print( + 'Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, nccl_hdr_path)) - environ_cp['TF_NCCL_VERSION'] = '' + environ_cp['TF_NCCL_VERSION'] = '' else: raise UserInputError('Invalid TF_NCCL setting was provided %d ' 'times in a row. Assuming to be a scripting mistake.' % @@ -1173,7 +1232,6 @@ def set_tf_nccl_install_path(environ_cp): environ_cp['TF_NCCL_VERSION'] = tf_nccl_version write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version) - def get_native_cuda_compute_capabilities(environ_cp): """Get native cuda compute capabilities. @@ -1410,7 +1468,7 @@ def set_other_mpi_vars(environ_cp): def set_system_libs_flag(environ_cp): syslibs = environ_cp.get('TF_SYSTEM_LIBS', '') - if syslibs and syslibs != '': + if syslibs: if ',' in syslibs: syslibs = ','.join(sorted(syslibs.split(','))) else: @@ -1469,30 +1527,31 @@ def config_info_line(name, help_text): def main(): + global _TF_WORKSPACE_ROOT + global _TF_BAZELRC + parser = argparse.ArgumentParser() parser.add_argument( '--workspace', type=str, - default=_TF_WORKSPACE_ROOT, + default=os.path.abspath(os.path.dirname(__file__)), help='The absolute path to your active Bazel workspace.') args = parser.parse_args() + _TF_WORKSPACE_ROOT = args.workspace + _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME) + # Make a copy of os.environ to be clear when functions and getting and setting # environment variables. environ_cp = dict(os.environ) check_bazel_version('0.15.0') - reset_tf_configure_bazelrc(args.workspace) + reset_tf_configure_bazelrc() cleanup_makefile() setup_python(environ_cp) if is_windows(): - environ_cp['TF_NEED_AWS'] = '0' - environ_cp['TF_NEED_GCP'] = '0' - environ_cp['TF_NEED_HDFS'] = '0' - environ_cp['TF_NEED_JEMALLOC'] = '0' - environ_cp['TF_NEED_KAFKA'] = '0' environ_cp['TF_NEED_OPENCL_SYCL'] = '0' environ_cp['TF_NEED_COMPUTECPP'] = '0' environ_cp['TF_NEED_OPENCL'] = '0' @@ -1506,8 +1565,8 @@ def main(): environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0' if is_macos(): - environ_cp['TF_NEED_JEMALLOC'] = '0' environ_cp['TF_NEED_TENSORRT'] = '0' + environ_cp['TF_ENABLE_XLA'] = '0' # The numpy package on ppc64le uses OpenBLAS which has multi-threading # issues that lead to incorrect answers. Set OMP_NUM_THREADS=1 at @@ -1516,19 +1575,10 @@ def main(): if is_ppc64le(): write_action_env_to_bazelrc('OMP_NUM_THREADS', 1) - set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc', - 'with_jemalloc', True) - set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform', - 'with_gcp_support', True, 'gcp') - set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System', - 'with_hdfs_support', True, 'hdfs') - set_build_var(environ_cp, 'TF_NEED_AWS', 'Amazon AWS Platform', - 'with_aws_support', True, 'aws') - set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', - 'with_kafka_support', True, 'kafka') + set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite', + 'with_ignite_support', True, 'ignite') set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', - False, 'xla') - + True, 'xla') set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': @@ -1540,6 +1590,13 @@ def main(): else: set_trisycl_include_dir(environ_cp) + set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False) + if (environ_cp.get('TF_NEED_ROCM') == '1' and + 'LD_LIBRARY_PATH' in environ_cp and + environ_cp.get('LD_LIBRARY_PATH') != '1'): + write_action_env_to_bazelrc('LD_LIBRARY_PATH', + environ_cp.get('LD_LIBRARY_PATH')) + set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) if (environ_cp.get('TF_NEED_CUDA') == '1' and 'TF_CUDA_CONFIG_REPO' not in environ_cp): @@ -1580,6 +1637,19 @@ def main(): write_to_bazelrc('build --config=download_clang') write_to_bazelrc('test --config=download_clang') + # SYCL / ROCm / CUDA are mutually exclusive. + # At most 1 GPU platform can be configured. + gpu_platform_count = 0 + if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': + gpu_platform_count += 1 + if environ_cp.get('TF_NEED_ROCM') == '1': + gpu_platform_count += 1 + if environ_cp.get('TF_NEED_CUDA') == '1': + gpu_platform_count += 1 + if gpu_platform_count >= 2: + raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. ' + 'At most 1 GPU platform can be configured.') + set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False) if environ_cp.get('TF_NEED_MPI') == '1': set_mpi_home(environ_cp) @@ -1605,15 +1675,14 @@ def main(): # TODO(pcloudy): remove the following if check when they make sense on Windows if not is_windows(): print('Preconfigured Bazel build configs. You can use any of the below by ' - 'adding "--config=<>" to your build command. See tools/bazel.rc for ' - 'more details.') + 'adding "--config=<>" to your build command. See .bazelrc for more ' + 'details.') config_info_line('mkl', 'Build with MKL support.') config_info_line('monolithic', 'Config for mostly static monolithic build.') config_info_line('gdr', 'Build with GDR support.') config_info_line('verbs', 'Build with libverbs support.') - config_info_line('ngraph', 'Build with Intel ngraph support.') + config_info_line('ngraph', 'Build with Intel nGraph support.') if __name__ == '__main__': main() - diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 3610eea42a58ab74940e059736dd692713d001f1..9b62a504525d5377d4836e92bdf0e46f7fc3ef38 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -203,21 +203,6 @@ config_setting( visibility = ["//visibility:public"], ) -# TODO(jhseu): Enable on other platforms other than Linux. -config_setting( - name = "with_jemalloc_linux_x86_64", - define_values = {"with_jemalloc": "true"}, - values = {"cpu": "k8"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_jemalloc_linux_ppc64le", - define_values = {"with_jemalloc": "true"}, - values = {"cpu": "ppc"}, - visibility = ["//visibility:public"], -) - config_setting( name = "with_default_optimizations", define_values = {"with_default_optimizations": "true"}, @@ -225,56 +210,8 @@ config_setting( ) config_setting( - name = "with_gcp_support", - define_values = {"with_gcp_support": "true"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support", - define_values = {"with_hdfs_support": "true"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_aws_support", - define_values = {"with_aws_support": "true"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_kafka_support", - define_values = {"with_kafka_support": "true"}, - visibility = ["//visibility:public"], -) - -# Crosses between platforms and file system libraries not supported on those -# platforms due to limitations in nested select() statements. -config_setting( - name = "with_gcp_support_windows_override", - define_values = {"with_gcp_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support_windows_override", - define_values = {"with_hdfs_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_aws_support_windows_override", - define_values = {"with_aws_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_kafka_support_windows_override", - define_values = {"with_kafka_support": "true"}, - values = {"cpu": "x64_windows"}, + name = "with_ignite_support", + define_values = {"with_ignite_support": "true"}, visibility = ["//visibility:public"], ) @@ -285,48 +222,6 @@ config_setting( visibility = ["//visibility:public"], ) -config_setting( - name = "with_gcp_support_android_override", - define_values = {"with_gcp_support": "true"}, - values = {"crosstool_top": "//external:android/crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support_android_override", - define_values = {"with_hdfs_support": "true"}, - values = {"crosstool_top": "//external:android/crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_aws_support_android_override", - define_values = {"with_aws_support": "true"}, - values = {"crosstool_top": "//external:android/crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_gcp_support_ios_override", - define_values = {"with_gcp_support": "true"}, - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support_ios_override", - define_values = {"with_hdfs_support": "true"}, - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_aws_support_ios_override", - define_values = {"with_aws_support": "true"}, - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, - visibility = ["//visibility:public"], -) - config_setting( name = "with_xla_support", define_values = {"with_xla_support": "true"}, @@ -355,30 +250,6 @@ config_setting( visibility = ["//visibility:public"], ) -config_setting( - name = "with_jemalloc_linux_x86_64_dynamic", - define_values = { - "with_jemalloc": "true", - "framework_shared_object": "true", - }, - values = { - "cpu": "k8", - }, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_jemalloc_linux_ppc64le_dynamic", - define_values = { - "with_jemalloc": "true", - "framework_shared_object": "true", - }, - values = { - "cpu": "ppc", - }, - visibility = ["//visibility:public"], -) - config_setting( name = "using_cuda_clang", define_values = { diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index f316e4ba6735213ba2fbbc1f8c019ad235c0df1f..d4b78138e93624a7e41e917f8210281b500661bc 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -8738,35 +8738,7 @@ void TFE_TensorHandlePrintDebugString(TFE_TensorHandle* handle) { TF_DeleteStatus(status); } -TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx) { - // Intentionally LOG into INFO below for ease of debugging. - VLOG(1) << "TFE_RunConstOp called"; - - auto* status = TF_NewStatus(); - auto* op = TFE_NewOp(ctx, "Const", status); - CheckOk(status); - TFE_OpSetAttrType(op, "dtype", TF_FLOAT); - - auto* tensor = - TF_AllocateTensor(TF_FLOAT, /*shape.data()*/ nullptr, /*shape.size()*/ 0, - TF_DataTypeSize(TF_FLOAT) * 1); - auto* ptr = reinterpret_cast(TF_TensorData(tensor)); - *reinterpret_cast(ptr) = 17.0; - - TFE_OpSetAttrTensor(op, "value", tensor, status); - CheckOk(status); - TF_DeleteTensor(tensor); - VLOG(1) << "New op created"; - - TFE_TensorHandle* retval; - int num_retvals = 1; - TFE_Execute(op, &retval, &num_retvals, status); - CheckOk(status); - CHECK_EQ(num_retvals, 1); - VLOG(1) << "Op executed"; - - TFE_DeleteOp(op); - TF_DeleteStatus(status); - - return retval; +TF_CAPI_EXPORT extern void TF_MakeInternalErrorStatus(TF_Status* status, + const char* errMsg) { + status->status = tensorflow::errors::Internal(errMsg); } diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index 950ad9aeed6f883fa22c2673fa8aa92839cd0fbc..d98d532e32e891e21f5b7ba360c74c3256fb1947 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -180,10 +180,8 @@ TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueVariantTensor( TF_CAPI_EXPORT extern void TFE_TensorHandlePrintDebugString( TFE_TensorHandle* handle); -// Returns a const scalar tensor. -// Caller owns both the input and the output tensor handles. -// TODO: Remove this API with hard-coded tensor computation. -TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx); +TF_CAPI_EXPORT extern void TF_MakeInternalErrorStatus(TF_Status* status, + const char* errMsg); #ifdef __cplusplus } /* end extern "C" */ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 0bf3d9542b72ecff916986ab809e8793b796d14c..3554ec0bf3202b54bfc38d67e51b89df19832302 100755 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -578,6 +578,14 @@ void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name, op->operation.MutableAttrs()->Set(attr_name, attr_value); } +void TFE_OpSetAttrFunctionName(TFE_Op* op, const char* attr_name, + const char* data, size_t length) { + tensorflow::AttrValue attr_value; + tensorflow::NameAttrList* func = attr_value.mutable_func(); + func->set_name(data, length); + op->operation.MutableAttrs()->Set(attr_name, attr_value); +} + void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor, TF_Status* status) { tensorflow::Tensor t; diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 6323f8a053197bb7069acf2d43214fb78c36f436..b2454d872207e26feb3764671474a5d87c01f84d 100755 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -313,6 +313,9 @@ TF_CAPI_EXPORT extern void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name, const TFE_Op* value); +TF_CAPI_EXPORT void TFE_OpSetAttrFunctionName(TFE_Op* op, const char* attr_name, + const char* data, size_t length); + TF_CAPI_EXPORT extern void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor, diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc index 5607c9dcb0bbec72b2f86def3dd4e6590d73197b..008f088c2dcdd7d9114103516a4702e47a55c6de 100644 --- a/tensorflow/c/eager/c_api_test_util.cc +++ b/tensorflow/c/eager/c_api_test_util.cc @@ -99,8 +99,6 @@ TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) { TFE_OpAddInput(op, b, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TF_DeleteStatus(status); - TFE_OpSetAttrBool(op, "transpose_a", 0); - TFE_OpSetAttrBool(op, "transpose_b", 0); TFE_OpSetAttrType(op, "T", TFE_TensorHandleDataType(a)); return op; diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 41b5b8ff36e16100e349cb909dc79d90fa4866b0..5ba55a203ff70cc64c07e96b5a869a1f11c9334e 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -130,7 +130,7 @@ class GradientTape { const string& op_type, std::vector& output_tensors, gtl::ArraySlice input_tensor_id, gtl::ArraySlice input_dtypes, - BackwardFunction* backward_function, + const std::function& backward_function_getter, const std::function& backward_function_deleter); void DeleteTrace(int64 tensor_id); @@ -206,10 +206,9 @@ void GradientTape::RecordOperation( const string& op_type, std::vector& output_tensors, gtl::ArraySlice input_tensor_id, gtl::ArraySlice input_dtypes, - BackwardFunction* backward_function, + const std::function& backward_function_getter, const std::function& backward_function_deleter) { if (!ShouldRecord(input_tensor_id, input_dtypes)) { - backward_function_deleter(backward_function); return; } std::vector ids; @@ -229,7 +228,7 @@ void GradientTape::RecordOperation( tensors.push_back(o); } op_tape_[op_id] = OpTapeEntry{ - op_type, std::move(tensors), ids, backward_function, + op_type, std::move(tensors), std::move(ids), backward_function_getter(), backward_function_deleter}; } diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index b587e63227708427e7fae47f8f4a7b524d963ed9..9d2208d84d7d0e96dc5b314f12a250395effdd73 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -411,6 +411,7 @@ tf_cc_test( srcs = ["gradients/nn_grad_test.cc"], deps = [ ":cc_ops", + ":cc_ops_internal", ":grad_op_registry", ":grad_testutil", ":gradient_checker", diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index a32d1b1eb50fc715084f5ee663a732770db1883c..39593370d1c243e84dc5b6091724d1d404c102b0 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -853,11 +853,7 @@ void OpInfo::WriteClassDecl(WritableFile* h) const { } } - strings::StrAppend(&class_decl, "\n"); - - if (output_types.empty()) { - strings::StrAppend(&class_decl, " Operation operation;\n"); - } + strings::StrAppend(&class_decl, "\n Operation operation;\n"); for (int i = 0; i < output_types.size(); ++i) { strings::StrAppend(&class_decl, " ", output_types[i], " ", output_names[i], ";\n"); @@ -878,9 +874,11 @@ void OpInfo::GetOutput(string* out) const { string return_on_error = strings::StrCat("if (!", scope_str, ".ok()) return;"); + strings::StrAppend(out, " this->operation = Operation(ret);\n"); + // No outputs. if (graph_op_def.output_arg_size() == 0) { - strings::StrAppend(out, " this->operation = Operation(ret);\n return;\n"); + strings::StrAppend(out, " return;\n"); return; } if (graph_op_def.output_arg_size() == 1) { diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc index 7f6ac4cae78d8d6e118837fce9ae5270336cdc89..6abc9e268e3ac97379954a34017ddffa010db67f 100644 --- a/tensorflow/cc/framework/scope.cc +++ b/tensorflow/cc/framework/scope.cc @@ -62,7 +62,7 @@ Scope::Impl::Impl(const std::shared_ptr& graph, refiner_(refiner), scope_used_(nullptr), colocation_constraints_(), - disable_shape_inference_(false) {} + disable_shape_inference_(refiner_ == nullptr) {} Scope Scope::NewRootScope() { Graph* graph = new Graph(OpRegistry::Global()); @@ -94,6 +94,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ScopeName, const string& name, exit_on_error_(other.impl()->exit_on_error_), kernel_label_(other.impl()->kernel_label_), device_(other.impl()->device_), + assigned_device_(other.impl()->assigned_device_), colocation_constraints_(other.impl()->colocation_constraints_), disable_shape_inference_(other.impl()->disable_shape_inference_) {} @@ -110,6 +111,7 @@ Scope::Impl::Impl(const Scope& other, Tags::OpName, const string& name, exit_on_error_(other.impl()->exit_on_error_), kernel_label_(other.impl()->kernel_label_), device_(other.impl()->device_), + assigned_device_(other.impl()->assigned_device_), colocation_constraints_(other.impl()->colocation_constraints_), disable_shape_inference_(other.impl()->disable_shape_inference_) {} @@ -132,6 +134,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ControlDeps, exit_on_error_(other.impl()->exit_on_error_), kernel_label_(other.impl()->kernel_label_), device_(other.impl()->device_), + assigned_device_(other.impl()->assigned_device_), colocation_constraints_(other.impl()->colocation_constraints_), disable_shape_inference_(other.impl()->disable_shape_inference_) {} @@ -163,6 +166,7 @@ Scope::Impl::Impl(const Scope& other, Tags::SingleUseScope, exit_on_error_(other.impl()->exit_on_error_), kernel_label_(other.impl()->kernel_label_), device_(other.impl()->device_), + assigned_device_(other.impl()->assigned_device_), colocation_constraints_(other.impl()->colocation_constraints_), disable_shape_inference_(other.impl()->disable_shape_inference_) {} @@ -178,6 +182,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ExitOnError) exit_on_error_(true), kernel_label_(other.impl()->kernel_label_), device_(other.impl()->device_), + assigned_device_(other.impl()->assigned_device_), colocation_constraints_(other.impl()->colocation_constraints_), disable_shape_inference_(other.impl()->disable_shape_inference_) {} @@ -194,6 +199,7 @@ Scope::Impl::Impl(const Scope& other, Tags::KernelLabel, exit_on_error_(other.impl()->exit_on_error_), kernel_label_(kernel_label), device_(other.impl()->device_), + assigned_device_(other.impl()->assigned_device_), colocation_constraints_(other.impl()->colocation_constraints_), disable_shape_inference_(other.impl()->disable_shape_inference_) {} @@ -210,12 +216,30 @@ Scope::Impl::Impl(const Scope& other, Tags::Colocate, exit_on_error_(other.impl()->exit_on_error_), kernel_label_(other.impl()->kernel_label_), device_(other.impl()->device_), + assigned_device_(other.impl()->assigned_device_), colocation_constraints_( clear_colocations ? std::unordered_set() : other.impl()->GetColocationConstraints(colocate_with_op)), disable_shape_inference_(other.impl()->disable_shape_inference_) {} +Scope::Impl::Impl(const Scope& other, Tags::AssignedDevice, + const string& assigned_device) + : graph_(other.impl()->graph_), + status_(other.impl()->status_), + name_map_(other.impl()->name_map_), + refiner_(other.impl()->refiner_), + scope_used_(other.impl()->scope_used_), + control_deps_(other.impl()->control_deps_), + name_(other.impl()->name_), + op_name_(other.impl()->op_name_), + exit_on_error_(other.impl()->exit_on_error_), + kernel_label_(other.impl()->kernel_label_), + device_(other.impl()->device_), + assigned_device_(assigned_device), + colocation_constraints_(other.impl()->colocation_constraints_), + disable_shape_inference_(other.impl()->disable_shape_inference_) {} + std::unordered_set Scope::Impl::GetColocationConstraints( const Operation& colocate_with_op) const { std::unordered_set current_constraints(colocation_constraints_); @@ -299,6 +323,9 @@ void Scope::UpdateBuilder(NodeBuilder* builder) const { if (!impl()->device_.empty()) { builder->Device(impl()->device_); } + if (!impl()->assigned_device_.empty()) { + builder->AssignedDevice(impl()->assigned_device_); + } } string Scope::Impl::GetUniqueName(const string& prefix, @@ -394,6 +421,10 @@ Scope Scope::WithDevice(const string& device) const { return Scope(new Impl(*this, Impl::Tags::Device(), device)); } +Scope Scope::WithAssignedDevice(const string& assigned_device) const { + return Scope(new Impl(*this, Impl::Tags::AssignedDevice(), assigned_device)); +} + Scope Scope::ColocateWith(const Operation& op) const { return Scope(new Impl(*this, Impl::Tags::Colocate(), op, /* clear_colocations */ false)); diff --git a/tensorflow/cc/framework/scope.h b/tensorflow/cc/framework/scope.h index 30c32bd44b0f22d6b29dd3836d431807d0216818..e307d8989b6647dfac8d2691ed2171c86b7f3a7c 100644 --- a/tensorflow/cc/framework/scope.h +++ b/tensorflow/cc/framework/scope.h @@ -133,6 +133,10 @@ class Scope { /// the device field set to 'device'. Scope WithDevice(const string& device) const; + /// Returns a new scope. All ops created within the returned scope will have + /// their assigned device set to `assigned_device`. + Scope WithAssignedDevice(const string& assigned_device) const; + /// Return a new scope. All ops created within the returned scope will be /// co-located on the device where op is placed. /// NOTE: This function is intended to be use internal libraries only for diff --git a/tensorflow/cc/framework/scope_internal.h b/tensorflow/cc/framework/scope_internal.h index 58adaef2e942a7fa6b0ce8d5534ac3e2fd380580..514e02e84146b6d95147d83182e5d9a07509cfa1 100644 --- a/tensorflow/cc/framework/scope_internal.h +++ b/tensorflow/cc/framework/scope_internal.h @@ -26,6 +26,8 @@ class ShapeRefiner; // graph, status, name_map, and refiner. // This is intended to enable the C API (which are used by other language // bindings) to create a Scope and access C++ functionality (i.e. gradients). +// +// Shape inference is disabled if `refiner` is nullptr. Scope NewInternalScope(Graph* graph, Status* status, ShapeRefiner* refiner); class Scope::Impl { @@ -58,6 +60,7 @@ class Scope::Impl { enum class ExitOnError; enum class KernelLabel; enum class Colocate; + enum class AssignedDevice; }; Impl(Graph* graph, Status* status, NameMap* name_map, ShapeRefiner* refiner, @@ -74,6 +77,7 @@ class Scope::Impl { Impl(const Scope& other, Tags::KernelLabel, const string& kernel_label); Impl(const Scope& other, Tags::Colocate, const Operation& colocate_with_op, bool clear_colocations); + Impl(const Scope& other, Tags::AssignedDevice, const string& assigned_device); std::unordered_set GetColocationConstraints( const Operation& colocate_with_op) const; @@ -107,6 +111,7 @@ class Scope::Impl { const bool exit_on_error_ = false; const string kernel_label_ = ""; const string device_ = ""; + const string assigned_device_ = ""; const std::unordered_set colocation_constraints_; // If true, Scope::DoShapeInference() always returns Status:OK(). diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 588e96cb196189780037f66266484962ba0385e4..2a32a2ed6f7862a29f4ce3d1aba5fdbc86adc670 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -143,6 +143,33 @@ Status Relu6GradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("Relu6", Relu6GradHelper); +Status LeakyReluGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), + attrs.Alpha(alpha)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper); + +Status LeakyReluGradGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(1), + attrs.Alpha(alpha)); + grad_outputs->push_back(dx); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyReluGrad", LeakyReluGradGradHelper); + Status EluGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index aa72cf7ba2a958f54d50b59f0edaefb27edf0e86..f5a09e09dcda3e06c71d44d5fa5a1b121a9ade58 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/cc/framework/gradient_checker.h" #include "tensorflow/cc/framework/testutil.h" #include "tensorflow/cc/gradients/grad_testutil.h" +#include "tensorflow/cc/ops/nn_ops_internal.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -160,6 +161,32 @@ TEST_F(NNGradTest, Relu6Grad) { RunTest(x, x_init_value, y, shape); } +TEST_F(NNGradTest, LeakyReluGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = ops::internal::LeakyRelu(scope_, x); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + RunTest(x, x_init_value, y, shape); +} + +TEST_F(NNGradTest, LeakyReluGradGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, {5, 2}); + Tensor features = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + auto y = ops::internal::LeakyReluGrad(scope_, x, features); + RunTest(x, x_init_value, y, shape); +} + TEST_F(NNGradTest, EluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py index de135d7a2346cdda13a8e35315929b17fa1ccbc1..64b861a73091642b03573543a5c55618bf33915d 100644 --- a/tensorflow/compiler/aot/tests/make_test_graphs.py +++ b/tensorflow/compiler/aot/tests/make_test_graphs.py @@ -47,7 +47,7 @@ def tfadd(_): def tfadd_with_ckpt(out_dir): x = array_ops.placeholder(dtypes.int32, name='x_hold') - y = variables.Variable(constant_op.constant([0]), name='y_saved') + y = variables.VariableV1(constant_op.constant([0]), name='y_saved') math_ops.add(x, y, name='x_y_sum') init_op = variables.initialize_all_variables() @@ -62,7 +62,7 @@ def tfadd_with_ckpt(out_dir): def tfadd_with_ckpt_saver(out_dir): x = array_ops.placeholder(dtypes.int32, name='x_hold') - y = variables.Variable(constant_op.constant([0]), name='y_saved') + y = variables.VariableV1(constant_op.constant([0]), name='y_saved') math_ops.add(x, y, name='x_y_sum') init_op = variables.initialize_all_variables() diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 4e184729efeb4a7f11810afe2f1c48bb75c33e4a..661b444a42eefadf52739d84483e8e26c07fadf5 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -258,6 +258,7 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/kernels:variable_ops", + "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -323,6 +324,8 @@ cc_library( "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", @@ -383,12 +386,16 @@ cc_library( ":shape_inference_helpers", ":union_find", ":xla_cluster_util", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:ops", + "//tensorflow/cc:scope_internal", "//tensorflow/compiler/jit/graphcycles", "//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags", "//tensorflow/compiler/jit/ops:xla_ops", "//tensorflow/compiler/tf2xla:dump_graph", "//tensorflow/compiler/tf2xla:resource_operation_table", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla/cc:xla_jit_ops", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", "//tensorflow/core:core_cpu", @@ -400,6 +407,8 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/kernels:bounds_check", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ], @@ -471,6 +480,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -478,6 +488,7 @@ tf_cc_test( name = "compilation_passes_test", size = "small", srcs = [ + "build_xla_ops_pass_test.cc", "encapsulate_subgraphs_pass_test.cc", "encapsulate_xla_computations_pass_test.cc", "mark_for_compilation_pass_test.cc", @@ -486,6 +497,7 @@ tf_cc_test( deps = [ ":common", ":compilation_passes", + ":node_matchers", ":xla_cluster_util", ":xla_gpu_device", "//tensorflow/cc:cc_ops", @@ -507,6 +519,7 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/grappler/optimizers/data:graph_utils", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ], diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc index 13a518d0e8b97c920c5c720a34ab92abdf1908dd..5974696b7751d69eb27141173fdab14313925ee9 100644 --- a/tensorflow/compiler/jit/build_xla_ops_pass.cc +++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc @@ -14,8 +14,12 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/jit/build_xla_ops_pass.h" +#include "absl/algorithm/container.h" +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/framework/scope_internal.h" #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" +#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_ops.h" #include "tensorflow/compiler/tf2xla/dump_graph.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/common_runtime/function.h" @@ -31,139 +35,108 @@ limitations under the License. #include "tensorflow/core/public/version.h" namespace tensorflow { - -static Status BuildXlaCompileNode( - const string& nodename, const string& function_name, - const AttrValueMap& function_attr, const string& device_name, - const DataTypeVector& constant_dtypes, int num_resources, - const DataTypeVector& arg_dtypes, Graph* graph, Node** node) { - NodeDef def; - def.set_name(graph->NewName(nodename)); - def.set_op("_XlaCompile"); - def.set_device(device_name); - AddNodeAttr("Tconstants", constant_dtypes, &def); - AddNodeAttr("Targs", arg_dtypes, &def); - AddNodeAttr("Nresources", num_resources, &def); - NameAttrList function; - function.set_name(function_name); - *function.mutable_attr() = function_attr; - AddNodeAttr("function", function, &def); - - Status status; - *node = graph->AddNode(def, &status); - return status; +namespace { +void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) { + std::vector out_edges(old_node->out_edges().begin(), + old_node->out_edges().end()); + for (const Edge* edge : out_edges) { + // TODO(sanjoy): This does not update NodeDef inputs. To be able to update + // NodeDef inputs we first need to fix encapsulate_subgraphs_pass to fix up + // the NodeDef inputs to the function call nodes. + g->AddEdge(new_node, edge->src_output(), edge->dst(), edge->dst_input()); + g->RemoveEdge(edge); + } } -static Status BuildXlaRunNode(const string& nodename, const string& device_name, - const DataTypeVector& arg_dtypes, - const DataTypeVector& result_dtypes, Graph* graph, - Node** node) { - NodeDef def; - def.set_name(graph->NewName(nodename)); - def.set_op("_XlaRun"); - def.set_device(device_name); - AddNodeAttr("Targs", arg_dtypes, &def); - AddNodeAttr("Tresults", result_dtypes, &def); +struct XlaClusterInfo { + std::vector constant_inputs; + std::vector non_constant_inputs; + std::vector resource_inputs; + NameAttrList function; +}; - Status status; - *node = graph->AddNode(def, &status); - return status; +Output IncomingEdgeAsOutput(const Edge* e) { + return Output(e->src(), e->src_output()); } -static Status GetXlaAttrs(Node* node, int* num_constant_args, - int* num_resource_args, DataTypeVector* const_dtypes, - DataTypeVector* arg_dtypes) { +Status GetXlaClusterInfo(Node* n, XlaClusterInfo* result) { + int num_constant_inputs, num_resource_inputs; TF_RETURN_IF_ERROR( - GetNodeAttr(node->attrs(), kXlaNumConstantArgsAttr, num_constant_args)); + GetNodeAttr(n->attrs(), kXlaNumConstantArgsAttr, &num_constant_inputs)); TF_RETURN_IF_ERROR( - GetNodeAttr(node->attrs(), kXlaNumResourceArgsAttr, num_resource_args)); + GetNodeAttr(n->attrs(), kXlaNumResourceArgsAttr, &num_resource_inputs)); - if (*num_constant_args < 0 || *num_resource_args < 0 || - *num_constant_args + *num_resource_args > node->num_inputs()) { + if (num_constant_inputs < 0 || num_resource_inputs < 0 || + num_constant_inputs + num_resource_inputs > n->num_inputs()) { return errors::InvalidArgument( "Invalid number of constant/resource arguments to XLA kernel."); } - const int num_nonconst_args = - node->num_inputs() - *num_constant_args - *num_resource_args; - - const DataTypeVector& input_types = node->input_types(); - std::copy(input_types.begin(), input_types.begin() + *num_constant_args, - std::back_inserter(*const_dtypes)); - std::copy(input_types.begin() + *num_constant_args, - input_types.begin() + *num_constant_args + num_nonconst_args, - std::back_inserter(*arg_dtypes)); - return Status::OK(); -} - -static void CopyIncomingEdges(Graph* g, Node* old_node, Node* new_node, - int prefix_to_ignore) { - for (const Edge* edge : old_node->in_edges()) { - if (edge->IsControlEdge()) { - g->AddControlEdge(edge->src(), new_node); - } else if (edge->dst_input() >= prefix_to_ignore) { - g->AddEdge(edge->src(), edge->src_output(), new_node, - edge->dst_input() - prefix_to_ignore); - } - } -} - -static void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) { - std::vector out_edges(old_node->out_edges().begin(), - old_node->out_edges().end()); - for (const Edge* edge : out_edges) { - Node* dst = edge->dst(); - int src_output = edge->src_output(); - int dst_input = edge->dst_input(); - g->RemoveEdge(edge); + int num_non_constant_inputs = + n->num_inputs() - num_constant_inputs - num_resource_inputs; - if (edge->IsControlEdge()) { - g->AddControlEdge(new_node, dst); - } else { - g->AddEdge(new_node, src_output, dst, dst_input); - } - } -} + std::vector input_edges_vector; + TF_RETURN_IF_ERROR(n->input_edges(&input_edges_vector)); + absl::Span input_edges(input_edges_vector); -static Status ReplaceNodeWithXlaCompileAndRun(Graph* g, Node* n) { - int num_constant_args, num_resource_args; - DataTypeVector const_dtypes; - DataTypeVector arg_dtypes; + absl::c_transform(input_edges.subspan(0, num_constant_inputs), + std::back_inserter(result->constant_inputs), + IncomingEdgeAsOutput); - TF_RETURN_IF_ERROR(GetXlaAttrs(n, &num_constant_args, &num_resource_args, - &const_dtypes, &arg_dtypes)); + absl::c_transform( + input_edges.subspan(num_constant_inputs, num_non_constant_inputs), + std::back_inserter(result->non_constant_inputs), IncomingEdgeAsOutput); - Node *compile_node, *run_node; + absl::c_transform( + input_edges.subspan(num_constant_inputs + num_non_constant_inputs, + num_resource_inputs), + std::back_inserter(result->resource_inputs), IncomingEdgeAsOutput); - TF_RETURN_IF_ERROR(BuildXlaCompileNode( - n->name(), n->type_string(), n->def().attr(), n->requested_device(), - const_dtypes, num_resource_args, arg_dtypes, g, &compile_node)); + result->function.set_name(n->type_string()); + *result->function.mutable_attr() = n->def().attr(); + return Status::OK(); +} - DataTypeVector arg_dtypes_with_resources = arg_dtypes; - for (int i = 0; i < num_resource_args; i++) { - arg_dtypes_with_resources.push_back(DT_RESOURCE); +Status CopyIncomingControlEdges(Graph* g, Node* from, Node* to) { + for (const Edge* e : from->in_edges()) { + if (e->IsControlEdge()) { + g->AddControlEdge(e->src(), to); + } } - TF_RETURN_IF_ERROR(BuildXlaRunNode(n->name(), n->requested_device(), - arg_dtypes_with_resources, - n->output_types(), g, &run_node)); - - compile_node->set_assigned_device_name(n->assigned_device_name()); - run_node->set_assigned_device_name(n->assigned_device_name()); + return Status::OK(); +} - CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/compile_node, - /*prefix_to_ignore=*/0); - CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/run_node, - /*prefix_to_ignore=*/num_constant_args); +Status ReplaceNodeWithXlaCompileAndXlaRun(Graph* g, Node* n) { + Status status; + Scope root = NewInternalScope(g, &status, /*refiner=*/nullptr) + .NewSubScope(n->name()) + .WithDevice(n->requested_device()) + .WithAssignedDevice(n->assigned_device_name()); + + XlaClusterInfo cluster_info; + TF_RETURN_IF_ERROR(GetXlaClusterInfo(n, &cluster_info)); + + ops::_XlaCompile xla_compile(root.WithOpName("xla_compile"), + /*constants=*/cluster_info.constant_inputs, + /*args=*/cluster_info.non_constant_inputs, + /*resources=*/cluster_info.resource_inputs, + cluster_info.function); + TF_RETURN_IF_ERROR( + CopyIncomingControlEdges(g, /*from=*/n, /*to=*/xla_compile.key.node())); - // The compilation_key output. - g->AddEdge(compile_node, 0, run_node, n->num_inputs() - num_constant_args); + std::vector xla_run_args = cluster_info.non_constant_inputs; + absl::c_copy(cluster_info.resource_inputs, std::back_inserter(xla_run_args)); + ops::_XlaRun xla_run(root.WithOpName("xla_run"), xla_run_args, + xla_compile.key, n->output_types()); - MoveOutgoingEdges(g, /*old_node=*/n, /*new_node=*/run_node); + MoveOutgoingEdges(g, /*old_node=*/n, + /*new_node=*/xla_run.operation.node()); g->RemoveNode(n); return Status::OK(); } +} // namespace Status BuildXlaOpsPass::Run(const GraphOptimizationPassOptions& options) { Graph* graph = options.graph->get(); @@ -177,7 +150,7 @@ Status BuildXlaOpsPass::Run(const GraphOptimizationPassOptions& options) { // Only compile nodes that are marked for compilation by the // compilation-marking pass (via 'attr_name'). if (IsXlaCompiledKernel(*n)) { - TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndRun(graph, n)); + TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndXlaRun(graph, n)); } } diff --git a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..9d56db7b6bc12938b2de9df02b97ff0ca6a42e54 --- /dev/null +++ b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc @@ -0,0 +1,138 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/build_xla_ops_pass.h" + +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/resource_variable_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/compiler/jit/defs.h" +#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" +#include "tensorflow/compiler/jit/node_matchers.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +using ::tensorflow::testing::FindNodeByName; +using ::tensorflow::testing::matchers::CtrlDeps; +using ::tensorflow::testing::matchers::NodeWith; +using ::tensorflow::testing::matchers::Op; + +Status BuildXlaOps(const Scope& s, std::unique_ptr* result) { + auto graph = absl::make_unique(OpRegistry::Global()); + TF_RETURN_IF_ERROR(s.ToGraph(graph.get())); + + // Assign all nodes to the CPU device. + static const char* kCpuDevice = "/job:localhost/replica:0/task:0/cpu:0"; + for (Node* n : graph->nodes()) { + if (n->assigned_device_name().empty()) { + n->set_assigned_device_name(kCpuDevice); + } + } + + GraphOptimizationPassOptions opt_options; + opt_options.graph = &graph; + BuildXlaOpsPass pass; + TF_RETURN_IF_ERROR(pass.Run(opt_options)); + *result = std::move(graph); + return Status::OK(); +} + +Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name, + const string& node_name, int num_constant_args, + int num_resource_args, Node** result) { + NodeDef call_node; + call_node.set_name(node_name); + call_node.set_op(callee_name); + AddNodeAttr(kXlaCompiledKernelAttr, true, &call_node); + AddNodeAttr(kXlaNumConstantArgsAttr, num_constant_args, &call_node); + AddNodeAttr(kXlaNumResourceArgsAttr, num_resource_args, &call_node); + Status s; + *result = graph->AddNode(call_node, &s); + return s; +} + +Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name, + const string& node_name, Node** result) { + return MakeXlaCompiledKernel(graph, callee_name, node_name, + /*num_constant_args=*/0, /*num_resource_args=*/0, + result); +} + +Node* MakeWrite(const Scope& scope, const string& id) { + Output var_handle = + ops::VarHandleOp(scope.WithOpName("Var" + id), DT_FLOAT, TensorShape({})); + Output value_to_write = + ops::Const(scope.WithOpName("ValueToAssign" + id), 1.0f); + ops::AssignVariableOp assign_op(scope.WithOpName("Assignee" + id), var_handle, + value_to_write); + return assign_op.operation.node(); +} + +FunctionDefLibrary CreateFunctionDefLibWithConstFunction(const string& name) { + FunctionDefLibrary flib_def; + FunctionDef func = FunctionDefHelper::Create( + /*function_name=*/name, /*in_def=*/{}, /*out_def=*/{"out: float"}, + /*attr_def*/ + {}, /*node_def=*/{FunctionDefHelper::Const("one", 1.0f)}, + /*ret_def=*/{{"out", "out:output:0"}}); + *flib_def.add_function() = std::move(func); + return flib_def; +} + +TEST(BuildXlaOps, ControlDepsPreserved) { + Scope root = Scope::NewRootScope().ExitOnError(); + + FunctionDefLibrary flib_def = + CreateFunctionDefLibWithConstFunction("cluster_0"); + TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def)); + Node* call; + TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call)); + Node* write_op = MakeWrite(root, "write"); + root.graph()->AddControlEdge(call, write_op); + + std::unique_ptr graph; + TF_ASSERT_OK(BuildXlaOps(root, &graph)); + + Node* write_op_new = FindNodeByName(graph.get(), write_op->name()); + ASSERT_NE(write_op_new, nullptr); + EXPECT_THAT(write_op_new, NodeWith(CtrlDeps(NodeWith(Op("_XlaRun"))))); +} + +TEST(BuildXlaOps, CleanFailureOnBogusAttr) { + Scope root = Scope::NewRootScope().ExitOnError(); + + FunctionDefLibrary flib_def = + CreateFunctionDefLibWithConstFunction("cluster_0"); + TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def)); + Node* call; + TF_ASSERT_OK( + MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", 100, 100, &call)); + Node* write_op = MakeWrite(root, "write"); + root.graph()->AddControlEdge(call, write_op); + + std::unique_ptr graph; + Status failure_status = BuildXlaOps(root, &graph); + ASSERT_FALSE(failure_status.ok()); + EXPECT_EQ(failure_status.code(), error::INVALID_ARGUMENT); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc index 9128b48da3fe9dd3d85d146e16c153c1b3bebf4c..b7ae7fbeb3912882368dc828e8d6fcd50735b04e 100644 --- a/tensorflow/compiler/jit/deadness_analysis.cc +++ b/tensorflow/compiler/jit/deadness_analysis.cc @@ -14,11 +14,14 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/jit/deadness_analysis.h" +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_join.h" #include "tensorflow/compiler/jit/deadness_analysis_internal.h" +#include "tensorflow/compiler/jit/xla_cluster_util.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/tensor_id.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/hash/hash.h" // ALGORITHM OVERVIEW @@ -296,7 +299,7 @@ class SymbolPredicate : public Predicate { template /*static*/ void Predicate::Visit(Predicate* p, const FunctionTy& func) { - gtl::FlatSet visited; + absl::flat_hash_set visited; std::vector stack; stack.push_back(p); @@ -383,6 +386,8 @@ class PredicateFactory { } Predicate* MakeAndOrImpl(absl::Span operands, bool is_and); + Predicate* MakeInternedAndOr(std::vector simplified_ops, + Predicate::Kind pred_kind); // Predicate instances are interned, meaning that there is only a single // instance of a Predicate object with a given content. This makes checking @@ -417,24 +422,53 @@ class PredicateFactory { } }; - gtl::FlatMap, - HashSignatureForAndOr> + absl::flat_hash_map, + HashSignatureForAndOr> interned_and_or_instances_; - gtl::FlatMap> + absl::flat_hash_map> interned_not_instances_; - gtl::FlatMap> + absl::flat_hash_map> interned_and_rec_instances_; - gtl::FlatMap, - HashSignatureForSymbol> + absl::flat_hash_map, + HashSignatureForSymbol> interned_symbol_instances_; }; +Predicate* PredicateFactory::MakeInternedAndOr( + std::vector simplified_ops, Predicate::Kind pred_kind) { + std::stable_sort( + simplified_ops.begin(), simplified_ops.end(), + [](Predicate* a, Predicate* b) { return a->hash() < b->hash(); }); + + auto it = interned_and_or_instances_.find({pred_kind, simplified_ops}); + if (it != interned_and_or_instances_.end()) { + return it->second.get(); + } + + simplified_ops.shrink_to_fit(); + // NB! Because we'll use a non-owning reference to simplified_ops in the + // key for interned_and_or_instances_ we need to be careful to std::move() + // it all the way through. + absl::Span operands_slice = simplified_ops; + std::unique_ptr new_pred = + pred_kind == Predicate::Kind::kAnd + ? Make(std::move(simplified_ops)) + : Make(std::move(simplified_ops)); + + Predicate* new_pred_ptr = new_pred.get(); + interned_and_or_instances_.emplace( + SignatureForAndOr(pred_kind, operands_slice), std::move(new_pred)); + return new_pred_ptr; +} + // Common code to create AndPredicate or OrPredicate instances. Predicate* PredicateFactory::MakeAndOrImpl( absl::Span operands, bool is_and) { Predicate::Kind pred_kind = is_and ? Predicate::Kind::kAnd : Predicate::Kind::kOr; - gtl::FlatSet simplified_ops_set; + Predicate::Kind other_pred_kind = + is_and ? Predicate::Kind::kOr : Predicate::Kind::kAnd; + absl::flat_hash_set simplified_ops_set; std::vector simplified_ops; for (Predicate* op : operands) { // Simplify A&A => A and A|A => A. @@ -459,7 +493,7 @@ Predicate* PredicateFactory::MakeAndOrImpl( } // Simplify "A&~A=>False" and "A|~A=>True". - gtl::FlatSet negated_ops; + absl::flat_hash_set negated_ops; for (Predicate* op : simplified_ops) { if (op->kind() == Predicate::Kind::kNot) { negated_ops.insert(dynamic_cast(*op).operand()); @@ -472,30 +506,63 @@ Predicate* PredicateFactory::MakeAndOrImpl( } } - std::stable_sort( - simplified_ops.begin(), simplified_ops.end(), - [](Predicate* a, Predicate* b) { return a->hash() < b->hash(); }); + // If all ops contain the same subop, then factor it out thanks to the + // distributive property. Such as: + // - (A & B) | (A & C) | (A & D) => A & (B | C | D) + // - (A | B) & (A | C) & (A | D) => A | (B & C & D) + // + // First find any predicates contained in all subops. + std::vector common_inner_operands; + absl::flat_hash_set common_inner_operands_set; + for (Predicate* op : simplified_ops) { + if (op->kind() != other_pred_kind) { + common_inner_operands.clear(); + break; + } - auto it = interned_and_or_instances_.find({pred_kind, simplified_ops}); - if (it == interned_and_or_instances_.end()) { - simplified_ops.shrink_to_fit(); - // NB! Because we'll use a non-owning reference to simplified_ops in the - // key for interned_and_or_instances_ we need to be careful to std::move() - // it all the way through. - absl::Span operands_slice = simplified_ops; - std::unique_ptr new_pred = - is_and ? Make(std::move(simplified_ops)) - : Make(std::move(simplified_ops)); + if (common_inner_operands.empty()) { + common_inner_operands.insert(common_inner_operands.end(), + op->GetOperands().begin(), + op->GetOperands().end()); + } else { + std::vector sub_ops_intersection; + common_inner_operands.clear(); + absl::c_copy_if(op->GetOperands(), + std::back_inserter(common_inner_operands), + [&](Predicate* sub_op) { + return common_inner_operands_set.count(sub_op) == 1; + }); + } + if (common_inner_operands.empty()) break; + common_inner_operands_set.clear(); + common_inner_operands_set.insert(common_inner_operands.begin(), + common_inner_operands.end()); + } - Predicate* new_pred_ptr = new_pred.get(); - CHECK(interned_and_or_instances_ - .emplace(SignatureForAndOr(pred_kind, operands_slice), - std::move(new_pred)) - .second); - return new_pred_ptr; - } else { - return it->second.get(); + if (common_inner_operands.empty()) { + return MakeInternedAndOr(std::move(simplified_ops), pred_kind); } + + // For all predicates that can be factored out, remove them and recreate the + // subops. + std::vector factored_ops; + for (Predicate* op : simplified_ops) { + std::vector new_sub_op_ops; + absl::c_copy_if(op->GetOperands(), std::back_inserter(new_sub_op_ops), + [&](Predicate* sub_op) { + return std::find(common_inner_operands.begin(), + common_inner_operands.end(), + sub_op) == common_inner_operands.end(); + }); + factored_ops.push_back(MakeAndOrImpl(new_sub_op_ops, !is_and)); + } + + Predicate* new_inner_op = MakeAndOrImpl(factored_ops, is_and); + std::vector outer_ops; + outer_ops.push_back(new_inner_op); + outer_ops.insert(outer_ops.end(), common_inner_operands.begin(), + common_inner_operands.end()); + return MakeAndOrImpl(outer_ops, !is_and); } class DeadnessAnalysisImpl : public DeadnessAnalysis { @@ -507,12 +574,14 @@ class DeadnessAnalysisImpl : public DeadnessAnalysis { Status PopulateWithReversePostOrder(absl::Span rpo); bool HasInputsWithMismatchingDeadness(const Node& node) override; void Print() const override; - gtl::FlatMap PredicateMapAsString() const; + absl::flat_hash_map PredicateMapAsString() + const; private: enum class EdgeKind { kDataAndControl, kDataOnly, kControlOnly }; - std::vector GetIncomingPreds(Node* n, EdgeKind edge_kind); + Status GetInputPreds(Node* n, EdgeKind edge_kind, + std::vector* result); // Sets the predicate for output `output_idx` of `n` to `pred`. Sets the i'th // bit of `should_revisit` if `pred` is different from the current predicate @@ -549,7 +618,7 @@ class DeadnessAnalysisImpl : public DeadnessAnalysis { Status HandleNode(Node* n, std::vector* should_revisit); const Graph& graph_; - gtl::FlatMap predicate_map_; + absl::flat_hash_map predicate_map_; PredicateFactory predicate_factory_; bool vlog_; }; @@ -558,9 +627,10 @@ TensorId InputEdgeToTensorId(const Edge* e) { return TensorId(e->src()->name(), e->src_output()); } -std::vector DeadnessAnalysisImpl::GetIncomingPreds( - Node* n, DeadnessAnalysisImpl::EdgeKind edge_kind) { - std::vector incoming_preds; +Status DeadnessAnalysisImpl::GetInputPreds( + Node* n, DeadnessAnalysisImpl::EdgeKind edge_kind, + std::vector* result) { + result->clear(); for (const Edge* in_edge : n->in_edges()) { bool should_process = edge_kind == EdgeKind::kDataAndControl || @@ -569,17 +639,27 @@ std::vector DeadnessAnalysisImpl::GetIncomingPreds( if (should_process) { auto it = predicate_map_.find(InputEdgeToTensorId(in_edge)); - CHECK(it != predicate_map_.end()) << n->name(); - incoming_preds.push_back(it->second); + if (it == predicate_map_.end()) { + GraphCycles graph_cycles; + TF_RETURN_IF_ERROR(CreateCycleDetectionGraph(&graph_, &graph_cycles)); + + // If we didn't return with an error above then the graph is probably + // fine and we have a bug in deadness analysis. + return errors::Internal("Could not find input ", in_edge->DebugString(), + " to ", n->name(), + " when visiting the graph in post-order. Most " + "likely indicates a bug in deadness analysis."); + } + result->push_back(it->second); } } - return incoming_preds; + return Status::OK(); } Status DeadnessAnalysisImpl::HandleSwitch(Node* n, std::vector* should_revisit) { - std::vector input_preds = - GetIncomingPreds(n, EdgeKind::kDataAndControl); + std::vector input_preds; + TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataAndControl, &input_preds)); const Edge* pred_edge; TF_RETURN_IF_ERROR(n->input_edge(1, &pred_edge)); Predicate* true_switch = predicate_factory_.MakeSymbolPredicate( @@ -608,17 +688,31 @@ Status DeadnessAnalysisImpl::HandleSwitch(Node* n, } namespace { -const Edge* FindUniqueBackedge(Node* merge) { +Status CreateMultipleNextIterationInputsError(Node* merge) { + std::vector backedges; + for (const Edge* backedge : merge->in_edges()) { + if (backedge->src()->IsNextIteration()) { + backedges.push_back(absl::StrCat(" ", SummarizeNode(*backedge->src()))); + } + } + return errors::InvalidArgument( + "Multiple NextIteration inputs to merge node ", SummarizeNode(*merge), + ": \n", absl::StrJoin(backedges, "\n"), + "\nMerge nodes can have at most one incoming NextIteration edge."); +} + +Status FindUniqueBackedge(Node* merge, const Edge** result) { + *result = nullptr; CHECK(merge->IsMerge()); - const Edge* result = nullptr; for (const Edge* e : merge->in_edges()) { if (e->src()->IsNextIteration()) { - CHECK_EQ(result, nullptr) - << "Multiple backedges to " << merge->DebugString(); - result = e; + if (*result != nullptr) { + return CreateMultipleNextIterationInputsError(merge); + } + *result = e; } } - return result; + return Status::OK(); } // If `backedge_predicate` is equal to `symbolic_predicate` & Step where Step @@ -697,9 +791,12 @@ Status DeadnessAnalysisImpl::HandleMerge(Node* n, return Status::OK(); } + std::vector input_preds; + TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataOnly, &input_preds)); + // We're visiting this merge for the first time and it is a acyclic merge. - Predicate* input_data_pred = predicate_factory_.MakeOrPredicate( - GetIncomingPreds(n, EdgeKind::kDataOnly)); + Predicate* input_data_pred = + predicate_factory_.MakeOrPredicate(input_preds); SetPredicate(n, {0, 1, Graph::kControlSlot}, input_data_pred, should_revisit); return Status::OK(); @@ -710,7 +807,9 @@ Status DeadnessAnalysisImpl::HandleMerge(Node* n, // of an unvisited backedge. Try to pattern match the predicate expression // for that backedge (which should be visited now) into an and recurrence // for the merge node. - if (const Edge* unique_backedge = FindUniqueBackedge(n)) { + const Edge* unique_backedge; + TF_RETURN_IF_ERROR(FindUniqueBackedge(n, &unique_backedge)); + if (unique_backedge) { if (Predicate* step = DeduceStepPredicate( &predicate_factory_, it->second, predicate_map_[InputEdgeToTensorId(unique_backedge)])) { @@ -741,8 +840,8 @@ Status DeadnessAnalysisImpl::HandleRecv(Node* n, std::vector* should_revisit) { // In addition to being alive or dead based on the inputs, a _Recv can also // acquire a dead signal from a _Send. - std::vector input_preds = - GetIncomingPreds(n, EdgeKind::kDataAndControl); + std::vector input_preds; + TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataAndControl, &input_preds)); input_preds.push_back(predicate_factory_.MakeSymbolPredicate( TensorId(n->name(), 0), /*must_be_true=*/false)); SetPredicate(n, {0, Graph::kControlSlot}, @@ -754,8 +853,9 @@ Status DeadnessAnalysisImpl::HandleRecv(Node* n, Status DeadnessAnalysisImpl::HandleGeneric(Node* n, std::vector* should_revisit) { // Generally nodes are alive iff all their inputs are alive. - Predicate* pred = predicate_factory_.MakeAndPredicate( - GetIncomingPreds(n, EdgeKind::kDataAndControl)); + std::vector input_preds; + TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataAndControl, &input_preds)); + Predicate* pred = predicate_factory_.MakeAndPredicate(input_preds); for (int output_idx = 0; output_idx < n->num_outputs(); output_idx++) { SetPredicate(n, output_idx, pred, should_revisit); } @@ -912,9 +1012,9 @@ DeadnessAnalysis::~DeadnessAnalysis() {} return Status::OK(); } -gtl::FlatMap +absl::flat_hash_map DeadnessAnalysisImpl::PredicateMapAsString() const { - gtl::FlatMap result; + absl::flat_hash_map result; std::vector tensor_ids; for (const auto& kv_pair : predicate_map_) { CHECK(result.insert({kv_pair.first, kv_pair.second->ToString()}).second); diff --git a/tensorflow/compiler/jit/deadness_analysis_internal.h b/tensorflow/compiler/jit/deadness_analysis_internal.h index 3df2679c629ce801fc6c9006415dcd27b40c078e..354782374ad070a3d19ddd68bfb986d5a8285e51 100644 --- a/tensorflow/compiler/jit/deadness_analysis_internal.h +++ b/tensorflow/compiler/jit/deadness_analysis_internal.h @@ -16,15 +16,15 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_JIT_DEADNESS_ANALYSIS_INTERNAL_H_ #define TENSORFLOW_COMPILER_JIT_DEADNESS_ANALYSIS_INTERNAL_H_ +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/graph/tensor_id.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace tensorflow { namespace deadness_analysis_internal { // Returns a map describing the predicate each Tensor was mapped to. For // testing purposes only. -using PredicateMapTy = gtl::FlatMap; +using PredicateMapTy = absl::flat_hash_map; Status ComputePredicates(const Graph& graph, PredicateMapTy* out_predicate_map); // Returns a map describing the predicate each Tensor was mapped to. For diff --git a/tensorflow/compiler/jit/deadness_analysis_test.cc b/tensorflow/compiler/jit/deadness_analysis_test.cc index 28a56044d5e3795fc3ecf5d1092491b87cb90f01..617e31488c7daeb714c0ff7056b786e4eaf7873f 100644 --- a/tensorflow/compiler/jit/deadness_analysis_test.cc +++ b/tensorflow/compiler/jit/deadness_analysis_test.cc @@ -384,10 +384,31 @@ TEST(DeadnessAnalysisTest, OrOfAnd) { EXPECT_FALSE(result->HasInputsWithMismatchingDeadness(*add2.node())); } -TEST(DeadnessAnalysisTest, NEGATIVE_AndOrDistributive) { - // This demonstrates one of the weaknesses in the current approach -- since we - // only do some basic simplifications we can't see that "(A|B)&C" == - // "(A&C)|(B&C)". +TEST(DeadnessAnalysisTest, AndOrDistributiveSimplified) { + // (*A | (~*A & ((~*B & ~*A) | (~*A & *B)))) == #true + Scope root = Scope::NewRootScope().ExitOnError(); + + ops::Switch sw_0 = CreateSwitch(root, "A"); + ops::Switch sw_1 = CreateSwitch(root, "B"); + Output add0 = + ops::Add(root.WithOpName("and0"), sw_0.output_false, sw_1.output_true); + Output add1 = + ops::Add(root.WithOpName("and1"), sw_0.output_false, sw_1.output_false); + ops::Merge or2(root.WithOpName("or2"), {add0, add1}); + Output add3 = + ops::Add(root.WithOpName("and3"), or2.output, sw_0.output_false); + ops::Merge or4(root.WithOpName("or4"), {add3, sw_0.output_true}); + + std::unique_ptr result; + TF_ASSERT_OK(AnalyzeDeadness(root.graph(), &result)); + + PredicateMapTy predicate_map; + TF_ASSERT_OK(ComputePredicates(*root.graph(), &predicate_map)); + EXPECT_EQ(predicate_map[ControlOutputFor(or4.output)], "#true"); +} + +TEST(DeadnessAnalysisTest, AndOrDistributive) { + // (A|B)&C == (A&C)|(B&C) Scope root = Scope::NewRootScope().ExitOnError(); ops::Switch sw_0 = CreateSwitch(root, "0"); @@ -408,7 +429,7 @@ TEST(DeadnessAnalysisTest, NEGATIVE_AndOrDistributive) { std::unique_ptr result; TF_ASSERT_OK(AnalyzeDeadness(root.graph(), &result)); - EXPECT_TRUE(result->HasInputsWithMismatchingDeadness(*add2.node())); + EXPECT_FALSE(result->HasInputsWithMismatchingDeadness(*add3.node())); } TEST(DeadnessAnalysisTest, Ternary) { diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index e0632ff7e48ccea99d469f62ec9d0a3fe8295024..da27f837e88fc3f57f865211929ec9cb1a1af779 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/jit/graphcycles/graphcycles.h" @@ -44,7 +45,6 @@ limitations under the License. #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_def_builder.h" #include "tensorflow/core/graph/tensor_id.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/public/session_options.h" @@ -78,7 +78,8 @@ void SortControlInputs(GraphDef* gdef) { namespace { bool AreAllParentsGuaranteedConst( - const Node& n, const gtl::FlatSet& runtime_const_nodes) { + const Node& n, + const absl::flat_hash_set& runtime_const_nodes) { if (n.type_string() == "GuaranteeConst") { // If the current node is itself a cast-to-const, no need // to look at the incoming edges. @@ -101,7 +102,7 @@ bool AreAllParentsGuaranteedConst( void MarkGuaranteedConstants( const Graph& graph, const std::vector>& src_arg_pairs) { - gtl::FlatSet guaranteed_const_nodes; + absl::flat_hash_set guaranteed_const_nodes; std::vector srcs; srcs.reserve(src_arg_pairs.size()); for (const auto& src_arg : src_arg_pairs) { @@ -748,6 +749,12 @@ Node* Encapsulator::Subgraph::MakeNodeImage(const Graph* graph_in, Node* node) { graph_->set_versions(graph_in->versions()); } + // TODO(b/116981129): Enhance how the device for the encapsulated subgraph is + // determined. In case of hard placement, ensure all the encapsulated nodes + // have the same requested device, which in turn will be the requested device + // for the entire encapsulated subgraph. In case of soft placement, use a + // deterministic approach to fill in the requested device. Handle co-location + // constraints similarly if they exist. if (device_.empty()) { device_ = node->assigned_device_name().empty() ? node->requested_device() @@ -1357,28 +1364,31 @@ void Encapsulator::Subgraph::GetOutsideCompilationSubgraphNames( Status Encapsulator::GetFunctionNameAttr( Node const* node, string* attr, string* outside_compilation_attr) const { - Status s = GetNodeAttr(node->attrs(), group_attribute_, attr); - if (s.code() == error::Code::NOT_FOUND) { - // Return empty attr if there's no group_attribute. - attr->clear(); - } else { - TF_RETURN_IF_ERROR(s); - } - bool has_group_attr = s.ok(); - s = GetNodeAttr(node->attrs(), outside_compilation_attribute_, - outside_compilation_attr); - if (s.code() == error::Code::NOT_FOUND) { - // Return empty attr if there's no outside_compilation attribute. - outside_compilation_attr->clear(); - } else { - TF_RETURN_IF_ERROR(s); - if (!has_group_attr) { - return errors::InvalidArgument( - "Node ", node->name(), " has ", outside_compilation_attribute_, - " attribute but no ", group_attribute_, " attribute."); + AttrSlice attrs = node->attrs(); + attr->clear(); + outside_compilation_attr->clear(); + bool found_group_attribute = false; + bool found_outside_compilation_attribute = false; + for (const auto& node_attr : attrs) { + if (node_attr.first == group_attribute_) { + TF_RETURN_IF_ERROR(AttrValueHasType(node_attr.second, "string")); + *attr = node_attr.second.s(); + found_group_attribute = true; + } else if (node_attr.first == outside_compilation_attribute_) { + TF_RETURN_IF_ERROR(AttrValueHasType(node_attr.second, "string")); + *outside_compilation_attr = node_attr.second.s(); + found_outside_compilation_attribute = true; } + if (found_group_attribute && found_outside_compilation_attribute) break; + } + + if (found_outside_compilation_attribute && !found_group_attribute) { + return errors::InvalidArgument( + "Node ", node->name(), " has ", outside_compilation_attribute_, + " attribute but no ", group_attribute_, " attribute."); + } else { + return Status::OK(); } - return Status::OK(); } bool IsInSubgraph(const string& func_id, const string& outside_compilation_id) { diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc index 97ef8cd3cb3fba54259fc413e0a3d3e75a89c431..2ce6fa73fc448ca83fa392aa909cb385453eb8b6 100644 --- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc @@ -15,13 +15,13 @@ limitations under the License. #include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" #include "tensorflow/compiler/tf2xla/dump_graph.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/core/framework/node_def.pb.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/proto_serialization.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -62,7 +62,7 @@ DataType EdgeType(const Edge* edge) { } // Adds the control inputs of `node` to `*deps`. -void AddControlInputs(const Node& node, gtl::FlatSet* deps) { +void AddControlInputs(const Node& node, absl::flat_hash_set* deps) { for (const Edge* edge : node.in_edges()) { if (edge->IsControlEdge()) { deps->insert(edge->src()); @@ -71,7 +71,7 @@ void AddControlInputs(const Node& node, gtl::FlatSet* deps) { } // Adds the control outputs of `node` to `*deps`. -void AddControlOutputs(const Node& node, gtl::FlatSet* deps) { +void AddControlOutputs(const Node& node, absl::flat_hash_set* deps) { for (const Edge* edge : node.out_edges()) { if (edge->IsControlEdge()) { deps->insert(edge->dst()); @@ -246,7 +246,7 @@ Status RewriteSubgraph(const std::vector& arg_source_tensors, // Data and control inputs to the new XlaLaunch node. std::vector> data_inputs(num_inputs); - gtl::FlatSet control_inputs; + absl::flat_hash_set control_inputs; DataTypeVector arg_types(num_args); AddControlInputs(*launch, &control_inputs); @@ -266,7 +266,7 @@ Status RewriteSubgraph(const std::vector& arg_source_tensors, // Outputs. const int num_outputs = launch->output_types().size(); - gtl::FlatSet control_outputs; + absl::flat_hash_set control_outputs; std::vector>> data_outputs(num_outputs); DataTypeVector output_types(num_outputs); @@ -297,7 +297,9 @@ Status RewriteSubgraph(const std::vector& arg_source_tensors, // Target the XLA CPU/GPU backends. VLOG(2) << "Replacing with XlaLaunch"; + VLOG(2) << "Device is " << launch->requested_device(); def.set_op("XlaLaunch"); + def.set_device(launch->requested_device()); AddNodeAttr("Tconstants", DataTypeVector{}, &def); AddNodeAttr("Targs", arg_types, &def); AddNodeAttr("Nresources", num_variables, &def); diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc index f643fb0cfe136caba42272d72f3972ec63a94bf3..22531a4acea3f130175c7cb2e03fcb7570926094 100644 --- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/cc/ops/resource_variable_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" -#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_op.h" +#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_ops.h" #include "tensorflow/compiler/tf2xla/test_util.h" #include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/graph/graph_constructor.h" @@ -55,6 +55,7 @@ static std::unique_ptr MakeOuterGraph( .Input(u.node()->name(), 0, DT_RESOURCE) .Input(v.node()->name(), 0, DT_RESOURCE) .Input(w.node()->name(), 0, DT_RESOURCE) + .Device("/gpu:0") .Attr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0") .Attr("_variable_start_index", 4) .Finalize(&def)); @@ -107,10 +108,11 @@ static std::unique_ptr MakeBodyGraph() { auto add_attrs = [](Node* node) { node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0"); + node->set_requested_device("/gpu:0"); }; auto b_identity = ops::Identity(scope.WithOpName("B_identity"), arg1); - + add_attrs(b_identity.node()); auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), arg4, DT_FLOAT); add_attrs(read_u.node()); auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), arg5, DT_FLOAT); @@ -215,6 +217,7 @@ TEST(EncapsulateXlaComputations, Encapsulate) { auto add_attrs = [](Node* node) { node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0"); + node->set_requested_device("/gpu:0"); }; auto b_identity = ops::Identity(scope.WithOpName("B_identity"), b); @@ -317,8 +320,8 @@ TEST(EncapsulateXlaComputations, BuildXlaLaunchOp) { NameAttrList function; function.set_name("launch0"); auto launch = ops::XlaLaunch( - scope.WithOpName("launch0"), std::initializer_list{}, - std::initializer_list{a, b, c, d}, + scope.WithOpName("launch0").WithDevice("/gpu:0"), + std::initializer_list{}, std::initializer_list{a, b, c, d}, std::initializer_list{u, v, w}, DataTypeVector{DT_FLOAT, DT_INT32, DT_FLOAT, DT_FLOAT}, function); diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD index 0839f1cb3dafd9af533631c73a37a1df7172ac0b..26cb3af9d69ba1877c67853cde28d2477d394efc 100644 --- a/tensorflow/compiler/jit/kernels/BUILD +++ b/tensorflow/compiler/jit/kernels/BUILD @@ -26,6 +26,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", "//tensorflow/core/kernels:variable_ops", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", ], alwayslink = 1, diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index a85006eb0378688dffd634c13a392b02e379c7f2..accc86a86d9d3eca741994ee502bd7580ce49b2e 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/jit/kernels/xla_ops.h" +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/tf2xla/shape_util.h" @@ -163,7 +164,7 @@ class XlaExecutableClosureStore { private: mutex mutex_; int64 key_counter_ GUARDED_BY(mutex_); - gtl::FlatMap closures_ GUARDED_BY(mutex_); + absl::flat_hash_map closures_ GUARDED_BY(mutex_); TF_DISALLOW_COPY_AND_ASSIGN(XlaExecutableClosureStore); }; @@ -276,7 +277,7 @@ static Status CompileToLocalExecutable( compile_options.always_return_tuple = false; return cache->Compile(options, function, constant_args, *variables, ctx, - kernel, executable, compile_options); + compile_options, kernel, executable); } void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index 133d9823609efa688d8a8f7a066ccbfefc75c15b..4f0c370e65159c89c91ea58733f20f852d9acc99 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/jit/deadness_analysis.h" #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/graphcycles/graphcycles.h" @@ -42,7 +43,6 @@ limitations under the License. #include "tensorflow/core/graph/control_flow.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/lib/gtl/cleanup.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/public/version.h" @@ -371,7 +371,7 @@ bool IsXlaFusable(const NodeDef& node) { Status FindCompilationCandidates( const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env, const std::function& is_compilable_fn, - OrderedNodeSet* candidates, gtl::FlatSet* isolated_nodes) { + OrderedNodeSet* candidates, absl::flat_hash_set* isolated_nodes) { OptimizerOptions opts; std::unique_ptr pflr( new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION, @@ -849,7 +849,7 @@ Status MarkForCompilationPass::RunImpl( Graph* graph = options.graph->get(); OrderedNodeSet compilation_candidates; - gtl::FlatSet isolated_nodes; + absl::flat_hash_set isolated_nodes; TF_RETURN_IF_ERROR(FindCompilationCandidates( *graph, options.flib_def, (options.session_options != nullptr) ? options.session_options->env diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc index 4f9145b4799d5fbaaae2bafd47edec7fa6e463a3..2a80c745e3fcebf97bcccb03551feb3d6fb9f831 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.h" +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "absl/strings/match.h" #include "tensorflow/cc/framework/ops.h" @@ -61,10 +62,10 @@ std::unordered_map GetClusters(const Graph& graph) { return ids; } -gtl::FlatMap> GetClusterSets( +absl::flat_hash_map> GetClusterSets( const Graph& g, std::vector* cluster_names = nullptr) { CHECK(cluster_names == nullptr || cluster_names->empty()); - gtl::FlatMap> cluster_sets; + absl::flat_hash_map> cluster_sets; for (const auto& p : GetClusters(g)) { cluster_sets[p.second].push_back(p.first); } @@ -566,7 +567,7 @@ TEST(XlaCompilationTest, ResourcesClusteringAllowed) { std::unique_ptr graph(new Graph(OpRegistry::Global())); TF_EXPECT_OK(root.ToGraph(graph.get())); TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph)); - gtl::FlatMap> cluster_sets = + absl::flat_hash_map> cluster_sets = GetClusterSets(*graph); ASSERT_EQ(cluster_sets.size(), 1); std::vector expected_clustered_nodes = {"AssignmentW", "ReadR", @@ -586,7 +587,7 @@ TEST(XlaCompilationTest, ResourcesClusteringDisallowed) { std::unique_ptr graph(new Graph(OpRegistry::Global())); TF_EXPECT_OK(root.ToGraph(graph.get())); TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph)); - gtl::FlatMap> cluster_sets = + absl::flat_hash_map> cluster_sets = GetClusterSets(*graph); ASSERT_EQ(cluster_sets.size(), 1); std::vector expected_clustered_nodes = {"AssignmentW", @@ -616,7 +617,7 @@ TEST(XlaCompilationTest, ChainOfOps) { TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph)); std::vector cluster_names; - gtl::FlatMap> cluster_sets = + absl::flat_hash_map> cluster_sets = GetClusterSets(*graph, &cluster_names); ASSERT_EQ(cluster_sets.size(), 2); diff --git a/tensorflow/compiler/jit/partially_decluster_pass.cc b/tensorflow/compiler/jit/partially_decluster_pass.cc index 10fc9e85d927ffe2416d6d9e6dfd24b286fbf1a0..b1f9e9088f391cb8813d2c82395ffcc0b2081cae 100644 --- a/tensorflow/compiler/jit/partially_decluster_pass.cc +++ b/tensorflow/compiler/jit/partially_decluster_pass.cc @@ -15,17 +15,18 @@ limitations under the License. #include "tensorflow/compiler/jit/partially_decluster_pass.h" #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" #include "tensorflow/compiler/tf2xla/const_analysis.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/framework/memory_types.h" #include "tensorflow/core/framework/node_def.pb.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace tensorflow { namespace { -Status FindNodesToDecluster(const Graph& graph, gtl::FlatSet* result, +Status FindNodesToDecluster(const Graph& graph, + absl::flat_hash_set* result, absl::Span post_order) { // Find nodes that have at least one user outside their cluster that expects // hostmem output. These nodes should be cloned to outside the cluster to @@ -171,7 +172,7 @@ Status PartiallyDeclusterToRemoveDeviceToHostCopies(Graph* graph) { GetPostOrder(*graph, &post_order, /*stable_comparator=*/NodeComparatorName(), /*edge_filter=*/NotBackedge); - gtl::FlatSet nodes_to_partially_decluster; + absl::flat_hash_set nodes_to_partially_decluster; TF_RETURN_IF_ERROR( FindNodesToDecluster(*graph, &nodes_to_partially_decluster, post_order)); diff --git a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc index 56e35c0059124015266ffabdf583c8724c8e0908..e039d46ec863920eb7deb5bc20525fdab866415c 100644 --- a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc +++ b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc @@ -82,6 +82,7 @@ limitations under the License. #include "tensorflow/compiler/jit/resource_operation_safety_analysis.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_join.h" #include "absl/types/optional.h" @@ -89,8 +90,6 @@ limitations under the License. #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/tensor_id.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/util/ptr_util.h" @@ -177,7 +176,7 @@ string ResourceOpToString(const ResourceOp& resource_op) { // point. class ResourceOpSet { private: - using Impl = gtl::FlatSet; + using Impl = absl::flat_hash_set; public: ResourceOpSet() = default; diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index 3aa9e9c7ed2dd3b7480f40e868c6b07192b68294..0471995015bb080016b523305c90a3e42163a039 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -228,37 +228,38 @@ Status XlaCompilationCache::Compile( const XlaCompiler::Options& options, const NameAttrList& function, const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, - const XlaCompiler::CompilationResult** compilation_result, - xla::LocalExecutable** executable, - const XlaCompiler::CompileOptions& compile_options) { + const XlaCompiler::CompileOptions& compile_options, + const XlaCompiler::CompilationResult** out_compilation_result, + xla::LocalExecutable** out_executable) { return CompileImpl(options, function, constant_args, variable_args, ctx, - compilation_result, executable, compile_options, false); + compile_options, /*compile_single_op=*/false, + out_compilation_result, out_executable); } Status XlaCompilationCache::CompileSingleOp( const XlaCompiler::Options& options, const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, - const XlaCompiler::CompilationResult** compilation_result, - xla::LocalExecutable** executable, - const XlaCompiler::CompileOptions& compile_options) { + const XlaCompiler::CompileOptions& compile_options, + const XlaCompiler::CompilationResult** out_compilation_result, + xla::LocalExecutable** out_executable) { const NodeDef& def = ctx->op_kernel().def(); NameAttrList name; name.set_name(def.op()); *name.mutable_attr() = def.attr(); - return CompileImpl(options, name, constant_args, variable_args, ctx, - compilation_result, executable, compile_options, true); + return CompileImpl( + options, name, constant_args, variable_args, ctx, compile_options, + /*compile_single_op=*/true, out_compilation_result, out_executable); } Status XlaCompilationCache::CompileImpl( const XlaCompiler::Options& options, const NameAttrList& function, const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, - const XlaCompiler::CompilationResult** compilation_result, - xla::LocalExecutable** executable, - const XlaCompiler::CompileOptions& compile_options, - bool compile_single_op) { - CHECK_NE(executable, nullptr); + const XlaCompiler::CompileOptions& compile_options, bool compile_single_op, + const XlaCompiler::CompilationResult** out_compilation_result, + xla::LocalExecutable** out_executable) { + DCHECK_NE(out_executable, nullptr); VLOG(2) << "XlaCompilationCache::Compile " << DebugString(); if (VLOG_IS_ON(2)) { @@ -357,8 +358,8 @@ Status XlaCompilationCache::CompileImpl( } } TF_RETURN_IF_ERROR(entry->compilation_status); - *compilation_result = &entry->compilation_result; - *executable = entry->executable.get(); + *out_compilation_result = &entry->compilation_result; + *out_executable = entry->executable.get(); return Status::OK(); } diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h index 10ad87e38cc4d614e869782329f84351bc3b1f0b..75c7758f730f9f2f8251c02e7fac1a01f8cc9c2b 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.h +++ b/tensorflow/compiler/jit/xla_compilation_cache.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_JIT_XLA_COMPILATION_CACHE_H_ #define TENSORFLOW_COMPILER_JIT_XLA_COMPILATION_CACHE_H_ +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/tf2xla/xla_context.h" #include "tensorflow/compiler/xla/client/local_client.h" @@ -24,7 +25,6 @@ limitations under the License. #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" @@ -68,9 +68,9 @@ class XlaCompilationCache : public ResourceBase { const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, - const XlaCompiler::CompilationResult** compilation_result, - xla::LocalExecutable** executable, - const XlaCompiler::CompileOptions& compile_options); + const XlaCompiler::CompileOptions& compile_options, + const XlaCompiler::CompilationResult** out_compilation_result, + xla::LocalExecutable** out_executable); // As above, but calls XlaCompiler::CompileSingleOp instead of // XlaCompiler::CompileFunction. @@ -78,9 +78,9 @@ class XlaCompilationCache : public ResourceBase { const XlaCompiler::Options& options, const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, - const XlaCompiler::CompilationResult** compilation_result, - xla::LocalExecutable** executable, - const XlaCompiler::CompileOptions& compile_options); + const XlaCompiler::CompileOptions& compile_options, + const XlaCompiler::CompilationResult** out_compilation_result, + xla::LocalExecutable** out_executable); xla::LocalClient* client() const { return client_; } const DeviceType& device_type() const { return device_type_; } @@ -89,15 +89,14 @@ class XlaCompilationCache : public ResourceBase { private: // Common implementation of Compile and CompileSingleOp. - Status CompileImpl(const XlaCompiler::Options& options, - const NameAttrList& function, - const std::map& constant_args, - const std::map& variable_args, - OpKernelContext* ctx, - const XlaCompiler::CompilationResult** compilation_result, - xla::LocalExecutable** executable, - const XlaCompiler::CompileOptions& compile_options, - bool compile_single_op); + Status CompileImpl( + const XlaCompiler::Options& options, const NameAttrList& function, + const std::map& constant_args, + const std::map& variable_args, OpKernelContext* ctx, + const XlaCompiler::CompileOptions& compile_options, + bool compile_single_op, + const XlaCompiler::CompilationResult** out_compilation_result, + xla::LocalExecutable** out_executable); // Takes `result` which has been compiled from a Tensorflow subgraph to a // XLA computation already, and generates an XLA LocalExecutable `executable`. @@ -152,7 +151,7 @@ class XlaCompilationCache : public ResourceBase { }; mutex compile_cache_mu_; - gtl::FlatMap, Signature::Hash> cache_ + absl::flat_hash_map, Signature::Hash> cache_ GUARDED_BY(compile_cache_mu_); struct CompileStats { @@ -165,7 +164,7 @@ class XlaCompilationCache : public ResourceBase { mutex compile_stats_mu_; // Maps cluster names to compilation statistics for said cluster. - gtl::FlatMap compile_stats_ + absl::flat_hash_map compile_stats_ GUARDED_BY(compile_stats_mu_); TF_DISALLOW_COPY_AND_ASSIGN(XlaCompilationCache); diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index b98c0cb028ff069278dceda21f4588c0da9086e5..79976c85dff200ce993ebb06e7a20a15b71f6085 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -180,7 +180,7 @@ Status XlaCompileOnDemandOp::Compile( std::map variable_args = GetVariables(ctx); return cache->CompileSingleOp(options, constant_arguments, variable_args, ctx, - result, executable, compile_options); + compile_options, result, executable); } void XlaCompileOnDemandOp::Compute(OpKernelContext* ctx) { diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 3cf74fa7880c96198f9072ab7488a1cec15c9e5c..ba2401ed2628beeba2be3bf59a067c3d87ca3f9f 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -894,6 +894,22 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "tensor_list_ops_test", + size = "small", + srcs = ["tensor_list_ops_test.py"], + # TensorList ops are not implemented in the on-demand compilation model yet. + disabled_backends = "cpu_ondemand", + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework", + "//tensorflow/python:list_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python/eager:function", + ], +) + tf_xla_py_test( name = "ternary_ops_test", size = "small", @@ -1028,6 +1044,19 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "permute_test", + size = "small", + srcs = ["permute_test.py"], + deps = [ + "//tensorflow/compiler/tests:xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:nn_ops", + ], +) + tf_xla_py_test( name = "xla_device_test", size = "small", @@ -1105,6 +1134,7 @@ cc_library( "//tensorflow/core:test", "//tensorflow/core:testlib", "//tensorflow/core/kernels:ops_util", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/compiler/tests/lstm.py b/tensorflow/compiler/tests/lstm.py index 43c469d0320645cdad6ddc67f3e8cb1374b8e9e5..73b3638e801e7389e83953f6662bcfc78ad86203 100644 --- a/tensorflow/compiler/tests/lstm.py +++ b/tensorflow/compiler/tests/lstm.py @@ -117,7 +117,7 @@ def LSTMLayer(cell_name, weights, m, c, x_seq, pad_seq): def RandomVar(shape, name=None): """Returns a variable of the given shape initialized to random values.""" - return variables.Variable( + return variables.VariableV1( random_ops.random_uniform(shape), dtype=dtypes.float32, name=name) diff --git a/tensorflow/compiler/tests/nullary_ops_test.py b/tensorflow/compiler/tests/nullary_ops_test.py index f985c5d2d96e06fc0117f3935d61b19c9e8562b1..38cb2f83efc48ffcdf5403a23e666963b2ea4da1 100644 --- a/tensorflow/compiler/tests/nullary_ops_test.py +++ b/tensorflow/compiler/tests/nullary_ops_test.py @@ -43,18 +43,37 @@ class NullaryOpsTest(xla_test.XLATestCase): output.run() def testConstants(self): - constants = [ - np.float32(42), - np.array([], dtype=np.float32), - np.array([1, 2], dtype=np.float32), - np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32), - np.array([[[1, 2], [3, 4], [5, 6]], [[10, 20], [30, 40], [50, 60]]], - dtype=np.float32), - np.array([[[]], [[]]], dtype=np.float32), - np.array([[[[1]]]], dtype=np.float32), - ] - for c in constants: - self._testNullary(lambda c=c: constant_op.constant(c), expected=c) + for dtype in self.numeric_types: + constants = [ + dtype(42), + np.array([], dtype=dtype), + np.array([1, 2], dtype=dtype), + np.array([7, 7, 7, 7, 7], dtype=dtype), + np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype), + np.array([[[1, 2], [3, 4], [5, 6]], [[10, 20], [30, 40], [50, 60]]], + dtype=dtype), + np.array([[[]], [[]]], dtype=dtype), + np.array([[[[1]]]], dtype=dtype), + ] + for c in constants: + self._testNullary(lambda c=c: constant_op.constant(c), expected=c) + + def testComplexConstants(self): + for dtype in self.complex_types: + constants = [ + dtype(42 + 3j), + np.array([], dtype=dtype), + np.ones([50], dtype=dtype) * (3 + 4j), + np.array([1j, 2 + 1j], dtype=dtype), + np.array([[1, 2j, 7j], [4, 5, 6]], dtype=dtype), + np.array([[[1, 2], [3, 4 + 6j], [5, 6]], + [[10 + 7j, 20], [30, 40], [50, 60]]], + dtype=dtype), + np.array([[[]], [[]]], dtype=dtype), + np.array([[[[1 + 3j]]]], dtype=dtype), + ] + for c in constants: + self._testNullary(lambda c=c: constant_op.constant(c), expected=c) if __name__ == "__main__": diff --git a/tensorflow/compiler/tests/permute_test.py b/tensorflow/compiler/tests/permute_test.py new file mode 100644 index 0000000000000000000000000000000000000000..dbb9274df4f579fbc6076bf55c9307e4d1cb7768 --- /dev/null +++ b/tensorflow/compiler/tests/permute_test.py @@ -0,0 +1,80 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the DataFormatVecPermute operator.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.compiler.tests import xla_test +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.platform import test + + +class XlaPermuteOpTest(xla_test.XLATestCase): + + def _runPermuteAndCompare(self, x, src_format, dst_format, expected): + with self.cached_session() as session: + with self.test_scope(): + placeholder = array_ops.placeholder(dtypes.as_dtype(x.dtype), x.shape) + param = {placeholder: x} + output = nn_ops.data_format_vec_permute( + placeholder, src_format=src_format, dst_format=dst_format) + result = session.run(output, param) + self.assertAllEqual(result, expected) + + def testNHWCToNCHW(self): + x = np.array([7, 4, 9, 3], dtype=np.int32) + self._runPermuteAndCompare(x, "NHWC", "NCHW", [7, 3, 4, 9]) + + def testNCHWToNHWC(self): + x = np.array([7, 4, 9, 3], dtype=np.int32) + self._runPermuteAndCompare(x, "NCHW", "NHWC", [7, 9, 3, 4]) + + def testNHWCToHWNC(self): + x = np.array([7, 4, 9, 3], dtype=np.int32) + self._runPermuteAndCompare(x, "NHWC", "HWNC", [4, 9, 7, 3]) + + def testHWNCToNHWC(self): + x = np.array([7, 4, 9, 3], dtype=np.int32) + self._runPermuteAndCompare(x, "HWNC", "NHWC", [9, 7, 4, 3]) + + def testNHWCToNCHW2D(self): + x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32) + self._runPermuteAndCompare(x, "NHWC", "NCHW", + [[7, 4], [5, 1], [9, 3], [4, 5]]) + + def testNHWCToHWNC2D(self): + x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32) + self._runPermuteAndCompare(x, "NHWC", "HWNC", + [[9, 3], [4, 5], [7, 4], [5, 1]]) + + def testHWNCToNHWC2D(self): + x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32) + self._runPermuteAndCompare(x, "HWNC", "NHWC", + [[4, 5], [7, 4], [9, 3], [5, 1]]) + + def testNCHWToNHWC2D(self): + x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32) + self._runPermuteAndCompare(x, "NCHW", "NHWC", + [[7, 4], [4, 5], [5, 1], [9, 3]]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc index bddda6f30245d4b8281a77783ec9922d61bd3883..dc119fb0f8a41a3772a8c9508bf2db657f57de88 100644 --- a/tensorflow/compiler/tests/randomized_tests.cc +++ b/tensorflow/compiler/tests/randomized_tests.cc @@ -45,6 +45,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "tensorflow/compiler/jit/defs.h" @@ -63,7 +64,6 @@ limitations under the License. #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/public/session.h" #include "tensorflow/core/public/session_options.h" @@ -457,7 +457,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values, Tensor tensor(dtype, TensorShape(shape)); switch (dtype) { case DT_FLOAT: { - gtl::FlatSet already_generated; + absl::flat_hash_set already_generated; std::uniform_real_distribution distribution(-1.0f, 1.0f); test::FillFn(&tensor, [&](int i) -> float { float generated; @@ -470,7 +470,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values, break; } case DT_DOUBLE: { - gtl::FlatSet already_generated; + absl::flat_hash_set already_generated; std::uniform_real_distribution distribution(-1.0, 1.0); test::FillFn(&tensor, [&](int i) -> double { double generated; @@ -483,7 +483,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values, break; } case DT_COMPLEX64: { - gtl::FlatSet> already_generated; + absl::flat_hash_set> already_generated; std::uniform_real_distribution distribution(-1.0f, 1.0f); test::FillFn(&tensor, [&](int i) { complex64 generated; @@ -500,7 +500,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values, break; } case DT_INT32: { - gtl::FlatSet already_generated; + absl::flat_hash_set already_generated; std::uniform_int_distribution distribution(-(1 << 20), 1 << 20); test::FillFn(&tensor, [&](int i) -> int32 { int32 generated; @@ -513,7 +513,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values, break; } case DT_INT64: { - gtl::FlatSet already_generated; + absl::flat_hash_set already_generated; std::uniform_int_distribution distribution(-(1LL << 40), 1LL << 40); test::FillFn(&tensor, [&](int i) -> int64 { @@ -527,7 +527,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values, break; } case DT_BOOL: { - gtl::FlatSet already_generated; + absl::flat_hash_set already_generated; std::bernoulli_distribution distribution; test::FillFn(&tensor, [&](int i) -> bool { bool generated; @@ -1820,7 +1820,7 @@ TEST_F(OpTest, Diag) { do { dims = RandomDims(1); size = TensorShape(dims).num_elements(); - } while (size * size < tf_xla_max_tensor_size); + } while (size * size > tf_xla_max_tensor_size); return ExpectTfAndXlaOutputsAreClose( OpTestBuilder("Diag").RandomInput(type, dims).Attr("T", type)); }); diff --git a/tensorflow/compiler/tests/sort_ops_test.py b/tensorflow/compiler/tests/sort_ops_test.py index dbf4beb693ec1766e6b7b5daaed4be4e1d874fba..57f0ab7a9eae16ab3de61af9760dfba1ab355b46 100644 --- a/tensorflow/compiler/tests/sort_ops_test.py +++ b/tensorflow/compiler/tests/sort_ops_test.py @@ -48,13 +48,29 @@ class XlaSortOpTest(xla_test.XLATestCase): self.assertAllClose(v, result, rtol=1e-3) def testSort(self): - supported_types = set([dtypes.bfloat16.as_numpy_dtype, np.float32]) + supported_types = set( + [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32]) for dtype in supported_types.intersection(self.numeric_types): x = np.arange(101, dtype=dtype) np.random.shuffle(x) self._assertOpOutputMatchesExpected( xla.sort, [x], expected=[np.arange(101, dtype=dtype)]) + def testKeyValueSort(self): + supported_types = set( + [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32]) + for key_type in supported_types.intersection(self.numeric_types): + for value_type in supported_types.intersection(self.numeric_types): + x = np.arange(101, dtype=key_type) + np.random.shuffle(x) + y = (-x).astype(value_type) + self._assertOpOutputMatchesExpected( + xla.key_value_sort, [x, y], + expected=[ + np.arange(101, dtype=key_type), + -np.arange(101, dtype=value_type) + ]) + def testTopK(self): supported_types = set( [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32]) diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py index f3861043b27ebb131554ff49af8c986229fc15ac..e8741bc468585ff9fb049dcd87700f8048d74026 100644 --- a/tensorflow/compiler/tests/stateless_random_ops_test.py +++ b/tensorflow/compiler/tests/stateless_random_ops_test.py @@ -91,7 +91,7 @@ class StatelessRandomOpsTest(xla_test.XLATestCase): with self.cached_session() as sess, self.test_scope(): for dtype in self._random_types(): seed_t = array_ops.placeholder(dtypes.int32, shape=[2]) - x = stateless.stateless_random_uniform( + x = stateless.stateless_random_normal( shape=[10000], seed=seed_t, dtype=dtype) y = sess.run(x, {seed_t: [0x12345678, 0xabcdef12]}) self.assertTrue(np.all(np.isfinite(y))) diff --git a/tensorflow/compiler/tests/tensor_list_ops_test.py b/tensorflow/compiler/tests/tensor_list_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5c079d595c440cac644f5461154509abe7b1d1ed --- /dev/null +++ b/tensorflow/compiler/tests/tensor_list_ops_test.py @@ -0,0 +1,96 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for ops which manipulate lists of tensors via bridge.""" + +# pylint: disable=g-bad-name +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import numpy as np +from tensorflow.compiler.tests import xla_test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import list_ops +from tensorflow.python.platform import test + + +def scalar_shape(): + return ops.convert_to_tensor([], dtype=dtypes.int32) + + +class ListOpsTest(xla_test.XLATestCase): + + def testElementShape(self): + with self.cached_session() as sess, self.test_scope(): + dim = array_ops.placeholder(dtypes.int32) + l = list_ops.tensor_list_reserve( + element_shape=(dim, 15), num_elements=20, + element_dtype=dtypes.float32) + e32 = list_ops.tensor_list_element_shape(l, shape_type=dtypes.int32) + e64 = list_ops.tensor_list_element_shape(l, shape_type=dtypes.int64) + self.assertAllEqual(sess.run(e32, {dim: 10}), (10, 15)) + self.assertAllEqual(sess.run(e64, {dim: 7}), (7, 15)) + + def testPushPop(self): + with self.cached_session() as sess, self.test_scope(): + num = array_ops.placeholder(dtypes.int32) + l = list_ops.tensor_list_reserve( + element_shape=(7, 15), num_elements=num, element_dtype=dtypes.float32) + l = list_ops.tensor_list_push_back( + l, constant_op.constant(1.0, shape=(7, 15))) + l = list_ops.tensor_list_push_back( + l, constant_op.constant(2.0, shape=(7, 15))) + l, e2 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) + _, e1 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) + self.assertAllEqual(sess.run(e2, {num: 10}), 2.0 * np.ones((7, 15))) + self.assertAllEqual(sess.run(e1, {num: 10}), 1.0 * np.ones((7, 15))) + + def testPushPopSeparateLists(self): + with self.cached_session() as sess, self.test_scope(): + num = array_ops.placeholder(dtypes.int32) + l = list_ops.tensor_list_reserve( + element_shape=scalar_shape(), + num_elements=num, + element_dtype=dtypes.float32) + l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0)) + l2 = list_ops.tensor_list_push_back(l, constant_op.constant(2.0)) + l3 = list_ops.tensor_list_push_back(l, constant_op.constant(3.0)) + _, e11 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) + l2, e21 = list_ops.tensor_list_pop_back(l2, element_dtype=dtypes.float32) + l2, e22 = list_ops.tensor_list_pop_back(l2, element_dtype=dtypes.float32) + l3, e31 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32) + l3, e32 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32) + result = sess.run([e11, [e21, e22], [e31, e32]], {num: 20}) + self.assertEqual(result, [1.0, [2.0, 1.0], [3.0, 1.0]]) + + def testEmptyTensorList(self): + dim = 7 + with self.cached_session() as sess, self.test_scope(): + p = array_ops.placeholder(dtypes.int32) + l = list_ops.empty_tensor_list( + element_shape=(p, 15), element_dtype=dtypes.float32) + l = list_ops.tensor_list_push_back( + l, constant_op.constant(1.0, shape=(dim, 15))) + _, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Use TensorListReserve instead"): + self.assertEqual(sess.run(e, {p: dim}), 1.0 * np.ones((dim, 15))) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index ba1e3b2b4fdbb73e98105ace6571783ef780adf5..3f631f91ec442c149b3ea4df3826d98b0419a76f 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -635,6 +635,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:ops", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], ) @@ -649,6 +650,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/compiler/tf2xla/cc/BUILD b/tensorflow/compiler/tf2xla/cc/BUILD index ea8d1b3d14939d4f4fba598318200f71c2eb0270..adcdb6c8f762cb7ea68485167bf7fc8ccb343a51 100644 --- a/tensorflow/compiler/tf2xla/cc/BUILD +++ b/tensorflow/compiler/tf2xla/cc/BUILD @@ -30,14 +30,15 @@ cc_library( tf_gen_op_wrapper_cc( name = "xla_jit_op_gen", - out_ops_file = "ops/xla_jit_op", + include_internal_ops = 1, + out_ops_file = "ops/xla_jit_ops", deps = ["//tensorflow/compiler/jit/ops:xla_ops"], ) cc_library( name = "xla_jit_ops", - srcs = ["ops/xla_jit_op.cc"], - hdrs = ["ops/xla_jit_op.h"], + srcs = ["ops/xla_jit_ops.cc"], + hdrs = ["ops/xla_jit_ops.h"], deps = [ "//tensorflow/cc:const_op", "//tensorflow/cc:ops", diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 2d45507796a39d029665c709bb6ad27e7697d544..0362682bd6a8d0977bb09854ef448075fba99273 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -79,7 +79,10 @@ Status FunctionalizeControlFlowForFunction( const string& func_name, const string& new_func_name, const protobuf::Map& attrs, FunctionLibraryDefinition* fld, FunctionLibraryRuntime* flr, - std::map* canonicalized_name_to_new_name) { + std::map>* canonicalized_name_to_new_name, + bool* modified) { + *modified = false; + // Convert the function to Graph. FunctionLibraryRuntime::Handle handle; TF_RETURN_IF_ERROR(flr->Instantiate(func_name, AttrSlice(&attrs), &handle)); @@ -91,6 +94,20 @@ Status FunctionalizeControlFlowForFunction( } }); const FunctionBody* body = flr->GetFunctionBody(handle); + Graph* g = body->graph; + + // Check if the graph has Switch or Merge node. + bool has_switch_or_merge = false; + for (Node* n : body->graph->nodes()) { + if (n->type_string() == "Switch" || n->type_string() == "Merge") { + has_switch_or_merge = true; + break; + } + } + // We cannot return here directly if the graph has no Switch/Merge. + // It might contain function call nodes, or If/While nodes with Switch/Merge + // in function body. We still need to rewrite those functions and modify + // corresponding nodes. // If any node has associated functions, functionalize them first. // Gather nodes with associated functions first, because rewriting those nodes @@ -98,7 +115,7 @@ Status FunctionalizeControlFlowForFunction( // it. std::vector>> nodes_to_associated_functions; - for (auto* n : body->graph->nodes()) { + for (auto* n : g->nodes()) { auto associated_functions = GetAssociatedFunctions(*n, flr); if (!associated_functions.empty()) { nodes_to_associated_functions.push_back({n, associated_functions}); @@ -113,83 +130,82 @@ Status FunctionalizeControlFlowForFunction( Canonicalize(name, AttrSlice(&associated_function.attrs())); auto iter = canonicalized_name_to_new_name->find(canonicalized_name); string new_name; + bool function_modified; if (iter != canonicalized_name_to_new_name->end()) { - // If we already functionalized this function, skip functionalization - // but still rewrite the node. - new_name = iter->second; + // If we already processed this function, check if it was rewritten. If + // the function was rewritten, the entry will be non-empty. Otherwise + // the entry will be empty. + function_modified = iter->second.has_value(); + if (function_modified) { + new_name = iter->second.value(); + } } else { - new_name = fld->UniqueFunctionName(absl::StrCat(name, "_f15n_")); + if (associated_function.type() == + AssociatedFunctionInfo::AssociatedFunctionType::kSymbolicGradient) { + // For SymbolicGradient, `name` is always "SymbolicGradient", + // which is not very informative. Use node name instead. + new_name = fld->UniqueFunctionName(absl::StrCat(n->name(), "_f15n_")); + } else { + new_name = fld->UniqueFunctionName(absl::StrCat(name, "_f15n_")); + } TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction( name, new_name, associated_function.attrs(), fld, flr, - canonicalized_name_to_new_name)); - (*canonicalized_name_to_new_name)[canonicalized_name] = new_name; + canonicalized_name_to_new_name, &function_modified)); + if (function_modified) { + // If the function was rewritten, add an non-empty entry. So later we + // know we have processed this function, and it was rewritten into + // another function. + (*canonicalized_name_to_new_name)[canonicalized_name] = new_name; + } else { + // If the function was not rewritten, add an empty entry. So later + // we know we have processed this function, and it does not need to be + // rewritten. + (*canonicalized_name_to_new_name)[canonicalized_name] = absl::nullopt; + } + } + if (function_modified) { + *modified = true; + + // Notice that if "n" is a function call, RewriteAssociatedFunction() + // will delete it and create a new node instead, making "n" an invalid + // pointer. That's fine because in that case, associated_functions will + // only have one member and the loop will only run once. + TF_RETURN_IF_ERROR(RewriteAssociatedFunction( + g, n, fld, associated_function, new_name)); } - // Notice that if "n" is a function call, RewriteAssociatedFunction() will - // delete it and create a new node instead, making "n" an invalid pointer. - // That's fine because in that case, associated_functions will only have - // one member and the loop will only run once. - TF_RETURN_IF_ERROR(RewriteAssociatedFunction( - body->graph, n, fld, associated_function, new_name)); } } - // Call graph optimizer. The most important optimization we need is constant - // folding, which will replace ops like Shape/BroadcastGradientArgs with - // constant shape input. Without this optimization, those ops might become - // dynamic input for then/else body function and XLA will complain that input - // is not compile time constant. We enable function inlining as well, because - // otherwise we won't be able to infer shape for any node depending on - // function call nodes. - if (VLOG_IS_ON(4)) { - dump_graph::DumpGraphToFile( - absl::StrCat("functionalize_control_flow_before_opt_", func_name), - *body->graph, fld); - } - // Optimizer accepts std::unique_ptr* as input and might change - // underlying pointer, thus we create a new Graph and copy from body->graph. - std::unique_ptr optimized_graph(new Graph(fld)); - CopyGraph(*body->graph, optimized_graph.get()); - OptimizerOptions opts; - opts.set_opt_level(OptimizerOptions::L0); - opts.set_do_function_inlining(true); - opts.set_do_constant_folding(true); - GraphOptimizer optimizer(opts); - auto cf_consider_fn = [](const Node* n) { - // Skip SymbolicGradient op when doing constant folding. - // Enabling SymbolicGradient op in constant folding requires - // flr->device() to be non-null, and here we have not constructed - // proper Device object yet (it will be constructed in XlaCompiler). - return n->type_string() != FunctionLibraryDefinition::kGradientOp; - }; - optimizer.Optimize(flr, flr->env(), - /*device=*/nullptr, &optimized_graph, - /*shape_map=*/nullptr, /*cse_consider_fn=*/nullptr, - cf_consider_fn); - - // Functionalize the function body. - if (VLOG_IS_ON(4)) { - dump_graph::DumpGraphToFile( - absl::StrCat("functionalize_control_flow_before_fdef_", func_name), - *optimized_graph, fld); - } - TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld)); - if (VLOG_IS_ON(4)) { - dump_graph::DumpGraphToFile( - absl::StrCat("functionalize_control_flow_after_fdef_", func_name), - *optimized_graph, fld); + if (has_switch_or_merge) { + *modified = true; + + // Functionalize the function body. + if (VLOG_IS_ON(4)) { + dump_graph::DumpGraphToFile( + absl::StrCat("functionalize_control_flow_before_fdef_", func_name), + *g, fld); + } + TF_RETURN_IF_ERROR(FunctionalizeControlFlow(g, fld)); + if (VLOG_IS_ON(4)) { + dump_graph::DumpGraphToFile( + absl::StrCat("functionalize_control_flow_after_fdef_", func_name), *g, + fld); + } } - FunctionDef functionalized_fdef; - TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name, - &functionalized_fdef)); - // Add rewritten FunctionDef into library. - if (func_name == new_func_name) { - VLOG(2) << "Replacing function " << func_name; + if (*modified) { + // Add rewritten FunctionDef into library. + FunctionDef functionalized_fdef; TF_RETURN_IF_ERROR( - fld->ReplaceFunction(new_func_name, functionalized_fdef)); - } else { - VLOG(2) << "Adding function " << new_func_name; - TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef)); + GraphToFunctionDef(*g, new_func_name, &functionalized_fdef)); + if (func_name == new_func_name) { + VLOG(2) << "Replacing function " << func_name; + TF_RETURN_IF_ERROR( + fld->ReplaceFunction(new_func_name, functionalized_fdef)); + } else { + VLOG(2) << "Adding function " << new_func_name; + TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef)); + } } return ret_status; @@ -215,7 +231,7 @@ Status FunctionalizeControlFlowPass::Run( {"TPUCompile", "function"}, {"XlaLaunch", "function"}, }; - std::map canonicalized_name_to_new_name; + std::map> canonicalized_name_to_new_name; for (Node* n : graph->nodes()) { auto it = kNodeTypeToFunctionAttrMapping->find(n->type_string()); if (it == kNodeTypeToFunctionAttrMapping->end()) { @@ -230,12 +246,15 @@ Status FunctionalizeControlFlowPass::Run( << ". Corresponding function: " << func.name(); string new_func_name = options.flib_def->UniqueFunctionName( absl::StrCat(func.name(), "_f15n_")); + bool modified; TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction( func.name(), new_func_name, func.attr(), options.flib_def, flr, - &canonicalized_name_to_new_name)); - n->ClearAttr(func_attr); - func.set_name(new_func_name); - n->AddAttr(func_attr, func); + &canonicalized_name_to_new_name, &modified)); + if (modified) { + n->ClearAttr(func_attr); + func.set_name(new_func_name); + n->AddAttr(func_attr, func); + } } } diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 3e823254d3d52e88552712b4f53fa4449586cd20..224e5ea123b4905bcfe0947722dbaf4a703f9893 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -62,6 +62,7 @@ tf_kernel_library( "one_hot_op.cc", "pack_op.cc", "pad_op.cc", + "permute_op.cc", "pooling_ops.cc", "qr_op.cc", "quantize_and_dequantize_op.cc", @@ -94,6 +95,7 @@ tf_kernel_library( "stateless_random_ops.cc", "strided_slice_op.cc", "tensor_array_ops.cc", + "tensor_list_ops.cc", "tile_ops.cc", "topk_op.cc", "training_ops.cc", @@ -119,6 +121,7 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla/lib:batch_dot", + "//tensorflow/compiler/tf2xla/lib:broadcast", "//tensorflow/compiler/tf2xla/lib:cholesky", "//tensorflow/compiler/tf2xla/lib:qr", "//tensorflow/compiler/tf2xla/lib:random", @@ -157,6 +160,7 @@ tf_kernel_library( "//tensorflow/core/kernels:control_flow_ops", "//tensorflow/core/kernels:conv_ops", "//tensorflow/core/kernels:cwise_op", + "//tensorflow/core/kernels:list_kernels", "//tensorflow/core/kernels:no_op", "//tensorflow/core/kernels:ops_util", "//tensorflow/core/kernels:pooling_ops", diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc index a988d3c33ed808b022f67882c8ae5100b7e7a305..47e517a6576d3a848bc41ceb703df2bd778c4a35 100644 --- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc @@ -64,7 +64,7 @@ XLA_MAKE_BINARY(Complex, xla::Complex(lhs, rhs, extend_dimensions)); // } static xla::XlaOp DivNoNanImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, xla::XlaOp y, const BCast& broadcast_helper) { - std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper); + std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper); auto zero = XlaHelpers::Zero(b, dtype); auto y_equals_0 = xla::Eq(y, zero); auto zeros = xla::ZerosLike(x); @@ -84,7 +84,7 @@ XLA_MAKE_BINARY(DivNoNan, // } static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, xla::XlaOp y, const BCast& broadcast_helper) { - std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper); + std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper); if (DataTypeIsUnsigned(dtype)) { return xla::Div(x, y); } @@ -105,7 +105,7 @@ XLA_MAKE_BINARY(FloorDiv, static xla::XlaOp XlogyImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, xla::XlaOp y, const BCast& broadcast_helper) { - std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper); + std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper); auto zero = XlaHelpers::Zero(b, dtype); auto is_zero = xla::Eq(x, zero); return xla::Select(is_zero, zero, xla::Mul(x, xla::Log(y))); @@ -114,7 +114,7 @@ XLA_MAKE_BINARY(Xlogy, XlogyImpl(b, input_type(0), lhs, rhs, broadcast_helper)); static xla::XlaOp XdivyImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, xla::XlaOp y, const BCast& broadcast_helper) { - std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper); + std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper); auto zero = XlaHelpers::Zero(b, dtype); auto is_zero = xla::Eq(x, zero); return xla::Select(is_zero, zero, xla::Div(x, y)); @@ -126,7 +126,7 @@ XLA_MAKE_BINARY(Xdivy, XdivyImpl(b, input_type(0), lhs, rhs, broadcast_helper)); // return (x < T(0)) == (y < T(0)) ? trunc_mod : std::fmod(trunc_mod + y, y); static xla::XlaOp FloorModImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, xla::XlaOp y, const BCast& broadcast_helper) { - std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper); + std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper); auto zero = XlaHelpers::Zero(b, dtype); auto same_sign = xla::Eq(xla::Lt(x, zero), xla::Lt(y, zero)); auto trunc_mod = xla::Rem(x, y); diff --git a/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc b/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc index 696c1c39befd5aa2972afb6cfa64905b57a5ab72..9bb11fb67e3e4ddc48d68631c60f96c60b921094 100644 --- a/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc @@ -13,16 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "absl/algorithm/container.h" -#include "tensorflow/compiler/tf2xla/shape_util.h" -#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/lib/broadcast.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/lib/constants.h" -#include "tensorflow/compiler/xla/literal.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/bcast.h" namespace tensorflow { namespace { @@ -37,59 +32,9 @@ class BroadcastToOp : public XlaOpKernel { TensorShape output_shape; OP_REQUIRES_OK(context, context->ConstantInputAsShape(1, &output_shape)); - OP_REQUIRES(context, input_shape.dims() <= output_shape.dims(), - errors::InvalidArgument( - "Input rank (", input_shape.dims(), - ") must be less than or equal to the output rank (", - output_shape.dims(), ")")); - - auto input_dims = input_shape.dim_sizes(); - auto output_dims = output_shape.dim_sizes(); - - // Broadcasting is done right-to-left on right-aligned dimensions; reverse - // the two vectors so elements to be broadcast are aligned. - absl::c_reverse(input_dims); - absl::c_reverse(output_dims); - - std::vector broadcast_dims; - std::vector broadcast_shape; - for (int i = 0; i < output_shape.dims(); ++i) { - if (i < input_shape.dims()) { - OP_REQUIRES( - context, - (output_dims[i] == 0 && input_dims[i] == 0) || - (input_dims[i] != 0 && output_dims[i] % input_dims[i] == 0), - errors::InvalidArgument("invalid shape to broadcast from ", - input_shape.DebugString(), " to ", - output_shape.DebugString())); - - broadcast_dims.push_back(broadcast_shape.size()); - if (output_dims[i] == input_dims[i]) { - broadcast_shape.push_back(output_dims[i]); - } else if (output_dims[i] != input_dims[i]) { - // Add dimensions [I, O/I], which we will later flatten to just - // [O]. We must do this in two phases since XLA broadcasting does not - // support tiling. - broadcast_shape.push_back(input_dims[i]); - broadcast_shape.push_back(output_dims[i] / input_dims[i]); - } - } else { - broadcast_shape.push_back(output_dims[i]); - } - } - absl::c_reverse(broadcast_dims); - int broadcast_shape_size = broadcast_shape.size(); - for (int64& broadcast_dim : broadcast_dims) { - broadcast_dim = broadcast_shape_size - broadcast_dim - 1; - } - absl::c_reverse(broadcast_shape); - xla::XlaOp output = xla::Reshape( - xla::BroadcastInDim(context->Input(0), - xla::ShapeUtil::MakeShape( - context->input_xla_type(0), broadcast_shape), - broadcast_dims), - output_shape.dim_sizes()); - context->SetOutput(0, output); + auto output = BroadcastTo(context->Input(0), output_shape.dim_sizes()); + OP_REQUIRES_OK(context, output.status()); + context->SetOutput(0, output.ValueOrDie()); } }; diff --git a/tensorflow/compiler/tf2xla/kernels/const_op.cc b/tensorflow/compiler/tf2xla/kernels/const_op.cc index da8cf3fc6fa694f592280f8c249d317827d9cd09..2628ef8e2454976aeff3859fa5dc1d8e106f32e1 100644 --- a/tensorflow/compiler/tf2xla/kernels/const_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/const_op.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/types.pb.h" namespace tensorflow { namespace { @@ -76,6 +77,17 @@ class ConstOp : public XlaOpKernel { return; } break; + case DT_COMPLEX64: + if (proto_.scomplex_val_size() == 2) { + ctx->SetOutput( + 0, + xla::Broadcast(xla::ConstantR0( + b, xla::complex64(proto_.scomplex_val(0), + proto_.scomplex_val(1))), + shape.dim_sizes())); + return; + } + break; case DT_INT32: if (proto_.int_val_size() == 1) { ctx->SetOutput( diff --git a/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc b/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc index ef1015552d181a183d412f9c269dd5ec608b388f..234f7b4a019c9aac4bac4f906ddbae166ecd9a80 100644 --- a/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/kernels/cwise_ops.h" +#include "tensorflow/compiler/tf2xla/lib/broadcast.h" #include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" @@ -39,7 +40,8 @@ void XlaBinaryOp::Compile(XlaOpKernelContext* ctx) { // compute valid broadcast shapes, but rely below on XLA to // automatically perform the broadcast assuming its valid shapes are // a superset of TensorFlow's valid shapes. - BCast bcast(BCast::FromShape(lhs_shape), BCast::FromShape(rhs_shape)); + BCast bcast(BCast::FromShape(lhs_shape), BCast::FromShape(rhs_shape), + /*fewer_dims_optimization=*/false); if (!bcast.IsValid()) { ctx->SetStatus(errors::InvalidArgument("Incompatible shapes: ", lhs_shape.DebugString(), " vs. ", @@ -86,51 +88,18 @@ void XlaBinaryOp::Compile(XlaOpKernelContext* ctx) { } /* static */ std::pair XlaBinaryOp::Broadcast( - xla::XlaBuilder* builder, const xla::XlaOp& lhs, const xla::XlaOp& rhs, - const BCast& broadcast_helper) { - // Manually construct the broadcasting since MapN does not do - // automatic broadcasting. The bcast helper ensures that - // lhs.reshape(bcast.x_reshape()).broadcast(bcast.x_bcast()) and - // rhs.reshape(bcast.y_reshape()).broadcast(bcast.y_bcast()) have - // the same shape, so can be operated on by MapN. - - // First reshape the inputs, which should be a metadata-only - // operation since we are flattening the dimensions in order. - auto lhs_shaped = xla::Reshape(lhs, broadcast_helper.x_reshape()); - auto rhs_shaped = xla::Reshape(rhs, broadcast_helper.y_reshape()); - - // Next broadcast the necessary input dimensions. We rely on the - // XLA optimizer to be smart about the fact that we are asking - // it to broadcast size 1 on some of these dimensions, to avoid - // adding complexity to this code. - auto lhs_broadcast = xla::Broadcast(lhs_shaped, broadcast_helper.x_bcast()); - int lhs_size = broadcast_helper.x_bcast().size(); - auto rhs_broadcast = xla::Broadcast(rhs_shaped, broadcast_helper.y_bcast()); - int rhs_size = broadcast_helper.y_bcast().size(); - - // Now reshape them to the correct output shape. After the - // broadcast each side is twice as wide as it should be, since the - // broadcast dimensions were prepended to the shape. Reshape - // flattening each original dimension with the prepended broadcast - // dimension. E.g. if we started out with lhs_shaped with shape - // [5,2,3] and x_bcast was [2,1,7] then lhs_broadcast would have - // shape [2,1,7,5,2,3] and we want to reshape it to [10,2,21]. - std::vector lhs_reorder; - for (int i = 0; i < lhs_size; ++i) { - lhs_reorder.push_back(i); - lhs_reorder.push_back(i + lhs_size); + xla::XlaOp lhs, xla::XlaOp rhs, const BCast& broadcast_helper) { + auto lhs_output = BroadcastTo(lhs, broadcast_helper.output_shape()); + if (!lhs_output.ok()) { + xla::XlaOp error = lhs.builder()->ReportError(lhs_output.status()); + return {error, error}; } - auto lhs_output = - xla::Reshape(lhs_broadcast, lhs_reorder, broadcast_helper.output_shape()); - std::vector rhs_reorder; - for (int i = 0; i < rhs_size; ++i) { - rhs_reorder.push_back(i); - rhs_reorder.push_back(i + rhs_size); + auto rhs_output = BroadcastTo(rhs, broadcast_helper.output_shape()); + if (!rhs_output.ok()) { + xla::XlaOp error = rhs.builder()->ReportError(rhs_output.status()); + return {error, error}; } - auto rhs_output = - xla::Reshape(rhs_broadcast, rhs_reorder, broadcast_helper.output_shape()); - - return {lhs_output, rhs_output}; + return {lhs_output.ValueOrDie(), rhs_output.ValueOrDie()}; } } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/cwise_ops.h b/tensorflow/compiler/tf2xla/kernels/cwise_ops.h index 6653944a911588b7bc88d67b8cdd2c17850530f0..516ead4bfe89b4ddeee11dcc6410a838d04f28a9 100644 --- a/tensorflow/compiler/tf2xla/kernels/cwise_ops.h +++ b/tensorflow/compiler/tf2xla/kernels/cwise_ops.h @@ -67,8 +67,7 @@ class XlaBinaryOp : public XlaOpKernel { // 'broadcast_helper', yielding arguments 'lhs' and 'rhs' that have the same // shape. static std::pair Broadcast( - xla::XlaBuilder* builder, const xla::XlaOp& lhs, const xla::XlaOp& rhs, - const BCast& broadcast_helper); + xla::XlaOp lhs, xla::XlaOp rhs, const BCast& broadcast_helper); }; } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc index 3d81ae9eb89a80e5b89b180ad77521c5ed15e79d..f210bfbd886e48b8d7972393ed1899491486646c 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc @@ -88,20 +88,30 @@ class ArgMaxCustomCallOp : public XlaOpKernel { xla::ConstantLiteral(&b, xla::LiteralUtil::CreateR0(dim))); } - xla::Shape xla_shape = - xla::ShapeUtil::MakeShape(xla::S64, output_shape.dim_sizes()); + // The argmax function expects row-major layout. + xla::Shape xla_shape = xla::ShapeUtil::MakeShapeWithDescendingLayout( + xla::S64, output_shape.dim_sizes()); + std::vector arg_shapes; + for (const xla::XlaOp& arg : args) { + auto shape_status = b.GetShape(arg); + OP_REQUIRES_OK(ctx, shape_status.status()); + xla::Shape arg_shape = shape_status.ConsumeValueOrDie(); + *arg_shape.mutable_layout() = xla::LayoutUtil::MakeDescendingLayout( + xla::ShapeUtil::Rank(arg_shape)); + arg_shapes.push_back(std::move(arg_shape)); + } // Tell XLA to call the custom code, defined in // index_ops_kernel_argmax_float_1d.cc. xla::XlaOp output; switch (input_shape.dims()) { case 1: - output = - xla::CustomCall(&b, "argmax_float_1d_xla_impl", args, xla_shape); + output = xla::CustomCallWithLayout(&b, "argmax_float_1d_xla_impl", args, + xla_shape, arg_shapes); break; case 2: - output = - xla::CustomCall(&b, "argmax_float_2d_xla_impl", args, xla_shape); + output = xla::CustomCallWithLayout(&b, "argmax_float_2d_xla_impl", args, + xla_shape, arg_shapes); break; default: OP_REQUIRES(ctx, false, diff --git a/tensorflow/compiler/tf2xla/kernels/permute_op.cc b/tensorflow/compiler/tf2xla/kernels/permute_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..0764e5503db583351e92a144b2c361e8875161d3 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/permute_op.cc @@ -0,0 +1,98 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/tensor_format.h" + +namespace tensorflow { +namespace { + +class DataFormatVecPermuteOp : public XlaOpKernel { + public: + explicit DataFormatVecPermuteOp(OpKernelConstruction* ctx) + : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("src_format", &src_format_)); + OP_REQUIRES( + ctx, src_format_.size() == 4, + errors::InvalidArgument("Data format should have 4 characters")); + TensorFormat data_format; + OP_REQUIRES(ctx, FormatFromString(src_format_, &data_format), + errors::InvalidArgument("Invalid data format")); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dst_format", &dst_format_)); + OP_REQUIRES( + ctx, dst_format_.size() == 4, + errors::InvalidArgument("Data format should have 4 characters")); + OP_REQUIRES(ctx, FormatFromString(dst_format_, &data_format), + errors::InvalidArgument("Invalid data format")); + } + void Compile(XlaOpKernelContext* ctx) override { + auto builder = ctx->builder(); + const TensorShape input_tensor_shape = ctx->InputShape(0); + int input_rank = input_tensor_shape.dims(); + OP_REQUIRES(ctx, input_rank == 1 || input_rank == 2, + errors::InvalidArgument( + "Input must be a vector or matrix, but got shape ", + input_tensor_shape.DebugString())); + OP_REQUIRES( + ctx, input_tensor_shape.dim_size(0) == 4, + errors::InvalidArgument( + "First dimension of input must be of size 4, but got shape ", + input_tensor_shape.DebugString())); + if (input_rank == 2) { + OP_REQUIRES( + ctx, input_tensor_shape.dim_size(1) == 2, + errors::InvalidArgument( + "Second dimension of 2D input must be of size 2, but got shape ", + input_tensor_shape.DebugString())); + } + std::vector dst_indices(4, 0); + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + if (src_format_[i] == dst_format_[j]) { + dst_indices[i] = j; + break; + } + } + } + auto keys = xla::ConstantR1(builder, absl::Span(dst_indices)); + if (input_rank == 2) { + keys = xla::BroadcastInDim( + keys, xla::ShapeUtil::MakeShape(xla::S32, {4, 2}), {0}); + } + auto sorted = xla::Sort(keys, ctx->Input(0), 0); + auto output = xla::GetTupleElement(sorted, 1); + ctx->SetOutput(0, output); + } + + private: + string src_format_; + string dst_format_; + + TF_DISALLOW_COPY_AND_ASSIGN(DataFormatVecPermuteOp); +}; + +// TODO(b/115384656): Support DT_INT64. +REGISTER_XLA_OP(Name("DataFormatVecPermute").TypeConstraint("T", DT_INT32), + DataFormatVecPermuteOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc index 8102faad28db71075fb8da269c55edbdb667193e..8eee5b12991fb377203d780cecd8916952bd699a 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc @@ -40,10 +40,16 @@ class ReduceWindowOp : public XlaOpKernel { std::vector window_dimensions; std::vector window_strides; + std::vector base_dilations; + std::vector window_dilations; OP_REQUIRES_OK(context, context->ConstantInputAsIntVector( "window_dimensions", &window_dimensions)); OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("window_strides", &window_strides)); + OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("base_dilations", + &base_dilations)); + OP_REQUIRES_OK(context, context->ConstantInputAsIntVector( + "window_dilations", &window_dilations)); const int rank = input_shape.dims(); OP_REQUIRES(context, rank == window_dimensions.size(), @@ -56,6 +62,16 @@ class ReduceWindowOp : public XlaOpKernel { "The size of window_strides must be equal to the input " "rank (", window_strides.size(), " vs. ", rank, ")")); + OP_REQUIRES(context, rank == base_dilations.size(), + errors::InvalidArgument( + "The size of base_dilations must be equal to the input " + "rank (", + base_dilations.size(), " vs. ", rank, ")")); + OP_REQUIRES(context, rank == window_dilations.size(), + errors::InvalidArgument( + "The size of window_dilations must be equal to the input " + "rank (", + window_dilations.size(), " vs. ", rank, ")")); // Build the reducer function. XlaCompiler::Argument reducer_arg; @@ -102,7 +118,8 @@ class ReduceWindowOp : public XlaOpKernel { xla::XlaOp output = xla::ReduceWindowWithGeneralPadding( context->Input(0), context->Input(1), *reducer.computation, - window_dimensions, window_strides, padding); + window_dimensions, window_strides, base_dilations, window_dilations, + padding); context->SetOutput(0, output); } @@ -115,6 +132,8 @@ class ReduceWindowOp : public XlaOpKernel { REGISTER_XLA_OP(Name("XlaReduceWindow") .CompileTimeConstInput("window_dimensions") .CompileTimeConstInput("window_strides") + .CompileTimeConstInput("base_dilations") + .CompileTimeConstInput("window_dilations") .CompileTimeConstInput("padding"), ReduceWindowOp); diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc index ab094d7dd1ce9856a3c2854fd2776827d6c4b76f..57afd608de820573821d605cadcc8779474b5fd6 100644 --- a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc @@ -104,7 +104,8 @@ class ScanOp : public XlaOpKernel { } auto output = xla::ReduceWindowWithGeneralPadding( XlaHelpers::ConvertElementType(builder, ctx->Input(0), dtype), init, - *reducer, window_dims, window_strides, padding); + *reducer, window_dims, window_strides, + /*base_dilations=*/{}, /*window_dilations=*/{}, padding); output = XlaHelpers::ConvertElementType(builder, output, ctx->input_type(0)); diff --git a/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc b/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc index 25a5bcbe1dd27d741ce3b74125ba9ce425ee78f3..0c32b8def0f7b741c93e803f8359b6504087e257 100644 --- a/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc @@ -18,7 +18,9 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/numeric.h" #include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/compiler/xla/primitive_util.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" @@ -55,10 +57,10 @@ Status GetIntValue(int index, XlaOpKernelContext* ctx, int64* value) { // The type-specific part of the implementation of Range. template -Status CreateRangeTensor(const xla::LiteralSlice& start_literal, - const xla::LiteralSlice& limit_literal, - const xla::LiteralSlice& delta_literal, - Tensor* output) { +xla::StatusOr CreateRangeTensor( + const xla::LiteralSlice& start_literal, + const xla::LiteralSlice& limit_literal, + const xla::LiteralSlice& delta_literal, xla::XlaBuilder* builder) { T start = start_literal.Get({}); T limit = limit_literal.Get({}); T delta = delta_literal.Get({}); @@ -82,14 +84,10 @@ Status CreateRangeTensor(const xla::LiteralSlice& start_literal, ? ((std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta)) : std::ceil(std::abs((limit - start) / delta))); - *output = Tensor(DataTypeToEnum::v(), TensorShape({size})); - auto flat = output->flat(); - T val = start; - for (int64 i = 0; i < size; ++i) { - flat(i) = val; - val += delta; - } - return Status::OK(); + return xla::ConstantR0(builder, start) + + xla::ConstantR0(builder, delta) * + xla::Iota(builder, xla::primitive_util::NativeToPrimitiveType(), + size); } class RangeOp : public XlaOpKernel { @@ -115,27 +113,26 @@ class RangeOp : public XlaOpKernel { OP_REQUIRES_OK(ctx, ctx->ConstantInput(2, &delta)); DataType type = input_type(0); - Tensor output; - Status status; + xla::StatusOr output; switch (type) { case DT_INT32: - status = CreateRangeTensor(start, limit, delta, &output); + output = CreateRangeTensor(start, limit, delta, ctx->builder()); break; case DT_INT64: - status = CreateRangeTensor(start, limit, delta, &output); + output = CreateRangeTensor(start, limit, delta, ctx->builder()); break; case DT_FLOAT: - status = CreateRangeTensor(start, limit, delta, &output); + output = CreateRangeTensor(start, limit, delta, ctx->builder()); break; case DT_DOUBLE: - status = CreateRangeTensor(start, limit, delta, &output); + output = CreateRangeTensor(start, limit, delta, ctx->builder()); break; default: - status = errors::InvalidArgument("Invalid type for Range ", + output = errors::InvalidArgument("Invalid type for Range ", DataTypeString(type)); } - OP_REQUIRES_OK(ctx, status); - ctx->SetConstantOutput(0, output); + OP_REQUIRES_OK(ctx, output.status()); + ctx->SetOutput(0, output.ValueOrDie()); } }; diff --git a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc index aaeeae01ccb303091a6d37d1aeb4b2a3377dc638..45f03d8c2175fc8b425b329b90893bb54d7f1d87 100644 --- a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc @@ -25,11 +25,26 @@ class XlaSortOp : public XlaOpKernel { explicit XlaSortOp(OpKernelConstruction* context) : XlaOpKernel(context) {} void Compile(XlaOpKernelContext* context) override { - context->SetOutput(0, xla::Sort(context->Input(0))); + context->SetOutput(0, xla::Sort(context->Input("input"))); } }; REGISTER_XLA_OP(Name("XlaSort"), XlaSortOp); +class XlaKeyValueSortOp : public XlaOpKernel { + public: + explicit XlaKeyValueSortOp(OpKernelConstruction* context) + : XlaOpKernel(context) {} + + void Compile(XlaOpKernelContext* context) override { + xla::XlaOp result = + xla::Sort(context->Input("keys"), context->Input("values")); + context->SetOutput(0, xla::GetTupleElement(result, 0)); + context->SetOutput(1, xla::GetTupleElement(result, 1)); + } +}; + +REGISTER_XLA_OP(Name("XlaKeyValueSort"), XlaKeyValueSortOp); + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..74d4fcc425bdadb70a7bedf2487deaf6c4a4f7b9 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc @@ -0,0 +1,226 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// XLA TensorList operators. + +#include +#include + +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/concat_lib.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace { + +Status GetTensorListShape(xla::XlaBuilder* builder, xla::XlaOp op, + TensorShape* tensor_list_shape) { + auto shape_or_status = builder->GetShape(op); + if (!shape_or_status.ok()) { + return shape_or_status.status(); + } + xla::Shape shape = shape_or_status.ValueOrDie(); + TF_RET_CHECK(xla::ShapeUtil::IsTuple(shape)); + return XLAShapeToTensorShape(xla::ShapeUtil::GetTupleElementShape(shape, 0), + tensor_list_shape); +} + +class TensorListReserveOp : public XlaOpKernel { + public: + explicit TensorListReserveOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &dtype_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + TensorShape element_shape; + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &element_shape)); + int64 num_elements; + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &num_elements)); + + TensorShape tensor_shape; + tensor_shape.AddDim(num_elements); + tensor_shape.AppendShape(element_shape); + + xla::XlaBuilder* b = ctx->builder(); + ctx->SetOutput(0, xla::Tuple(b, {xla::Broadcast(XlaHelpers::Zero(b, dtype_), + tensor_shape.dim_sizes()), + xla::ConstantR0(b, 0)})); + } + + private: + DataType dtype_; + + TF_DISALLOW_COPY_AND_ASSIGN(TensorListReserveOp); +}; + +REGISTER_XLA_OP(Name("TensorListReserve") + .CompileTimeConstInput("element_shape") + .CompileTimeConstInput("num_elements"), + TensorListReserveOp); + +class EmptyTensorListOp : public XlaOpKernel { + public: + explicit EmptyTensorListOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + + void Compile(XlaOpKernelContext* ctx) override { + ctx->CtxFailure( + errors::InvalidArgument("XLA compilation requires a fixed tensor list " + "size. Use TensorListReserve instead.")); + } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(EmptyTensorListOp); +}; + +REGISTER_XLA_OP(Name("EmptyTensorList"), EmptyTensorListOp); + +class TensorListElementShapeOp : public XlaOpKernel { + public: + explicit TensorListElementShapeOp(OpKernelConstruction* ctx) + : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shape_type", &shape_type_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* b = ctx->builder(); + TensorShape shape; + OP_REQUIRES_OK(ctx, GetTensorListShape(b, ctx->Input(0), &shape)); + shape.RemoveDim(0); + + switch (shape_type_) { + case DT_INT64: + ctx->SetOutput(0, xla::ConstantR1(b, shape.dim_sizes())); + break; + case DT_INT32: { + std::vector size; + for (int64 s : shape.dim_sizes()) { + size.push_back(s); + } + ctx->SetOutput(0, xla::ConstantR1(b, size)); + break; + } + default: + ctx->CtxFailure( + errors::InvalidArgument("Unsupported shape type requested")); + return; + } + } + + private: + DataType shape_type_; + + TF_DISALLOW_COPY_AND_ASSIGN(TensorListElementShapeOp); +}; + +REGISTER_XLA_OP(Name("TensorListElementShape"), TensorListElementShapeOp); + +class TensorListPushBackOp : public XlaOpKernel { + public: + explicit TensorListPushBackOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &dtype_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp list = ctx->Input(0); + TensorShape elem_shape = ctx->InputShape(1); + + xla::XlaOp ta = xla::GetTupleElement(list, 0); + xla::XlaOp index = xla::GetTupleElement(list, 1); + xla::XlaOp value = ctx->Input(1); + + // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0]. + auto start_indices = + xla::Pad(xla::Reshape(index, {1}), xla::ConstantR0(b, 0), + xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}})); + + TensorShape slice_shape = elem_shape; + slice_shape.InsertDim(0, 1LL); + auto update = xla::Reshape(value, slice_shape.dim_sizes()); + + // TODO(phawkins): We don't check the index is in bounds --- there is no + // error mechanism in XLA. + ctx->SetOutput( + 0, xla::Tuple(b, {xla::DynamicUpdateSlice(ta, update, start_indices), + index + xla::ConstantR0(b, 1)})); + } + + private: + DataType dtype_; + + TF_DISALLOW_COPY_AND_ASSIGN(TensorListPushBackOp); +}; + +REGISTER_XLA_OP(Name("TensorListPushBack"), TensorListPushBackOp); + +class TensorListPopBackOp : public XlaOpKernel { + public: + explicit TensorListPopBackOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &dtype_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* b = ctx->builder(); + xla::XlaOp state = ctx->Input(0); + + TensorShape shape; + OP_REQUIRES_OK(ctx, GetTensorListShape(b, state, &shape)); + + xla::XlaOp ta = xla::GetTupleElement(state, 0); + xla::XlaOp index = xla::GetTupleElement(state, 1); + + index = index - xla::ConstantR0(b, 1); + + // start_indices of the DynamicSlice are [index, 0, 0, ..., 0]. + auto start_indices = + xla::Pad(xla::Reshape(index, {1}), xla::ConstantR0(b, 0), + xla::MakeEdgePaddingConfig({{0, shape.dims() - 1}})); + + auto slice_shape = shape.dim_sizes(); + slice_shape[0] = 1LL; + + // TODO(phawkins): We don't check the index is in bounds --- there is no + // error mechanism in XLA. + xla::XlaOp read = xla::DynamicSlice(ta, start_indices, slice_shape); + // Remove the leading '1' dimension. + std::vector value_shape(slice_shape.begin() + 1, slice_shape.end()); + + ctx->SetOutput(0, xla::Tuple(b, {ta, index})); + ctx->SetOutput(1, xla::Reshape(read, value_shape)); + } + + private: + DataType dtype_; + + TF_DISALLOW_COPY_AND_ASSIGN(TensorListPopBackOp); +}; + +REGISTER_XLA_OP(Name("TensorListPopBack"), TensorListPopBackOp); + +} // anonymous namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD index 8597e7f139d8d32b7e08782e70a4ee44d02618f2..1ce3930fd1cd91f8e8dfb765b49be2dc969d1bd7 100644 --- a/tensorflow/compiler/tf2xla/lib/BUILD +++ b/tensorflow/compiler/tf2xla/lib/BUILD @@ -31,6 +31,22 @@ cc_library( ], ) +cc_library( + name = "broadcast", + srcs = ["broadcast.cc"], + hdrs = ["broadcast.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/client:xla_builder", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + ], +) + cc_library( name = "cholesky", srcs = ["cholesky.cc"], diff --git a/tensorflow/compiler/tf2xla/lib/broadcast.cc b/tensorflow/compiler/tf2xla/lib/broadcast.cc new file mode 100644 index 0000000000000000000000000000000000000000..3e402ef855cd7c114332d84032bc869232404fc8 --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/broadcast.cc @@ -0,0 +1,93 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/broadcast.h" + +#include + +#include "absl/algorithm/container.h" +#include "absl/strings/str_join.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/util.h" + +namespace tensorflow { + +xla::StatusOr BroadcastTo(xla::XlaOp input, + absl::Span output_dims) { + xla::XlaBuilder* builder = input.builder(); + TF_ASSIGN_OR_RETURN(xla::Shape input_shape, builder->GetShape(input)); + absl::Span input_dims = + xla::AsInt64Slice(input_shape.dimensions()); + + if (input_dims == output_dims) { + return input; + } + + if (input_dims.size() > output_dims.size()) { + return errors::InvalidArgument( + "Input shape (", xla::ShapeUtil::HumanString(input_shape), + ") must have rank less than or equal to the output shape [", + absl::StrJoin(output_dims, ","), "]"); + } + + std::vector broadcast_dims; + std::vector broadcast_shape; + auto input_it = input_dims.rbegin(); + for (auto output_it = output_dims.rbegin(); output_it != output_dims.rend(); + ++output_it) { + if (input_it != input_dims.rend()) { + if (!(*output_it == 0 && *input_it == 0) && + !(*input_it != 0 && *output_it % *input_it == 0)) { + return errors::InvalidArgument("Invalid shape broadcast from ", + xla::ShapeUtil::HumanString(input_shape), + " to [", absl::StrJoin(output_dims, ","), + "]"); + } + + broadcast_dims.push_back(broadcast_shape.size()); + if (*output_it == *input_it) { + broadcast_shape.push_back(*output_it); + } else if (*output_it != *input_it) { + // Add dimensions [I, O/I], which we will later flatten to just + // [O]. We must do this in two phases since XLA broadcasting does not + // support tiling. + broadcast_shape.push_back(*input_it); + broadcast_shape.push_back(*output_it / *input_it); + } + ++input_it; + } else { + broadcast_shape.push_back(*output_it); + } + } + TF_RET_CHECK(input_it == input_dims.rend()); + + absl::c_reverse(broadcast_dims); + int broadcast_shape_size = broadcast_shape.size(); + for (int64& broadcast_dim : broadcast_dims) { + broadcast_dim = broadcast_shape_size - broadcast_dim - 1; + } + absl::c_reverse(broadcast_shape); + xla::XlaOp output = xla::BroadcastInDim( + input, + xla::ShapeUtil::MakeShape(input_shape.element_type(), broadcast_shape), + broadcast_dims); + if (broadcast_shape != output_dims) { + output = xla::Reshape(output, output_dims); + } + return output; +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/broadcast.h b/tensorflow/compiler/tf2xla/lib/broadcast.h new file mode 100644 index 0000000000000000000000000000000000000000..591e696f06b994a7fdea58bc95ba785f683ce7d1 --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/broadcast.h @@ -0,0 +1,32 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_BROADCAST_H_ +#define TENSORFLOW_COMPILER_TF2XLA_LIB_BROADCAST_H_ + +#include "absl/types/span.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/statusor.h" + +namespace tensorflow { + +// Broadcasts 'input' up to shape 'output_dims', using TensorFlow broadcasting +// rules. Supports broadcasting a dimension of size x to size x*y, i.e., tiling. +xla::StatusOr BroadcastTo(xla::XlaOp input, + absl::Span output_dims); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_LIB_BROADCAST_H_ diff --git a/tensorflow/compiler/tf2xla/lib/scatter.cc b/tensorflow/compiler/tf2xla/lib/scatter.cc index 38dfde165df47ca78a25a068a901cd1071aa55e2..2b1c2ced925d9fee7392986015a6e716a94d356f 100644 --- a/tensorflow/compiler/tf2xla/lib/scatter.cc +++ b/tensorflow/compiler/tf2xla/lib/scatter.cc @@ -38,12 +38,10 @@ xla::StatusOr XlaScatter( combiner, xla::XlaBuilder* builder) { TF_ASSIGN_OR_RETURN(xla::Shape buffer_shape, builder->GetShape(buffer)); - TF_RETURN_IF_ERROR(builder->GetShape(updates).status()); + TF_ASSIGN_OR_RETURN(xla::Shape updates_shape, builder->GetShape(updates)); TF_ASSIGN_OR_RETURN(xla::Shape indices_shape, builder->GetShape(indices)); absl::Span indices_dims = xla::AsInt64Slice(indices_shape.dimensions()); - absl::Span buffer_dims = - xla::AsInt64Slice(buffer_shape.dimensions()); // If the indices are N-dimensional, the minor dimension of indices contains // the indices to update. Otherwise the indices are all scalars. @@ -81,104 +79,129 @@ xla::StatusOr XlaScatter( } } - // Shape of the non-indexed dimensions of the buffer. - std::vector buffer_shape_post_axes( - buffer_dims.begin() + num_index_dims, buffer_dims.end()); - - // Flatten the major dimensions of indices and updates into a single dimension - // for ease of iteration. - std::vector flat_indices_shape({num_indices}); - if (indices_are_vectors) { - flat_indices_shape.push_back(num_index_dims); + // Example of a 1-D scatter that updates two [3,1] tensors in a tensor of + // shape [3,3]: + // NOTE: ***This case will not be generated by any of the tf.scatter ops.*** + // + // operand = s32[3,3] parameter(0) + // indices = s32[2] parameter(1) + // updates = s32[3,2] parameter(2) + // scatter = s32[3,3] scatter(operand, indices, updates), + // to_apply=update_computation, + // update_window_dims={0}, + // inserted_window_dims={1}, + // scatter_dims_to_operand_dims={1}, + // index_vector_dim=1 + // + // + // Example of a 1-D scatter that updates two [1,3] tensors in a tensor of + // shape [3,3]: + // + // operand = s32[3,3] parameter(0) + // indices = s32[2] parameter(1) + // updates = s32[2,3] parameter(2) + // scatter = s32[3,3] scatter(operand, indices, updates), + // to_apply=update_computation, + // update_window_dims={1}, + // inserted_window_dims={0}, + // scatter_dims_to_operand_dims={0}, + // index_vector_dim=1 + // + // + // Example of an N-D scatter updating slices of shape [1,1,2] in a tensor of + // shape [3,3,2] + // + // operand = s32[3,3,2] parameter(0) + // indices = s32[2,2] parameter(1) + // updates = s32[2,2] parameter(2) + // scatter = s32[3,3,2] scatter(operand, indices, updates), + // to_apply=update_computation, + // update_window_dims={1}, + // inserted_window_dims={0,1}, + // scatter_dims_to_operand_dims={0,1}, + // index_vector_dim=1 + // + // + // Example of a scatter updating slices of shape [] in a tensor of shape [1,1] + // + // operand = s32[1,1] parameter(0) + // indices = s32[1] parameter(1) + // updates = s32[1] parameter(2) + // scatter = s32[1,1] scatter(operand, indices, updates), + // to_apply=update_computation, + // update_window_dims={}, + // inserted_window_dims={0,1}, + // scatter_dims_to_operand_dims={0}, + // index_vector_dim=1 + // Note that updates operand would be broadcasted into [1] in this case. + // + + xla::ScatterDimensionNumbers dim_numbers; + dim_numbers.set_index_vector_dim(indices_are_vectors + ? indices_shape.dimensions_size() - 1 + : indices_shape.dimensions_size()); + + int64 updates_rank = xla::ShapeUtil::Rank(updates_shape); + int64 buffer_rank = xla::ShapeUtil::Rank(buffer_shape); + int64 num_window_dims_in_updates = buffer_rank - num_index_dims; + + // If the rank of `updates` is 0 and does not match the expected rank of + // updates, broadcast `updates` to the expected shape of updates. + auto new_updates = updates; + std::vector expected_updates_dims(indices_dims.begin(), + indices_dims.end()); + for (int64 dim = num_index_dims; dim < buffer_rank; ++dim) { + expected_updates_dims.push_back(buffer_shape.dimensions(dim)); + } + int64 expected_updates_rank = expected_updates_dims.size(); + if (updates_rank == 0 && expected_updates_rank != 0) { + new_updates = xla::Broadcast(updates, expected_updates_dims); + TF_ASSIGN_OR_RETURN(updates_shape, builder->GetShape(new_updates)); + updates_rank = xla::ShapeUtil::Rank(updates_shape); } - std::vector flat_updates_shape({num_indices}); - flat_updates_shape.insert(flat_updates_shape.end(), - buffer_shape_post_axes.begin(), - buffer_shape_post_axes.end()); - - // Construct the initial values of the loop-carried Tensors. - auto flat_indices = xla::Reshape(indices, flat_indices_shape); - auto flat_updates = xla::Reshape(updates, flat_updates_shape); - auto init = {flat_indices, flat_updates, buffer}; - - // Constructs the loop body. The implementation of scatter is essentially: - // for i in range(num_indices): - // index = dynamic-slice(indices, i) - // update = dynamic-slice(updates, i) - // buffer = dynamic-update-slice(buffer, update, index) - auto body_fn = [&](xla::XlaOp i, absl::Span loop_vars, - xla::XlaBuilder* body_builder) { - auto indices = loop_vars[0]; - auto updates = loop_vars[1]; - auto buffer = loop_vars[2]; - - auto zero_index = xla::ConstantLiteral( - body_builder, xla::LiteralUtil::Zero(indices_shape.element_type())); - - // Slice the i-th index from the indices array. - xla::XlaOp index; - auto indices_offset = xla::Reshape(i, {1}); - if (indices_are_vectors) { - indices_offset = xla::Pad(indices_offset, zero_index, - xla::MakeEdgePaddingConfig({{0, 1}})); - - index = xla::DynamicSlice(indices, indices_offset, {1, num_index_dims}); - index = xla::Collapse(index, {0, 1}); - } else { - index = xla::DynamicSlice(indices, indices_offset, {1}); + if (updates_rank > 0) { + for (int64 i = (updates_rank - num_window_dims_in_updates); + i < updates_rank; ++i) { + dim_numbers.add_update_window_dims(i); } + } - // Discard updates with negative indices, since some users expect this. - auto index_in_range = xla::ReduceAll( - xla::Le(zero_index, index), xla::ConstantR0(body_builder, true), - xla::CreateScalarAndComputation(xla::PRED, body_builder)); - - // Make the index in bounds to prevent implementation defined behavior. - index = xla::Max(index, zero_index); - index = xla::Pad( - index, zero_index, - xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}})); - - // Slice the i-th index from the updates array. - auto updates_offset = xla::Reshape(i, {1}); - updates_offset = xla::Pad( - updates_offset, zero_index, - xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}})); - std::vector flat_updates_slice_shape({1}); - flat_updates_slice_shape.insert(flat_updates_slice_shape.end(), - buffer_shape_post_axes.begin(), - buffer_shape_post_axes.end()); - auto update = - xla::DynamicSlice(updates, updates_offset, flat_updates_slice_shape); - - // Unflatten the major (iteration) dimensions of the slice to their - // original shape. - std::vector updates_slice_shape(num_index_dims, 1); - updates_slice_shape.insert(updates_slice_shape.end(), - buffer_shape_post_axes.begin(), - buffer_shape_post_axes.end()); - update = xla::Reshape(update, updates_slice_shape); - - // Apply the update to the buffer. If there is a combiner, use it to merge - // the current values with the update. - auto current_value = xla::DynamicSlice(buffer, index, updates_slice_shape); + for (int64 i = 0; i < num_index_dims; ++i) { + dim_numbers.add_inserted_window_dims(i); + dim_numbers.add_scatter_dims_to_operand_dims(i); + } + + // Build the combiner computation. + xla::XlaComputation combiner_computation; + { + xla::XlaBuilder cb("scatter-combiner"); + auto xla_scalar_shape = + xla::ShapeUtil::MakeShape(buffer_shape.element_type(), {}); + auto p0 = xla::Parameter(&cb, 0, xla_scalar_shape, "p0"); + auto p1 = xla::Parameter(&cb, 1, xla_scalar_shape, "p1"); if (combiner) { - update = combiner(current_value, update, body_builder); + combiner(p0, p1, &cb); } - // Use the current value instead of the update if the index is out of - // bounds. - update = xla::Select(index_in_range, update, current_value); - // Apply the update. - buffer = xla::DynamicUpdateSlice(buffer, update, index); - - return std::vector{indices, updates, buffer}; - }; - - TF_ASSIGN_OR_RETURN(auto outputs, - XlaForEachIndex(num_indices, indices_shape.element_type(), - body_fn, init, "scatter", builder)); - return outputs[2]; + combiner_computation = cb.Build().ConsumeValueOrDie(); + } + + VLOG(3) << "Scatter op:"; + VLOG(3) << " Input: " << xla::ShapeUtil::HumanString(buffer_shape); + VLOG(3) << " Indices: " << xla::ShapeUtil::HumanString(indices_shape); + VLOG(3) << " Updates: " << xla::ShapeUtil::HumanString(updates_shape); + VLOG(3) << " Scatter Dimension Numbers: "; + VLOG(3) << " index_vector_dim: " << dim_numbers.index_vector_dim(); + VLOG(3) << " update_window_dims: [" + << absl::StrJoin(dim_numbers.update_window_dims(), ",") << "]"; + VLOG(3) << " inserted_window_dims: [" + << absl::StrJoin(dim_numbers.inserted_window_dims(), ",") << "]"; + VLOG(3) << " scatter_dims_to_operand_dims: [" + << absl::StrJoin(dim_numbers.scatter_dims_to_operand_dims(), ",") + << "]"; + + return xla::Scatter(buffer, indices, new_updates, combiner_computation, + dim_numbers); } } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/scatter.h b/tensorflow/compiler/tf2xla/lib/scatter.h index 13a5f1b850a612bddeeac39bef431c19925351ca..4cf478c4b9b4316f1cf43f45d1bf90afa648fb11 100644 --- a/tensorflow/compiler/tf2xla/lib/scatter.h +++ b/tensorflow/compiler/tf2xla/lib/scatter.h @@ -34,7 +34,11 @@ namespace tensorflow { // Otherwise, `indices_are_vectors`, then indices are multidimensional and the // minor dimension of `indices` represents a vector of indices. // -// If any indices are negative, the corresponding update is discarded. +// If `updates` is a scalar, then it will be broadcasted into the expected shape +// of updates. +// +// If any part of the update region is out-of-bounds, the corresponding update +// is discarded. // // If a `combiner` is provided, updates are combined with the existing values in // the buffer using the combiner function. Otherwise, the updates replace the diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc index 733eeed3c661c9ed683f0fb7fd90f7f997b8dc2b..bd2c0a5ee88869ba60701c0a7ace05857452eed9 100644 --- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc @@ -283,6 +283,8 @@ REGISTER_OP("XlaReduceWindow") .Input("init_value: T") .Input("window_dimensions: Tindices") .Input("window_strides: Tindices") + .Input("base_dilations: Tindices") + .Input("window_dilations: Tindices") .Input("padding: Tindices") .Attr("T: numbertype") .Attr("Tindices: {int32, int64}") @@ -354,12 +356,33 @@ Wraps the XLA Sort operator, documented at https://www.tensorflow.org/performance/xla/operation_semantics#sort . -Sorts a tensor. Currently only rank 1 sorts in ascending order are supported. +Sorts a tensor. Currently only sorts in ascending order are supported. input: A `Tensor` of type T. output: A `Tensor` of type T. )doc"); +REGISTER_OP("XlaKeyValueSort") + .Input("keys: K") + .Input("values: V") + .Output("sorted_keys: K") + .Output("sorted_values: V") + .Attr("K: realnumbertype") + .Attr("V: type") + .SetShapeFn(shape_inference::UnchangedShape) + .Doc(R"doc( +Wraps the XLA Sort operator, documented at + https://www.tensorflow.org/performance/xla/operation_semantics#sort +. + +Sorts a tensor. Currently only sorts in ascending order are supported. + +keys: A `Tensor` of type K. +values: A `Tensor` of type V. +sorted_keys: A `Tensor` of type K. +sorted_values: A `Tensor` of type V. +)doc"); + // TODO(b/37549631) setting the While Op to always be stateful is too // conservative. REGISTER_OP("XlaWhile") diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py index 27dd18a9bbd5aceece41aaf61eb185acb537b3b6..5e86b5d8ec0a2690f004bc67decea09185d9cbb6 100644 --- a/tensorflow/compiler/tf2xla/python/xla.py +++ b/tensorflow/compiler/tf2xla/python/xla.py @@ -212,9 +212,9 @@ bitcast_convert_type = array_ops.bitcast def broadcast(x, dims, name=None): x = ops.convert_to_tensor(x) - shape = array_ops.concat( - [constant_op.constant(dims), - array_ops.shape(x)], axis=0) + shape = array_ops.concat([constant_op.constant(dims), + array_ops.shape(x)], + axis=0) return array_ops.broadcast_to(x, shape, name=name) @@ -320,6 +320,8 @@ def reduce_window(operand, reducer, window_dimensions, window_strides=None, + base_dilations=None, + window_dilations=None, padding=None, name=None): """Wraps the XLA ReduceWindow operator. @@ -332,22 +334,27 @@ def reduce_window(operand, init: a scalar tensor representing the initial value for the reduction reducer: a reduction function that combines a pair of scalars. window_dimensions: shape of the window, as a list of integers - window_strides: inter-window strides, as a list of integers. Optional; - if omitted, defaults to strides of 1. + window_strides: inter-window strides, as a list of integers. Optional; if + omitted, defaults to strides of 1. padding: padding to apply to 'operand'. List of (low, high) pairs of integers that specify the padding to apply before and after each dimension. Optional; if omitted, defaults to no padding. name: the operator name, or None. + Returns: A tensor that represents the output of the reduce_window operator. """ window_strides = window_strides or [1] * len(window_dimensions) + base_dilations = base_dilations or [1] * len(window_dimensions) + window_dilations = window_dilations or [1] * len(window_dimensions) padding = padding or [(0, 0)] * len(window_dimensions) return gen_xla_ops.xla_reduce_window( input=operand, init_value=init, window_dimensions=window_dimensions, window_strides=window_strides, + base_dilations=base_dilations, + window_dilations=window_dilations, padding=padding, computation=reducer, name=name) @@ -377,4 +384,5 @@ def slice(x, start_dims, limit_dims, strides): sort = gen_xla_ops.xla_sort +key_value_sort = gen_xla_ops.xla_key_value_sort while_loop = gen_xla_ops.xla_while diff --git a/tensorflow/compiler/tf2xla/resource_operation_table.cc b/tensorflow/compiler/tf2xla/resource_operation_table.cc index 20f2ce2919701731ef6e90d368b67545af95e8f9..72b240996fb4d9dcb5f5dfd919da618cbae08c16 100644 --- a/tensorflow/compiler/tf2xla/resource_operation_table.cc +++ b/tensorflow/compiler/tf2xla/resource_operation_table.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/resource_operation_table.h" #include "absl/algorithm/container.h" -#include "tensorflow/core/lib/gtl/flatmap.h" +#include "absl/container/flat_hash_map.h" namespace tensorflow { /*static*/ absl::string_view XlaResourceOpInfo::XlaResourceOpKindToString( @@ -30,9 +30,9 @@ namespace tensorflow { } } -static gtl::FlatMap* +static absl::flat_hash_map* CreateResourceOpInfoMap() { - auto* result = new gtl::FlatMap; + auto* result = new absl::flat_hash_map; auto add = [&](absl::string_view op, XlaResourceOpKind op_kind, XlaResourceKind resource_kind) { @@ -103,15 +103,15 @@ CreateResourceOpInfoMap() { return result; } -static const gtl::FlatMap& +static const absl::flat_hash_map& GetStaticResourceOpInfoMap() { - static gtl::FlatMap* op_info_map = - CreateResourceOpInfoMap(); + static absl::flat_hash_map* + op_info_map = CreateResourceOpInfoMap(); return *op_info_map; } const XlaResourceOpInfo* GetResourceOpInfoForOp(absl::string_view op) { - const gtl::FlatMap& op_infos = + const absl::flat_hash_map& op_infos = GetStaticResourceOpInfoMap(); auto it = op_infos.find(op); return it == op_infos.end() ? nullptr : &it->second; diff --git a/tensorflow/compiler/tf2xla/resource_operation_table_test.cc b/tensorflow/compiler/tf2xla/resource_operation_table_test.cc index a85ef040a7b65c2f6e405c3444eaef3019137b4b..956f597301d28d781a9df7ab2086ed79d4c8bf9d 100644 --- a/tensorflow/compiler/tf2xla/resource_operation_table_test.cc +++ b/tensorflow/compiler/tf2xla/resource_operation_table_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/resource_operation_table.h" #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" #include "absl/strings/str_join.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -33,7 +34,7 @@ bool HasResourceInputOrOutput(const OpDef& op_def) { } TEST(ResourceOperationTableTest, HaveAllResourceOps) { - gtl::FlatMap known_resource_ops; + absl::flat_hash_map known_resource_ops; for (absl::string_view known_resource_op : resource_op_table_internal::GetKnownResourceOps()) { ASSERT_TRUE( diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc index d6f42bac86f1ef359531d67b652d43d851d7ac02..01dd3ba10fec85e6b1d411fbd32fbf9c58b5fe11 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc @@ -336,9 +336,9 @@ bool HasAssociatedFunction(const NodeDef& node_def, } if (node_def.op() == FunctionLibraryDefinition::kGradientOp) { - // Skip gradient op. Gradient op has "f" attr, which is set to the function - // we are getting gradient for. That function is not associated with the op. - return false; + // Gradient op has "f" attr, which is set to the function we are getting + // gradient for. We need to functionalize the gradient function. + return true; } for (const auto& iter : node_def.attr()) { @@ -357,17 +357,18 @@ std::vector GetAssociatedFunctions( if (flr->GetFunctionLibraryDefinition()->Contains(op)) { // This is a function call node. AttrValueMap attrs(node.attrs().begin(), node.attrs().end()); - results.emplace_back(AssociatedFunctionInfo(op, attrs)); + results.emplace_back(AssociatedFunctionInfo::FunctionCall(op, attrs)); } else if (node.type_string() == FunctionLibraryDefinition::kGradientOp) { - // Skip gradient op. Gradient op has "f" attr, which is set to the function - // we are getting gradient for. That function is not associated with the op. + // This is a SymbolicGradient op. + AttrValueMap attrs(node.attrs().begin(), node.attrs().end()); + results.emplace_back(AssociatedFunctionInfo::SymbolicGradient(op, attrs)); } else { // Collect all function attrs for the node. for (auto& iter : node.attrs()) { if (iter.second.has_func()) { VLOG(2) << "Found function attr for node " << node.name() << ": " << iter.first << " = " << iter.second.func().name(); - results.emplace_back(AssociatedFunctionInfo( + results.emplace_back(AssociatedFunctionInfo::FunctionAttr( iter.second.func().name(), iter.second.func().attr(), iter.first)); } } @@ -410,6 +411,21 @@ Status RewriteAssociatedFunction( graph->RemoveNode(node); break; } + case AssociatedFunctionInfo::kSymbolicGradient: { + NameAttrList func; + TF_RETURN_IF_ERROR(GetNodeAttr( + node->attrs(), FunctionLibraryDefinition::kFuncAttr, &func)); + GradientDef gradient_def; + gradient_def.set_function_name(func.name()); + gradient_def.set_gradient_func(rewritten_function_name); + string original_grad_func = fld->FindGradient(func.name()); + if (original_grad_func.empty()) { + TF_RETURN_IF_ERROR(fld->AddGradientDef(gradient_def)); + } else if (original_grad_func != rewritten_function_name) { + TF_RETURN_IF_ERROR(fld->ReplaceGradient(gradient_def)); + } + break; + } case AssociatedFunctionInfo::kFunctionAttr: { // Change function attr to rewritten functions. NameAttrList func; diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.h b/tensorflow/compiler/tf2xla/tf2xla_util.h index 6065d0bb9a3abd23b8911c5049914be8a5f23b99..53eab8b63e2fc8aa3dfb0bacfe065897ca775bd0 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.h +++ b/tensorflow/compiler/tf2xla/tf2xla_util.h @@ -65,21 +65,33 @@ uint32 GetXLARandomSeed(); class AssociatedFunctionInfo { public: enum AssociatedFunctionType { - kFunctionCallNode = 0, - kFunctionAttr = 1, + kFunctionAttr = 0, + kFunctionCallNode = 1, + kSymbolicGradient = 2, }; - // The node is a function call. - AssociatedFunctionInfo(const string& func_name, const AttrValueMap& attrs) - : type_(kFunctionCallNode), func_name_(func_name), attrs_(attrs) {} - // The function is an attr of the node. - AssociatedFunctionInfo(const string& func_name, const AttrValueMap& attrs, - const string& attr_name) - : type_(kFunctionAttr), - func_name_(func_name), - attrs_(attrs), - attr_name_(attr_name) {} + static AssociatedFunctionInfo FunctionAttr(const string& func_name, + const AttrValueMap& attrs, + const string& attr_name) { + return AssociatedFunctionInfo(kFunctionAttr, func_name, attrs, attr_name); + } + + // The node is a function call. + static AssociatedFunctionInfo FunctionCall(const string& func_name, + const AttrValueMap& attrs) { + // attr_name will not be used in this case. + return AssociatedFunctionInfo(kFunctionCallNode, func_name, attrs, + /*attr_name=*/""); + } + + // The node is a SymbolicGradient op. + static AssociatedFunctionInfo SymbolicGradient(const string& func_name, + const AttrValueMap& attrs) { + // attr_name will not be used in this case. + return AssociatedFunctionInfo(kSymbolicGradient, func_name, attrs, + /*attr_name=*/""); + } AssociatedFunctionType type() const { return type_; } @@ -90,6 +102,13 @@ class AssociatedFunctionInfo { const AttrValueMap& attrs() const { return attrs_; } private: + AssociatedFunctionInfo(AssociatedFunctionType type, const string& func_name, + const AttrValueMap& attrs, const string& attr_name) + : type_(type), + func_name_(func_name), + attrs_(attrs), + attr_name_(attr_name) {} + // Available for all instances. AssociatedFunctionType type_; string func_name_; @@ -105,14 +124,18 @@ bool HasAssociatedFunction(const NodeDef& node_def, // Gets functions associated with the node. Current cases: // 1. For function call node, its function name; -// 2. For nodes like XlaWhile/XlaIf, all their function attributes. +// 2. For SymbolicGradient op, returned func_name will be "SymbolicGradient", +// and returned attrs will be this node's attributes; +// 3. For nodes like XlaWhile/XlaIf, all their function attributes. std::vector GetAssociatedFunctions( const Node& node, FunctionLibraryRuntime* flr); // Changes associated functions for the node. Current cases: // 1. For function call node, creates a new node with the new function name and // remove the old node; -// 2. For nodes like XlaWhile/XlaIf, modify their function attributes. +// 2. For SymbolicGradient op, add or replace GradientDef in +// FunctionLibraryDefinition; +// 3. For nodes like XlaWhile/XlaIf, modify their function attributes. Status RewriteAssociatedFunction( Graph* graph, Node* node, FunctionLibraryDefinition* fld, const AssociatedFunctionInfo& associated_function, diff --git a/tensorflow/compiler/tf2xla/type_util.h b/tensorflow/compiler/tf2xla/type_util.h index bda667eb1f16b80da415c7c5205df96a4ae93e4c..6354216eee7978dc2b4a59f5792a70f67d530b9b 100644 --- a/tensorflow/compiler/tf2xla/type_util.h +++ b/tensorflow/compiler/tf2xla/type_util.h @@ -25,6 +25,14 @@ namespace tensorflow { // Converts a Tensorflow DataType to an XLA PrimitiveType. Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type); +// N.B.: there is intentionally no function to convert an XLA PrimitiveType to +// a TensorFlow DataType. The mapping from TF types to XLA types is not +// one-to-one: for example, both DT_INT8 and DT_QINT8 map to xla::S8. So the +// inverse would not be a well-defined function. If you find that you want the +// inverse mapping, then most likely you should be preserving the original +// TensorFlow type, rather than trying to convert an XLA type into a TensorFlow +// type. + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_TF2XLA_TYPE_UTIL_H_ diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index d5094e8ec5ed95b8cdbad63762a7fbc718ba5f30..b2c57e88803e0661a9a514f844dff97ff9edf2ea 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -194,6 +194,17 @@ Status XlaCompiler::CompileFunction(const XlaCompiler::CompileOptions& options, std::unique_ptr graph = GetGraph(fbody); + // Clear the "_kernel" attribute if it is set to "host". This is used to + // indicate that a computation should happen on the host instead of the + // accelerator, but doesn't make sense in XLA. + const char* const kKernelAttr = "_kernel"; + for (Node* n : graph->nodes()) { + string value; + if (GetNodeAttrSimple(n->attrs(), kKernelAttr, &value) && value == "host") { + n->ClearAttr(kKernelAttr); + } + } + // _Arg and _Retval nodes don't exist in the stored subgraph for the function; // they are added by the function body looked up. Therefore, they don't have // core assignments here. diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index 2a9eaeee146bf6d792e010df7e041f9986b2c77e..dd3498ef7aa242d3ad946cae5f60bc2c8853a342 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -455,23 +455,43 @@ Status XlaOpKernelContext::GetVariableTypeAndShape(int index, DataType* type, return Status::OK(); } +Status XlaOpKernelContext::allocate_output(int index, const xla::Shape& shape, + Tensor** output) { + // The step's default allocator is the dummy XlaCompilationAllocator which + // simply allocates a metadata buffer to hold the expression to which it + // corresponds. + if (expected_output_dtype(index) == DT_VARIANT) { + // tensor_data() is not supported for variant Tensor (i.e., + // DataTypeCanUseMemcpy is false for DT_VARIANT), and so storing the + // XlaExpression inside the Tensor's tensor_data() does not work for + // variant. Instead construct a uint8 tensor and store the expression in its + // value. + // TODO(jpienaar): This should be refactored to stop masquerading + // XlaExpressions as Tensors. + *output = new Tensor(); + TensorShape tensor_shape; + TF_RETURN_IF_ERROR( + context_->allocate_temp(DT_UINT8, tensor_shape, *output)); + context_->set_output(index, **output); + } else { + TensorShape tensor_shape; + TF_RETURN_IF_ERROR(XLAShapeToTensorShape(shape, &tensor_shape)); + TF_RETURN_IF_ERROR(context_->allocate_output(index, tensor_shape, output)); + } + return Status::OK(); +} + void XlaOpKernelContext::SetOutput(int index, const xla::XlaOp& handle) { // Makes the host Tensor that will refer to the expression. Tensor* output = nullptr; - auto shape = builder()->GetShape(handle); - if (!shape.ok()) { - SetStatus(shape.status()); + auto shape_or = builder()->GetShape(handle); + if (!shape_or.ok()) { + SetStatus(shape_or.status()); return; } - // The step's default allocator is the dummy XlaCompilationAllocator which - // simply allocates a metadata buffer to hold the expression to which it - // corresponds. - TensorShape tensor_shape; - OP_REQUIRES_OK(context_, - XLAShapeToTensorShape(shape.ValueOrDie(), &tensor_shape)); OP_REQUIRES_OK(context_, - context_->allocate_output(index, tensor_shape, &output)); + allocate_output(index, shape_or.ValueOrDie(), &output)); // The expression is stored in the tensor's data buffer. Fill in the // fields now. diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h index a3a0d10cc06cd4afceec728b7dbe287389099b9d..aa00a454968ad29495e34dc080e55b62bb0b5f7b 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.h +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h @@ -255,6 +255,11 @@ class XlaOpKernelContext { // Returns the tensor of input `name`. const Tensor& GetInputTensorByName(absl::string_view name); + // Wraps OpKernelContext's allocate_output method while providing special + // behavior for DT_VARIANT: a variant is treated as DT_UINT8 scalar as the + // type to allow mapping for variant to more generic types. + Status allocate_output(int index, const xla::Shape& shape, Tensor** output); + OpKernelContext* const context_; }; diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD index f825f67b447514a416f3a49ac8aad9dcf505f5a7..dc097f3696e22d75d7dc72ec4877a9c8b5dda059 100644 --- a/tensorflow/compiler/xla/client/BUILD +++ b/tensorflow/compiler/xla/client/BUILD @@ -220,6 +220,8 @@ cc_library( "//tensorflow/compiler/xla/service:shape_inference", "//tensorflow/core:lib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 95ff6432a591f87845729b180397e33a85e5e9a5..e7cf9ae36389056c4732285dd9667f4dcd4115bd 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" @@ -33,7 +34,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/mutex.h" namespace xla { @@ -208,6 +208,9 @@ void XlaBuilder::IsConstantVisitor(const int64 op_handle, case HloOpcode::kWhile: // TODO(b/32495713): We aren't checking the condition and body // computations themselves. + case HloOpcode::kScatter: + // TODO(b/32495713): We aren't checking the embedded computation in + // Scatter. case HloOpcode::kSend: case HloOpcode::kRecv: case HloOpcode::kParameter: @@ -1276,9 +1279,10 @@ XlaOp XlaBuilder::AfterAll(absl::Span tokens) { }); } -XlaOp XlaBuilder::CustomCall(const string& call_target_name, - absl::Span operands, - const Shape& shape) { +XlaOp XlaBuilder::CustomCall( + const string& call_target_name, absl::Span operands, + const Shape& shape, const string& opaque, + absl::optional> operand_shapes_with_layout) { return ReportErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; if (absl::StartsWith(call_target_name, "$")) { @@ -1289,6 +1293,32 @@ XlaOp XlaBuilder::CustomCall(const string& call_target_name, } *instr.mutable_shape() = shape; instr.set_custom_call_target(call_target_name); + instr.set_custom_call_opaque(opaque); + if (operand_shapes_with_layout.has_value()) { + if (!LayoutUtil::HasLayout(shape)) { + return InvalidArgument( + "Result shape must have layout for custom call with constrained " + "layout."); + } + if (operands.size() != operand_shapes_with_layout->size()) { + return InvalidArgument( + "Must specify a shape with layout for each operand for custom call " + "with constrained layout; given %d shapes, expected %d", + operand_shapes_with_layout->size(), operands.size()); + } + instr.set_constrain_layout(true); + int64 operand_num = 0; + for (const Shape& operand_shape : *operand_shapes_with_layout) { + if (!LayoutUtil::HasLayout(operand_shape)) { + return InvalidArgument( + "No layout specified for operand %d for custom call with " + "constrained layout.", + operand_num); + } + *instr.add_operand_shapes_with_layout() = operand_shape; + ++operand_num; + } + } return AddInstruction(std::move(instr), HloOpcode::kCustomCall, operands); }); } @@ -1785,9 +1815,9 @@ XlaOp XlaBuilder::ReduceWindow(const XlaOp& operand, const XlaOp& init_value, std::vector> padding_values = MakePadding(AsInt64Slice(operand_shape.dimensions()), window_dimensions, window_strides, padding); - return ReduceWindowWithGeneralPadding(operand, init_value, computation, - window_dimensions, window_strides, - padding_values); + return ReduceWindowWithGeneralPadding( + operand, init_value, computation, window_dimensions, window_strides, + /*base_dilations=*/{}, /*window_dilations=*/{}, padding_values); }); } @@ -1796,6 +1826,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding( const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding) { return ReportErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; @@ -1806,7 +1838,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding( computation.GetProgramShape()); TF_ASSIGN_OR_RETURN(*instr.mutable_window(), MakeWindow(window_dimensions, window_strides, padding, - /*lhs_dilation=*/{}, /*rhs_dilation=*/{})); + /*lhs_dilation=*/base_dilations, + /*rhs_dilation=*/window_dilations)); TF_ASSIGN_OR_RETURN( *instr.mutable_shape(), ShapeInference::InferReduceWindowShape(operand_shape, init_shape, @@ -2289,7 +2322,7 @@ StatusOr XlaBuilder::BuildConstantSubGraph( // also a valid dependency order). The related ops will be added to the // subgraph in the same order. std::set related_ops; - tensorflow::gtl::FlatSet related_calls; // Related computations. + absl::flat_hash_set related_calls; // Related computations. std::queue worklist; worklist.push(root->id()); related_ops.insert(root->id()); @@ -2681,8 +2714,18 @@ XlaOp Call(XlaBuilder* builder, const XlaComputation& computation, } XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, - absl::Span operands, const Shape& shape) { - return builder->CustomCall(call_target_name, operands, shape); + absl::Span operands, const Shape& shape, + const string& opaque) { + return builder->CustomCall(call_target_name, operands, shape, opaque, + /*operand_shapes_with_layout=*/absl::nullopt); +} + +XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name, + absl::Span operands, const Shape& shape, + absl::Span operand_shapes_with_layout, + const string& opaque) { + return builder->CustomCall(call_target_name, operands, shape, opaque, + operand_shapes_with_layout); } XlaOp Complex(const XlaOp& real, const XlaOp& imag, @@ -2795,10 +2838,12 @@ XlaOp ReduceWindowWithGeneralPadding( const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding) { return operand.builder()->ReduceWindowWithGeneralPadding( operand, init_value, computation, window_dimensions, window_strides, - padding); + base_dilations, window_dilations, padding); } XlaOp CrossReplicaSum(const XlaOp& operand, diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index d0c59fa6f27bc265c0868734ed95a196002fbd2e..9ceede7a795168716120e74f18a8053390e92801 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -21,6 +21,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/client/padding.h" @@ -34,8 +36,6 @@ limitations under the License. #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stacktrace.h" #include "tensorflow/core/platform/types.h" @@ -577,11 +577,10 @@ class XlaBuilder { absl::Span operands); // Enqueues a custom call instruction onto the computation. - // During code generation, a call instruction is emitted which targets a - // symbol with the name |call_target_name|. The |operands| are passed to the - // call instruction. |shape| is the resultant shape. - XlaOp CustomCall(const string& call_target_name, - absl::Span operands, const Shape& shape); + XlaOp CustomCall( + const string& call_target_name, absl::Span operands, + const Shape& shape_with_layout, const string& opaque, + absl::optional> operand_shapes_with_layout); // The following methods enqueue element-wise binary arithmetic operations // onto the computation. The shapes of the operands have to match unless one @@ -673,6 +672,8 @@ class XlaBuilder { const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding); // Returns the sum of the operand value within each subgroup of replicas. All @@ -1029,7 +1030,7 @@ class XlaBuilder { // A map from XlaOp::Handle to the index in the instructions_ vector where the // instruction is held. - tensorflow::gtl::FlatMap handle_to_index_; + absl::flat_hash_map handle_to_index_; // The embedded computations used by this computation. Each computation was // the entry computation of some XlaComputation, the key is the unique id of @@ -1037,7 +1038,7 @@ class XlaBuilder { std::map embedded_; // The unique parameter numbers. - tensorflow::gtl::FlatSet parameter_numbers_; + absl::flat_hash_set parameter_numbers_; // The metadata to attach to each op. This is structured as a "modal"-like // operation, in order to simplify client code (and not sprinkle this metadata @@ -1195,7 +1196,12 @@ class XlaBuilder { friend XlaOp Call(XlaBuilder* builder, const XlaComputation& computation, absl::Span operands); friend XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, - absl::Span operands, const Shape& shape); + absl::Span operands, const Shape& shape, + const string& opaque); + friend XlaOp CustomCallWithLayout( + XlaBuilder* builder, const string& call_target_name, + absl::Span operands, const Shape& shape_with_layout, + absl::Span operand_shapes_with_layout, const string& opaque); friend XlaOp Complex(const XlaOp& real, const XlaOp& imag, absl::Span broadcast_dimensions); friend XlaOp Conj(const XlaOp& operand); @@ -1246,6 +1252,8 @@ class XlaBuilder { const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding); friend XlaOp CrossReplicaSum(const XlaOp& operand, absl::Span replica_groups); @@ -1717,12 +1725,28 @@ XlaOp OutfeedWithToken(const XlaOp& operand, const XlaOp& token, XlaOp Call(XlaBuilder* builder, const XlaComputation& computation, absl::Span operands); -// Enqueues a custom call instruction onto the computation. -// During code generation, a call instruction is emitted which targets a -// symbol with the name |call_target_name|. The |operands| are passed to the -// call instruction. |shape| is the resultant shape. +// Enqueues a custom call instruction onto the computation. A custom call +// invokes code external to XLA. The |operands| are passed to the external code, +// and the external code is expected to produce a result of the given +// |shape|. The exact mechanism is backend-specific. For example, in the CPU +// backend, a call instruction is emitted which targets a symbol with the name +// |call_target_name|. |call_target_name| and |opaque| can arbitrary strings, +// but |call_target_name| should be short as it may be used in labels. |opaque| +// can encode arbitrarily large amounts of information. XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, - absl::Span operands, const Shape& shape); + absl::Span operands, const Shape& shape, + const string& opaque = ""); + +// Overload which constructs a custom call with fixed layouts. The operands will +// have the layouts specified by |operand_shapes_with_layout| when provided to +// external code, and the external code is expected to produce a result with the +// layout specified by |shape_with_layout|. All shapes in |shape_with_layout| +// and |operand_shapes_with_layout| must have layouts. +XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name, + absl::Span operands, + const Shape& shape_with_layout, + absl::Span operand_shapes_with_layout, + const string& opaque = ""); // The following methods enqueue element-wise binary arithmetic operations // onto the computation. The shapes of the operands have to match unless one @@ -1814,6 +1838,8 @@ XlaOp ReduceWindowWithGeneralPadding( const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding); // Returns the sum of the operand value within each subgroup of replicas. All diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc index d310335618ded7b581e6ed632223218585bb791f..3c8db9aa451e3ede433547307f33f0d15d6cd5f1 100644 --- a/tensorflow/compiler/xla/layout_util.cc +++ b/tensorflow/compiler/xla/layout_util.cc @@ -65,6 +65,12 @@ void SetDefaultLayoutToContainer( return layout; } +/* static */ Layout LayoutUtil::MakeDescendingLayout(int64 rank) { + std::vector layout(rank); + std::iota(layout.rbegin(), layout.rend(), static_cast(0)); + return MakeLayout(layout); +} + /* static */ Layout LayoutUtil::MakeLayoutFromMajorToMinor( absl::Span major_to_minor) { Layout layout; diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h index b78883c2d870043032306637730c4666665125a8..af032b1cae4c5645d6c7da55b779cd0a7336592e 100644 --- a/tensorflow/compiler/xla/layout_util.h +++ b/tensorflow/compiler/xla/layout_util.h @@ -40,6 +40,10 @@ class LayoutUtil { static Layout MakeLayoutFromMajorToMinor( absl::Span major_to_minor); + // Returns a layout with descending ((i.e. {n, n-1, ..., 0}) minor-to-major + // dimensions. + static Layout MakeDescendingLayout(int64 rank); + // Creates a sparse layout with the given maximum number of elements. (This is // a convenience function for protobuf construction.) static Layout MakeSparseLayout(int64 max_sparse_elements); diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index 5035f4198890857fcafd0156d7eaeeb4bc164322..656ce720a13d5c9622e9dc05ae04ddcac8cbeee5 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -287,6 +287,8 @@ Status MutableLiteralBase::CopyElementFrom(const LiteralSlice& src_literal, return InvalidArgument("LiteralProto has no layout"); } + TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(proto.shape())); + Literal literal(proto.shape()); TF_RETURN_IF_ERROR(literal.root_piece_->ForEachMutableSubpieceWithStatus( @@ -725,16 +727,34 @@ Literal LiteralBase::Slice(absl::Span start_indices, ShapeUtil::MakeShapeWithLayout(shape().element_type(), result_dimensions, LayoutUtil::MinorToMajor(shape())); switch (result_shape.element_type()) { - case F32: - return SliceInternal(result_shape, start_indices); + case PRED: + return SliceInternal(result_shape, start_indices); + case U8: + return SliceInternal(result_shape, start_indices); + case U16: + return SliceInternal(result_shape, start_indices); + case U32: + return SliceInternal(result_shape, start_indices); + case U64: + return SliceInternal(result_shape, start_indices); + case S8: + return SliceInternal(result_shape, start_indices); + case S16: + return SliceInternal(result_shape, start_indices); + case S32: + return SliceInternal(result_shape, start_indices); + case S64: + return SliceInternal(result_shape, start_indices); + case F16: + return SliceInternal(result_shape, start_indices); case BF16: return SliceInternal(result_shape, start_indices); + case F32: + return SliceInternal(result_shape, start_indices); + case F64: + return SliceInternal(result_shape, start_indices); case C64: return SliceInternal(result_shape, start_indices); - case S32: - return SliceInternal(result_shape, start_indices); - case U32: - return SliceInternal(result_shape, start_indices); default: LOG(FATAL) << "not yet implemented: " << PrimitiveType_Name(result_shape.element_type()); @@ -1850,6 +1870,24 @@ Status LiteralBase::Piece::CopyFromProto(const LiteralProto& proto) { TF_RET_CHECK(LayoutUtil::HasLayout(proto.shape())); TF_RET_CHECK(ShapeUtil::Equal(proto.shape(), subshape())); + if (LayoutUtil::IsSparseArray(subshape())) { + // Compute the number of elements (indices) in the sparse shape and reserve + // the necessary space in spare_indices. + TF_RET_CHECK(ShapeUtil::Rank(subshape()) != 0) + << "Scalar shapes cannot be sparse"; + TF_RET_CHECK(proto.sparse_indices_size() % ShapeUtil::Rank(subshape()) == 0) + << "Unexpected number of indices in proto (" + << proto.sparse_indices_size() << ") for shape of rank " + << ShapeUtil::Rank(subshape()); + const int64 index_count = + proto.sparse_indices_size() / ShapeUtil::Rank(subshape()); + sparse_indices()->Resize(index_count); + + // Copy the indices from the proto into the SparseIndexArray object. + TF_RETURN_IF_ERROR(CopyFromRepeatedField(sparse_indices()->mutable_data(), + proto.sparse_indices())); + } + switch (subshape().element_type()) { case PRED: TF_RETURN_IF_ERROR(CopyFromRepeatedField(data(), proto.preds())); @@ -1907,11 +1945,11 @@ Status LiteralBase::Piece::CopyFromProto(const LiteralProto& proto) { } } break; case TUPLE: - LOG(FATAL) << "Should not be called on tuple shapes: " - << ShapeUtil::HumanString(subshape()); - break; + return InvalidArgument("Should not be called on tuple shapes: %s", + ShapeUtil::HumanString(subshape())); default: - LOG(FATAL) << "Unhandled primitive type " << subshape().element_type(); + return InvalidArgument("Is called on unsupported shape: %s", + ShapeUtil::HumanString(subshape())); } return Status::OK(); } diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc index 7ad287c8973367fb04583e6911ff75e76bdf5f1e..dd5b54e4c99998f676419cf98a3da16593338829 100644 --- a/tensorflow/compiler/xla/literal_test.cc +++ b/tensorflow/compiler/xla/literal_test.cc @@ -224,6 +224,16 @@ TEST_F(LiteralUtilTest, CreateSparse) { absl::Span(expected_indices.data(), expected_indices.num_elements())); EXPECT_EQ(literal.data(), absl::Span(expected_values)); + + // Serialize then deserialize and verify the resulting literal. + TF_ASSERT_OK_AND_ASSIGN(Literal literal_from_proto, + Literal::CreateFromProto(literal.ToProto())); + + EXPECT_EQ(literal_from_proto.sparse_indices()->data(), + absl::Span(expected_indices.data(), + expected_indices.num_elements())); + EXPECT_EQ(literal_from_proto.data(), + absl::Span(expected_values)); } TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) { diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index cd5fd330298fb0ff158e232dac121f8ffb271218..ffa336f30417eab1e4b16e278a97130c5fd57f88 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -532,10 +532,13 @@ LocalOp LocalComputationBuilder::ReduceWindowWithGeneralPadding( const LocalComputation& local_computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding) { return xla::ReduceWindowWithGeneralPadding( operand.op(), init_value.op(), local_computation.computation(), - window_dimensions, window_strides, padding); + window_dimensions, window_strides, base_dilations, window_dilations, + padding); } LocalOp LocalComputationBuilder::RngNormal(const LocalOp& mu, diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index 2166bb6721ca380f3180a8802e4922f2e9e45945..43332e0abd410c08dc5a40f7de39dbc96d34a72c 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -278,6 +278,8 @@ class LocalComputationBuilder { const LocalComputation& local_computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span > padding); LocalOp RngNormal(const LocalOp& mu, const LocalOp& sigma, diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index bb303c5678a2cac9a9e78925e857ab25c0c6d9be..f8197488fb3bacb312cc7fbf149b773851992b8a 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -995,7 +995,30 @@ class ComputationBuilder(object): window_strides) return self._client.ReduceWindowWithGeneralPadding( operand, init_value, computation_to_apply.c_local_computation, - window_dimensions, window_strides, pads) + window_dimensions, window_strides, (), (), pads) + + def ReduceWindowWithGeneralPadding( + self, operand, init_value, computation_to_apply, window_dimensions, + window_strides, base_dilations, window_dilations, padding): + """Enqueues a windowed reduction operation onto the computation. + + Args: + operand: reduction operand (LocalOp). + init_value: reduction initial value (LocalOp). + computation_to_apply: a binary reduction function (Computation). + window_dimensions: dimensions of window (sequence of integers). + window_strides: strides for window (sequence of integers). + base_dilations: dilations for the base (sequence of integers). + window_dilations: dilations for window (sequence of integers). + padding: length-N array-like of pairs of integers of (low, high) padding. + + Returns: + A LocalOp representing the added ReduceWindow op. + """ + return self._client.ReduceWindowWithGeneralPadding( + operand, init_value, computation_to_apply.c_local_computation, + window_dimensions, window_strides, base_dilations, window_dilations, + padding) def RngNormal(self, mu, sigma, dims): """Enqueues an RngNormal operation onto the computation. diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index e800cf470cfd129f93c2a1be586e03bebcaec987..26ebb88e965dbaacd4bc11ce23b2a5701973e161 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -146,6 +146,8 @@ cc_library( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", ], ) @@ -182,6 +184,7 @@ cc_library( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:span", @@ -251,6 +254,7 @@ cc_library( "//tensorflow/core:lib", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:inlined_vector", + "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", @@ -290,12 +294,14 @@ cc_library( srcs = [ "dfs_hlo_visitor.cc", "hlo_computation.cc", + "hlo_input_output_alias_config.cc", "hlo_instruction.cc", "hlo_instructions.cc", "hlo_module.cc", "hlo_opcode.cc", "hlo_schedule.cc", "hlo_sharding.cc", + "hlo_sharding_metadata.cc", ], hdrs = [ "dfs_hlo_visitor.h", @@ -303,12 +309,14 @@ cc_library( "hlo_clone_context.h", "hlo_computation.h", "hlo_domain_metadata.h", + "hlo_input_output_alias_config.h", "hlo_instruction.h", "hlo_instructions.h", "hlo_module.h", "hlo_opcode.h", "hlo_schedule.h", "hlo_sharding.h", + "hlo_sharding_metadata.h", ], deps = [ ":hlo_casting_utils", @@ -333,6 +341,8 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", @@ -395,6 +405,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/types:span", ], ) @@ -485,6 +496,8 @@ cc_library( "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -776,6 +789,7 @@ cc_library( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -903,6 +917,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ], @@ -952,6 +967,8 @@ cc_library( deps = [ "//tensorflow/compiler/xla:types", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", ], ) @@ -987,6 +1004,8 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", ], @@ -1034,6 +1053,8 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -1087,6 +1108,7 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", ], @@ -1125,6 +1147,8 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", ], ) @@ -1146,6 +1170,7 @@ tf_cc_test( "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", "//tensorflow/core:test", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", ], ) @@ -1196,6 +1221,7 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/types:optional", ], @@ -1216,6 +1242,8 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", @@ -1242,6 +1270,25 @@ tf_cc_test( ], ) +tf_cc_test( + name = "hlo_input_output_alias_config_test", + srcs = ["hlo_input_output_alias_config_test.cc"], + deps = [ + ":hlo", + ":hlo_dce", + ":hlo_memory_scheduler", + ":hlo_ordering", + ":hlo_parser", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + "@com_google_absl//absl/algorithm:container", + ], +) + cc_library( name = "hlo_memory_scheduler", srcs = ["hlo_memory_scheduler.cc"], @@ -1260,6 +1307,8 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", ], ) @@ -1280,6 +1329,7 @@ tf_cc_test( "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -1294,16 +1344,26 @@ cc_library( ], ) +cc_library( + name = "fusion_queue", + hdrs = ["fusion_queue.h"], + deps = [ + ":hlo", + ], +) + cc_library( name = "instruction_fusion", srcs = ["instruction_fusion.cc"], hdrs = ["instruction_fusion.h"], deps = [ + ":fusion_queue", ":hlo", ":hlo_pass", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", ], ) @@ -1330,6 +1390,8 @@ cc_library( "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_pass", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", ], ) @@ -1385,6 +1447,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", ], @@ -1640,6 +1703,8 @@ cc_library( ":while_loop_analysis", "//tensorflow/compiler/xla:statusor", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", ], @@ -1671,6 +1736,7 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -1796,42 +1862,6 @@ tf_cc_test( ], ) -cc_library( - name = "inliner", - srcs = ["inliner.cc"], - hdrs = ["inliner.h"], - deps = [ - ":hlo", - ":hlo_pass", - ":hlo_query", - "//tensorflow/compiler/xla:status_macros", - "//tensorflow/compiler/xla:types", - "//tensorflow/core:lib", - "@com_google_absl//absl/types:span", - ], -) - -tf_cc_test( - name = "inliner_test", - srcs = ["inliner_test.cc"], - deps = [ - ":cpu_plugin", - ":hlo", - ":hlo_matchers", - ":inliner", - "//tensorflow/compiler/xla:literal", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:test", - "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/tests:hlo_test_base", - "//tensorflow/compiler/xla/tests:hlo_verified_test_base", - "//tensorflow/compiler/xla/tests:literal_test_util", - "//tensorflow/compiler/xla/tests:xla_internal_test_main", - "@com_google_absl//absl/memory", - ], -) - cc_library( name = "computation_placer", srcs = ["computation_placer.cc"], @@ -2043,6 +2073,7 @@ cc_library( ":logical_buffer", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "@com_google_absl//absl/container:flat_hash_set", ], ) @@ -2078,6 +2109,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", @@ -2099,6 +2131,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", @@ -2182,6 +2215,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", ], ) @@ -2203,6 +2237,8 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", ], @@ -2263,6 +2299,8 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", @@ -2319,6 +2357,8 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -2345,6 +2385,8 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", ], ) @@ -2416,6 +2458,7 @@ cc_library( "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ], @@ -2428,6 +2471,7 @@ tf_cc_test( ":hlo", ":hlo_parser", ":hlo_verifier", + ":layout_assignment", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:types", @@ -2460,6 +2504,8 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -2588,6 +2634,8 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -2627,6 +2675,7 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", ], ) @@ -2701,26 +2750,12 @@ cc_library( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", ], ) -cc_library( - name = "hlo_sharding_metadata", - srcs = ["hlo_sharding_metadata.cc"], - hdrs = [ - "hlo_sharding_metadata.h", - ], - deps = [ - ":hlo", - "//tensorflow/compiler/xla:shape_tree", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/core:lib", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:span", - ], -) - cc_library( name = "hlo_domain_verifier", srcs = ["hlo_domain_verifier.cc"], @@ -2771,7 +2806,6 @@ tf_cc_test( ":hlo_domain_isolator", ":hlo_domain_remover", ":hlo_parser", - ":hlo_sharding_metadata", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/legacy_flags:debug_options_flags", "//tensorflow/compiler/xla/tests:hlo_test_base", @@ -3147,6 +3181,7 @@ cc_library( ":hlo_pass_pipeline", "//tensorflow/compiler/xla:shape_util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -3269,6 +3304,8 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", ], ) @@ -3298,6 +3335,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", ], ) @@ -3354,6 +3392,8 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:ptr_util", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", @@ -3381,7 +3421,6 @@ cc_library( deps = [ ":hlo", ":hlo_lexer", - ":hlo_sharding_metadata", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", @@ -3439,6 +3478,39 @@ cc_library( deps = ["//tensorflow/core:lib"], ) +cc_library( + name = "map_inliner", + srcs = ["map_inliner.cc"], + hdrs = ["map_inliner.h"], + deps = [ + ":hlo", + ":hlo_pass", + ":hlo_query", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:types", + "//tensorflow/core:lib", + "@com_google_absl//absl/types:span", + ], +) + +tf_cc_test( + name = "map_inliner_test", + srcs = ["map_inliner_test.cc"], + deps = [ + ":hlo", + ":hlo_matchers", + ":map_inliner", + "//tensorflow/compiler/xla:literal", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/tests:hlo_verified_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep + "@com_google_absl//absl/memory", + ], +) + tf_cc_test( name = "hlo_casting_utils_test", srcs = ["hlo_casting_utils_test.cc"], diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 75dae7a7141647d7b7b60b0e07e11c143621ea63..86d9dbea90dbf4ac1eb93e261f25e0e192aa5589 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2057,6 +2057,12 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( return Status::OK(); } + // Bail on dilation. + if (window_util::HasDilation(window)) { + VLOG(10) << "Not folding pad into reduce-window as there is dilation."; + return Status::OK(); + } + VLOG(10) << "Considering folding Pad: " << pad->ToString() << "\ninto reduce-window: " << reduce_window->ToString() << (convert != nullptr diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h index a7d8927cf7e90d764ff8046df16c71922b11478e..43feccee3c67152c6f61098bb98d546379848b8c 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.h +++ b/tensorflow/compiler/xla/service/allocation_tracker.h @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/service/backend.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" @@ -110,7 +111,7 @@ class AllocationTracker { // A map from device memory opaque value to allocation. One such map is // maintained per device ordinal. - using AllocationMap = tensorflow::gtl::FlatMap; + using AllocationMap = absl::flat_hash_map; tensorflow::mutex mutex_; @@ -123,10 +124,7 @@ class AllocationTracker { int64 next_handle_ GUARDED_BY(mutex_); // A map from device ordinal to AllocationMap. - // - // This is not a TF FlatMap because (currently) FlatMap (and therefore - // AllocationMap) is not movable. - std::unordered_map opaque_to_allocation_map_ + absl::flat_hash_map opaque_to_allocation_map_ GUARDED_BY(mutex_); // A map from data handle to a vector of shaped buffers that represent the @@ -146,7 +144,7 @@ class AllocationTracker { // non-owning "view" into a tuple's sub-buffers. The sub-buffers are then // free'd when both the view *and* the original tuple are Unregistered. This // refcounting is managed in opaque_to_allocation_map_. - tensorflow::gtl::FlatMap>> + absl::flat_hash_map>> handle_to_shaped_buffers_ GUARDED_BY(mutex_); TF_DISALLOW_COPY_AND_ASSIGN(AllocationTracker); diff --git a/tensorflow/compiler/xla/service/batchnorm_expander.cc b/tensorflow/compiler/xla/service/batchnorm_expander.cc index 30d33e0d3531bb5e931ebfa0b60c91523dd0cb44..f70f6ddfec69c0113a1afe2073a2392098f49456 100644 --- a/tensorflow/compiler/xla/service/batchnorm_expander.cc +++ b/tensorflow/compiler/xla/service/batchnorm_expander.cc @@ -35,7 +35,6 @@ limitations under the License. #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc index 58f78f8e24d0bc00a63e3583828cf8e01ae4531a..002be9c97098ef1f73446c458dae24bbc826a626 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/bfloat16_propagation.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -81,7 +82,7 @@ void BFloat16Propagation::RevertIfFusionInternalBF16Changes( }; auto root = fusion->fused_instructions_computation()->root_instruction(); - tensorflow::gtl::FlatSet changed_root_buffers; + absl::flat_hash_set changed_root_buffers; auto root_changes_it = changes_to_bf16_.find(root); if (root_changes_it != changes_to_bf16_.end()) { @@ -500,7 +501,7 @@ void BFloat16Propagation::AdjustCalledComputationRoot(HloInstruction* hlo) { bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper( HloComputation* computation, - tensorflow::gtl::FlatSet* visited_computations) { + absl::flat_hash_set* visited_computations) { bool parameter_changed = false; auto insts = computation->MakeInstructionPostOrder(); // Do the adjustment on each instruction in the computation in reverse @@ -560,7 +561,7 @@ bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper( // another input parameter. A fixed point will be reached because the // parameters can only be changed from BF16 to F32, not the other way // around. - tensorflow::gtl::FlatSet visited_in_while; + absl::flat_hash_set visited_in_while; while (ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_condition(), &visited_in_while) || ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_body(), @@ -587,7 +588,7 @@ void BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( HloModule* module) { const auto& computations_topological_order = module->MakeComputationPostOrder(); - tensorflow::gtl::FlatSet resolved; + absl::flat_hash_set resolved; for (auto comp_it = computations_topological_order.rbegin(); comp_it != computations_topological_order.rend(); ++comp_it) { if (ContainsKey(resolved, *comp_it)) { diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h index 6a62439f8877634a065979d1e2fcda262ca83dc1..5fcaa15c8356107af02e9099874a293d8350c51a 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.h +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h @@ -21,6 +21,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/bfloat16_support.h" #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -81,7 +83,7 @@ class BFloat16Propagation : public HloModulePass { // The set of instructions to consider using bfloat16, computed in the forward // pass. - tensorflow::gtl::FlatSet consider_using_bfloat16_; + absl::flat_hash_set consider_using_bfloat16_; // *************************** // Functions called and state produced by the backward pass (from root to @@ -110,12 +112,12 @@ class BFloat16Propagation : public HloModulePass { // The set of HloInstructions that have been visited in the // opportunity-finding pass. - tensorflow::gtl::FlatSet + absl::flat_hash_set instructions_visited_in_backward_pass_; // The set of HloComputations that have been visited in the // opportunity-finding pass. - tensorflow::gtl::FlatSet + absl::flat_hash_set computations_visited_in_backward_pass_; // *************************** @@ -131,7 +133,7 @@ class BFloat16Propagation : public HloModulePass { // point is reached. bool ResolveInconsistencyOfAliasingBuffersHelper( HloComputation* computation, - tensorflow::gtl::FlatSet* visited_computations); + absl::flat_hash_set* visited_computations); // Makes the parameters of called computations match how they are called by // the given HLO. @@ -182,11 +184,11 @@ class BFloat16Propagation : public HloModulePass { PrimitiveType target_type); // The set of F32 HLO values that must be kept in F32. - tensorflow::gtl::FlatSet values_that_must_be_kept_as_f32_; + absl::flat_hash_set values_that_must_be_kept_as_f32_; // Mapping from each HloComputation to the number of callers to it in the // module. Populated at the beginning of this pass. - tensorflow::gtl::FlatMap caller_counts_; + absl::flat_hash_map caller_counts_; // We first store the potential F32-to-BF16 changes to changes_to_bf16_, which // are subject to further adjustment, then finally applied to the HLOs. This @@ -195,8 +197,7 @@ class BFloat16Propagation : public HloModulePass { // // For each HloInstruction, changes_to_bf16_ stores the affected buffers in // the output as a map from in-place pointers to subshapes to shape indices. - tensorflow::gtl::FlatMap> + absl::flat_hash_map> changes_to_bf16_; // Whether the last processed HLO module has been changed by this pass. diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc index 23645346e6f491beb5171cc839c013ce5f83d789..5b48f10505e78c035608d4c575501e4623218987 100644 --- a/tensorflow/compiler/xla/service/bfloat16_support.cc +++ b/tensorflow/compiler/xla/service/bfloat16_support.cc @@ -78,8 +78,10 @@ bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision( const HloInstruction& hlo, int64 operand_index) { switch (hlo.opcode()) { case HloOpcode::kAbs: + case HloOpcode::kAllToAll: case HloOpcode::kBroadcast: case HloOpcode::kClamp: + case HloOpcode::kCollectivePermute: case HloOpcode::kConcatenate: case HloOpcode::kConvert: case HloOpcode::kCopy: diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 34a7be0e9c079e9e42c28eef10af4079e99853b6..d5d6a044a81303425495202d8a98c6735b0b8b89 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -22,6 +22,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -41,10 +43,10 @@ limitations under the License. namespace xla { namespace { +using absl::flat_hash_map; +using absl::flat_hash_set; using absl::StrAppend; using absl::StrAppendFormat; -using ::tensorflow::gtl::FlatMap; -using ::tensorflow::gtl::FlatSet; using ::tensorflow::strings::HumanReadableNumBytes; template @@ -128,8 +130,8 @@ Status GatherComputationsByAllocationType( // Sets for quickly checking membership. Computations are returned in vectors // for stable iteration. - FlatSet thread_local_set; - FlatSet global_set; + flat_hash_set thread_local_set; + flat_hash_set global_set; while (!worklist.empty()) { auto worklist_front = worklist.front(); @@ -237,7 +239,7 @@ BufferAllocation::Slice BufferAllocation::GetSlice( void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset, int64 size) { - VLOG(4) << "Trying to add " << buffer << " to " << this; + VLOG(4) << "Trying to add " << buffer << " to allocation #" << index(); CHECK(assigned_buffers_.count(&buffer) == 0) << "LogicalBuffer " << buffer << " already assigned to allocation " << index_; @@ -444,7 +446,7 @@ bool BufferAssignment::SharesSliceAtIndex( bool BufferAssignment::HaveDisjointSlices(const HloInstruction* hlo_a, const HloInstruction* hlo_b) const { using SliceSet = - FlatSet; + flat_hash_set; // Gets the slices all of instr's subshapes. If any subshape doesn't have an // assigned slice, returns the empty set. auto collect_slices = [&](const HloInstruction* instr) -> SliceSet { @@ -519,7 +521,8 @@ void BufferAssignment::AddAssignment(BufferAllocation* allocation, // BufferAllocation. void BufferAssignment::CombineTempAllocations() { VLOG(1) << "CombineTempAllocations()"; - FlatMap + flat_hash_map combined_allocation_map; // Move all temp allocations into a single run at the end of the allocations @@ -582,7 +585,8 @@ void BufferAssignment::CombineTempAllocations() { } // Update allocation indices to their new positions. - allocation_index_for_buffer_.clear_no_resize(); + allocation_index_for_buffer_.erase(allocation_index_for_buffer_.begin(), + allocation_index_for_buffer_.end()); for (size_t index = 0; index < allocations_.size(); ++index) { BufferAllocation* allocation = &allocations_[index]; allocation->set_index(index); @@ -780,21 +784,6 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, } } - if (allow_input_output_aliasing_ && allocation->maybe_live_out()) { - const HloComputation* entry_computation = - assignment->module_->entry_computation(); - for (auto param : entry_computation->parameter_instructions()) { - for (auto& param_buffer : - assignment->points_to_analysis().GetBuffersDefinedByInstruction( - param)) { - if (assignment->liveness().MayInterfere(*param_buffer, buffer)) { - VLOG(4) << "Can't assign: Parameter interference with result"; - return false; - } - } - } - } - // If the buffer is live out of the computation then it should only be // assigned a buffer which exactly fits the result to avoid wasting memory // (result buffers can have arbitrary lifetimes). @@ -812,9 +801,9 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, Status BufferAssigner::AssignBuffersForComputation( const HloComputation* computation, bool is_thread_local, - const FlatSet& colocated_buffers, - const FlatSet& colocated_allocations, - FlatMap>* + const flat_hash_set& colocated_buffers, + const flat_hash_set& colocated_allocations, + flat_hash_map>* buffers_to_assign_sequentially, BufferAssignment* assignment) { // Buffers are sorted and assigned to BufferAllocations in decreasing order of @@ -833,7 +822,7 @@ Status BufferAssigner::AssignBuffersForComputation( // Generate a post order sort of instructions for sorting of the // LogicalBuffers. - FlatMap post_order_position; + flat_hash_map post_order_position; int position = 0; for (auto* instruction : computation->MakeInstructionPostOrder()) { post_order_position.emplace(instruction, position); @@ -850,8 +839,8 @@ Status BufferAssigner::AssignBuffersForComputation( // buffers_to_assign_sequentially map, even if we end up with an empty set // of buffers. This ensures we can correctly determine whether to run // whole-module heap simulation. - buffers_to_assign_sequentially->emplace(computation, - FlatSet()); + buffers_to_assign_sequentially->emplace( + computation, flat_hash_set()); } // Sort the LogicalBuffers first by size. We assign the larger LogicalBuffers @@ -1043,12 +1032,12 @@ Status BufferAssigner::AssignBuffersForComputation( return Status::OK(); } -FlatMap, - LogicalBuffer::Color::Hasher> +flat_hash_map, + LogicalBuffer::Color::Hasher> BufferAssigner::SplitBuffersByColor( - const FlatSet& buffers) { - FlatMap, - LogicalBuffer::Color::Hasher> + const flat_hash_set& buffers) { + flat_hash_map, + LogicalBuffer::Color::Hasher> color_map; for (auto buffer : buffers) { color_map[buffer->color()].insert(buffer); @@ -1057,7 +1046,8 @@ BufferAssigner::SplitBuffersByColor( } Status BufferAssigner::AssignBuffersWithSequentialOrdering( - const FlatMap>& + const flat_hash_map>& buffers_to_assign_sequentially, bool run_whole_module_heap_simulation, BufferAssignment* assignment) { // Run the sequence of instructions through the heap simulator. The heuristic @@ -1083,10 +1073,11 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering( // only live for the duration of their calling instructions. VLOG(1) << "Running whole-module heap simulation"; HloSchedule schedule(&assignment->module()); - FlatSet all_buffers_to_assign; + flat_hash_set all_buffers_to_assign; for (const auto& pair : buffers_to_assign_sequentially) { const HloComputation* computation = pair.first; - const FlatSet& buffers_to_assign = pair.second; + const flat_hash_set& buffers_to_assign = + pair.second; const std::vector* instruction_sequence = hlo_ordering.SequentialOrder(*computation); CHECK(instruction_sequence != nullptr) << computation->name(); @@ -1120,7 +1111,8 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering( VLOG(1) << "Running per-computation heap simulation"; for (const auto& pair : buffers_to_assign_sequentially) { const HloComputation* computation = pair.first; - const FlatSet& buffers_to_assign = pair.second; + const flat_hash_set& buffers_to_assign = + pair.second; const std::vector* instruction_sequence = hlo_ordering.SequentialOrder(*computation); CHECK(instruction_sequence != nullptr) << computation->name(); @@ -1155,9 +1147,8 @@ std::vector ComputePeakMemoryLogicalBuffers( const BufferAllocation& allocation, const HeapSimulatorTrace& heap_trace) { // Create a map from LogicalBuffer::Id to LogicalBuffer* for the logical // buffers in this allocation. - tensorflow::gtl::FlatMap - id_to_buffer; - tensorflow::gtl::FlatMap buffer_sizes; + absl::flat_hash_map id_to_buffer; + absl::flat_hash_map buffer_sizes; for (const auto& pair : allocation.assigned_buffers()) { const LogicalBuffer* buffer = pair.first; const BufferAllocation::OffsetSize& offset_size = pair.second; @@ -1196,7 +1187,7 @@ std::vector ComputePeakMemoryLogicalBuffers( // Next gather the set of logical buffers live at the earliest point of // maximal live set size. - tensorflow::gtl::FlatSet live_buffers; + absl::flat_hash_set live_buffers; live_size = 0; for (const auto& event : heap_trace.events()) { const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id()); @@ -1428,13 +1419,28 @@ BufferAssigner::MergeColocatedBufferSets( // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated // in the same allocation (currently just supports kWhile, kCall, and -// kConditional). +// kConditional and input output aliasing). void BufferAssigner::BuildColocatedBufferSets( const HloModule* module, const BufferLiveness& buffer_liveness, const LogicalBuffer::SizeFunction& buffer_size, std::vector* colocated_buffer_sets) { const TuplePointsToAnalysis& points_to_analysis = buffer_liveness.points_to_analysis(); + + // Set up colocated buffer set for input and output. + module->input_output_alias_config().ForEachAlias( + [&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index) { + std::vector colocated_set; + AddBufferToColocatedSet(module->entry_computation()->root_instruction(), + output_index, points_to_analysis, + &colocated_set); + AddBufferToColocatedSet( + module->entry_computation()->parameter_instruction(param_number), + param_index, points_to_analysis, &colocated_set); + AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets); + }); + for (const HloComputation* computation : module->MakeComputationPostOrder()) { if (computation->IsFusionComputation()) { continue; @@ -1586,8 +1592,8 @@ void BufferAssigner::BuildColocatedBufferSets( void BufferAssigner::AssignColocatedBufferSets( const std::vector& colocated_buffer_sets, BufferAssignment* assignment, - FlatSet* colocated_buffers, - FlatSet* colocated_allocations) { + flat_hash_set* colocated_buffers, + flat_hash_set* colocated_allocations) { for (const ColocatedBufferSet& colocated_buffer_set : colocated_buffer_sets) { BufferAllocation* allocation = nullptr; // Set 'entry_parameter_number' and 'entry_parameter_shape_idx' if entry @@ -1660,8 +1666,8 @@ StatusOr> BufferAssigner::CreateAssignment( // Once b/32491382 enables module-level liveness analysis, we may be able // to assign colocated buffers (or at least reuse their allocation for // buffers outside of the set) in AssignBuffersForComputation. - FlatSet colocated_buffers; - FlatSet colocated_allocations; + flat_hash_set colocated_buffers; + flat_hash_set colocated_allocations; std::vector colocated_buffer_sets; BuildColocatedBufferSets(module, assignment->liveness(), assignment->buffer_size_, &colocated_buffer_sets); @@ -1679,7 +1685,7 @@ StatusOr> BufferAssigner::CreateAssignment( // First assign buffers for global computatations. Temporary buffers for // sequential computations are collected in 'buffers_to_assign_sequentially'. - FlatMap> + flat_hash_map> buffers_to_assign_sequentially; for (auto* computation : global_computations) { TF_RETURN_IF_ERROR(AssignBuffersForComputation( diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h index 24ba7c16f548c10f58f41d2b88488939ca2d8e4d..899cd36e1f98c9e7b8ba7e42c06ced5c3e8afcc8 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.h +++ b/tensorflow/compiler/xla/service/buffer_assignment.h @@ -22,6 +22,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/heap_simulator.h" @@ -33,8 +35,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" @@ -148,7 +148,7 @@ class BufferAllocation { // Access to the logical buffers assigned to this allocation, and their // associated logical offsets and sizes. - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& assigned_buffers() const { return assigned_buffers_; } @@ -323,7 +323,7 @@ class BufferAllocation { // Mapping from the set of buffers assigned to this allocation to their // logical offsets and sizes. - tensorflow::gtl::FlatMap assigned_buffers_; + absl::flat_hash_map assigned_buffers_; int64 fragmentation_bytes_ = 0; std::vector heap_traces_; @@ -500,7 +500,7 @@ class BufferAssignment { int64 temp_allocation_total_size_ = 0; // Maps Buffers to the index of the BufferAllocation which holds the buffer. - tensorflow::gtl::FlatMap + absl::flat_hash_map allocation_index_for_buffer_; const HloModule* module_; @@ -554,11 +554,10 @@ class BufferAssigner { // true. Status AssignBuffersForComputation( const HloComputation* computation, bool is_thread_local, - const tensorflow::gtl::FlatSet& colocated_buffers, - const tensorflow::gtl::FlatSet& - colocated_allocations, - tensorflow::gtl::FlatMap>* + const absl::flat_hash_set& colocated_buffers, + const absl::flat_hash_set& colocated_allocations, + absl::flat_hash_map>* buffers_to_assign_sequentially, BufferAssignment* assignment); @@ -568,9 +567,8 @@ class BufferAssigner { // 'run_whole_module_heap_simulation' is true, the heap simulation will be run // assuming all global computations are sequentially ordered. Status AssignBuffersWithSequentialOrdering( - const tensorflow::gtl::FlatMap< - const HloComputation*, - tensorflow::gtl::FlatSet>& + const absl::flat_hash_map>& buffers_to_assign_sequentially, bool run_whole_module_heap_simulation, BufferAssignment* assignment); @@ -590,7 +588,7 @@ class BufferAssigner { // alias. Explicitly handling these colocated buffers is necessary because // points-to analysis is computation level scope and does not recognize // aliasing across computations (b/32491382). - using ColocatedBufferSet = tensorflow::gtl::FlatSet; + using ColocatedBufferSet = absl::flat_hash_set; // Returns a vector of ColocatedBufferSet objects, where each // ColocatedBufferSet aggregates a set of related LogicalBuffers from 'module' @@ -605,8 +603,8 @@ class BufferAssigner { void AssignColocatedBufferSets( const std::vector& colocated_buffer_sets, BufferAssignment* assignment, - tensorflow::gtl::FlatSet* colocated_buffers, - tensorflow::gtl::FlatSet* colocated_allocations); + absl::flat_hash_set* colocated_buffers, + absl::flat_hash_set* colocated_allocations); // Adds the 'colocated_set' of buffers to 'colocated_buffer_sets', maintaining // the invariant that all sets in 'colocated_buffer_sets' are disjoint. @@ -624,11 +622,10 @@ class BufferAssigner { // Split a set of buffers into several sets, each of which contains buffers // colored with the same color. - tensorflow::gtl::FlatMap, - LogicalBuffer::Color::Hasher> - SplitBuffersByColor( - const tensorflow::gtl::FlatSet& buffers); + absl::flat_hash_map, + LogicalBuffer::Color::Hasher> + SplitBuffersByColor(const absl::flat_hash_set& buffers); // If true, buffer assignments assumes that input parameter buffers and output // buffers can be shared if their sizes match. diff --git a/tensorflow/compiler/xla/service/buffer_liveness.h b/tensorflow/compiler/xla/service/buffer_liveness.h index cdd3cf4032ef6916086e1c2d148b575192503000..f939a426ead7c34092fc5234ef779ee857347a26 100644 --- a/tensorflow/compiler/xla/service/buffer_liveness.h +++ b/tensorflow/compiler/xla/service/buffer_liveness.h @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_ordering.h" @@ -27,8 +28,6 @@ limitations under the License. #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { @@ -102,7 +101,7 @@ class BufferLiveness { // Set of LogicalBuffers which are aliased in the output of other // instructions. For example, a LogicalBuffer which is inserted into a tuple // is considered to be aliased and will be in this set. - tensorflow::gtl::FlatSet aliased_buffers_; + absl::flat_hash_set aliased_buffers_; // LogicalBuffers that may be live out of the entry computation. PointsToSet::BufferSet maybe_live_out_buffers_; diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h index 69b36463560a1fad4f62687e9014fb3fbe5bbd13..11d8abc5badf7b1a05239ed74a05be0c899e37a1 100644 --- a/tensorflow/compiler/xla/service/buffer_value.h +++ b/tensorflow/compiler/xla/service/buffer_value.h @@ -141,6 +141,9 @@ class BufferValue { // operator< is required for std::set. bool operator<(const BufferValue& other) const { return id_ < other.id_; } + bool operator==(const BufferValue& other) const { return id_ == other.id_; } + bool operator!=(const BufferValue& other) const { return id_ != other.id_; } + virtual string ToString() const = 0; // TODO(lauj) rename LogicalBufferProto to BufferValueProto. diff --git a/tensorflow/compiler/xla/service/buffer_value_containers.h b/tensorflow/compiler/xla/service/buffer_value_containers.h index 305914fca828f110bf54239bddb1590172562b16..cc46af5eeec623e19637cd6245915b3a3124a2cd 100644 --- a/tensorflow/compiler/xla/service/buffer_value_containers.h +++ b/tensorflow/compiler/xla/service/buffer_value_containers.h @@ -16,10 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_CONTAINERS_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_CONTAINERS_H_ +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/logical_buffer.h" #include "tensorflow/core/lib/gtl/compactptrset.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { @@ -38,7 +38,7 @@ BufferValueCompactPointerSet ToBufferValueCompactPointerSet( return output; } -using BufferValueFlatSet = tensorflow::gtl::FlatSet; +using BufferValueFlatSet = absl::flat_hash_set; template BufferValueFlatSet ToBufferValueFlatSet( const LogicalBufferContainerT& logical_buffer_container) { diff --git a/tensorflow/compiler/xla/service/call_graph.cc b/tensorflow/compiler/xla/service/call_graph.cc index 23b2a327096dfdb3c756a4acc5476ec01dcac1b3..bdd5069632e84fe6c67ca129f726432479ac1b35 100644 --- a/tensorflow/compiler/xla/service/call_graph.cc +++ b/tensorflow/compiler/xla/service/call_graph.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -138,7 +139,7 @@ CallGraphNode& CallGraph::GetNode(const HloComputation* computation) { bool CallGraph::DominatesHelper( const HloComputation* a, const HloComputation* b, - tensorflow::gtl::FlatSet* visited) const { + absl::flat_hash_set* visited) const { if (a == b || ContainsKey(*visited, b)) { // The call graph is guaranteed to be acyclic so any previously visited node // we encounter was already determined to be dominated. @@ -163,7 +164,7 @@ bool CallGraph::DominatesHelper( bool CallGraph::Dominates(const HloComputation* a, const HloComputation* b) const { - tensorflow::gtl::FlatSet visited; + absl::flat_hash_set visited; return DominatesHelper(a, b, &visited); } @@ -277,7 +278,7 @@ std::unique_ptr CallGraph::Build(const HloModule* module) { Status CallGraph::VisitNodesInternal( const VisitorFunction& visitor_func, const CallGraphNode& node, - tensorflow::gtl::FlatSet* visited) const { + absl::flat_hash_set* visited) const { auto pair = visited->insert(&node); if (!pair.second) { // Node was not inserted. Node has already been visited. @@ -294,7 +295,7 @@ Status CallGraph::VisitNodesInternal( Status CallGraph::VisitNodes(const VisitorFunction& visitor_func, bool visit_unreachable_nodes) const { - tensorflow::gtl::FlatSet visited; + absl::flat_hash_set visited; if (visit_unreachable_nodes) { // Traverse from all roots in the call graph. for (const CallGraphNode& node : nodes()) { diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h index 3af2ab5edfd9faf4ac5193df4b823c21b55b2f7f..cb56f4789d06ac33acdaadc8b619b9e37f683d58 100644 --- a/tensorflow/compiler/xla/service/call_graph.h +++ b/tensorflow/compiler/xla/service/call_graph.h @@ -20,11 +20,11 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { @@ -145,19 +145,19 @@ class CallGraphNode { // The computations called by this computation. The vector is used for a // stable ordering and the set enables fast membership testing. std::vector callees_; - tensorflow::gtl::FlatSet callee_set_; + absl::flat_hash_set callee_set_; // The computations which call this computation. The vector is used for a // stable ordering and the set enables fast membership testing. std::vector callers_; - tensorflow::gtl::FlatSet caller_set_; + absl::flat_hash_set caller_set_; // The call sites in this computation std::vector callsites_; // The map from instruction to index in callsites_ for looking up the callsite // (if any) associated with a particular instruction in this computation. - tensorflow::gtl::FlatMap callsite_instructions_; + absl::flat_hash_map callsite_instructions_; // The call sites in other computations which call this computation. std::vector caller_callsites_; @@ -250,14 +250,14 @@ class CallGraph { // 'visited'. Status VisitNodesInternal( const VisitorFunction& visitor_func, const CallGraphNode& node, - tensorflow::gtl::FlatSet* visited) const; + absl::flat_hash_set* visited) const; // Recursive helper for computing whether 'a' dominates 'b' in the call // graph. 'b_ancestor' is the currently visited node (which starts at 'b'), // and 'visited' is the set of computations which have been visited. bool DominatesHelper( const HloComputation* a, const HloComputation* b, - tensorflow::gtl::FlatSet* visited) const; + absl::flat_hash_set* visited) const; // The HLO module represented by this call graph. const HloModule* module_ = nullptr; @@ -267,7 +267,7 @@ class CallGraph { // Map from HLO computation to the index of the corresponding call graph node // in nodes_. - tensorflow::gtl::FlatMap node_indices_; + absl::flat_hash_map node_indices_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index b65dfef9c9575b683b2656af2ccc151d87db2cd7..cfe025fdd102fe42ccdceac1a656d6a092462f8b 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/copy_insertion.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "tensorflow/compiler/xla/service/hlo_alias_analysis.h" @@ -31,8 +33,6 @@ limitations under the License. #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/logging.h" namespace xla { @@ -40,10 +40,12 @@ namespace { using absl::StrAppend; -bool IsEntryParameterValue(const HloValue& value) { +bool IsReadonlyEntryParameterValue(const HloValue& value) { const HloComputation* computation = value.defining_instruction()->parent(); return value.defining_instruction()->opcode() == HloOpcode::kParameter && - computation == computation->parent()->entry_computation(); + computation == computation->parent()->entry_computation() && + !computation->parent()->input_output_alias_config().ParameterHasAlias( + value.defining_instruction()->parameter_number()); } bool IsConstantValue(const HloValue& value) { @@ -51,7 +53,7 @@ bool IsConstantValue(const HloValue& value) { } bool ValueIsReadOnly(const HloValue& value) { - return IsConstantValue(value) || IsEntryParameterValue(value); + return IsConstantValue(value) || IsReadonlyEntryParameterValue(value); } // Data structure describing the action which should be taken on parts of a @@ -332,6 +334,81 @@ Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis, return Status::OK(); } +// Conservatively adds copies before root instruction of entry computation and +// each aliased parameter to resolve interference of aliased input and output +// buffer. We later rely on the CopyRemover to drop the unnecessary ones. +Status AddCopiesForAliasedInputOutputs(HloModule* module) { + HloComputation* entry = module->entry_computation(); + HloInstruction* root = entry->root_instruction(); + + ShapeTree output_indices_to_copy(root->shape()); + std::vector> copied_parameters; + bool has_alias = false; + for (auto* param : entry->parameter_instructions()) { + bool param_has_alias = false; + ShapeTree param_indices_to_copy(param->shape()); + + module->input_output_alias_config().ForEachAlias( + [&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index) { + if (param_number == param->parameter_number()) { + param_has_alias = true; + *(param_indices_to_copy.mutable_element(param_index)) = true; + *(output_indices_to_copy.mutable_element(output_index)) = true; + } + }); + + if (!param_has_alias) { + continue; + } + + has_alias = true; + // Store a snapshot of users before DeepCopyInstruction, as + // DeepCopyInstruction introduces new users of the instruction. + std::vector users = param->users(); + ShapeTree param_copy_tree(param->shape(), + /*init_value=*/nullptr); + TF_ASSIGN_OR_RETURN(HloInstruction * copied, + entry->DeepCopyInstruction( + param, ¶m_indices_to_copy, ¶m_copy_tree)); + for (HloInstruction* user : users) { + TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, copied)); + } + + copied_parameters.push_back(param_copy_tree); + } + + if (!has_alias) { + return Status::OK(); + } + + // Add copies before root instruction. + ShapeTree output_copy_tree(root->shape(), + /*init_value=*/nullptr); + + TF_ASSIGN_OR_RETURN(HloInstruction * root_copied, + root->parent()->DeepCopyInstruction( + root, &output_indices_to_copy, &output_copy_tree)); + + // Add control dependencies between the input/output copies. + TF_RETURN_IF_ERROR(module->input_output_alias_config().ForEachAliasWithStatus( + [&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& input_index) -> Status { + HloInstruction* from = + copied_parameters[param_number].element(input_index); + HloInstruction* to = output_copy_tree.element(output_index); + + TF_RET_CHECK(from != nullptr); + TF_RET_CHECK(to != nullptr); + TF_RETURN_IF_ERROR(from->AddControlDependencyTo(to)); + return Status::OK(); + })); + + entry->set_root_instruction(root_copied); + + return Status::OK(); +} + // Removes any control dependencies to or from the given instruction. Status StripControlDependenciesFrom(HloInstruction* instruction) { while (!instruction->control_successors().empty()) { @@ -432,7 +509,7 @@ class CopyRemover { // Construct a list for each HLO buffer in the alias analysis. Maintain a // map from HloValue to the respective list element representing that // value. The map is used to construct the copy info map below. - tensorflow::gtl::FlatMap value_to_node; + absl::flat_hash_map value_to_node; for (const HloBuffer& buffer : alias_analysis.buffers()) { // Verify values contained in the buffer are strictly ordered. This // should always be the case after adding copies to eliminate @@ -480,7 +557,7 @@ class CopyRemover { // respective ValueNode representing that value. void AddValueList( absl::Span values, - tensorflow::gtl::FlatMap* value_to_node) { + absl::flat_hash_map* value_to_node) { ValueNode* tail = nullptr; ValueNode* head = nullptr; for (const HloValue* value : values) { @@ -516,8 +593,7 @@ class CopyRemover { // respective ValueNode. void CreateCopyMap( const HloModule& module, - const tensorflow::gtl::FlatMap& - value_to_node) { + const absl::flat_hash_map& value_to_node) { for (HloComputation* computation : module.computations()) { for (HloInstruction* instruction : computation->instructions()) { // Add copies with unambiguous source values to the map. Copies with @@ -905,7 +981,7 @@ class CopyRemover { // The heads of all the value lists. Each value list represents the HLO // values contained in a particular HLO buffer. The values in the list are // in dependency order. - tensorflow::gtl::FlatSet value_lists_; + absl::flat_hash_set value_lists_; // Copy removal requires fast access to the value list elements // corresponding to the source and destination values of the kCopy @@ -916,7 +992,7 @@ class CopyRemover { ValueNode* src = nullptr; ValueNode* dest = nullptr; }; - tensorflow::gtl::FlatMap copy_map_; + absl::flat_hash_map copy_map_; }; HloModule* module_; @@ -954,6 +1030,8 @@ Status CopyInsertion::AddCopiesToResolveInterference(HloModule* module) { } } } + + TF_RETURN_IF_ERROR(AddCopiesForAliasedInputOutputs(module)); return Status::OK(); } @@ -1010,7 +1088,7 @@ Status CopyInsertion::AddSpecialCaseCopies(const CallGraph& call_graph, HloInstruction* root = computation->root_instruction(); // Mark nondistinct/ambiguous indices. - tensorflow::gtl::FlatSet seen; + absl::flat_hash_set seen; ShapeUtil::ForEachSubshape( root->shape(), [&](const Shape& /*subshape*/, const ShapeIndex& index) { std::vector buffers_at_index = diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc index 892d0d7b547aaf1e7f1c55e4163d1e1fd9518def..3096206c345c07ce7a638c6c95f9815b08096bd1 100644 --- a/tensorflow/compiler/xla/service/copy_insertion_test.cc +++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc @@ -1351,6 +1351,189 @@ TEST_F(CopyInsertionTest, SwizzlingWhile) { EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy())); } +TEST_F(CopyInsertionTest, CrossingParameters) { + // Test a case where two parameters' dataflow cross with each other while + // input and output are aliased with same index: + // + // (p0 , p1) + // | \ /| + // | \ / | + // alias X alias + // | / \ | + // | / \| + // (p1 , p0) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte1, gte0})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 4); +} + +TEST_F(CopyInsertionTest, ParametersAliasing) { + // Test a case where two parameters' dataflow don't interfere with each other + // while aliased. + // + // (p0 , p1) + // | | + // | | + // alias alias + // | | + // | | + // (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + InsertCopies(module.get()); + + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Tuple(op::Copy(op::GetTupleElement(param, 0)), + op::Copy(op::GetTupleElement(param, 1)))); + + EXPECT_EQ(CountCopies(*module), 2); +} + +TEST_F(CopyInsertionTest, ParameterWithPartialAliasing) { + // Test a case where one parameter is aliased with result while another one + // isn't. + // + // (p0 , p1) + // | | + // | | + // alias | + // | | + // | | + // (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + InsertCopies(module.get()); + + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Tuple(op::Copy(op::GetTupleElement(param, 0)), + op::Copy(op::GetTupleElement(param, 1)))); + + EXPECT_EQ(CountCopies(*module), 2); +} + +TEST_F(CopyInsertionTest, ParameterAndParallelOpsWithPartialAliasing) { + // Test a case where one parameter is aliased with result while another one + // isn't. + // + // +-- (p0 , p1) + // | | | + // | | | + // alias Negate Negate + // | | | + // | | | + // +-- (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + + auto negate0 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); + + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); + builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 0); +} + +TEST_F(CopyInsertionTest, ParameterAndOpsWithPartialAliasing) { + // Test a case where one parameter is aliased with result while another one + // isn't. + // + // +-- (p0 , p1) + // | | | + // | | | + // alias Negate Negate + // | | | + // | Add----+ + // | | | + // +-- (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + + auto negate0 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); + + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); + + auto add = builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape_, HloOpcode::kAdd, negate0, negate1)); + builder.AddInstruction(HloInstruction::CreateTuple({add, negate1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 0); +} + TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) { // Test a while instruction with a body which permutes its tuple parameter // elements and applies one operation to one of the elements. The addition of diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index b7103118ac5cbd47e060b170a8e432e2ec93c0fd..58abb330a6e31e9b7a8081cd7964cf89a5b64a09 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -94,6 +94,7 @@ cc_library( ":target_machine_features", "@com_google_absl//absl/types:span", "//tensorflow/compiler/tf2xla:cpu_function_runtime", + "//tensorflow/compiler/xla/service:map_inliner", "//tensorflow/compiler/xla/service:scatter_expander", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:protobuf_util", @@ -127,7 +128,6 @@ cc_library( "//tensorflow/compiler/xla/service:hlo_subcomputation_unification", "//tensorflow/compiler/xla/service:hlo_verifier", "//tensorflow/compiler/xla/service:indexed_array_analysis", - "//tensorflow/compiler/xla/service:inliner", "//tensorflow/compiler/xla/service:llvm_compiler", "//tensorflow/compiler/xla/service:reduce_precision_insertion", "//tensorflow/compiler/xla/service:reshape_mover", @@ -290,6 +290,8 @@ cc_library( "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter", "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:span", @@ -309,6 +311,7 @@ cc_library( deps = [ "//tensorflow/compiler/xla:shape_util", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", "@llvm//:analysis", "@llvm//:target", ], @@ -471,6 +474,7 @@ cc_library( "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", "//tensorflow/core:lib", "//tensorflow/stream_executor", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", ], @@ -762,6 +766,7 @@ cc_library( "//tensorflow/compiler/xla/service:computation_layout", "//tensorflow/compiler/xla/service:layout_assignment", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", ], ) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 18fc144efe0023c0893adfcb16eda3341c0938d3..68c715a086af2a53acd510d51479b29e2eeac632 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -86,8 +86,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_subcomputation_unification.h" #include "tensorflow/compiler/xla/service/hlo_verifier.h" #include "tensorflow/compiler/xla/service/indexed_array_analysis.h" -#include "tensorflow/compiler/xla/service/inliner.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/compiler/xla/service/map_inliner.h" #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h" #include "tensorflow/compiler/xla/service/reshape_mover.h" #include "tensorflow/compiler/xla/service/scatter_expander.h" @@ -249,9 +249,7 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn( &pipeline, module->config().debug_options(), ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION); - // TODO(b/35786417): Re-enable inliner pass after fixing the bug and deciding - // where we will take this pass in future. - // pipeline.AddPass(); + pipeline.AddPass(); // TODO(b/65775800): Fix wrong output bug in Call and remove the CallInliner // pass. @@ -308,7 +306,8 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn( ReducePrecisionInsertion::PassTiming::AFTER_FUSION); pipeline.AddPass( - module->mutable_entry_computation_layout(), target_machine_features); + module->mutable_entry_computation_layout(), + LayoutAssignment::InstructionCanChangeLayout, target_machine_features); return pipeline.Run(module).status(); } @@ -328,8 +327,13 @@ Status CpuCompiler::RunHloPassesAfterLayoutAssn( { auto& pass = pipeline.AddPass>( "simplification after layout assignement"); - pass.AddInvariantChecker(/*layout_sensitive=*/true, - /*allow_mixed_precision=*/false); + // TODO(b/117156505): When the bug is fixed, the CPU backend should not + // produce layout changing elementwise operations. We will then pass + // LayoutAssignment::InstructionCanChangeLayout to the HLO verifier to + // enable stricter verification. + pass.AddInvariantChecker( + /*layout_sensitive=*/true, + /*allow_mixed_precision=*/false); pass.AddPass>( /*is_layout_sensitive=*/true, [](const Shape&, const Shape&) { return true; }, diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc index bfecbd6e017893e4f6d3dcbc01d46c899e6060fa..c291bf2d1ba2eaff4192051840768c037bece86f 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h" #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" @@ -38,7 +39,7 @@ using absl::nullopt; using absl::optional; using ShouldMakeOperandColMajorCache = - tensorflow::gtl::FlatMap; + absl::flat_hash_map; } // namespace static bool ShouldMakeAllUsersColMajor(const HloInstruction* instruction) { diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h index 3c4fe68b830d9602f009b318d4e51e9a04a27e09..f4da35dd373f24d81323d198582048e2e6d36268 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h @@ -30,8 +30,11 @@ class CpuLayoutAssignment : public LayoutAssignment { public: explicit CpuLayoutAssignment( ComputationLayout* entry_computation_layout, + std::function + instruction_can_change_layout_func, const TargetMachineFeatures* target_machine_features) - : LayoutAssignment(entry_computation_layout), + : LayoutAssignment(entry_computation_layout, + std::move(instruction_can_change_layout_func)), target_machine_features_(*target_machine_features) {} ~CpuLayoutAssignment() override {} diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc index 4668f3872dad598edf4c7680e1b601622104ab3e..97659b88a7974d7caf91ab0d4741f3635e4dae4a 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc @@ -54,8 +54,9 @@ class CpuLayoutAssignmentTest : public HloTestBase { [](int64 shape_size) { return cpu::TargetMachineFeatures::kEigenExpectedTensorAlignment; }); - cpu::CpuLayoutAssignment layout_assignment(entry_computation_layout, - &target_machine_features); + cpu::CpuLayoutAssignment layout_assignment( + entry_computation_layout, LayoutAssignment::InstructionCanChangeLayout, + &target_machine_features); EXPECT_IS_OK(layout_assignment.Run(module).status()); } }; @@ -321,8 +322,9 @@ static StatusOr RunDotOutputFusion( [](int64 shape_size) { return cpu::TargetMachineFeatures::kEigenExpectedTensorAlignment; }); - cpu::CpuLayoutAssignment layout_assignment(&computation_layout, - &target_machine_features); + cpu::CpuLayoutAssignment layout_assignment( + &computation_layout, LayoutAssignment::InstructionCanChangeLayout, + &target_machine_features); TF_ASSIGN_OR_RETURN(result.layout_assignment_changed_something, layout_assignment.Run(module)); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 20cf8557354b161451cf5b7825ccfce57d96875a..a9febe891b5e9d1eb9e6b297952b50d1d26a3396 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "absl/synchronization/mutex.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/core/platform/dynamic_annotations.h" @@ -30,8 +31,7 @@ namespace cpu { namespace runtime { XfeedManager* GetXfeedManager(int device_ordinal) { - static tensorflow::gtl::FlatMap* managers = - new tensorflow::gtl::FlatMap(); + static auto* managers = new absl::flat_hash_map(); static absl::Mutex* mutex = new absl::Mutex(); absl::MutexLock lock(mutex); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index c3e802078385d4724f0da26e8b6c16503e3662a1..b2abdb39a598871a7cc44760e464f48b9a200874 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -24,6 +24,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/platform/logging.h" // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc" @@ -67,8 +69,6 @@ limitations under the License. #include "tensorflow/compiler/xla/window_util.h" #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { @@ -688,8 +688,25 @@ StatusOr IrEmitter::EmitTargetElementLoopBodyForReduceWindow( for (size_t i = 0; i < index.size(); ++i) { llvm::Value* strided_index = NSWMul(index[i], b_.getInt64(window.dimensions(i).stride())); - input_index[i] = NSWSub(NSWAdd(strided_index, window_index[i]), - b_.getInt64(window.dimensions(i).padding_low())); + input_index[i] = NSWSub( + NSWAdd(strided_index, + NSWMul(window_index[i], + b_.getInt64(window.dimensions(i).window_dilation()))), + b_.getInt64(window.dimensions(i).padding_low())); + + // We need to verify that we are not in the dilated base area. + llvm::Value* dilation_condition = ICmpEQ( + SRem(input_index[i], b_.getInt64(window.dimensions(i).base_dilation())), + b_.getInt64(0)); + if (in_bounds_condition == nullptr) { + in_bounds_condition = dilation_condition; + } else { + in_bounds_condition = And(in_bounds_condition, dilation_condition); + } + + // Apply base dilation to the index. + input_index[i] = + SDiv(input_index[i], b_.getInt64(window.dimensions(i).base_dilation())); // We need to check if 0 <= input_index[i] < bound, as otherwise we are in // the padding so that we can skip the computation. That is equivalent to @@ -728,12 +745,6 @@ Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window) { /*operands=*/{reduce_window->operand(0)}, /*supported_types=*/{F32, BF16, S32, F16})); - // TODO(b/31410564): Implement dilation for reduce-window. - if (window_util::HasDilation(reduce_window->window())) { - return Unimplemented( - "Dilation for ReduceWindow is not implemented on CPU."); - } - // Pseudo code for reduce window: // // for (coordinates O in the output) @@ -1398,10 +1409,10 @@ static bool ReductionPreservesLayout(const HloInstruction& reduce) { // // So if we reduce f32[A,B,C,D] on dimensions 1 and 2, this map contains // [0->0, 3->1]. - gtl::FlatMap unreduced_dim_map; + absl::flat_hash_map unreduced_dim_map; - gtl::FlatSet reduced_dims(reduce.dimensions().begin(), - reduce.dimensions().end()); + absl::flat_hash_set reduced_dims(reduce.dimensions().begin(), + reduce.dimensions().end()); const Shape& operand_shape = reduce.operand(0)->shape(); const Shape& result_shape = reduce.shape(); @@ -1977,7 +1988,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) { // // * Implement the memcpy within the innermost loop. - gtl::FlatSet inner_dims; + absl::flat_hash_set inner_dims; for (int64 dim : LayoutUtil::MinorToMajor(layout)) { if (operand->shape().dimensions(dim) != slice->shape().dimensions(dim)) { break; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index daafef4eb38f14679e025d8e75dd671e94198102..586f27b104ed706a3b128903c6a90abbf3667e59 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -23,6 +23,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "llvm/ADT/Triple.h" @@ -47,7 +48,6 @@ limitations under the License. #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" @@ -427,7 +427,7 @@ class IrEmitter : public DfsHloVisitorWithDefault, // Maps the buffer allocation slices for the parameters to the computation // being compiled to their parameter numbers. Only relevant for thread local // computations. - tensorflow::gtl::FlatMap + absl::flat_hash_map computation_parameter_allocations_; // Maps HLO instructions to their index into the profile counter array. @@ -567,11 +567,11 @@ class IrEmitter : public DfsHloVisitorWithDefault, } }; - tensorflow::gtl::FlatMap + absl::flat_hash_map emitted_literals_; - tensorflow::gtl::FlatMap + absl::flat_hash_map constant_buffer_to_global_; std::vector thread_local_computations_; diff --git a/tensorflow/compiler/xla/service/cpu/target_machine_features.cc b/tensorflow/compiler/xla/service/cpu/target_machine_features.cc index a0cd8ee2d2be10bcee9c2e216e24908d949e2d7b..5cdac203af2e7a1f8f3aebda965447ba75e9934e 100644 --- a/tensorflow/compiler/xla/service/cpu/target_machine_features.cc +++ b/tensorflow/compiler/xla/service/cpu/target_machine_features.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/cpu/target_machine_features.h" +#include "tensorflow/core/platform/logging.h" namespace xla { namespace cpu { diff --git a/tensorflow/compiler/xla/service/cpu/target_machine_features.h b/tensorflow/compiler/xla/service/cpu/target_machine_features.h index 8b00ae9e47eeed26ffe80707b89593b267e8dbb8..a383b4a4a00f9b8d49a88e8349793a3a90d8da7b 100644 --- a/tensorflow/compiler/xla/service/cpu/target_machine_features.h +++ b/tensorflow/compiler/xla/service/cpu/target_machine_features.h @@ -16,10 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_ +#include "absl/container/flat_hash_map.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Target/TargetMachine.h" #include "tensorflow/compiler/xla/primitive_util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { namespace cpu { @@ -97,8 +97,7 @@ class LLVMTargetMachineFeatures : public TargetMachineFeatures { // This is mutated from within `GetTargetTransformInfoFor` which is // semantically a getter (and thus `const`); and is therefore declared // mutable. Making this mutable is okay because it has cache semantics. - mutable tensorflow::gtl::FlatMap + mutable absl::flat_hash_map target_transform_info_cache_; llvm::TargetMachine* target_machine_; }; diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc index 7af51db55af44ae1e437ea8e4de7427012cad82f..b35fd9dad877c319c3d0110c96a00aeefa78769e 100644 --- a/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc @@ -121,7 +121,7 @@ TEST_F(CpuNoAliasTest, Concat) { CHECK: %read_concat2_array = load {{.*}} !alias.scope [[concat1_noalias]], !noalias [[concat1_scope]] CHECK-DAG: [[buf_size32:![0-9]+]] = !{!"buffer:{{.*}} size:32 CHECK-DAG: [[buf_size48:![0-9]+]] = !{!"buffer:{{.*}} size:48 - CHECK-DAG: [[param_x_noalias]] = !{[[buf_size32]], [[buf_size48]]} + CHECK-DAG: [[param_x_noalias]] = !{[[buf_size48]], [[buf_size32]]} CHECK-DAG: [[concat1_scope]] = !{[[buf_size32]]} CHECK-DAG: [[concat1_noalias]] = !{[[buf_size48]]} )"; diff --git a/tensorflow/compiler/xla/service/defuser.cc b/tensorflow/compiler/xla/service/defuser.cc index d124f74d19d83269be96ee34a6b4b2a8d00a978f..661539cccb4ef27a49a73f97a0a8b0d9dfc77061 100644 --- a/tensorflow/compiler/xla/service/defuser.cc +++ b/tensorflow/compiler/xla/service/defuser.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/service/call_graph.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -48,7 +49,7 @@ Status Defuse(HloInstruction* fusion_instruction) { fusion_instruction->fused_instructions_computation(); // A map from fused instruction to its defused clone. - tensorflow::gtl::FlatMap + absl::flat_hash_map defused_instructions; // Initialize map to contain the fusion instruction parameters mapping // to the operands of the fusion instruction. diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index 5761573791d90e45c65b55124a4bae3c5b929ef1..68d01d75a2ed3d7eaadb03a46ba3bd20f43a9ffc 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -27,7 +27,6 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/compiler/xla/service/fusion_queue.h b/tensorflow/compiler/xla/service/fusion_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..1208a7dda87d7b2a6ad7113e2604e8b9a0fa045b --- /dev/null +++ b/tensorflow/compiler/xla/service/fusion_queue.h @@ -0,0 +1,53 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_FUSION_QUEUE_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_FUSION_QUEUE_H_ + +#include + +#include "tensorflow/compiler/xla/service/hlo_instruction.h" + +namespace xla { + +// A queue interface that allows implementations to choose fusion candidates in +// custom order. +class FusionQueue { + public: + FusionQueue() = default; + virtual ~FusionQueue() = default; + + // Dequeues the next fusion candidates: a consumer and the list of producers + // as operand indices. + virtual std::pair> + DequeueNextInstructionAndOperandsToFuseInOrder() = 0; + + // A callback passed to the queue implementation right before the producer is + // fused into the consumer. + virtual void PreFusion(HloInstruction* producer, HloInstruction* consumer) {} + + // A callback passed to the queue implementation right after the fusion is + // created. Note that original_producer could have been destroyed. + virtual void OnFusingInstruction(HloInstruction* fusion, + HloInstruction* original_producer, + HloInstruction* original_consumer) {} + + // A callback passed to the queue implementation to notify the removal of an + // instruction. + virtual void RemoveInstruction(HloInstruction* instruction) = 0; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_FUSION_QUEUE_H_ diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 51968d13d492d6cb1d9731c9c18c7c8e4962c0d5..350fd325371b2252efb02391a28850289d50fb2d 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -91,6 +91,7 @@ cc_library( "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_reachability", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", ], ) @@ -357,6 +358,7 @@ cc_library( "//tensorflow/core/platform/default/build_config:cufft_plugin", "//tensorflow/core/platform/default/build_config:stream_executor_cuda", # build_cleaner: keep "//tensorflow/stream_executor", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -402,6 +404,7 @@ cc_library( "//tensorflow/core:stream_executor_no_cuda", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", "@com_google_absl//absl/types:optional", ], ) @@ -474,6 +477,7 @@ cc_library( "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:instruction_fusion", "//tensorflow/compiler/xla/service:pattern_matcher", + "@com_google_absl//absl/container:flat_hash_set", ], ) @@ -506,6 +510,7 @@ cc_library( "//tensorflow/compiler/xla/service:multi_output_fusion", "//tensorflow/core:lib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_set", ], ) @@ -539,6 +544,7 @@ cc_library( "//tensorflow/compiler/xla/service:hlo_dataflow_analysis", "//tensorflow/compiler/xla/service:hlo_pass", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_set", ], ) @@ -713,6 +719,7 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core:regexp_internal", "//tensorflow/core:stream_executor_no_cuda", + "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", @@ -774,7 +781,6 @@ cc_library( srcs = ["gpu_layout_assignment.cc"], hdrs = ["gpu_layout_assignment.h"], deps = [ - ":gpu_options", ":ir_emission_utils", ":stream_executor_util", "//tensorflow/compiler/xla:shape_util", @@ -875,16 +881,6 @@ cc_library( ], ) -cc_library( - name = "gpu_options", - srcs = ["gpu_options.cc"], - hdrs = ["gpu_options.h"], - deps = [ - "//tensorflow/compiler/xla/service:hlo_module_config", - "//tensorflow/core:lib_internal", - ], -) - cc_library( name = "stream_executor_util", srcs = ["stream_executor_util.cc"], diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc index 7125673887d28729287d67577bcfa06423f85611..6d4a72038fb3f6ed657ee36eee82c3e5261d27b8 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc @@ -145,7 +145,7 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) { // cache misses and doing extra work. Overall, caching doesn't seem worth the // trouble, but we may want to revisit this if we ever find a model where // caching would speed up compilation a lot. -StatusOr> +StatusOr CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( HloCustomCallInstruction* instr) { // TODO(timshen): for now only check fp16. It can be expanded to other types, @@ -316,9 +316,10 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( << AlgorithmToString(best_result.algorithm()) << ", takes " << best_result.elapsed_time_in_ms() << "ms, and uses " << best_result_bytes_used << "B of scratch memory."; - return std::make_tuple(best_result.algorithm().algo_id(), - best_result.algorithm().tensor_ops_enabled(), - best_result_bytes_used); + return AutotuneResult{best_result.algorithm().algo_id(), + best_result.algorithm().tensor_ops_enabled(), + best_result_bytes_used, + absl::Milliseconds(best_result.elapsed_time_in_ms())}; } return InternalError( @@ -331,41 +332,37 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( HloInstruction* instr) { CHECK(IsCustomCallToDnnConvolution(*instr)); - StatusOr> alg_scratch_and_tc = + StatusOr best_algo_or = PickBestAlgorithm(Cast(instr)); - - if (!alg_scratch_and_tc.ok()) { - LOG(ERROR) << alg_scratch_and_tc.status(); + if (!best_algo_or.ok()) { + LOG(ERROR) << best_algo_or.status(); return false; } - int64 algorithm; - bool tensor_ops_enabled; - int64 scratch_bytes; - - std::tie(algorithm, tensor_ops_enabled, scratch_bytes) = - alg_scratch_and_tc.ConsumeValueOrDie(); - - VLOG(1) << "Setting cudnn conv to use algorithm " << algorithm << " and " - << NumBytesToString(scratch_bytes) + auto best_algo = std::move(best_algo_or).ValueOrDie(); + VLOG(1) << "Setting cudnn conv to use algorithm " << best_algo.algorithm + << " and " << NumBytesToString(best_algo.scratch_bytes) << " of scratch memory: " << instr->ToString() - << " tensor_ops_enabled: " << tensor_ops_enabled; + << " tensor_ops_enabled: " << best_algo.tensor_ops_enabled; // Replace instr with a new CustomCall which has the correct algorithm, and // whose output shape has the appropriate amount of scratch memory. HloComputation* computation = instr->parent(); - Shape new_call_shape = - ShapeUtil::MakeTupleShape({instr->shape().tuple_shapes(0), - ShapeUtil::MakeShape(U8, {scratch_bytes})}); + Shape new_call_shape = ShapeUtil::MakeTupleShape( + {instr->shape().tuple_shapes(0), + ShapeUtil::MakeShape(U8, {best_algo.scratch_bytes})}); TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config, instr->backend_config()); - backend_config.set_algorithm(algorithm); - backend_config.set_tensor_ops_enabled(tensor_ops_enabled); + backend_config.set_algorithm(best_algo.algorithm); + backend_config.set_tensor_ops_enabled(best_algo.tensor_ops_enabled); HloInstruction* new_call = computation->AddInstruction( instr->CloneWithNewOperands(new_call_shape, instr->operands())); + VLOG(1) << "Replacing convolution " << instr->ToString() << " with " + << new_call->ToString(); + TF_RETURN_IF_ERROR(new_call->set_backend_config(backend_config)); // Repackage new_call so it has the same shape as the original call, namely diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h index aeda2fc7f8b4d6169fc2baa8975119ba7bf68dd2..136c32210a4afbd60cf9b13863befdba9b712a9d 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ +#include "absl/time/time.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" @@ -47,10 +48,16 @@ class CudnnConvolutionAlgorithmPicker : public HloModulePass { StatusOr Run(HloModule* module) override; private: + struct AutotuneResult { + int64 algorithm; + bool tensor_ops_enabled; + int64 scratch_bytes; + absl::Duration runtime; + }; + StatusOr RunOnComputation(HloComputation* computation); StatusOr RunOnInstruction(HloInstruction* instr); - StatusOr> PickBestAlgorithm( - HloCustomCallInstruction* instr); + StatusOr PickBestAlgorithm(HloCustomCallInstruction* instr); se::StreamExecutor* stream_exec_; // never null DeviceMemoryAllocator* allocator_; // may be null diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc index ef292373018295f5100b91c343df274b626c2fa1..437d25727e20afb1600daccd6f925f84db642855 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc @@ -525,6 +525,9 @@ StatusOr RunOnInstruction(HloInstruction* conv) { TF_RETURN_IF_ERROR( custom_call->set_backend_config(GetDefaultBackendConfig())); + VLOG(1) << "Replacing convolution " << conv->ToString() << " with " + << custom_call->ToString(); + // The CustomCall returns a tuple (conv_result, scratch_memory). Extract out // the conv result and replace `conv` with it. TF_RETURN_IF_ERROR(conv->parent()->ReplaceWithNewInstruction( diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc index 3761c19cfcab10e0c6faa17c2d1d535d706ff6c5..d508cbc2e1cf4cec071857ec5e048e8bd0be1015 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc @@ -234,7 +234,8 @@ StatusOr> TryRewriteToCudnnForwardRelu( config.set_side_input_scale(alpha_side_input); TF_RETURN_IF_ERROR(new_conv->set_backend_config(config)); - VLOG(1) << "Rewriting " << conv->name() << " to " << new_conv->name(); + VLOG(1) << "Replacing convolution " << conv->ToString() << " with " + << new_conv->ToString(); return HloInstruction::CreateGetTupleElement(conv->shape().tuple_shapes(0), new_conv, 0); } diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc index c1aaa4bf04ddc31edf723c056805ae5aad994e55..6dcdaf1cfe06e446deed847aaf29088a7ed10e13 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc @@ -358,13 +358,6 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator( const HloInstruction* operand = hlo->operand(0); const Window& window = hlo->window(); - // TODO(b/31410564): Implement dilation for reduce-window. - if (window_util::HasDilation(window)) { - return Unimplemented( - "Dilation for reduce-window not implemented on GPU. " - "See b/31410564."); - } - PrimitiveType operand_element_type = operand->shape().element_type(); llvm::Value* accum_ptr = llvm_ir::EmitAllocaAtFunctionEntry( llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_), @@ -397,9 +390,24 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator( for (size_t i = 0; i < index.size(); ++i) { llvm::Value* stridden_index = NSWMul( index[i], index_typed_const(window.dimensions(i).stride())); + input_index[i] = NSWSub( + NSWAdd(stridden_index, + NSWMul(window_index[i], + index_typed_const( + window.dimensions(i).window_dilation()))), + index_typed_const(window.dimensions(i).padding_low())); + + // We need to verify that we are not in the dilated base area. + llvm::Value* dilation_condition = ICmpEQ( + SRem(input_index[i], + index_typed_const(window.dimensions(i).base_dilation())), + index_typed_const(0)); + in_bounds = And(in_bounds, dilation_condition); + + // Apply base dilation to the index. input_index[i] = - NSWSub(NSWAdd(stridden_index, window_index[i]), - index_typed_const(window.dimensions(i).padding_low())); + SDiv(input_index[i], + index_typed_const(window.dimensions(i).base_dilation())); // We must check whether 0 ≤ input_index[i] < bound, as otherwise // we are in the pad and so can skip the computation. This diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc index 79c74e7e8bf3a1aa59243b81942d29180bb46e74..e2ab00ce41c9e23e91449f249620d61d0f7736ae 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/call_graph.h" #include "tensorflow/compiler/xla/service/copy_insertion.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" @@ -27,7 +28,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/logging.h" namespace xla { diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 31a9f9b1beb81da81a06f6dc8e7c13c105514092..57426327822d95a42f407ed7488f35acfd3623d2 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h" @@ -197,7 +198,7 @@ GpuExecutable::ResolveConstantGlobals(se::StreamExecutor* executor) { } module_spec.AddCudaPtxInMemory(ptx().c_str()); - tensorflow::gtl::FlatMap globals; + absl::flat_hash_map globals; se::ModuleHandle module_handle; executor->LoadModule(module_spec, &module_handle); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h index 38b0f8f15bd28cf2659e4a53b6634e981545716b..0e276282e40fba0ae4881a51dad0c7c9e8d1c081 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "absl/types/span.h" @@ -35,7 +36,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" @@ -101,7 +101,7 @@ class GpuExecutable : public Executable { const PointsToSet& GetRootPointsToSet() const; using BufferAllocToDeviceMemoryMap = - tensorflow::gtl::FlatMap; + absl::flat_hash_map; // Loads the PTX or CUBIN for this executable into `executor` and resolves the // globals corresponding to constant buffers. Returns a map mapping buffer diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc index 74352f26aa9c3a2ca597da21735438df92f863ab..8c9a8adc614e748ffc431dd2dc7fa1de3d38cf40 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/compiler/xla/layout_util.h" -#include "tensorflow/compiler/xla/service/gpu/gpu_options.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" @@ -125,14 +124,8 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall( DataLayout input; FilterLayout filter; DataLayout output; - if (ConvUseLayoutHeuristic(instr->GetModule()->config())) { - std::tie(input, filter, output) = - HeuristicLayoutAssignment(instr, stream_executor_); - } else { - input = DataLayout::kBatchDepthYX; - filter = FilterLayout::kOutputInputYX; - output = DataLayout::kBatchDepthYX; - } + std::tie(input, filter, output) = + HeuristicLayoutAssignment(instr, stream_executor_); TF_ASSIGN_OR_RETURN( std::tie(*input_shape->mutable_layout(), @@ -220,16 +213,6 @@ Status GpuLayoutAssignment::AddBackendConstraints( return Status::OK(); } -bool GpuLayoutAssignment::CustomCallRequiresMajorFirstLayout( - const HloInstruction* instruction) { - // - Inputs to cudnn batchnorm custom calls don't need the major-first layout - // (i.e. {n, n-1, ...0}) -- we can handle any layout. - // - Inputs to cudnn convolution require custom layouts handled in - // AddBackendConstraints. - return !IsCustomCallToDnnBatchNorm(*instruction) && - !IsCustomCallToDnnConvolution(*instruction); -} - Status GpuLayoutAssignment::PropagateOperandConstraint( const OperandLayoutConstraint& layout_constraint, LayoutConstraints* constraints) { diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h index e2b96a81d4de1337de2978a9d3c6c38c6e5fd0cd..6a48e55fd2e784f80a50f4565107db177fb43bfc 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h @@ -30,8 +30,11 @@ namespace gpu { class GpuLayoutAssignment : public LayoutAssignment { public: explicit GpuLayoutAssignment(ComputationLayout* entry_computation_layout, + std::function + instruction_can_change_layout_func, se::StreamExecutor* stream_executor) - : LayoutAssignment(entry_computation_layout), + : LayoutAssignment(entry_computation_layout, + std::move(instruction_can_change_layout_func)), stream_executor_(stream_executor) {} ~GpuLayoutAssignment() override {} @@ -43,8 +46,6 @@ class GpuLayoutAssignment : public LayoutAssignment { Status PropagateBufferConstraint( const BufferLayoutConstraint& buffer_constraint, LayoutConstraints* constraints) override; - bool CustomCallRequiresMajorFirstLayout( - const HloInstruction* instruction) override; private: Status AddBackendConstraintsToDnnConvCustomCall( diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc index fbc8ddf599570b90e93eb463a1fd6c275b73711c..04681cfcec792d86eed95585262691932b07b269 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc @@ -75,7 +75,8 @@ TEST_F(LayoutAssignmentTest, Elementwise) { ShapeLayout(result_shape_with_layout); GpuLayoutAssignment layout_assignment( - &computation_layout, backend().default_stream_executor()); + &computation_layout, LayoutAssignment::InstructionCanChangeLayout, + backend().default_stream_executor()); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); for (const HloInstruction* operand : add->operands()) { @@ -163,7 +164,8 @@ TEST_F(LayoutAssignmentTest, BatchNormInference) { } GpuLayoutAssignment layout_assignment( - &computation_layout, backend().default_stream_executor()); + &computation_layout, LayoutAssignment::InstructionCanChangeLayout, + backend().default_stream_executor()); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); // The first operand to batchnorm should have the same layout as the @@ -233,7 +235,8 @@ TEST_F(LayoutAssignmentTest, BatchNormTraining) { } GpuLayoutAssignment layout_assignment( - &computation_layout, backend().default_stream_executor()); + &computation_layout, LayoutAssignment::InstructionCanChangeLayout, + backend().default_stream_executor()); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); // The first operand to batchnorm should have the same layout as the @@ -314,7 +317,8 @@ TEST_F(LayoutAssignmentTest, BatchNormGrad) { } GpuLayoutAssignment layout_assignment( - &computation_layout, backend().default_stream_executor()); + &computation_layout, LayoutAssignment::InstructionCanChangeLayout, + backend().default_stream_executor()); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); // The first and fourth operands to the batchnorm call should have the @@ -348,8 +352,9 @@ TEST_F(LayoutAssignmentTest, DotLayout) { ComputationLayout computation_layout( module->entry_computation()->ComputeProgramShape()); - GpuLayoutAssignment layout_assignment(&computation_layout, - backend().default_stream_executor()); + GpuLayoutAssignment layout_assignment( + &computation_layout, LayoutAssignment::InstructionCanChangeLayout, + backend().default_stream_executor()); EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); Shape expected_shape = diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index 4d5d8e99f88149aabfd0a4aeafc7e6724d29418d..b61f0387392d2301109a484ca5c1f65f18882265 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" @@ -125,8 +126,8 @@ bool IsIEEEFloatingPointScalarConstant(const HloInstruction* constant) { } // Compute the precise number of operands to the new fusion. - tensorflow::gtl::FlatSet operands( - a->operands().begin(), a->operands().end()); + absl::flat_hash_set operands(a->operands().begin(), + a->operands().end()); operands.insert(b->operands().begin(), b->operands().end()); // If there's an edge between `a` and `b`, don't count it: We're fusing that // producer -> consumer relationship. diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc index c21f76f6eb1874bfa5a1d296c78ea0e3b9261eca..835924024b7b7de79624a369a69b07d72ac751ab 100644 --- a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc @@ -24,6 +24,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h" #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" @@ -31,7 +32,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -101,7 +101,7 @@ bool GpuMultiOutputFusion::IsFusible(HloInstruction* instr) { int64 GpuMultiOutputFusion::GetProfit(HloInstruction* instr1, HloInstruction* instr2) { - tensorflow::gtl::FlatSet in_list; + absl::flat_hash_set in_list; for (auto instr : instr1->operands()) { if (!IsProfitableOperand(instr)) { continue; @@ -148,7 +148,7 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() { bool changed = false; RecomputeReachability(); - tensorflow::gtl::FlatSet to_fuse; + absl::flat_hash_set to_fuse; // Keep a list of the instructions to fuse after making all the fusion // decisions. We first aggressively add instructions to potential_fusion_list, // then filter out instructions that will be no longer fusible because of diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index 0b3b429710a1a3158ce57a393a09291c95a2ef7a..ac6c2c5565be3a469a539dde7a60e7fee3f09339 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -232,14 +232,17 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, // a layout-sensitive verifier! HloPassPipeline pipeline("layout assignment"); pipeline.AddPass( - hlo_module->mutable_entry_computation_layout(), stream_exec); + hlo_module->mutable_entry_computation_layout(), + LayoutAssignment::InstructionCanChangeLayout, stream_exec); TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); } { HloPassPipeline pipeline("post-layout_assignment"); - pipeline.AddInvariantChecker(/*layout_sensitive=*/true, - /*allow_mixed_precision=*/false); + pipeline.AddInvariantChecker( + /*layout_sensitive=*/true, + /*allow_mixed_precision=*/false, + LayoutAssignment::InstructionCanChangeLayout); // The LayoutAssignment pass may leave behind kCopy instructions which are // duplicate or NOPs, so remove them with algebraic simplification and CSE. @@ -285,8 +288,10 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, { HloPassFix fusion("fusion"); - fusion.AddInvariantChecker(/*layout_sensitive=*/true, - /*allow_mixed_precision=*/false); + fusion.AddInvariantChecker( + /*layout_sensitive=*/true, + /*allow_mixed_precision=*/false, + LayoutAssignment::InstructionCanChangeLayout); fusion.AddPass(/*may_duplicate=*/false); fusion.AddPass(/*may_duplicate=*/true); fusion.AddPass(); @@ -298,7 +303,8 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, HloPassPipeline reduce_pipeline("reduce-precision"); reduce_pipeline.AddInvariantChecker( - /*is_layout_sensitive=*/true, /*allow_mixed_precision=*/false); + /*is_layout_sensitive=*/true, /*allow_mixed_precision=*/false, + LayoutAssignment::InstructionCanChangeLayout); ReducePrecisionInsertion::AddPasses( &reduce_pipeline, hlo_module->config().debug_options(), ReducePrecisionInsertion::PassTiming::AFTER_FUSION); @@ -324,8 +330,10 @@ Status PrepareHloModuleForIrEmitting(HloModule* hlo_module) { // (b/27180329). Therefore, in that case, we set the output to be a copy of // the parameter. HloPassPipeline pipeline("GPU-ir-emit-prepare"); - pipeline.AddInvariantChecker(/*layout_sensitive=*/true, - /*allow_mixed_precision=*/false); + pipeline.AddInvariantChecker( + /*layout_sensitive=*/true, + /*allow_mixed_precision=*/false, + LayoutAssignment::InstructionCanChangeLayout); // Copy insertion should be performed immediately before IR emission to avoid // inserting unnecessary copies (later pass adds an instruction which @@ -400,7 +408,7 @@ void WarnIfBadPtxasVersion(const string& ptxas_path) { "prefers >= 9.2.88). Compilation of XLA kernels below will likely " "fail.\n\nYou do not need to update CUDA; cherry-picking the ptxas " "binary is sufficient."; - } else if ((vmaj < 9 || vmin < 2 || vdot < 88)) { + } else if (std::make_tuple(vmaj, vmin, vdot) < std::make_tuple(9, 2, 88)) { LOG(WARNING) << "*** WARNING *** You are using ptxas " << vmaj << "." << vmin << "." << vdot diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h index 8e97774750344bfc141daa7d752300762c708613..c4a0b727cd3d9ae0af61c1752c1608cd4fb65d2d 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/container/node_hash_map.h" #include "absl/types/optional.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/service/executable.h" @@ -140,10 +141,10 @@ class NVPTXCompiler : public LLVMCompiler { tensorflow::condition_variable compilation_done_cv_; }; - // Don't even think about switching this to FlatMap; iterator stability is - // critical here. - std::unordered_map + // Don't even think about switching this to flat_hash_map; iterator stability + // is critical here. + absl::node_hash_map compilation_cache_ GUARDED_BY(mutex_); TF_DISALLOW_COPY_AND_ASSIGN(NVPTXCompiler); diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc index cf9f102d31305da15dabaf6247f23c5ca9a9e054..375f68a15957936151aee068582a714b62694af2 100644 --- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc @@ -62,13 +62,8 @@ LaunchDimensions CalculateLaunchDimensions( // // * = - auto threads_per_core = device_desc.threads_per_core_limit(); - auto blocks_per_core = device_desc.blocks_per_core_limit(); - int64 threads_per_block; - if (threads_per_core != 0 && blocks_per_core != 0) { - threads_per_block = device_desc.threads_per_core_limit() / - device_desc.blocks_per_core_limit(); - } else { + int64 threads_per_block = device_desc.threads_per_block_limit(); + if (threads_per_block == 0) { static std::atomic log_count{0}; if (log_count.fetch_add(1) < 8) { LOG(WARNING) << "Attempting to calculate launch dimensions for GPU " diff --git a/tensorflow/compiler/xla/service/gpu/stream_assignment.h b/tensorflow/compiler/xla/service/gpu/stream_assignment.h index c2df83aaa4347a9439798acc6cfc2ba0db995232..52d38b6f20e8d61e2d4966ad15a5583a9cd2e945 100644 --- a/tensorflow/compiler/xla/service/gpu/stream_assignment.h +++ b/tensorflow/compiler/xla/service/gpu/stream_assignment.h @@ -16,9 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_ASSIGNMENT_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_ASSIGNMENT_H_ +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { namespace gpu { @@ -34,7 +34,7 @@ class StreamAssignment { private: int stream_count_ = 1; // At least the main stream. - tensorflow::gtl::FlatMap hlo_to_stream_number_; + absl::flat_hash_map hlo_to_stream_number_; }; // Assigns GPU streams to instructions in `module`. diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc index 2bd04259c0e8193e6fde415df17a8232c701dec4..9220865867b770eebfb1ada8f31a5d24693a4b8d 100644 --- a/tensorflow/compiler/xla/service/heap_simulator.cc +++ b/tensorflow/compiler/xla/service/heap_simulator.cc @@ -18,14 +18,16 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/util.h" namespace xla { -using tensorflow::gtl::FlatMap; -using tensorflow::gtl::FlatSet; +using absl::flat_hash_map; +using absl::flat_hash_set; /*static*/ StatusOr HeapSimulator::MinimumMemoryForModule( @@ -56,7 +58,7 @@ StatusOr HeapSimulator::MinimumMemoryForComputation( const HloComputation& computation, const HloInstructionSequence& sequence, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap* + const absl::flat_hash_map* memory_by_computation) { TF_ASSIGN_OR_RETURN( HeapSimulator::Result result, @@ -88,7 +90,7 @@ StatusOr HeapSimulator::Run( const HloInstructionSequence& instruction_sequence, const TuplePointsToAnalysis& points_to_analysis, const BufferValue::SizeFunction& size_fn, const Options& options, - const tensorflow::gtl::FlatMap* + const absl::flat_hash_map* memory_by_computation) { HeapSimulator heap(std::move(algorithm), size_fn, options, /*schedule=*/nullptr, memory_by_computation); @@ -115,8 +117,10 @@ Status HeapSimulator::RunComputation( // 'used_buffers' is the reverse map - it tracks which buffers were used by an // instruction, so that we can remove the instructions from a buffer's live // set after they are visited. - FlatMap> live_buffers; - FlatMap> used_buffers; + flat_hash_map> + live_buffers; + flat_hash_map> + used_buffers; auto add_user_to_buffer = [this, &live_buffers, &used_buffers]( const HloInstruction* user, const BufferValue* buffer) { @@ -213,7 +217,7 @@ Status HeapSimulator::RunComputation( VLOG(4) << " Removing user " << instruction->name() << " from buffer " << operand_buffer->ToString(); auto it = live_buffers.find(operand_buffer); - FlatSet* live_set = &it->second; + flat_hash_set* live_set = &it->second; live_set->erase(instruction); if (live_set->empty()) { live_buffers.erase(it); @@ -235,7 +239,8 @@ Status HeapSimulator::RunComputation( // that we should assign. // Make sure each buffer get reused at most once. - FlatSet reused_buffers; + flat_hash_set reused_buffers; + int64 alloc_size_by_instruction = 0; for (const BufferValue* buffer : buffers_defined_by_instruction) { if (IgnoreBuffer(buffer)) { continue; @@ -268,14 +273,15 @@ Status HeapSimulator::RunComputation( if (!shared) { VLOG(3) << " Allocating: " << buffer->ToString(); + alloc_size_by_instruction += size_fn_(*buffer); Alloc(buffer, instruction); } } // Account for the memory used by subcomputations when estimating the // current heap size. if (memory_by_computation_ != nullptr) { - algorithm_->AccountForSubcomputationMemory(instruction, - *memory_by_computation_); + algorithm_->AccountForSubcomputationMemory( + instruction, alloc_size_by_instruction, *memory_by_computation_); } // If all computations in the module have been scheduled, we can save memory @@ -323,7 +329,7 @@ Status HeapSimulator::RunComputation( to_free.reserve(live_buffers.size()); for (const auto& buffer_pending : live_buffers) { const BufferValue* buffer = buffer_pending.first; - const FlatSet& pending = buffer_pending.second; + const flat_hash_set& pending = buffer_pending.second; CHECK_EQ(pending.size(), 1) << *buffer; CHECK(*pending.begin() == nullptr) << *buffer; to_free.push_back(buffer); @@ -345,7 +351,7 @@ HeapSimulator::HeapSimulator( std::unique_ptr algorithm, const BufferValue::SizeFunction& size_fn, const Options& options, const HloSchedule* schedule, - const tensorflow::gtl::FlatMap* + const absl::flat_hash_map* memory_by_computation) : no_fragmentation_stats_(absl::make_unique()), algorithm_(std::move(algorithm)), @@ -381,10 +387,8 @@ void HeapSimulator::Alloc(const BufferValue* buffer, allocated_buffers_.insert(buffer); const int64 size = size_fn_(*buffer); - const HloInstruction* instruction_to_calc_aliasing = - memory_by_computation_ == nullptr ? nullptr : instruction; - algorithm_->Alloc(buffer, size, instruction_to_calc_aliasing); - no_fragmentation_stats_->Alloc(buffer, size, instruction_to_calc_aliasing); + algorithm_->Alloc(buffer, size); + no_fragmentation_stats_->Alloc(buffer, size); FillDebugTrace(HeapSimulatorTrace::Event::ALLOC, buffer, instruction, nullptr); } @@ -522,21 +526,9 @@ void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size) { } } -void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size, - const HloInstruction* instruction) { - // The output buffer of while/call/conditional is always aliased with the - // output buffer of the root instruction in the body. Don't double count. - if (instruction == nullptr || - (instruction->opcode() != HloOpcode::kWhile && - instruction->opcode() != HloOpcode::kCall && - instruction->opcode() != HloOpcode::kConditional)) { - Alloc(buffer, size); - } -} - void NoFragmentationStatsHeap::AccountForSubcomputationMemory( - const HloInstruction* instruction, - const tensorflow::gtl::FlatMap& + const HloInstruction* instruction, int64 alloc_size_by_instruction, + const absl::flat_hash_map& memory_by_computation) { // We only count the memory usage of the largest subcomputation, instead of // adding them all, because subcomputations won't execute in parallel. @@ -550,6 +542,14 @@ void NoFragmentationStatsHeap::AccountForSubcomputationMemory( } } } + if (max_subcomputation_bytes > 0 && + (instruction->opcode() == HloOpcode::kWhile || + instruction->opcode() == HloOpcode::kCall || + instruction->opcode() == HloOpcode::kConditional)) { + // The output buffer of while/call/conditional is always aliased with the + // output buffer of the root instruction in the body. Don't double count. + max_subcomputation_bytes -= alloc_size_by_instruction; + } max_heap_size_ = std::max(max_heap_size_, current_heap_size_ + max_subcomputation_bytes); } diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h index 7d6dcc0dc9436ea6bd30ae14ffe226c014f1ca68..dbbf43082f2c1d21f5ef42f53804bf0969903a58 100644 --- a/tensorflow/compiler/xla/service/heap_simulator.h +++ b/tensorflow/compiler/xla/service/heap_simulator.h @@ -21,6 +21,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/buffer_value.h" #include "tensorflow/compiler/xla/service/buffer_value_containers.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" @@ -30,8 +32,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_schedule.h" #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" #include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { @@ -58,7 +58,7 @@ class HeapSimulator { // Result represents the result of the heap simulation. struct Result { // The assignment of buffers to chunks. - tensorflow::gtl::FlatMap chunk_map; + absl::flat_hash_map chunk_map; // The total size in bytes of the heap, containing all assigned chunks. int64 heap_size = 0; @@ -100,7 +100,7 @@ class HeapSimulator { const HloComputation& computation, const HloInstructionSequence& sequence, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap* + const absl::flat_hash_map* memory_by_computation = nullptr); // Run the heap simulation with the given algorithm, assuming the given @@ -130,7 +130,7 @@ class HeapSimulator { const TuplePointsToAnalysis& points_to_analysis, const BufferValue::SizeFunction& size_fn, const Options& options = Options(), - const tensorflow::gtl::FlatMap* + const absl::flat_hash_map* memory_by_computation = nullptr); private: @@ -140,7 +140,7 @@ class HeapSimulator { HeapSimulator(std::unique_ptr algorithm, const BufferValue::SizeFunction& size_fn, const Options& options, const HloSchedule* schedule = nullptr, - const tensorflow::gtl::FlatMap* + const absl::flat_hash_map* memory_by_computation = nullptr); ~HeapSimulator(); @@ -172,7 +172,7 @@ class HeapSimulator { // handle subcomputations. It would be good to unify the handling of // subcomputations, but it's not clear how. const HloSchedule* schedule_; - const tensorflow::gtl::FlatMap* + const absl::flat_hash_map* memory_by_computation_; // In addition to Alloc and Free, the heap simulator exposes a concept of @@ -193,12 +193,12 @@ class HeapSimulator { const BufferValue* canonical = nullptr; int64 refcount = 0; }; - tensorflow::gtl::FlatMap> + absl::flat_hash_map> shared_buffers_; // Hold some sets for error-checking the sequence of Alloc and Free calls. - tensorflow::gtl::FlatSet allocated_buffers_; - tensorflow::gtl::FlatSet freed_buffers_; + absl::flat_hash_set allocated_buffers_; + absl::flat_hash_set freed_buffers_; // Debugging information filled in while the heap simulator runs. HeapSimulatorTrace debug_trace_; @@ -218,12 +218,6 @@ class HeapAlgorithm { // Alloc allocates a buffer of 'size' bytes. virtual void Alloc(const BufferValue* buffer, int64 size) = 0; - // NoFragmentationStatsHeap overrides this method. - virtual void Alloc(const BufferValue* buffer, int64 size, - const HloInstruction* instruction) { - Alloc(buffer, size); - } - // Takes memory usage of subcomputations into account when calculating the // memory usage of a computation. Currently, we don't handle buffer aliasing // between computations entirely correctly. We are careful to not double count @@ -235,7 +229,9 @@ class HeapAlgorithm { // analysis, it's not worth making major changes to HeapSimulator now. virtual void AccountForSubcomputationMemory( const HloInstruction* instruction, - const tensorflow::gtl::FlatMap& + // The total number of bytes allocated by instruction. + int64 alloc_size_by_instruction, + const absl::flat_hash_map& memory_by_computation) {} // Free de-allocates a previously allocated buffer. @@ -257,12 +253,9 @@ class NoFragmentationStatsHeap : public HeapAlgorithm { void Alloc(const BufferValue* buffer, int64 size) override; - void Alloc(const BufferValue* buffer, int64 size, - const HloInstruction* instruction) override; - void AccountForSubcomputationMemory( - const HloInstruction* instruction, - const tensorflow::gtl::FlatMap& + const HloInstruction* instruction, int64 alloc_size_by_instruction, + const absl::flat_hash_map& memory_by_computation) override; void Free(const BufferValue* buffer, int64 size) override; @@ -382,8 +375,7 @@ class GlobalDecreasingSizeBestFitHeap : public HeapAlgorithm { // Free time of the buffer. int64 end; }; - tensorflow::gtl::FlatMap - buffer_intervals_; + absl::flat_hash_map buffer_intervals_; }; // A heap algorithm that chooses the best results from other algorithms added to diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc index 191fbf8194ac65684cd7bfd48a6931d82c702186..e30e7667f3015bc7bfe67c65147a5016332780f7 100644 --- a/tensorflow/compiler/xla/service/heap_simulator_test.cc +++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/buffer_value.h" @@ -31,7 +32,6 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" #include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { namespace { @@ -98,6 +98,124 @@ TEST_F(MinimumMemoryForSequenceTest, MultiComputation) { HeapSimulator::MinimumMemoryForModule(schedule, size_fn).ValueOrDie()); } +TEST_F(MinimumMemoryForSequenceTest, SubcomputationAccounting) { + // HloModule SubcomputationAccounting + + // %WhileBody (body_param: f32[4]) -> f32[4] { + // %body_param = f32[4]{0} parameter(0) + // %constant.1 = f32[4]{0} constant({1, 1, 1, 1}) + // ROOT %subtract = f32[4]{0} subtract(f32[4]{0} %body_param, f32[4]{0} + // %constant.1) + // } + + // %WhileCond (cond_param: f32[4]) -> pred[] { + // %cond_param = f32[4]{0} parameter(0) + // %slice = f32[1]{0} slice(f32[4]{0} %cond_param), slice={[0:1]} + // %reshape = f32[] reshape(f32[1]{0} %slice) + // %constant = f32[] constant(0) + // ROOT %not-equal-to = pred[] not-equal-to(f32[] %reshape, f32[] %constant) + // } + + // ENTRY %SubcomputationAccounting () -> f32[2,4] { + // %constant.3 = f32[2,4]{1,0} constant(f32[2,4] { { 1, 2, 3, 4 }, { 1, 2, + // 3, 4 } }) %transpose = f32[2,4]{1,0} transpose(f32[2,4]{1,0} + // %constant.3), dimensions={0,1} %constant.2 = f32[4]{0} constant({1, 1, 1, + // 1}) %while = f32[4]{0} while(f32[4]{0} %constant.2), + // condition=%WhileCond, body=%WhileBody %broadcast = f32[2,4]{1,0} + // broadcast(f32[4]{0} %while), dimensions={1} ROOT %add = f32[2,4]{1,0} + // add(f32[2,4]{1,0} %transpose, f32[2,4]{1,0} %broadcast) + // } + + auto module = CreateNewVerifiedModule(); + const Shape r0f32 = ShapeUtil::MakeShape(F32, {}); + const Shape r1f32 = ShapeUtil::MakeShape(F32, {4}); + const Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 4}); + + // reshape(slice(param)) != 0 + // Needs 5 bytes + auto cond_builder = HloComputation::Builder("WhileCond"); + HloInstruction* cond_param = cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, r1f32, "cond_param")); + HloInstruction* slice = + cond_builder.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {1}), cond_param, {0}, {1}, {1})); + HloInstruction* reshape = + cond_builder.AddInstruction(HloInstruction::CreateReshape(r0f32, slice)); + HloInstruction* zero = cond_builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(0))); + HloInstruction* cond_comparison = + cond_builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(PRED, {}), HloOpcode::kNe, reshape, zero)); + auto cond_computation = module->AddEmbeddedComputation(cond_builder.Build()); + + // param - 1 + // Needs 16 bytes + auto body_builder = HloComputation::Builder("WhileBody"); + HloInstruction* body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, r1f32, "body_param")); + HloInstruction* one_vector = + body_builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1({1, 1, 1, 1}))); + HloInstruction* subtract = + body_builder.AddInstruction(HloInstruction::CreateBinary( + r1f32, HloOpcode::kSubtract, body_param, one_vector)); + auto body_computation = module->AddEmbeddedComputation(body_builder.Build()); + + // transpose(matrix) + bcast(while) + auto builder = HloComputation::Builder(TestName()); + HloInstruction* while_init = + builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1({1, 1, 1, 1}))); + // Creates 16 bytes, ignoring subcomputations + HloInstruction* while_loop = + builder.AddInstruction(HloInstruction::CreateWhile( + r1f32, cond_computation, body_computation, while_init)); + + // Creates 32 bytes and frees 16 + HloInstruction* bcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(r2f32, while_loop, {1})); + + HloInstruction* matrix = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR2( + {{1.0, 2.0, 3.0, 4.0}, {1.0, 2.0, 3.0, 4.0}}))); + // Creates 32 bytes + HloInstruction* transpose = builder.AddInstruction( + HloInstruction::CreateTranspose(r2f32, matrix, {0, 1})); + + // Creates 32 bytes and frees 64 + HloInstruction* add = builder.AddInstruction( + HloInstruction::CreateBinary(r2f32, HloOpcode::kAdd, transpose, bcast)); + + auto entry_computation = module->AddEntryComputation(builder.Build()); + + HloSchedule schedule(module.get()); + std::vector cond_vec = {cond_param, slice, reshape, zero, + cond_comparison}; + std::vector while_body_vec = {body_param, one_vector, + subtract}; + std::vector entry_comp_vec = {while_init, while_loop, bcast, + matrix, transpose, add}; + schedule.set_sequence(cond_computation, cond_vec); + schedule.set_sequence(body_computation, while_body_vec); + schedule.set_sequence(entry_computation, entry_comp_vec); + + auto size_fn = [](const BufferValue& buffer) { + return ShapeUtil::ByteSizeOf(buffer.shape()); + }; + absl::flat_hash_map memory_by_computation; + memory_by_computation[cond_computation] = 5; + memory_by_computation[body_computation] = 16; + std::unique_ptr points_to_analysis = + TuplePointsToAnalysis::Run(module.get()).ValueOrDie(); + + // HeapSimulator accounts for subcomputations. The output buffer is aliased, + // so we don't double count. + EXPECT_EQ(64, HeapSimulator::MinimumMemoryForComputation( + *entry_computation, schedule.sequence(entry_computation), + *points_to_analysis, size_fn, &memory_by_computation) + .ValueOrDie()); +} + const char kAlloc[] = "Alloc"; const char kFree[] = "Free"; const char kFinish[] = "Finish"; @@ -174,7 +292,7 @@ class HeapSimulatorTracker { // Construct the module sequence grouped by computation. HloSchedule schedule(module_.get()); - tensorflow::gtl::FlatMap reverse_position; + absl::flat_hash_map reverse_position; for (int i = 0; i < full_module_sequence.size(); ++i) { const HloInstruction* instruction = full_module_sequence[i]; schedule.GetOrCreateSequence(instruction->parent()) diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index b19ec126382d143b6ded401f2fad56f950d04bbd..82c8fb1904cf141327eef0f0c7ae44bd4ce3a2b0 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto"; option cc_enable_arenas = true; // Serialization of HloInstruction. -// Next ID: 53 +// Next ID: 58 message HloInstructionProto { reserved 10; reserved "parameter_name"; @@ -124,9 +124,13 @@ message HloInstructionProto { // The string representation of the infeed configuration. bytes infeed_config = 27; - // Name of a global symbol to call, only present for kCustomCall. + // Name of a external target (eg, global symbol) to call, only present for + // kCustomCall. string custom_call_target = 28; + // Opaque string, only present for kCustomCall. + string custom_call_opaque = 53; + // Shape of outfeed request. xla.Shape outfeed_shape = 29; @@ -176,6 +180,17 @@ message HloInstructionProto { // Collective permute field. repeated SourceTarget source_target_pairs = 52; + + // Sharding for kDomain instructions. + xla.OpSharding domain_entry_sharding = 54; + xla.OpSharding domain_exit_sharding = 55; + + // For custom call this indicates that the layouts are constrained. If + // constrain_layout is true then the 'shape' field must contain a layout, and + // 'operand_shapes_with_layout' must contain a shape with layout for each + // operand. + bool constrain_layout = 56; + repeated Shape operand_shapes_with_layout = 57; } // Serialization of HloComputation. @@ -210,6 +225,32 @@ message HloScheduleProto { map sequences = 1; } +message HloInputOutputAliasProto { + // The following proto describes a pair of aliased an input + // (described by parameter number and a ShapeIndex of the parameter) + // and an output (described by a ShapeIndex of the root + // instruction). For example: + // + // entry = { + // output_shape_index={1}, + // parameter_number=0, + // parameter_shape_index={1, 2}, + // } + // + // This entry indicates that the first paremter's {1, 2} element is + // aliased with the {1} element of the root instruction. + message AliasEntryProto { + // ShapeIndex of the root hlo. + repeated int64 output_shape_index = 1; + // Number of the parameter in entry computation. + int64 parameter_number = 2; + // ShapeIndex of the parameter instruction. + repeated int64 parameter_shape_index = 3; + } + + repeated AliasEntryProto entries = 1; +} + // Serialization of HloModule. message HloModuleProto { string name = 1; @@ -228,6 +269,9 @@ message HloModuleProto { // The schedule for this module. HloScheduleProto schedule = 7; + + // Describes alias information between inputs and outputs. + HloInputOutputAliasProto input_output_alias = 8; } // Serialization of LogicalBuffer. diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc index 0986da65cbd3d550ecfa01212364518aba651d86..cf8e6594cbe5ffd28ca75dd5006e8817f1e8581c 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc @@ -20,6 +20,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "tensorflow/compiler/xla/map_util.h" @@ -57,8 +59,9 @@ class BufferValueMap { // construction process. using BufferNumber = int64; - explicit BufferValueMap(const HloDataflowAnalysis& dataflow) - : dataflow_(dataflow) { + explicit BufferValueMap(HloModule* module, + const HloDataflowAnalysis& dataflow) + : module_(module), dataflow_(dataflow) { buffers_.reserve(dataflow_.values().size()); value_to_buffer_number_.reserve(dataflow_.values().size()); for (const HloValue* value : dataflow_.values()) { @@ -119,7 +122,7 @@ class BufferValueMap { } // Return a set of all the values in the given buffer. - const tensorflow::gtl::FlatSet& GetValuesInBuffer( + const absl::flat_hash_set& GetValuesInBuffer( BufferNumber buffer_number) const { return buffers_.at(buffer_number); } @@ -142,7 +145,7 @@ class BufferValueMap { // Move the given value into the given buffer. void MoveValueToBuffer(const HloValue& value, BufferNumber buffer_number) { BufferNumber old_buffer_number = value_to_buffer_number_.at(&value); - tensorflow::gtl::FlatSet& old_value_set = + absl::flat_hash_set& old_value_set = buffers_.at(old_buffer_number); old_value_set.erase(&value); if (old_value_set.empty()) { @@ -169,6 +172,42 @@ class BufferValueMap { return value_to_buffer_number_.at(&value); } + void ComputeInputOutputAliasedBuffers( + const HloValue& value, std::vector* aliased_buffers) { + // Get parameter value from an aliased_input object. + const auto get_parameter_value = + [this](const std::pair& aliased_input) + -> const HloValue& { + int64 param_number = aliased_input.first; + const ShapeIndex& param_index = aliased_input.second; + return dataflow_.GetUniqueValueAt( + module_->entry_computation()->parameter_instruction(param_number), + param_index); + }; + + // If the value shows up in a root instruction, alias it with parameter + // intruction. + for (const HloPosition& pos : value.positions()) { + if (pos.instruction == module_->entry_computation()->root_instruction()) { + ShapeIndex output_index = pos.index; + + auto aliased_input = + module_->input_output_alias_config().GetAliasedParameter( + output_index); + if (aliased_input) { + aliased_buffers->push_back( + GetBufferForValue(get_parameter_value(*aliased_input))); + } + } + } + + // If the value is parameter instruction itself, alias it with itself. + if (value.instruction()->opcode() == HloOpcode::kParameter && + value.instruction()->parent() == module_->entry_computation()) { + aliased_buffers->push_back(GetBufferForValue(value)); + } + } + void ComputeWhileAliasedBuffers(const HloValue& value, std::vector* aliased_buffers) { VLOG(3) << "Compute kWhile aliases"; @@ -276,6 +315,7 @@ class BufferValueMap { VLOG(2) << "Use of value " << value.ToShortString() << ": " << use; } std::vector aliased_buffers; + ComputeInputOutputAliasedBuffers(value, &aliased_buffers); ComputeWhileAliasedBuffers(value, &aliased_buffers); ComputeConditionalAliasedBuffers(value, &aliased_buffers); // Uniquify aliased buffers. @@ -286,17 +326,17 @@ class BufferValueMap { return aliased_buffers; } + HloModule* module_; + // Dataflow analysis used to construct the buffer map. const HloDataflowAnalysis& dataflow_; // A map containing the set of values contained in each buffer. - tensorflow::gtl::FlatMap> + absl::flat_hash_map> buffers_; // A map indicating which buffer each value is contained in. - tensorflow::gtl::FlatMap - value_to_buffer_number_; + absl::flat_hash_map value_to_buffer_number_; // The buffer number of the next buffer to be created. BufferNumber next_buffer_number_ = 0; @@ -352,7 +392,7 @@ bool HloAliasAnalysis::InstructionBuffersAreAmbiguous( bool HloAliasAnalysis::InstructionBuffersAreDistinct( const HloInstruction* instruction) const { - tensorflow::gtl::FlatSet buffers_seen; + absl::flat_hash_set buffers_seen; for (const auto& pair : dataflow_analysis_->GetInstructionValueSet(instruction)) { const HloValueSet& value_set = pair.second; @@ -461,7 +501,7 @@ StatusOr> HloAliasAnalysis::Run( /*bitcast_defines_value=*/false, fusion_can_share_buffer)); - BufferValueMap buffer_map(alias_analysis->dataflow_analysis()); + BufferValueMap buffer_map(module, alias_analysis->dataflow_analysis()); buffer_map.MergeAliasedBuffers(); // Create a vector of HloBuffers, one for each set of values in the diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.h b/tensorflow/compiler/xla/service/hlo_alias_analysis.h index e345804537723f01e9ccb63e7d6ded1bd68f4196..372f99ff01c786a503e9fc2a1ba96fb4abf75b4c 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.h @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/service/hlo_buffer.h" #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" @@ -110,7 +111,7 @@ class HloAliasAnalysis { std::unique_ptr dataflow_analysis_; // A map indicating which buffer a value is contained in. - tensorflow::gtl::FlatMap value_to_buffer_; + absl::flat_hash_map value_to_buffer_; // A lazily constructed vector containing all HloBuffers sorted by // HloBuffer::Id. diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc index 0cd0ab36fcf832af9a71ab5837c94f9b39bc4bf3..5c8d97b2d15e15d15cb8014a7d25b37437ce8aec 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc @@ -217,6 +217,181 @@ TEST_F(HloAliasAnalysisTest, NondistinctTuple) { EXPECT_FALSE(AnyValuesInSameBufferInterfere()); } +TEST_F(HloAliasAnalysisTest, ParametersWithAliasing) { + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + + auto negate0 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); + + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1})); + module_->AddEntryComputation(builder.Build()); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + + // Cannot alias an output twice. + ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0})); + + const HloAliasAnalysis& analysis = RunAnalysis(); + + EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), + analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); + + EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); +} + +TEST_F(HloAliasAnalysisTest, ParametersWithCrossAliasing) { + // parameter 0 aliased with output 1 and parameter 1 aliased with output 0. + // + // (p0 , p1) + // \ / + // \ / + // alias X + // / \ + // / \ + // (p0 , p1) + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module_->AddEntryComputation(builder.Build()); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{1})); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0})); + + // Cannot alias an output twice. + ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + + const HloAliasAnalysis& analysis = RunAnalysis(); + + // Every Ops in this graph are aliased with each other. + EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), + analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); + EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), + analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); + + EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); + EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); +} + +TEST_F(HloAliasAnalysisTest, InputOutputAliasingWithWhile) { + // Test a simple single while instruction can be aliased with input and output + // of the computation. + // + // body((F32[], F32[]) %tuple_param): + // %add = Add(%tuple_param{0}, %tuple_param{1}) + // return Tuple(%tuple_param{0}, %add) + // + // condition((F32[], F32[]) %tuple_param): + // return Constant(false) + // + // entry: + // %param1 = param1 + // %while = While(%param1, body, condition) + // %while_1 = GTE(%while, 0) + // %while_2 = GTE(%while, 1) + // %negate_1 = Negate(%while_1) + // %negate_2 = Negate(%while_2) + // return Tuple(negate_1, negate_2) + // + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + // Element 0 passes transparently through the body. + auto body_builder = HloComputation::Builder("body"); + auto body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "param")); + auto body_element_0 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0)); + auto body_element_1 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1)); + auto add = body_builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1)); + auto body_tuple = body_builder.AddInstruction( + HloInstruction::CreateTuple({body_element_0, add})); + HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build()); + + // Condition computation trivially returns a constant "false". + auto cond_builder = HloComputation::Builder("condition"); + auto cond_param = cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "param")); + cond_builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(false))); + HloComputation* condition = + module_->AddEmbeddedComputation(cond_builder.Build()); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + + auto xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(tuple_shape, condition, body, param)); + auto while_element_1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 0)); + auto while_element_2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 1)); + auto negate_1 = builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape_, HloOpcode::kNegate, while_element_1)); + auto negate_2 = builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape_, HloOpcode::kNegate, while_element_2)); + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({negate_1, negate_2})); + module_->AddEntryComputation(builder.Build()); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + + const HloAliasAnalysis& analysis = RunAnalysis(); + + EXPECT_THAT( + GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})), + UnorderedElementsAre(GetValueDefinedAt(param, {1}), + GetValueDefinedAt(xla_while, /*index=*/{1}), + GetValueDefinedAt(body_param, {1}), + GetValueDefinedAt(cond_param, {1}), + GetValueDefinedAt(add), + GetValueDefinedAt(negate_2))); + + EXPECT_THAT( + analysis.GetUniqueBufferAt(xla_while, /*index=*/{1}).ComputePositions(), + UnorderedElementsAre( + HloPosition{param, {1}}, HloPosition{xla_while, {1}}, + HloPosition{while_element_2, {}}, HloPosition{body_param, {1}}, + HloPosition{body_element_1, {}}, HloPosition{add, {}}, + HloPosition{body_tuple, {1}}, HloPosition{tuple, {1}}, + HloPosition{cond_param, {1}}, HloPosition{negate_2, {}})); + + EXPECT_FALSE(AnyValuesInSameBufferInterfere()); +} + TEST_F(HloAliasAnalysisTest, SingleCall) { // Test a single call of a subcomputation. The subcomputation adds its two // array-shaped parameters. diff --git a/tensorflow/compiler/xla/service/hlo_buffer.cc b/tensorflow/compiler/xla/service/hlo_buffer.cc index 6c11a073b74c61e44dfe81a32261ae78ae7b46fb..9c3aa0e64d119c2560f4955d0bcb492519fa52a2 100644 --- a/tensorflow/compiler/xla/service/hlo_buffer.cc +++ b/tensorflow/compiler/xla/service/hlo_buffer.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "tensorflow/compiler/xla/map_util.h" @@ -28,7 +29,6 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/logging.h" namespace xla { diff --git a/tensorflow/compiler/xla/service/hlo_clone_context.h b/tensorflow/compiler/xla/service/hlo_clone_context.h index 658643b427a9625fac1166151a89cbd669f817d5..24910ca07bf7c991d31875704b5dd918ed04fe6f 100644 --- a/tensorflow/compiler/xla/service/hlo_clone_context.h +++ b/tensorflow/compiler/xla/service/hlo_clone_context.h @@ -18,8 +18,8 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/map_util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { @@ -73,12 +73,12 @@ class HloCloneContext { return FindOrDie(computations_, old_computation); } - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& cloned_instructions() const { return instructions_; } - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& cloned_computations() const { return computations_; } @@ -86,10 +86,8 @@ class HloCloneContext { private: HloModule* module_; string suffix_; - tensorflow::gtl::FlatMap - instructions_; - tensorflow::gtl::FlatMap - computations_; + absl::flat_hash_map instructions_; + absl::flat_hash_map computations_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 0e5920af7a60966ace4ff52662cd23ea3141d477..c2041c466708fd8c88d34f14fbc0064905f594a9 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -24,6 +24,8 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" @@ -39,7 +41,6 @@ limitations under the License. #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/logging.h" namespace xla { @@ -122,30 +123,6 @@ HloInstruction* HloComputation::AddParameter( return instructions_.back().get(); } -namespace { - -// Returns the new name for a fusion parameter when we change its number. -// -// Fusion parameters are named foo.param_1, bar.param_2, etc. We are -// renumbering the parameters, so replace the final number in the name with -// the updated value. -string RenameFusionParameter(const string& original_name, int64 new_param_no) { - const string param_underscore = ".param_"; - size_t index = original_name.rfind(param_underscore); - if (index == string::npos) { - return original_name; - } - string after_param = original_name.substr(index + param_underscore.size()); - int64 numeric_suffix; - if (absl::SimpleAtoi(after_param, &numeric_suffix)) { - return StrCat(original_name.substr(0, index + param_underscore.size()), - new_param_no); - } - return original_name; -} - -} // namespace - Status HloComputation::RemoveParameter(int64 param_no) { CHECK_GE(param_no, 0); CHECK_LT(param_no, param_instructions_.size()); @@ -158,11 +135,9 @@ Status HloComputation::RemoveParameter(int64 param_no) { while (param_no < param_instructions_.size()) { param_instruction = param_instructions_[param_no]; - string param_name = - RenameFusionParameter(param_instruction->name(), param_no); HloInstruction* new_instr = AddInstructionInternal(HloInstruction::CreateParameter( - param_no, param_instruction->shape(), param_name)); + param_no, param_instruction->shape(), StrCat("param_", param_no))); TF_RETURN_IF_ERROR(param_instruction->ReplaceAllUsesWith(new_instr)); param_instructions_[param_no] = new_instr; TF_RETURN_IF_ERROR(RemoveInstruction(param_instruction)); @@ -186,11 +161,9 @@ Status HloComputation::RemoveUnusedParameters() { if (removed > 0) { const int64 param_no = i - removed; - string param_name = - RenameFusionParameter(param_instruction->name(), param_no); - HloInstruction* new_instr = - AddInstructionInternal(HloInstruction::CreateParameter( - param_no, param_instruction->shape(), param_name)); + HloInstruction* new_instr = AddInstructionInternal( + HloInstruction::CreateParameter(param_no, param_instruction->shape(), + StrCat("param_", param_no))); TF_RETURN_IF_ERROR(param_instruction->ReplaceAllUsesWith(new_instr)); param_instructions_[param_no] = new_instr; TF_RETURN_IF_ERROR(RemoveInstruction(param_instruction)); @@ -305,10 +278,9 @@ void HloComputation::set_root_instruction(HloInstruction* new_root_instruction, namespace { // Helper which builds a post order of the HLO call graph. -void ComputeComputationPostOrder( - HloComputation* computation, - tensorflow::gtl::FlatSet* visited, - std::vector* post_order) { +void ComputeComputationPostOrder(HloComputation* computation, + absl::flat_hash_set* visited, + std::vector* post_order) { if (visited->insert(computation).second) { for (auto* instruction : computation->instructions()) { for (HloComputation* called_computation : @@ -325,7 +297,7 @@ void ComputeComputationPostOrder( void HloComputation::ComputeInstructionPostOrder( const HloComputation::ChannelDependencyMap& channel_dependency_map, std::vector* post_order, HloInstruction* root, - tensorflow::gtl::FlatMap* visited) const { + absl::flat_hash_map* visited) const { std::vector dfs_stack; dfs_stack.push_back(root); while (!dfs_stack.empty()) { @@ -422,7 +394,7 @@ std::vector HloComputation::MakeInstructionPostOrder() const { std::vector post_order; post_order.reserve(instruction_count()); std::vector trace_instructions; - tensorflow::gtl::FlatMap visited; + absl::flat_hash_map visited; for (auto& instruction : instructions_) { if (instruction->opcode() == HloOpcode::kTrace) { // Trace instructions aren't handled by the DFS visitor. Add trace @@ -443,7 +415,7 @@ std::vector HloComputation::MakeInstructionPostOrder() const { std::vector HloComputation::MakeEmbeddedComputationsList() const { - tensorflow::gtl::FlatSet visited; + absl::flat_hash_set visited; std::vector post_order; // To avoid special handling of this computation, cast away const of @@ -533,9 +505,9 @@ HloComputationProto HloComputation::ToProto() const { /* static */ StatusOr> HloComputation::CreateFromProto( const HloComputationProto& proto, - const tensorflow::gtl::FlatMap& computation_map) { - tensorflow::gtl::FlatMap instruction_map; - tensorflow::gtl::FlatMap to_proto_id; + const absl::flat_hash_map& computation_map) { + absl::flat_hash_map instruction_map; + absl::flat_hash_map to_proto_id; std::vector> instructions; int64 parameter_count = 0; for (const HloInstructionProto& instruction_proto : proto.instructions()) { @@ -563,6 +535,28 @@ HloComputation::CreateFromProto( return to_proto_id[a.get()] < to_proto_id[b.get()]; }); + TF_RETURN_IF_ERROR([&]() -> Status { + std::vector parameters_seen(parameter_count); + int parameters_seen_count = 0; + for (auto& instruction : instructions) { + if (instruction->opcode() == HloOpcode::kParameter) { + int64 param_no = instruction->parameter_number(); + TF_RET_CHECK(param_no >= 0 && param_no < parameter_count) + << "Invalid parameter number. Expected [0, " << parameter_count + << "), got " << param_no; + TF_RET_CHECK(!parameters_seen[param_no]) + << "Parameter number " << param_no + << " already allocated in this computation"; + parameters_seen[param_no] = true; + parameters_seen_count++; + } + } + TF_RET_CHECK(parameters_seen_count == parameter_count) + << "Not all parameters in range [0, " << parameter_count + << ") were referenced"; + return Status::OK(); + }()); + auto computation = absl::WrapUnique( new HloComputation(proto.name(), parameter_count, &instructions, root, /*fusion_instruction=*/nullptr)); diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 936a53bd7e9ad362d10f06ab807ddb8944fec93e..d87ab4bda162a74421e8906e07cfcb97e2128fe4 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -25,6 +25,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/iterator_util.h" #include "tensorflow/compiler/xla/map_util.h" @@ -40,8 +42,6 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" @@ -188,7 +188,7 @@ class HloComputation { // calls. static StatusOr> CreateFromProto( const HloComputationProto& proto, - const tensorflow::gtl::FlatMap& computation_map); + const absl::flat_hash_map& computation_map); // Gets the instructions in this computation. // @@ -414,14 +414,14 @@ class HloComputation { // cross-replica-sum the union of the dependencies for all participating // instructions. using ChannelDependencyMap = - tensorflow::gtl::FlatMap>; + absl::flat_hash_map>; ChannelDependencyMap ComputeChannelDependencies() const; enum VisitState { kVisiting, kVisited }; void ComputeInstructionPostOrder( const HloComputation::ChannelDependencyMap& channel_dependency_map, std::vector* post_order, HloInstruction* root, - tensorflow::gtl::FlatMap* visited) const; + absl::flat_hash_map* visited) const; string name_; int64 unique_id_; @@ -439,7 +439,7 @@ class HloComputation { // instruction pointer to location in the list for fast lookup. using InstructionList = std::list>; InstructionList instructions_; - tensorflow::gtl::FlatMap + absl::flat_hash_map instruction_iterators_; std::vector param_instructions_; diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding.cc b/tensorflow/compiler/xla/service/hlo_constant_folding.cc index f837816cea78d78bb3d605dd91e81cac39036268..4f898ce61c3f36e83e4b13130a404dbb4a2c36c6 100644 --- a/tensorflow/compiler/xla/service/hlo_constant_folding.cc +++ b/tensorflow/compiler/xla/service/hlo_constant_folding.cc @@ -76,6 +76,26 @@ StatusOr HloConstantFolding::Run(HloModule* module) { continue; } + // Don't constant fold unless it's a net positive or the output is small. + if (ShapeUtil::IsArray(instruction->shape())) { + int64 elements_in_removed_operands = 0; + for (HloInstruction* operand : instruction->operands()) { + if (operand->user_count() == 1 && + ShapeUtil::IsArray(operand->shape())) { + elements_in_removed_operands += + ShapeUtil::ElementsIn(operand->shape()); + } + } + int64 elements_in_constant = + ShapeUtil::ElementsIn(instruction->shape()); + + static const int64 kMaximumConstantSizeElements = 2 * 1000 * 1000; + if (elements_in_constant > elements_in_removed_operands && + elements_in_constant > kMaximumConstantSizeElements) { + continue; + } + } + Literal result; // Currently we skip unimplemented operations. // TODO(b/35975797): Fold constant computations for more operations. @@ -84,6 +104,7 @@ StatusOr HloConstantFolding::Run(HloModule* module) { << instruction->ToString(); continue; } + VLOG(4) << "Constant folded: " << instruction->ToString(); TF_RETURN_IF_ERROR(computation->ReplaceWithNewInstruction( instruction, HloInstruction::CreateConstant(std::move(result)))); diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc index 3e0def5d26a0033d954a776c1c32d6c35acfb505..e45f905f7152c37a9ab2b41d407310671310c2a3 100644 --- a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc +++ b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc @@ -242,5 +242,25 @@ TEST_F(HloConstantFoldingTest, ConstantFoldReduceNoLayout) { EXPECT_THAT(module().entry_computation()->root_instruction(), op::Reduce()); } +const char* const kConstantFoldLargePad = R"( + HloModule ConstantFoldLargePad + + ENTRY r { + a = f32[1,1,1] constant(f32[1,1,1]{{{7}}}) + b = f32[] constant(42) + ROOT pad = f32[2048,2048,128] pad(a, b), padding=1024_1023x1024_1023x64_63 + })"; + +TEST_F(HloConstantFoldingTest, DoesNotFoldLargePad) { + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(kConstantFoldLargePad)); + HloConstantFolding const_folder; + TF_ASSERT_OK_AND_ASSIGN(bool result, const_folder.Run(module.get())); + EXPECT_FALSE(result); + + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Pad(op::Constant(), op::Constant())); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_cse.cc b/tensorflow/compiler/xla/service/hlo_cse.cc index b59c9ba3ed7990eb2a35abc83f87b25a1b1e7c60..e602107cbe64320a8e8e740168cb294ec6be9667 100644 --- a/tensorflow/compiler/xla/service/hlo_cse.cc +++ b/tensorflow/compiler/xla/service/hlo_cse.cc @@ -23,6 +23,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal.h" @@ -34,7 +35,6 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/hash/hash.h" namespace xla { @@ -137,8 +137,8 @@ StatusOr HloCSE::Run(HloModule* module) { // HLO instructions are grouped into equivalency classes by using the // cse_equal predicate defined above. This set holds a representative // instruction for each class. - tensorflow::gtl::FlatSet + absl::flat_hash_set representatives(/*N=*/computation->instruction_count() + 1, &CseHash, cse_equal); for (auto instruction : computation->MakeInstructionPostOrder()) { diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index 6a63681996bc57f4ef16b2405ffc8ce4f003e783..f401eac01628e41aa9433af87ff03cf3acc867cd 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" @@ -91,7 +92,7 @@ HloDataflowAnalysis::HloDataflowAnalysis( bool HloDataflowAnalysis::AreTransitiveUsesElementwiseOrTuple( const HloInstruction* inst) { - tensorflow::gtl::FlatSet visited; + absl::flat_hash_set visited; absl::InlinedVector stack; stack.push_back(inst); while (!stack.empty()) { @@ -125,7 +126,7 @@ bool HloDataflowAnalysis::ValueIsDefinedAt(const HloInstruction* instruction, const HloValue& HloDataflowAnalysis::GetValueDefinedAt( const HloInstruction* instruction, const ShapeIndex& index) const { - CHECK(ValueIsDefinedAt(instruction, index)); + CHECK(ValueIsDefinedAt(instruction, index)) << instruction->ToString(); return GetUniqueValueAt(instruction, index); } @@ -159,8 +160,8 @@ void HloDataflowAnalysis::MarkValueForDeletion(HloValue::Id value_id) { void HloDataflowAnalysis::DeleteMarkedValues() { #ifndef NDEBUG // Verify that no marked-for-deletion values are in any of the value sets. - tensorflow::gtl::FlatSet id_set(value_ids_to_delete_.begin(), - value_ids_to_delete_.end()); + absl::flat_hash_set id_set(value_ids_to_delete_.begin(), + value_ids_to_delete_.end()); for (const auto& pair : value_sets_) { const HloInstruction* instruction = pair.first; const InstructionValueSet& instruction_value_set = pair.second; @@ -355,23 +356,6 @@ bool HloDataflowAnalysis::UpdateBitcastValueSet(HloInstruction* bitcast) { return false; } -bool HloDataflowAnalysis::UpdateSliceValueSet(HloInstruction* slice) { - CHECK_EQ(slice->opcode(), HloOpcode::kSlice); - if (!slice->IsInPlaceSlice()) { - return false; - } - // If this slice is lowered to an in-place version, then it forwards the - // operand value to the output. - const InstructionValueSet& operand_set = - GetInstructionValueSet(slice->operand(0)); - InstructionValueSet& slice_set = GetInstructionValueSet(slice); - if (operand_set != slice_set) { - slice_set = operand_set; - return true; - } - return false; -} - bool HloDataflowAnalysis::UpdateSendValueSet(HloInstruction* send) { CHECK_EQ(send->opcode(), HloOpcode::kSend); bool changed = false; @@ -640,8 +624,6 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet( switch (instruction->opcode()) { case HloOpcode::kBitcast: return UpdateBitcastValueSet(instruction); - case HloOpcode::kSlice: - return UpdateSliceValueSet(instruction); case HloOpcode::kDomain: return UpdateDomainValueSet(instruction); case HloOpcode::kCopy: @@ -673,7 +655,7 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet( void HloDataflowAnalysis::Propagate() { std::queue worklist; - tensorflow::gtl::FlatSet workset; + absl::flat_hash_set workset; auto add_to_worklist = [&worklist, &workset](HloInstruction* instruction) { if (workset.insert(instruction).second) { worklist.push(instruction); @@ -813,11 +795,6 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { define_all_values(); } break; - case HloOpcode::kSlice: - if (!instruction->IsInPlaceSlice()) { - define_all_values(); - } - break; case HloOpcode::kWhile: case HloOpcode::kCall: case HloOpcode::kConditional: diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h index e62c1c2ac81981e1f44f4c7e1479107979576e32..abac398c04fc4c418d8814a0097db4434bc1cd9c 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h @@ -182,7 +182,6 @@ class HloDataflowAnalysis { // Updates the value set for a particular instruction type. Returns whether // the instruction value set changed. bool UpdateBitcastValueSet(HloInstruction* bitcast); - bool UpdateSliceValueSet(HloInstruction* slice); bool UpdateCallValueSet(HloInstruction* call); bool UpdateConditionalValueSet(HloInstruction* conditional); bool UpdateCopyValueSet(HloInstruction* copy); diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc index 113fd18eae70f0a581e2ab3e44544c47fcab3361..c6d02f9f67bb599e496d20fc2acf2e627ed54438 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_map.cc +++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc @@ -17,6 +17,8 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" @@ -40,18 +42,19 @@ namespace xla { return std::move(domain_map); } -bool HloDomainMap::InSameDomain(HloInstruction* instruction1, - HloInstruction* instruction2) const { +bool HloDomainMap::InSameDomain(const HloInstruction* instruction1, + const HloInstruction* instruction2) const { int64 domain_id1 = GetDomainId(instruction1); int64 domain_id2 = GetDomainId(instruction2); return domain_id1 >= 0 && domain_id1 == domain_id2; } -int64 HloDomainMap::GetDomainId(HloInstruction* instruction) const { +int64 HloDomainMap::GetDomainId(const HloInstruction* instruction) const { return FindOrDefault(instruction_to_domain_, instruction, -1); } -int64 HloDomainMap::GetDomainMetadataId(HloInstruction* instruction) const { +int64 HloDomainMap::GetDomainMetadataId( + const HloInstruction* instruction) const { return FindOrDie(domain_metadata_id_, instruction); } @@ -106,8 +109,8 @@ Status HloDomainMap::PopulateDomainMetadataMap() { auto equal = [](const DomainMetadata* a, const DomainMetadata* b) { return a->Matches(*b); }; - tensorflow::gtl::FlatMap + absl::flat_hash_map domain_metadata(1024, hash, equal); for (auto& domain : instruction_domains_) { @@ -198,7 +201,8 @@ StatusOr> HloDomainMap::CreateDomain( return std::move(domain); } -bool HloDomainMap::IsDomainInstruction(HloInstruction* instruction) const { +bool HloDomainMap::IsDomainInstruction( + const HloInstruction* instruction) const { if (instruction->opcode() != HloOpcode::kDomain) { return false; } @@ -216,7 +220,7 @@ bool HloDomainMap::IsDomainInstruction(HloInstruction* instruction) const { /* static */ std::vector HloDomainMap::MakeNonDomainInstructions( - const tensorflow::gtl::FlatSet& instruction_set, + const absl::flat_hash_set& instruction_set, const InstructionOrderMap& instructions_order) { std::vector instructions; instructions.reserve(instruction_set.size()); diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h index 56b557d7cea424f63cd4891661ae446133ee5a37..bce7d1aa7cf1822ef1608674e7bf9483c628e4b5 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_map.h +++ b/tensorflow/compiler/xla/service/hlo_domain_map.h @@ -19,14 +19,14 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_domain_metadata.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { @@ -58,27 +58,26 @@ class HloDomainMap { } // Checks whether two instructions are within the same domain. - bool InSameDomain(HloInstruction* instruction1, - HloInstruction* instruction2) const; + bool InSameDomain(const HloInstruction* instruction1, + const HloInstruction* instruction2) const; // Checks whether instruction is a kDomain instruction of the kind we are // currently processing. - bool IsDomainInstruction(HloInstruction* instruction) const; + bool IsDomainInstruction(const HloInstruction* instruction) const; // Retrieves the domain identifier of the instruction, or -1 in case // instruction is not found within any domain. - int64 GetDomainId(HloInstruction* instruction) const; + int64 GetDomainId(const HloInstruction* instruction) const; // Returns the unique id of the domain metadata for the domain the given // instruction belongs to. The given instruction must not be a kDomain // instruction since each domain instruction is associated with 2 domains. - int64 GetDomainMetadataId(HloInstruction* instruction) const; + int64 GetDomainMetadataId(const HloInstruction* instruction) const; private: // Map used for representing instruction ordering, i.e. // order_map[a] < order_map[b] means a must be ordered before b. - using InstructionOrderMap = - tensorflow::gtl::FlatMap; + using InstructionOrderMap = absl::flat_hash_map; HloDomainMap(string domain_kind) : domain_kind_(std::move(domain_kind)) {} @@ -111,7 +110,7 @@ class HloDomainMap { // Out of an instruction set, returns a vector of all the ones which are not // a kDomain kind. static std::vector MakeNonDomainInstructions( - const tensorflow::gtl::FlatSet& instruction_set, + const absl::flat_hash_set& instruction_set, const InstructionOrderMap& instructions_order); // Populates domain_metadata_id_ that maps each HloInstruction to the unique @@ -120,8 +119,8 @@ class HloDomainMap { string domain_kind_; std::vector> instruction_domains_; - tensorflow::gtl::FlatMap instruction_to_domain_; - tensorflow::gtl::FlatMap domain_metadata_id_; + absl::flat_hash_map instruction_to_domain_; + absl::flat_hash_map domain_metadata_id_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_domain_metadata.h b/tensorflow/compiler/xla/service/hlo_domain_metadata.h index 302807f816e4ab626af419023e7740fd6bde795f..d3c83c15ae3be67a64f3dc4bcb0312ae9fbc33e4 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_metadata.h +++ b/tensorflow/compiler/xla/service/hlo_domain_metadata.h @@ -20,11 +20,11 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/strings/string_view.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { @@ -42,7 +42,7 @@ class DomainMetadata { // operand/user pathways, without crossing a kDomain instruction of a given // kind. The reach_set can contain kDomain instructions of other kinds, if // two domains of different kind intersect each other. - tensorflow::gtl::FlatSet reach_set; + absl::flat_hash_set reach_set; // The same instructions in reach_set, but purged from kDomain instructions // and ordered according to their computation graph post-order, i.e. @@ -55,8 +55,8 @@ class DomainMetadata { // whose dataflow enters the reach set (domain), while the exit_domains // contains the set of kDomain instructions whose dataflow exit the reach // set. - tensorflow::gtl::FlatSet enter_domains; - tensorflow::gtl::FlatSet exit_domains; + absl::flat_hash_set enter_domains; + absl::flat_hash_set exit_domains; }; virtual ~DomainMetadata() = default; diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index d7c39b2778d57c1b2e9da0d87d9c2b91bb47e968..6cba46135c52c5b83afcfe08432adabfcd92acae 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -24,6 +24,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/string_view.h" #include "tensorflow/compiler/xla/index_util.h" @@ -1279,7 +1280,9 @@ StatusOr EvaluateSortInternal(HloInstruction* sort, return SafeLess(a.first, b.first); }); std::vector result_keys; - std::vector result_values; + // We use a InlinedVector here because we need to convert it to an + // absl::Span later, and this would not work with std::vector. + absl::InlinedVector result_values; for (const auto& key_value : key_value_vector) { result_keys.push_back(key_value.first); result_values.push_back(key_value.second); @@ -1316,6 +1319,9 @@ StatusOr EvaluateSortCurried(HloInstruction* sort, const Literal& keys_literal, const Literal& values_literal) { switch (sort->operand(1)->shape().element_type()) { + case PRED: + return EvaluateSortInternal(sort, keys_literal, + values_literal); case F32: return EvaluateSortInternal(sort, keys_literal, values_literal); @@ -1378,7 +1384,7 @@ Status HloEvaluator::HandleReduce(HloInstruction* reduce) { "unsupported"); } } - return reduce->Visit(typed_visitors_.at(first_element_type).get()); + return reduce->Visit(typed_visitors_[first_element_type].get()); } } diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 6c2662ebaeff5ff3ae21b19fac430c3490e22d36..07f8d0aad4af0b07303b4e485b3630cc75bcb519 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "absl/container/node_hash_map.h" #include "absl/memory/memory.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" @@ -28,7 +29,6 @@ limitations under the License. #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/macros.h" namespace xla { @@ -134,7 +134,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault { // Wraps around instruction handling to infer types before dispatching to // the corresponding typed Visitor. Status DefaultAction(HloInstruction* hlo) override { - return hlo->Visit(typed_visitors_.at(hlo->shape().element_type()).get()); + return hlo->Visit(typed_visitors_[hlo->shape().element_type()].get()); } Status Preprocess(HloInstruction* hlo) override; @@ -210,8 +210,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { // post-orderring. // Must be cleared for each evaluation. // Storing Literal in place require the container to have pointer stability so - // we cannot use FlatMap any more. - std::unordered_map evaluated_; + // we cannot use flat_hash_map any more. + absl::node_hash_map evaluated_; private: template @@ -241,12 +241,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault { } // Map from a primitive type to its associated (templated) DfsHloVisitor. - // Note: the hash function here is only needed because current gcc std::hash - // does not specialize for enum types. This should however be fixed in the - // future: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60970#c5 - tensorflow::gtl::FlatMap, - std::hash> - typed_visitors_; + std::unique_ptr typed_visitors_[PrimitiveType_ARRAYSIZE]; // Caches pointers to input literals, assuming they are in post-order. // Literals are not owned by this class, and they must outlive the lifetime of diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index cee11a8a2166f96ae801095b6364921ed05d0000..608a42bb60702aa075daca39535ca1672dcc5467 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -1463,6 +1463,58 @@ TEST_P(HloEvaluatorTest, ReduceWindowMax) { EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); } +TEST_P(HloEvaluatorTest, ReduceWindowMaxWindowDilation) { + HloComputation::Builder b(TestName()); + + // arg: + // f32[3,3] { + // { 1, 2, 3 }, + // { 5, 6, 7 }, + // { 9, 10, 11 }, + // } + auto arg_array = absl::make_unique>(3, 3); + arg_array->FillUnique(1.0f); + auto arg_literal = LiteralUtil::CreateR2FromArray2D(*arg_array); + + HloInstruction* arg_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal))); + + auto init_value = b.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(0.f))); + + HloComputation::Builder max_computation("max"); + Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + auto param_lhs = max_computation.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "lhs")); + auto param_rhs = max_computation.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "rhs")); + max_computation.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kMaximum, param_lhs, param_rhs)); + auto max_func = module().AddEmbeddedComputation(max_computation.Build()); + + Window window; + WindowDimension dim; + dim.set_size(2); + dim.set_stride(1); + dim.set_padding_low(0); + dim.set_padding_high(0); + dim.set_window_dilation(2); + dim.set_base_dilation(1); + *window.add_dimensions() = dim; + *window.add_dimensions() = dim; + + Shape shape = ShapeUtil::MakeShape(F32, {1, 1}); + b.AddInstruction(HloInstruction::CreateReduceWindow( + shape, arg_instruction, init_value, window, max_func)); + + module().AddEntryComputation(b.Build()); + + Literal result = Evaluate(); + + auto expected = LiteralUtil::CreateR2({{11}}); + EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); +} + TEST_P(HloEvaluatorTest, ReduceWindowAdd) { HloComputation::Builder b(TestName()); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index b2d12c94b848e4fd8ae473fdc0e4a9f5fecf6286..84fbbd3e0c3ddb704b8db601897f3b199dc99626 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -1072,66 +1072,66 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { // Convolve input feature with kernel. do { + // Find corresponding spatial dimension index for input (lhs). + int64 lhs_linear_spatial_index = 0; + int64 rhs_linear_spatial_index = 0; + for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { + // Spatial dimension number for input (lhs) and output. + const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki); + const int64 output_spatial_dim = dnums.output_spatial_dimensions(ki); + + // Calculate lhs (input) index without taking base dilation into + // account. + const auto& window_dim = window.dimensions(ki); + const int64 undilated_index = + out_index[output_spatial_dim] * window_dim.stride() - + window_dim.padding_low() + + rhs_spatial_index[ki] * window_dim.window_dilation(); + // Skip if the lhs (input) index is to be dilated. As an + // optimization, skip this mod if there's no dilation. + if (window_dim.base_dilation() > 1 && + undilated_index % window_dim.base_dilation() != 0) { + goto cnt; + } + + // Calculate the actual lhs (input) index after dilation. As an + // optimization, skip this integer divide if there's no dilation. + int64 lhs_spatial_index; + if (window_dim.base_dilation() > 1) { + lhs_spatial_index = undilated_index / window_dim.base_dilation(); + } else { + lhs_spatial_index = undilated_index; + } + + // Skip if input index is not in bounds. + if (!(lhs_spatial_index >= 0 && + lhs_spatial_index < lhs_shape.dimensions(input_spatial_dim))) { + goto cnt; + } + + lhs_linear_spatial_index += + lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim]; + rhs_linear_spatial_index += + (window_dim.window_reversal() + ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) + : rhs_spatial_index[ki]) * + rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)]; + } + for (int64 rhs_iz = 0; rhs_iz < input_feature_group_size; ++rhs_iz) { const int64 iz = feature_group_index * input_feature_group_size + rhs_iz; - int64 lhs_linear_index = 0; + int64 lhs_linear_index = lhs_linear_spatial_index; lhs_linear_index += out_index[output_batch_dim] * lhs_dim_multipliers[input_batch_dim]; lhs_linear_index += iz * lhs_dim_multipliers[input_z_dim]; - int64 rhs_linear_index = 0; + int64 rhs_linear_index = rhs_linear_spatial_index; rhs_linear_index += out_index[output_z_dim] * rhs_dim_multipliers[kernel_output_z_dim]; rhs_linear_index += rhs_iz * rhs_dim_multipliers[kernel_input_z_dim]; - // Find corresponding spatial dimension index for input (lhs). - for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { - // Spatial dimension number for input (lhs) and output. - const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki); - const int64 output_spatial_dim = - dnums.output_spatial_dimensions(ki); - - // Calculate lhs (input) index without taking base dilation into - // account. - const auto& window_dim = window.dimensions(ki); - const int64 undilated_index = - out_index[output_spatial_dim] * window_dim.stride() - - window_dim.padding_low() + - rhs_spatial_index[ki] * window_dim.window_dilation(); - // Skip if the lhs (input) index is to be dilated. As an - // optimization, skip this mod if there's no dilation. - if (window_dim.base_dilation() > 1 && - undilated_index % window_dim.base_dilation() != 0) { - goto cnt; - } - - // Calculate the actual lhs (input) index after dilation. As an - // optimization, skip this integer divide if there's no dilation. - int64 lhs_spatial_index; - if (window_dim.base_dilation() > 1) { - lhs_spatial_index = undilated_index / window_dim.base_dilation(); - } else { - lhs_spatial_index = undilated_index; - } - lhs_linear_index += - lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim]; - - // Skip if input index is not in bounds. - if (!(lhs_spatial_index >= 0 && - lhs_spatial_index < - lhs_shape.dimensions(input_spatial_dim))) { - goto cnt; - } - - rhs_linear_index += - (window_dim.window_reversal() - ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) - : rhs_spatial_index[ki]) * - rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)]; - } - result_val += static_cast(lhs_literal_data[lhs_linear_index]) * static_cast(rhs_literal_data[rhs_linear_index]); @@ -2613,8 +2613,17 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { std::vector base_index(rank); bool out_of_bound = false; for (int64 i = 0; i < rank; ++i) { - base_index[i] = window_count_index[i] * window.dimensions(i).stride() + - window_index[i] - window.dimensions(i).padding_low(); + base_index[i] = + window_count_index[i] * window.dimensions(i).stride() + + window_index[i] * window.dimensions(i).window_dilation() - + window.dimensions(i).padding_low(); + // We are not in the base area if the dilation placed us out of bounds. + if (base_index[i] % window.dimensions(i).base_dilation() != 0) { + out_of_bound = true; + break; + } + // Apply the dilation to the base area. + base_index[i] /= window.dimensions(i).base_dilation(); if (base_index[i] < 0 || base_index[i] >= base_shape.dimensions(i)) { out_of_bound = true; break; diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc index de3d7a167752f0de790585e50874dd6d2904bd37..ce4cad42355ec5881f2ae14f4dd52a0588d51cf7 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -90,8 +90,9 @@ std::unique_ptr CreateHloProfilePrinterData( HloInstructionInfo* instruction_info = computation_info->add_instruction_infos(); instruction_info->set_long_name(hlo->ToString()); - instruction_info->set_short_name( - hlo->ToString(HloPrintOptions().set_compact_operands(true))); + instruction_info->set_short_name(hlo->ToString( + HloPrintOptions().set_compact_operands(true).set_print_operand_names( + false))); instruction_info->set_category(hlo->ToCategory()); instruction_info->set_flop_count(cost_analysis.flop_count(*hlo)); instruction_info->set_transcendental_count( diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc new file mode 100644 index 0000000000000000000000000000000000000000..9ad98e50386c5ffd6f85292c270892eb9c91f14d --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc @@ -0,0 +1,172 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" + +namespace xla { +Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, + int64 param_number, + const ShapeIndex& param_index) { + // Output can't be aliased with multiple parameters. + TF_RET_CHECK(!alias_.element(output_index)); + (*alias_.mutable_element(output_index)) = + std::make_pair(param_number, param_index); + return Status::OK(); +} + +HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const { + HloInputOutputAliasProto result; + alias_.ForEachElement( + [&](const ShapeIndex& index, + const absl::optional>& data) { + if (data) { + HloInputOutputAliasProto::AliasEntryProto entry; + for (int64 i : index) { + entry.add_output_shape_index(i); + } + entry.set_parameter_number(data->first); + for (int64 i : data->second) { + entry.add_parameter_shape_index(i); + } + result.add_entries()->Swap(&entry); + } + }); + return result; +} + +StatusOr HloInputOutputAliasConfig::CreateFromProto( + const HloModule* module, const HloInputOutputAliasProto& proto) { + HloInputOutputAliasConfig result( + module->entry_computation()->root_instruction()->shape()); + for (const HloInputOutputAliasProto::AliasEntryProto& entry : + proto.entries()) { + ShapeIndex output_index(entry.output_shape_index().begin(), + entry.output_shape_index().end()); + + int64 param_number = entry.parameter_number(); + ShapeIndex param_index(entry.parameter_shape_index().begin(), + entry.parameter_shape_index().end()); + TF_RETURN_IF_ERROR( + result.SetUpAlias(output_index, param_number, param_index)); + } + + return result; +} + +string HloInputOutputAliasConfig::ToString() const { + std::vector pieces; + pieces.push_back("HloInputOutputAliasConfig"); + + ForEachAlias([&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index) { + pieces.push_back(absl::StrFormat( + " OutputIndex %s is aliased with parameter %lld at %s:", + output_index.ToString(), param_number, param_index.ToString())); + }); + + return absl::StrJoin(pieces, "\n"); +} + +bool HloInputOutputAliasConfig::ParameterHasAlias(int64 param_number) const { + bool output = false; + alias_.ForEachElement( + [&](const xla::ShapeIndex&, + absl::optional> alias) { + if (alias && alias->first == param_number) { + output = true; + } + }); + return output; +} + +absl::optional HloInputOutputAliasConfig::GetAliasedOutput( + int64 param_number, const ShapeIndex& param_index) const { + absl::optional output; + alias_.ForEachElement( + [&](const xla::ShapeIndex& output_index, + absl::optional> alias) { + if (alias && alias->first == param_number && + alias->second == param_index) { + output = output_index; + } + }); + return output; +} + +absl::optional> +HloInputOutputAliasConfig::GetAliasedParameter( + const ShapeIndex& output_index) const { + CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index)); + return alias_.element(output_index); +} + +void HloInputOutputAliasConfig::ForEachAlias(AliasFn fn) const { + alias_.ForEachElement( + [&](const ShapeIndex& output_index, + absl::optional> aliased) { + if (aliased) { + fn(output_index, aliased->first, aliased->second); + } + }); +} + +Status HloInputOutputAliasConfig::ForEachAliasWithStatus( + AliasFnWithStatus fn) const { + return alias_.ForEachElementWithStatus( + [&](const ShapeIndex& output_index, + absl::optional> aliased) { + if (aliased) { + TF_RETURN_IF_ERROR(fn(output_index, aliased->first, aliased->second)); + } + return Status::OK(); + }); +} + +Status HloInputOutputAliasConfig::Verify(const HloModule& module) const { + std::vector> param_has_seen; + const HloComputation* entry = module.entry_computation(); + for (int64 i = 0; i < entry->num_parameters(); ++i) { + HloInstruction* param = entry->parameter_instruction(i); + param_has_seen.emplace_back(param->shape()); + } + return ForEachAliasWithStatus([&](const ShapeIndex& output_index, + int64 param_number, + const ShapeIndex& param_index) -> Status { + const HloInstruction* root = entry->root_instruction(); + + const Shape& param_shape = + entry->parameter_instruction(param_number)->shape(); + const Shape& output_shape = root->shape(); + TF_RET_CHECK(entry->num_parameters() > param_number); + TF_RET_CHECK(ShapeUtil::IndexIsValid(param_shape, param_index)); + TF_RET_CHECK(ShapeUtil::IndexIsValid(output_shape, output_index)); + + // Check each param_number and param_index pair only show up once. No + // input can be aliased with output buffers. + TF_RET_CHECK(param_has_seen[param_number].element(param_index) == false); + + *(param_has_seen[param_number].mutable_element(param_index)) = true; + + return Status::OK(); + }); +} + +std::ostream& operator<<(std::ostream& out, + const HloInputOutputAliasConfig& config) { + out << config.ToString(); + return out; +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h new file mode 100644 index 0000000000000000000000000000000000000000..02c46f65c823617cb7aeac6720145016275acb1e --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h @@ -0,0 +1,101 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ + +#include + +#include "absl/types/optional.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/shape_tree.h" +#include "tensorflow/compiler/xla/shape_util.h" + +namespace xla { + +class HloModule; + +// This class specifies the alias map from output index to parameter number and +// parameter index in the entry computation. +class HloInputOutputAliasConfig { + public: + HloInputOutputAliasConfig() = default; + + explicit HloInputOutputAliasConfig(Shape shape) : alias_(shape) {} + + virtual ~HloInputOutputAliasConfig() = default; + + // Sets up alias config from `output_index` to `param_index` at + // `param_number`. + Status SetUpAlias(const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index); + + // Returns true if the given parameter is aliased with one of the output + // buffers. + bool ParameterHasAlias(int64 param_number) const; + + // (De)Serializes an HloInputOutoutAliasConfig to/from an + // HloInputOutoutAliasProto. + HloInputOutputAliasProto ToProto() const; + + static StatusOr CreateFromProto( + const HloModule* module, const HloInputOutputAliasProto& proto); + + // Returns the output index that the given parameter and parameter index is + // aliased with. A nullopt is returned if there is no output that is aliased + // with the parameter number and index. + absl::optional GetAliasedOutput( + int64 param_number, const ShapeIndex& param_index) const; + + // Returns the number of parameter and index of the parameter buffer that the + // given output buffer index is aliased with. A nullopt is returned if there + // is no parameter is aliased with the specific output. + absl::optional> GetAliasedParameter( + const ShapeIndex& output_index) const; + + using AliasFn = + std::function; + + // Iterates through each aliased output and input. + void ForEachAlias(AliasFn fn) const; + + using AliasFnWithStatus = + std::function; + + // Verifies that the given config is valid for the given module. + // Specifically, the config's input and output should be in-bound and size of + // the aliased buffers should match. + Status Verify(const HloModule& module) const; + + Status ForEachAliasWithStatus(AliasFnWithStatus fn) const; + + string ToString() const; + + private: + // A ShapeTree which indicates the list of buffers that's expected to be + // aliased. The key on this shape tree represents the output index. The value + // is a pair of parameter number and index into the buffer. If the value is + // nullopt, it means there is no parameter aliasing for this output. + ShapeTree>> alias_; +}; + +std::ostream& operator<<(std::ostream& out, + const HloInputOutputAliasConfig& config); + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..3b61ff04e6d7eeaa5876775fa18a85af82164b3d --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc @@ -0,0 +1,184 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" + +#include +#include + +#include "absl/algorithm/container.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dce.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_ordering.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { +class HloInputOutputAliasConfigTest : public HloTestBase { + protected: + void expect_aliased(const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index, + const HloInputOutputAliasConfig& config) { + absl::optional aliased_output = + config.GetAliasedOutput(param_number, param_index); + + EXPECT_TRUE(aliased_output); + EXPECT_EQ(aliased_output.value(), output_index); + + absl::optional> aliased_param = + config.GetAliasedParameter(output_index); + + EXPECT_TRUE(aliased_param); + EXPECT_EQ(aliased_param.value(), std::make_pair(param_number, param_index)); + } + + void expect_not_aliased(const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index, + const HloInputOutputAliasConfig& config) { + absl::optional aliased_output = + config.GetAliasedOutput(param_number, param_index); + + EXPECT_FALSE(aliased_output && aliased_output == output_index); + + absl::optional> aliased_param = + config.GetAliasedParameter(output_index); + + EXPECT_FALSE(aliased_param && aliased_param->first == param_number && + aliased_param->second == param_index); + } +}; + +TEST_F(HloInputOutputAliasConfigTest, SimpleAliasing) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT root = (f32[], f32[]) tuple(%a, %b) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1, + /*param_index=*/{})); + + expect_aliased(/*output_index=*/{0}, /*param_number=*/1, + /*param_index=*/{}, config); + + expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1, + /*param_index=*/{}, config); + + expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{}, config); +} + +TEST_F(HloInputOutputAliasConfigTest, SimpleAliasingWithTupleInput) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + param = (f32[], f32[]) parameter(0) + gte1 = f32[] get-tuple-element(%param), index=0 + gte2 = f32[] get-tuple-element(%param), index=1 + ROOT root = (f32[], f32[]) tuple(%gte1, %gte2) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{0})); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0, + /*param_index=*/{1})); + + expect_aliased(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{0}, config); + + expect_aliased(/*output_index=*/{1}, /*param_number=*/0, + /*param_index=*/{1}, config); + + expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1, + /*param_index=*/{}, config); + + expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{}, config); +} + +TEST_F(HloInputOutputAliasConfigTest, InputDoNotAliasTwice) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT root = (f32[], f32[]) tuple(%a, %b) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{})); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0, + /*param_index=*/{})); + + ASSERT_IS_NOT_OK(config.Verify(*module)); +} + +TEST_F(HloInputOutputAliasConfigTest, OutputDoNotAliasTwice) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT root = (f32[], f32[]) tuple(%a, %b) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{})); + + ASSERT_IS_NOT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1, + /*param_index=*/{})); +} +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index f7ec854d800fdbc0af3a53eb3bebe772f432e478..5c3908a9a4d6de3f9eda41a8c11d6c9ee4c92644 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -22,6 +22,8 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/ascii.h" @@ -37,14 +39,13 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_sharding_metadata.h" #include "tensorflow/compiler/xla/service/name_uniquer.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/platform/human_readable_json.h" #include "tensorflow/core/platform/logging.h" @@ -59,8 +60,8 @@ using absl::StrJoin; /* static */ StatusOr> HloInstruction::CreateFromProto( const HloInstructionProto& proto, - const tensorflow::gtl::FlatMap& instruction_map, - const tensorflow::gtl::FlatMap& computation_map) { + const absl::flat_hash_map& instruction_map, + const absl::flat_hash_map& computation_map) { TF_RET_CHECK(!proto.opcode().empty()); TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(proto.opcode())); TF_RET_CHECK(proto.has_shape()); @@ -80,6 +81,20 @@ StatusOr> HloInstruction::CreateFromProto( const auto computations = [&computation_map, &proto](int index) { return computation_map.at(proto.called_computation_ids(index)); }; + + TF_RET_CHECK(std::all_of( + proto.operand_ids().begin(), proto.operand_ids().end(), + [&instruction_map](int64 id) { return instruction_map.contains(id); })) + << proto.name() << " instruction contains invalid operand id(s)"; + + TF_RET_CHECK(std::all_of( + proto.called_computation_ids().begin(), + proto.called_computation_ids().end(), + [&computation_map](int64 id) { return computation_map.contains(id); })) + << proto.name() << " instruction references invalid computation id(s)"; + + TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(proto.shape())); + switch (opcode) { // Ops migrated to subclasses. case HloOpcode::kBatchNormTraining: @@ -266,7 +281,8 @@ StatusOr> HloInstruction::CreateFromProto( << "Expect 1 called computation for fusion instruction but sees " << proto.called_computation_ids_size(); const int64 fusion_id = proto.called_computation_ids(0); - auto* fused_computation = FindPtrOrNull(computation_map, fusion_id); + auto* fused_computation = + tensorflow::gtl::FindPtrOrNull(computation_map, fusion_id); TF_RET_CHECK(fused_computation != nullptr) << "No fusion computation with id " << fusion_id; instruction = CreateFusion(proto.shape(), fusion_kind, all_operands(), @@ -302,6 +318,8 @@ StatusOr> HloInstruction::CreateFromProto( } break; case HloOpcode::kOutfeed: TF_RET_CHECK(proto.operand_ids_size() == 2); + TF_RETURN_IF_ERROR( + ShapeUtil::ValidateShapeWithOptionalLayout(proto.outfeed_shape())); instruction = CreateOutfeed(proto.outfeed_shape(), operands(0), operands(1), proto.outfeed_config()); break; @@ -378,8 +396,22 @@ StatusOr> HloInstruction::CreateFromProto( operands(1), operands(2), computations(1)); break; case HloOpcode::kCustomCall: - instruction = CreateCustomCall(proto.shape(), all_operands(), - proto.custom_call_target()); + if (proto.constrain_layout()) { + // A proto RepeatedPtrField cannot be converted to a Span (it is a + // vector of pointers essentially) so create a vector of shapes to pass + // in. + std::vector operand_shapes; + for (const Shape& shape : proto.operand_shapes_with_layout()) { + operand_shapes.push_back(shape); + } + instruction = CreateCustomCall( + proto.shape(), all_operands(), proto.custom_call_target(), + operand_shapes, proto.custom_call_opaque()); + } else { + instruction = CreateCustomCall(proto.shape(), all_operands(), + proto.custom_call_target(), + proto.custom_call_opaque()); + } if (proto.has_window()) { static_cast(instruction.get()) ->set_window(proto.window()); @@ -446,8 +478,8 @@ StatusOr> HloInstruction::CreateFromProto( break; } case HloOpcode::kIota: - TF_RET_CHECK(proto.dimensions_size() <= 1) - << "Iota instruction should have at most 1 dimension but sees " + TF_RET_CHECK(proto.dimensions_size() == 1) + << "Iota instruction should have 1 dimension but sees " << proto.dimensions_size(); instruction = CreateIota(proto.shape(), proto.dimensions(0)); break; @@ -465,31 +497,34 @@ StatusOr> HloInstruction::CreateFromProto( proto.dot_dimension_numbers(), precision_config); break; } - case HloOpcode::kDomain: + case HloOpcode::kDomain: { TF_RET_CHECK(proto.operand_ids_size() == 1) << "Domain instruction should have 1 operands but sees " << proto.operand_ids_size(); + TF_RET_CHECK(proto.has_domain_entry_sharding()) + << "Domain instruction must domain_entry_sharding"; + TF_RET_CHECK(proto.has_domain_exit_sharding()) + << "Domain instruction must domain_exit_sharding"; + TF_ASSIGN_OR_RETURN( + HloSharding entry_hlo_sharding, + HloSharding::FromProto(proto.domain_entry_sharding())); + TF_ASSIGN_OR_RETURN(HloSharding exit_hlo_sharding, + HloSharding::FromProto(proto.domain_exit_sharding())); instruction = absl::make_unique( - proto.shape(), operands(0), /*operand_side_metadata=*/nullptr, - /*user_side_metadata=*/nullptr); + proto.shape(), operands(0), + absl::make_unique( + std::make_shared(entry_hlo_sharding)), + absl::make_unique( + std::make_shared(exit_hlo_sharding))); break; + } default: { instruction = absl::WrapUnique(new HloInstruction(opcode, proto.shape())); for (const int64 operand_id : proto.operand_ids()) { - TF_RET_CHECK(ContainsKey(instruction_map, operand_id)) - << "No instruction with id " << operand_id; instruction->AppendOperand(instruction_map.at(operand_id)); } - for (const int64 predecessor_id : proto.control_predecessor_ids()) { - TF_RET_CHECK(ContainsKey(instruction_map, predecessor_id)) - << "No instruction with id " << predecessor_id; - TF_RETURN_IF_ERROR(instruction_map.at(predecessor_id) - ->AddControlDependencyTo(instruction.get())); - } if (instruction->opcode() != HloOpcode::kFusion) { for (const int64 computation_id : proto.called_computation_ids()) { - TF_RET_CHECK(ContainsKey(computation_map, computation_id)) - << "No computation with id " << computation_id; instruction->called_computations_.push_back( computation_map.at(computation_id)); } @@ -501,6 +536,13 @@ StatusOr> HloInstruction::CreateFromProto( } } + for (const int64 predecessor_id : proto.control_predecessor_ids()) { + TF_RET_CHECK(ContainsKey(instruction_map, predecessor_id)) + << "No instruction with id " << predecessor_id; + TF_RETURN_IF_ERROR(instruction_map.at(predecessor_id) + ->AddControlDependencyTo(instruction.get())); + } + TF_RET_CHECK(!proto.name().empty()); instruction->SetAndSanitizeName(proto.name()); instruction->metadata_ = proto.metadata(); @@ -1108,9 +1150,18 @@ bool HloInstruction::HasSideEffect() const { /* static */ std::unique_ptr HloInstruction::CreateCustomCall( const Shape& shape, absl::Span operands, - absl::string_view custom_call_target) { - return absl::make_unique(shape, operands, - custom_call_target); + absl::string_view custom_call_target, absl::string_view opaque) { + return absl::make_unique( + shape, operands, custom_call_target, opaque); +} + +/* static */ std::unique_ptr HloInstruction::CreateCustomCall( + const Shape& shape, absl::Span operands, + absl::string_view custom_call_target, + absl::Span operand_shapes_with_layout, + absl::string_view opaque) { + return absl::make_unique( + shape, operands, custom_call_target, opaque, operand_shapes_with_layout); } /* static */ std::unique_ptr HloInstruction::CreateTuple( @@ -1431,7 +1482,7 @@ int64 HloInstruction::operand_index(const HloInstruction* target) const { HloInstruction::InstructionVector HloInstruction::unique_operands() const { InstructionVector unique; - tensorflow::gtl::FlatSet seen; + absl::flat_hash_set seen; for (HloInstruction* operand : operands()) { if (seen.insert(operand).second) { unique.push_back(operand); @@ -2005,7 +2056,7 @@ string HloInstruction::OperandsToStringWithCanonicalNameMap( options.is_in_nested_computation()) { str.push_back(PrintName( canonical_name_map->LookupOrInsert(operand->name()), options)); - } else if (!options.compact_operands()) { + } else if (options.print_operand_names()) { str.push_back(PrintName(operand->name(), options)); } StrAppend(out, StrJoin(str, " ")); @@ -2660,14 +2711,14 @@ class HloInstruction::FusionReusesParamElements { // the value of this parameter, which would save stack space but not allow us // to finish early if we find a reuse. static UseKind Compute(int64 i, const HloInstruction& hlo) { - tensorflow::gtl::FlatMap memoization_cache; + absl::flat_hash_map memoization_cache; return ComputeInternal(i, hlo, &memoization_cache); } private: static UseKind ComputeInternal( int64 i, const HloInstruction& hlo, - tensorflow::gtl::FlatMap* cache) { + absl::flat_hash_map* cache) { if (auto hlo_param = DynCast(&hlo)) { if (hlo_param->parameter_number() == i) { return UseKind::kUse; @@ -3047,10 +3098,6 @@ const std::vector& HloInstruction::slice_strides() const { return Cast(this)->slice_strides(); } -bool HloInstruction::IsInPlaceSlice() const { - return Cast(this)->IsInPlaceSlice(); -} - const Literal& HloInstruction::literal() const { return Cast(this)->literal(); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index d615df0831f6306b75e099ee10353a199878b42b..44f776ebac84bc95c103ba3942a6a849f118a6d1 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -32,6 +32,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" @@ -50,7 +51,6 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/lib/gtl/iterator_range.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -80,6 +80,7 @@ class HloPrintOptions { print_backend_config_(true), compact_operands_(false), print_operand_shape_(true), + print_operand_names_(true), print_program_shape_(true), print_percent_(true), print_control_dependencies_(true), @@ -107,6 +108,7 @@ class HloPrintOptions { .set_print_metadata(false) .set_print_backend_config(false) .set_compact_operands(true) + .set_print_operand_names(false) .set_print_operand_shape(true) .set_print_program_shape(false) .set_print_percent(false) @@ -144,6 +146,12 @@ class HloPrintOptions { return *this; } + // If true, the operand names will be printed. + HloPrintOptions& set_print_operand_names(bool value) { + print_operand_names_ = value; + return *this; + } + // If true, program shape of hlo computations will be printed. HloPrintOptions& set_print_program_shape(bool value) { print_program_shape_ = value; @@ -162,8 +170,8 @@ class HloPrintOptions { return *this; } - // If true, only a part of operands will be printed out, and their names will - // be omitted (note that in this case the text will not be parsable). + // If true, only a part of operands will be printed out (note that in this + // case the text will not be parsable). HloPrintOptions& set_compact_operands(bool value) { compact_operands_ = value; return *this; @@ -197,6 +205,7 @@ class HloPrintOptions { bool print_backend_config() const { return print_backend_config_; } bool compact_operands() const { return compact_operands_; } bool print_operand_shape() const { return print_operand_shape_; } + bool print_operand_names() const { return print_operand_names_; } bool print_program_shape() const { return print_program_shape_; } bool print_percent() const { return print_percent_; } bool print_control_dependencies() const { @@ -215,6 +224,7 @@ class HloPrintOptions { bool print_backend_config_; bool compact_operands_; bool print_operand_shape_; + bool print_operand_names_; bool print_program_shape_; bool print_percent_; bool print_control_dependencies_; @@ -247,7 +257,7 @@ class CanonicalNameMap { private: int64 index; - tensorflow::gtl::FlatMap canonical_name_map; + absl::flat_hash_map canonical_name_map; }; // HLO instructions are the atomic unit of the high-level compiler's IR. @@ -350,8 +360,8 @@ class HloInstruction { // calls. static StatusOr> CreateFromProto( const HloInstructionProto& proto, - const tensorflow::gtl::FlatMap& instruction_map, - const tensorflow::gtl::FlatMap& computation_map); + const absl::flat_hash_map& instruction_map, + const absl::flat_hash_map& computation_map); // Creates a parameter-retrieving instruction. static std::unique_ptr CreateParameter(int64 parameter_number, @@ -718,10 +728,21 @@ class HloInstruction { HloComputation* computation); // Creates a custom call instruction that applies the given custom call target - // to the given operands. "shape" is the resultant shape. + // to the given operands. "opaque" can be an arbitrary string with a + // backend-specific interpretation. "shape" is the resultant shape. static std::unique_ptr CreateCustomCall( const Shape& shape, absl::Span operands, - absl::string_view custom_call_target); + absl::string_view custom_call_target, absl::string_view opaque = ""); + + // Overload which constrains the layouts of the operand and result. 'shape' + // and 'operand_shapes_with_layout' must have layouts. + // 'operand_shapes_with_layout' must have a compatible element for each + // operand. + static std::unique_ptr CreateCustomCall( + const Shape& shape, absl::Span operands, + absl::string_view custom_call_target, + absl::Span operand_shapes_with_layout, + absl::string_view opaque = ""); // Creates a tuple instruction with the given elements. This is a convenience // wrapper around CreateVariadic. @@ -1319,9 +1340,6 @@ class HloInstruction { int64 slice_strides(int64 dimension) const; const std::vector& slice_strides() const; - // Delegates to HloSliceInstruction::IsInPlaceSlice. - bool IsInPlaceSlice() const; - // Returns the literal associated with this instruction. const Literal& literal() const; diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index c1b7c3832b44b5d65b715dffa5211a5c92e17953..d93351fe0435b5f29035dc4ea0621a8c576bfd5a 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -135,7 +135,8 @@ TEST_F(HloInstructionTest, BasicProperties) { auto parameter = HloInstruction::CreateParameter(1, r0f32_, "foo"); EXPECT_EQ(HloOpcode::kParameter, parameter->opcode()); - EXPECT_TRUE(ShapeUtil::IsScalarF32(parameter->shape())); + EXPECT_TRUE(ShapeUtil::IsScalarWithElementType(parameter->shape(), F32)); + EXPECT_FALSE(ShapeUtil::IsScalarWithElementType(parameter->shape(), S32)); EXPECT_EQ(0, parameter->operand_count()); } diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index e92882c22a6ef1dd43440d3c94c7d233c9a4fb5d..2ec233eaec42dd90d001379d5c6100009d580cbe 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" @@ -27,8 +28,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_sharding_metadata.h" #include "tensorflow/compiler/xla/window_util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { namespace { @@ -213,6 +214,7 @@ HloSendRecvInstruction::HloSendRecvInstruction(HloOpcode opcode, HloInstructionProto HloSendRecvInstruction::ToProto() const { HloInstructionProto proto = HloInstruction::ToProto(); proto.set_channel_id(channel_id_); + proto.set_is_host_transfer(is_host_transfer_); return proto; } @@ -641,14 +643,6 @@ HloTransposeInstruction::HloTransposeInstruction( absl::Span dimensions) : HloInstruction(HloOpcode::kTranspose, shape), dimensions_(dimensions.begin(), dimensions.end()) { - CHECK_EQ(shape.dimensions().size(), dimensions.size()); - CHECK_EQ(shape.dimensions().size(), operand->shape().dimensions().size()); - CHECK(std::equal(operand->shape().dimensions().begin(), - operand->shape().dimensions().end(), - Permute(dimensions, shape.dimensions()).begin())) - << "shape: " << ShapeUtil::HumanString(shape) - << ", operand->shape(): " << ShapeUtil::HumanString(shape) - << ", dimensions: {" << StrJoin(dimensions, ", ") << "}"; AppendOperand(operand); } @@ -1042,7 +1036,8 @@ HloInstruction* HloFusionInstruction::AddFusionOperand( const int64 param_no = operand_count(); // Name the parameter after the instruction it represents in the outer // (non-fusion) computation. - string param_name = StrCat(new_operand->name(), ".param_", param_no); + // string param_name = StrCat(new_operand->name(), ".param_", param_no); + string param_name = StrCat("param_", param_no); HloInstruction* fused_parameter = fused_instructions_computation()->AddParameter( HloInstruction::CreateParameter(param_no, new_operand->shape(), @@ -1098,7 +1093,7 @@ void HloFusionInstruction::MergeFusionInstructionIntoMultiOutput( // Note that we add the unfused instructions to this->parent_ computation. // This is necessary because the unique_id needs for an instruction and // it's only added when inserting to the computation. - tensorflow::gtl::FlatMap old_to_new; + absl::flat_hash_map old_to_new; std::vector unfused_instructions; auto computation_to_merge = instruction_to_merge->fused_instructions_computation(); @@ -1391,7 +1386,7 @@ std::unique_ptr HloFusionInstruction::CloneWithNewOperandsImpl( } Status HloFusionInstruction::DeduplicateFusionOperands() { - tensorflow::gtl::FlatMap operand_indices; + absl::flat_hash_map operand_indices; std::vector operands_to_remove; for (int i = 0; i < operand_count(); ++i) { auto emplace_result = operand_indices.emplace(operand(i), i); @@ -1488,7 +1483,6 @@ HloParameterInstruction::CloneWithNewOperandsImpl( HloGetTupleElementInstruction::HloGetTupleElementInstruction( const Shape& shape, HloInstruction* operand, int64 index) : HloInstruction(HloOpcode::kGetTupleElement, shape), tuple_index_(index) { - CHECK(ShapeUtil::IsTuple(operand->shape())); AppendOperand(operand); } @@ -1610,9 +1604,6 @@ HloOutfeedInstruction::HloOutfeedInstruction(const Shape& outfeed_shape, : HloInstruction(HloOpcode::kOutfeed, ShapeUtil::MakeTokenShape()), outfeed_shape_(outfeed_shape), outfeed_config_(outfeed_config) { - CHECK(ShapeUtil::Compatible(operand->shape(), outfeed_shape)) - << "Outfeed shape " << outfeed_shape - << " must be compatible with operand shape " << operand->shape(); AppendOperand(operand); AppendOperand(token_operand); } @@ -1830,10 +1821,28 @@ HloSelectAndScatterInstruction::CloneWithNewOperandsImpl( HloCustomCallInstruction::HloCustomCallInstruction( const Shape& shape, absl::Span operands, - absl::string_view custom_call_target) + absl::string_view custom_call_target, absl::string_view opaque) : HloInstruction(HloOpcode::kCustomCall, shape), custom_call_target_(custom_call_target.begin(), custom_call_target.end()), - feature_group_count_(1) { + opaque_(opaque.begin(), opaque.end()), + feature_group_count_(1), + layout_constrained_(false) { + for (auto operand : operands) { + AppendOperand(operand); + } +} + +HloCustomCallInstruction::HloCustomCallInstruction( + const Shape& shape, absl::Span operands, + absl::string_view custom_call_target, absl::string_view opaque, + absl::Span operand_shapes_with_layout) + : HloInstruction(HloOpcode::kCustomCall, shape), + custom_call_target_(custom_call_target.begin(), custom_call_target.end()), + opaque_(opaque.begin(), opaque.end()), + feature_group_count_(1), + layout_constrained_(true), + operand_shapes_with_layout_(operand_shapes_with_layout.begin(), + operand_shapes_with_layout.end()) { for (auto operand : operands) { AppendOperand(operand); } @@ -1849,7 +1858,14 @@ HloInstructionProto HloCustomCallInstruction::ToProto() const { *convolution_dimension_numbers_; } proto.set_custom_call_target(custom_call_target_); + proto.set_custom_call_opaque(opaque_); proto.set_feature_group_count(feature_group_count_); + if (layout_constrained()) { + proto.set_constrain_layout(true); + for (const Shape& shape : operand_shapes_with_layout_) { + *proto.add_operand_shapes_with_layout() = shape; + } + } return proto; } @@ -1872,6 +1888,19 @@ std::vector HloCustomCallInstruction::ExtraAttributesToStringImpl( // an HloComputation. extra.push_back( StrCat("custom_call_target=\"", CEscape(custom_call_target_), "\"")); + // If the opaque string becomes enormous we may want to reconsider printing + // this inline and consider other options. + if (!opaque_.empty()) { + extra.push_back(StrCat("opaque=\"", CEscape(opaque_), "\"")); + } + if (layout_constrained()) { + std::vector shape_strings; + for (const Shape& shape : operand_shapes_with_layout_) { + shape_strings.push_back(ShapeUtil::HumanStringWithLayout(shape)); + } + extra.push_back(StrCat("operand_layout_constraints={", + StrJoin(shape_strings, ", "), "}")); + } return extra; } @@ -1897,7 +1926,8 @@ bool HloCustomCallInstruction::IdenticalSlowPath( if (feature_group_count_ != casted_other.feature_group_count_) { return false; } - return custom_call_target_ == casted_other.custom_call_target_; + return custom_call_target_ == casted_other.custom_call_target_ && + opaque_ == casted_other.opaque_; } std::unique_ptr @@ -1905,7 +1935,7 @@ HloCustomCallInstruction::CloneWithNewOperandsImpl( const Shape& shape, absl::Span new_operands, HloCloneContext* context) const { auto cloned = absl::make_unique( - shape, new_operands, custom_call_target()); + shape, new_operands, custom_call_target(), opaque()); if (window_ != nullptr) { cloned->set_window(*window_); } @@ -2301,4 +2331,23 @@ std::unique_ptr HloDomainInstruction::CloneWithNewOperandsImpl( shape, new_operands[0], operand_side_metadata_->Clone(), user_side_metadata_->Clone()); } + +HloInstructionProto HloDomainInstruction::ToProto() const { + HloInstructionProto proto = HloInstruction::ToProto(); + auto operand_side_sharding = + dynamic_cast(operand_side_metadata_.get()); + if (operand_side_sharding) { + *proto.mutable_domain_entry_sharding() = + operand_side_sharding->sharding()->ToProto(); + } + + auto user_side_sharding = + dynamic_cast(user_side_metadata_.get()); + if (user_side_sharding) { + *proto.mutable_domain_exit_sharding() = + user_side_sharding->sharding()->ToProto(); + } + + return proto; +} } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 2d7bc83855e761ed313d831a1252a54130910bbe..4c5fc759a3b5e6c022f52b84e9c2617bc90213e1 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -546,17 +546,6 @@ class HloSliceInstruction : public HloInstruction { } const std::vector& slice_strides() const { return slice_strides_; } - // Returns the flag that describes whether a slice must be lowered into an - // offset into the original operand. - bool IsInPlaceSlice() const { return is_in_place_slice_; } - - // Sets and returns the flag that describes whether a slice must be lowered - // into an offset into the original operand. - bool SetIsInPlaceSlice(bool value) { - is_in_place_slice_ = value; - return value; - } - private: std::vector ExtraAttributesToStringImpl( const HloPrintOptions& options) const override; @@ -573,9 +562,6 @@ class HloSliceInstruction : public HloInstruction { std::vector slice_starts_; std::vector slice_limits_; std::vector slice_strides_; - - // Describes whether the slice can be lowered to an offset into the operand. - bool is_in_place_slice_ = false; }; class HloConstantInstruction : public HloInstruction { @@ -910,7 +896,6 @@ class HloOutfeedInstruction : public HloInstruction { absl::string_view outfeed_config); // Returns the shape for the Outfeed instruction. const Shape& outfeed_shape() const { - TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(outfeed_shape_)); return outfeed_shape_; } // Returns the config for the Outfeed instruction. @@ -1068,9 +1053,19 @@ class HloSelectAndScatterInstruction : public HloInstruction { class HloCustomCallInstruction : public HloInstruction { public: - explicit HloCustomCallInstruction(const Shape& shape, - absl::Span operands, - absl::string_view custom_call_target); + HloCustomCallInstruction(const Shape& shape, + absl::Span operands, + absl::string_view custom_call_target, + absl::string_view opaque); + + // Constructor for a custom call with constrained layout. 'shape' and + // 'operands_with_layout' must all have layouts. + HloCustomCallInstruction(const Shape& shape, + absl::Span operands, + absl::string_view custom_call_target, + absl::string_view opaque, + absl::Span operand_shapes_with_layout); + const Window& window() const override { CHECK(window_ != nullptr); return *window_; @@ -1090,6 +1085,7 @@ class HloCustomCallInstruction : public HloInstruction { convolution_dimension_numbers_ = absl::make_unique(dnums); } + const string& opaque() const { return opaque_; } const string& custom_call_target() const { return custom_call_target_; } void set_feature_group_count(int64 feature_group_count) { feature_group_count_ = feature_group_count; @@ -1098,6 +1094,16 @@ class HloCustomCallInstruction : public HloInstruction { // Returns a serialized representation of this instruction. HloInstructionProto ToProto() const override; + // Returns whether the result and operand layouts are constrained. + bool layout_constrained() const { return layout_constrained_; } + + // Returns the shapes (with layout) of the operands. CHECKs if this custom + // call does not have constrained layouts. + const std::vector& operand_shapes_with_layout() const { + CHECK(layout_constrained()); + return operand_shapes_with_layout_; + } + private: std::vector ExtraAttributesToStringImpl( const HloPrintOptions& options) const override; @@ -1109,14 +1115,21 @@ class HloCustomCallInstruction : public HloInstruction { std::unique_ptr CloneWithNewOperandsImpl( const Shape& shape, absl::Span new_operands, HloCloneContext* context) const override; - // Name of a global symbol to call, only present for kCustomCall. + // Name of a global symbol to call. string custom_call_target_; + // Opaque string interpreted by the backend. + string opaque_; // Describes the window in a windowed operation such as convolution. std::unique_ptr window_; // Describes the dimension numbers used for a convolution. std::unique_ptr convolution_dimension_numbers_; // The number of feature groups. This is used for grouped convolutions. int64 feature_group_count_; + // Whether the result and operand layouts are constrained. + bool layout_constrained_; + // For layout-constrained custom calls, this vector holds the shape with + // layout for each operand. + std::vector operand_shapes_with_layout_; }; class HloPadInstruction : public HloInstruction { @@ -1337,6 +1350,9 @@ class HloDomainInstruction : public HloInstruction { std::unique_ptr operand_side_metadata, std::unique_ptr user_side_metadata); + // Returns a serialized representation of this instruction. + HloInstructionProto ToProto() const override; + // Retrieves the operand side metadata of a kDomain instruction. const DomainMetadata& operand_side_metadata() const { return *operand_side_metadata_; diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc index 6a4e766788f47cad9e168fcccd3a3de9097cacdc..5cee865b7ad34eded1743d9d5455bb40febf6182 100644 --- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc +++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc @@ -20,6 +20,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/heap_simulator.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" @@ -74,7 +76,7 @@ class ListScheduler { const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation) { ListScheduler scheduler(computation, points_to_analysis, size_function, memory_by_computation); @@ -99,7 +101,7 @@ class ListScheduler { ListScheduler(const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation) : computation_(computation), points_to_analysis_(points_to_analysis), @@ -110,7 +112,7 @@ class ListScheduler { // LogicalBuffer is in an operand of the instruction as indicated by // points-to analysis. for (auto* instruction : computation.instructions()) { - tensorflow::gtl::FlatSet instr_uses; + absl::flat_hash_set instr_uses; for (auto* operand : instruction->operands()) { points_to_analysis.GetPointsToSet(operand).ForEachElement( [&](const ShapeIndex& /*index*/, @@ -193,13 +195,15 @@ class ListScheduler { return entry; } - // Returns the number of bytes freed if the HLO instruction is scheduled. - // If the instruction calls subcomputations, we count the memory used by the - // subcomputations as memory "defined" by the instruction. This is not - // entirely accurate, because subcomputation memory will be freed after the - // instruction finishes. But it is more accurate than not taking - // subcomputations into account at all. In the future, we may improve - // accounting for subcomputation memory (b/65409243). + // Returns the number of bytes freed *after* the HLO instruction finishes. + // The current List algorithm only considers two states for an instruction: + // right before it runs, and after it finishes. We don't represent memory + // usage during the execution of an instruction. But if the instruction calls + // subcomputations, they are only live during the instruction's execution. + // We end up counting the memory used by subcomputations as memory "defined" + // by the instruction. This is not entirely accurate, but it is more accurate + // than not taking subcomputations into account at all. In the future, we may + // improve accounting for subcomputation memory (b/65409243). int64 BytesFreedIfScheduled(const ReadyListEntry& entry) { int64 freed_bytes = 0; for (const auto& kv : entry.used_buffer_unscheduled_use_counts) { @@ -221,7 +225,18 @@ class ListScheduler { } } } - return freed_bytes - entry.bytes_defined - max_subcomputation_bytes; + int64 bytes_defined; + if (max_subcomputation_bytes > 0 && + (entry.instruction->opcode() == HloOpcode::kWhile || + entry.instruction->opcode() == HloOpcode::kCall || + entry.instruction->opcode() == HloOpcode::kConditional)) { + // The output buffer of while/call/conditional is always aliased with the + // output buffer of the root instruction in the body. Don't double count. + bytes_defined = max_subcomputation_bytes; + } else { + bytes_defined = entry.bytes_defined + max_subcomputation_bytes; + } + return freed_bytes - bytes_defined; } // Constructs the scheduling priority of the given instruction. @@ -234,8 +249,7 @@ class ListScheduler { // Populate the ready list with instructions which have no operands or // control predecessors. - tensorflow::gtl::FlatMap - unscheduled_pred_count; + absl::flat_hash_map unscheduled_pred_count; for (auto* instruction : computation_.instructions()) { // TODO(b/34466113): Replace this and above with successors() or // predecessors() when these methods are added to HloInstruction. @@ -251,8 +265,8 @@ class ListScheduler { std::multimap ready_queue; // Map of ready instructions to their iterators in ready_queue. - tensorflow::gtl::FlatMap::iterator> + absl::flat_hash_map::iterator> ready_instructions; auto add_to_ready_queue = [&](HloInstruction* inst) { @@ -262,9 +276,8 @@ class ListScheduler { }; for (auto* instruction : computation_.instructions()) { - // Instruction with no operands or control predecessors will - // not be in the map. - if (unscheduled_pred_count.count(instruction) == 0) { + if (instruction->operands().empty() && + instruction->control_predecessors().empty()) { add_to_ready_queue(instruction); } } @@ -347,21 +360,19 @@ class ListScheduler { // Computations are analyzed in post-order. When scheduling an instruction // that includes subcomputations, such as a while loop, we use this map to // look up the memory needed by subcomputations. - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation_; // A map containing the LogicalBuffers that each instruction uses. - tensorflow::gtl::FlatMap> + absl::flat_hash_map> buffer_uses_; // A map containing the count of unscheduled HLOs which using a particular - // LogicalBuffer. We rely on iterator stability in this map, and that the map - // entries are std::pair's. - std::unordered_map unscheduled_use_count_; + // LogicalBuffer. + absl::flat_hash_map unscheduled_use_count_; // Set of instructions which have been scheduled. - tensorflow::gtl::FlatSet scheduled_instructions_; + absl::flat_hash_set scheduled_instructions_; }; int64 SumLogicalBufferSizes( @@ -379,7 +390,7 @@ StatusOr ScheduleComputationHelper( const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, const MemorySchedulerAlgorithm& algorithm, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation) { VLOG(2) << "Computation: " << computation.name(); if (algorithm) { @@ -396,13 +407,13 @@ StatusOr DFSMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation) { // These variables are a hack to prevent overflows. int64 cumulative_total_size = 0; int64 total_hlos = computation.parent()->instruction_count(); - tensorflow::gtl::FlatMap extra_users; - tensorflow::gtl::FlatMap total_sizes; + absl::flat_hash_map extra_users; + absl::flat_hash_map total_sizes; for (const HloInstruction* hlo : computation.MakeInstructionPostOrder()) { if (ListScheduler::IgnoreInstruction(*hlo)) { extra_users[hlo] = 0; @@ -419,7 +430,7 @@ StatusOr DFSMemoryScheduler( points_to_analysis.GetBuffersDefinedByInstruction(hlo), size_function); total_sizes[hlo] = logical_buffer_size; cumulative_total_size += logical_buffer_size; - tensorflow::gtl::FlatSet unique_operands( + absl::flat_hash_set unique_operands( hlo->operands().begin(), hlo->operands().end()); for (const HloInstruction* operand : unique_operands) { extra_users[hlo] += extra_users[operand]; @@ -467,7 +478,7 @@ StatusOr ListMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation) { return ListScheduler::Run(computation, points_to_analysis, size_function, memory_by_computation); @@ -477,7 +488,7 @@ StatusOr PostOrderMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation) { return HloInstructionSequence(computation.MakeInstructionPostOrder()); } @@ -486,7 +497,7 @@ StatusOr DefaultMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation) { // We try a few schedulers and choose whichever returns a lower min-memory, // not accounting for fragmentation. @@ -549,7 +560,7 @@ StatusOr ScheduleModule( HloSchedule schedule(&module); TF_ASSIGN_OR_RETURN(std::unique_ptr points_to_analysis, TuplePointsToAnalysis::Run(&module)); - tensorflow::gtl::FlatMap memory_by_computation; + absl::flat_hash_map memory_by_computation; for (const auto* computation : module.MakeComputationPostOrder()) { if (!computation->IsFusionComputation()) { TF_ASSIGN_OR_RETURN(HloInstructionSequence computation_sequence, @@ -577,7 +588,7 @@ StatusOr ScheduleComputation( CHECK(!computation.IsFusionComputation()); TF_ASSIGN_OR_RETURN(std::unique_ptr points_to_analysis, TuplePointsToAnalysis::Run(computation.parent())); - tensorflow::gtl::FlatMap empty_map; + absl::flat_hash_map empty_map; return ScheduleComputationHelper(computation, *points_to_analysis, size_function, nullptr, empty_map); } diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h index 9964c6fdd7c60a807896ea7aaaa9d55767f20f51..a4c1d3db8170a1725043def576f913e09b352e5d 100644 --- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h +++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_ordering.h" @@ -37,7 +38,7 @@ namespace xla { typedef std::function( const HloComputation&, const TuplePointsToAnalysis&, const LogicalBuffer::SizeFunction&, - const tensorflow::gtl::FlatMap&)> + const absl::flat_hash_map&)> MemorySchedulerAlgorithm; // List scheduler @@ -45,7 +46,7 @@ StatusOr ListMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation); // DFS-order scheduler @@ -53,7 +54,7 @@ StatusOr DFSMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation); // Naive Post Order scheduler @@ -61,7 +62,7 @@ StatusOr PostOrderMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation); // The default scheduling algorithm. Runs both the list scheduler @@ -71,7 +72,7 @@ StatusOr DefaultMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function, - const tensorflow::gtl::FlatMap& + const absl::flat_hash_map& memory_by_computation); // Returns an HloSchedule which seeks to minimize the memory required for diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc index 1b9e9bfc77c3ba91e5b878f4aa42d26d8267a49a..214119fba881c4411a262cd4227b5cc49cef0d14 100644 --- a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc +++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/service/heap_simulator.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_dce.h" @@ -146,126 +147,6 @@ ENTRY root { instructions_by_name.at("e"))); } -TEST_F(HloSchedulingTest, ListAccountsForSubcomputations) { - // %WhileCond (cond_param: f32[4]) -> pred[] { - // %cond_param = f32[4]{0} parameter(0) - // %constant = f32[1,4]{1,0} constant(f32[1,4] { { 0, 0, 0, 0 } }) - // ROOT %not-equal-to = pred[] not-equal-to( - // f32[4]{0} %cond_param, f32[1,4]{1,0} %constant) - // } - // %WhileBody (body_param: f32[4]) -> f32[4] { - // %body_param = f32[4]{0} parameter(0) - // %constant.1 = f32[1,4]{1,0} constant(f32[1,4] { { 1, 1, 1, 1 } }) - // ROOT %subtract = f32[4]{0} subtract( - // f32[4]{0} %body_param, f32[1,4]{1,0} %constant.1) - // } - // %ListAccountsForSubcomputations () -> f32[2,4] { - // %constant.3 = f32[2,4]{1,0} constant( - // f32[2,4] { { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }) - // %transpose = f32[2,4]{1,0} transpose( - // f32[2,4]{1,0} %constant.3), dimensions={0,1} - // %constant.2 = f32[1,4]{1,0} constant(f32[1,4] { { 1, 1, 1, 1 } }) - // %while = f32[4]{0} while(f32[1,4]{1,0} %constant.2), - // condition=%WhileCond, - // body=%WhileBody - // %broadcast = f32[2,4]{1,0} broadcast(f32[4]{0} %while), dimensions={0} - // ROOT %add = f32[2,4]{1,0} add( - // f32[2,4]{1,0} %transpose, f32[2,4]{1,0} %broadcast) - // } - - auto module = CreateNewModule(); - const Shape r1f32 = ShapeUtil::MakeShape(F32, {4}); - const Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 4}); - - // param != 0 - // Needs 17 bytes - auto cond_builder = HloComputation::Builder("WhileCond"); - HloInstruction* cond_param = cond_builder.AddInstruction( - HloInstruction::CreateParameter(0, r1f32, "cond_param")); - HloInstruction* zero_vector = - cond_builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR2({{0, 0, 0, 0}}))); - cond_builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(PRED, {}), HloOpcode::kNe, cond_param, zero_vector)); - auto cond_computation = module->AddEmbeddedComputation(cond_builder.Build()); - - // param - 1 - // Needs 16 bytes - auto body_builder = HloComputation::Builder("WhileBody"); - HloInstruction* body_param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, r1f32, "body_param")); - HloInstruction* one_vector = - body_builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR2({{1, 1, 1, 1}}))); - body_builder.AddInstruction(HloInstruction::CreateBinary( - r1f32, HloOpcode::kSubtract, body_param, one_vector)); - auto body_computation = module->AddEmbeddedComputation(body_builder.Build()); - - // transpose(matrix) + bcast(while) - auto builder = HloComputation::Builder(TestName()); - HloInstruction* while_init = - builder.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR2({{1, 1, 1, 1}}))); - // Creates 16 bytes, ignoring subcomputations - HloInstruction* while_loop = - builder.AddInstruction(HloInstruction::CreateWhile( - r1f32, cond_computation, body_computation, while_init)); - - // Creates 32 bytes and frees 16 - HloInstruction* bcast = builder.AddInstruction( - HloInstruction::CreateBroadcast(r2f32, while_loop, {0})); - - HloInstruction* matrix = builder.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR2( - {{1.0, 2.0, 3.0, 4.0}, {1.0, 2.0, 3.0, 4.0}}))); - // Creates 32 bytes - HloInstruction* transpose = builder.AddInstruction( - HloInstruction::CreateTranspose(r2f32, matrix, {0, 1})); - - // Creates 32 bytes and frees 64 - HloInstruction* add = builder.AddInstruction( - HloInstruction::CreateBinary(r2f32, HloOpcode::kAdd, transpose, bcast)); - - module->AddEntryComputation(builder.Build()); - - auto size_fn = [](const BufferValue& buffer) { - return ShapeUtil::ByteSizeOf(buffer.shape()); - }; - TF_ASSERT_OK_AND_ASSIGN( - HloSchedule schedule, - ScheduleModule(*module, size_fn, ListMemoryScheduler)); - // Verify that all instructions are in the sequence. - auto entry_computation = module->entry_computation(); - EXPECT_EQ(entry_computation->instruction_count(), - schedule.sequence(entry_computation).size()); - SequentialHloOrdering ordering(schedule); - // This schedule is an example of List's greedy heuristics being suboptimal. - // The while_loop is more expensive than transpose, so it would have been - // better to schedule it first, instead of during the busy time. - EXPECT_TRUE(ordering.ExecutesBefore(transpose, while_loop)); - EXPECT_TRUE(ordering.ExecutesBefore(transpose, bcast)); - EXPECT_TRUE(ordering.ExecutesBefore(bcast, add)); - EXPECT_TRUE(ordering.ExecutesBefore(transpose, add)); - - tensorflow::gtl::FlatMap memory_by_computation; - memory_by_computation[cond_computation] = 17; - memory_by_computation[body_computation] = 16; - std::unique_ptr points_to_analysis = - TuplePointsToAnalysis::Run(module.get()).ValueOrDie(); - - // HeapSimulator doesn't account for subcomputations - EXPECT_EQ(80, HeapSimulator::MinimumMemoryForComputation( - *entry_computation, schedule.sequence(entry_computation), - *points_to_analysis, size_fn) - .ValueOrDie()); - // HeapSimulator accounts for subcomputations. The output buffer is aliased, - // so we don't double count. - EXPECT_EQ(64, HeapSimulator::MinimumMemoryForComputation( - *entry_computation, schedule.sequence(entry_computation), - *points_to_analysis, size_fn, &memory_by_computation) - .ValueOrDie()); -} - TEST_F(HloSchedulingTest, TuplesAreAccountedCorrectly) { auto builder = HloComputation::Builder(TestName()); const auto TUPLE_SIZE = 1; @@ -409,7 +290,7 @@ TEST_F(HloSchedulingTest, HeapSimulatorAccountsForSubcomputations) { EXPECT_EQ(module->entry_computation()->instruction_count(), schedule.sequence(module->entry_computation()).size()); - tensorflow::gtl::FlatMap memory_by_computation; + absl::flat_hash_map memory_by_computation; memory_by_computation[cond_computation] = 17; memory_by_computation[body_computation] = 16; std::unique_ptr points_to_analysis = diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index b3949f3a6d7176950c61cafb0830d1175f17758d..547f74a0edf3ff7e46ec7ffb2b704928b0eeac6c 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -23,6 +23,8 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/xla/map_util.h" @@ -71,6 +73,8 @@ HloComputation* HloModule::AddComputationInternal( config_.SetDefaultComputationLayout( entry_computation_->ComputeProgramShape()); } + input_output_alias_config_ = HloInputOutputAliasConfig( + entry_computation_->root_instruction()->shape()); } if (uniquify_identifiers) { @@ -144,7 +148,8 @@ void HloModule::ReplaceComputations( case HloOpcode::kCall: case HloOpcode::kMap: case HloOpcode::kReduce: - case HloOpcode::kReduceWindow: { + case HloOpcode::kReduceWindow: + case HloOpcode::kScatter: { HloComputation* new_arg = tensorflow::gtl::FindWithDefault( replacements, instruction->to_apply(), nullptr); if (new_arg != nullptr) { @@ -249,6 +254,9 @@ HloModuleProto HloModule::ToProto() const { if (has_schedule()) { *proto.mutable_schedule() = schedule().ToProto().ValueOrDie(); } + + *proto.mutable_input_output_alias() = input_output_alias_config().ToProto(); + return proto; } @@ -285,8 +293,8 @@ StatusOr> HloModule::CreateFromProto( << ShapeUtil::HumanStringWithLayout(expected_program_shape.result()) << ", actual: " << ShapeUtil::HumanStringWithLayout(result_shape); - tensorflow::gtl::FlatMap computation_map; - tensorflow::gtl::FlatMap to_proto_id; + absl::flat_hash_map computation_map; + absl::flat_hash_map to_proto_id; std::vector> computations; HloComputation* entry = nullptr; for (const HloComputationProto& computation_proto : proto.computations()) { @@ -325,12 +333,16 @@ StatusOr> HloModule::CreateFromProto( } TF_RET_CHECK(module->entry_computation_ != nullptr); + TF_ASSIGN_OR_RETURN(module->input_output_alias_config_, + HloInputOutputAliasConfig::CreateFromProto( + module.get(), proto.input_output_alias())); + // Because we didn't uniquify the names or the ids, double-check that the // instruction and computation names and ids are unique from the proto. - tensorflow::gtl::FlatSet computation_names; - tensorflow::gtl::FlatSet instruction_names; - tensorflow::gtl::FlatSet computation_ids; - tensorflow::gtl::FlatSet instruction_ids; + absl::flat_hash_set computation_names; + absl::flat_hash_set instruction_names; + absl::flat_hash_set computation_ids; + absl::flat_hash_set instruction_ids; for (HloComputation* computation : module->computations()) { TF_RET_CHECK(!ContainsKey(computation_names, computation->name())) << "Computation name is not unique: " << computation->name(); diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 735804e827afd77e2b7f2a4a7d490ee6f5ee7b4f..9b9dc3ba9fbb46c5a315d3df5d3b1cfc248c5cbe 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_clone_context.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/hlo_schedule.h" @@ -212,6 +213,15 @@ class HloModule { return result; } + // input_output_alias_config indicates the list of aliased buffers that are + // expected from the module. + HloInputOutputAliasConfig& input_output_alias_config() { + return input_output_alias_config_; + } + const HloInputOutputAliasConfig& input_output_alias_config() const { + return input_output_alias_config_; + } + // Returns the number of unique intruction ids given out. All ids up to // this point are guaranteed to be in the range [0..NumUniqueInstructionIds()) int NumUniqueInstructionIds() const { return next_unique_id_; } @@ -284,6 +294,10 @@ class HloModule { // sequential order of instructions for each non-fusion computation in the // module. absl::optional schedule_; + + // alias_config indicates the alias information of input/output buffers that + // are expected from the module. + HloInputOutputAliasConfig input_output_alias_config_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc index 83352ef91b35b61ee2560b1488ee2ecdff6bea0a..b4aac4c8076cb69647d42c6243bc969d06d0709e 100644 --- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc +++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc @@ -59,7 +59,7 @@ string HloModuleGroupMetadata::TrackedInstruction::ToString() const { } /* static */ StatusOr> -HloModuleGroupMetadata::Build(const std::vector& modules) { +HloModuleGroupMetadata::Build(absl::Span modules) { auto metadata = absl::make_unique(modules); TF_RETURN_IF_ERROR(metadata->Build()); return std::move(metadata); diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h index 278d94cdd337c835bc0ff98ea577ef7b8c3ddd03..928df0f5a7444ad877961a5de970c752e1d024da 100644 --- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h +++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -30,7 +31,6 @@ limitations under the License. #include "tensorflow/compiler/xla/status.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -102,14 +102,14 @@ class HloModuleGroupMetadata { HloInstruction* recv_done = nullptr; }; - explicit HloModuleGroupMetadata(const std::vector& modules) - : modules_(modules) {} + explicit HloModuleGroupMetadata(absl::Span modules) + : modules_(modules.begin(), modules.end()) {} ~HloModuleGroupMetadata() = default; // Build and return the metadata for the given modules. static StatusOr> Build( - const std::vector& modules); + absl::Span modules); // Returns true if the instruction is one of the 4 channel instructions (Send, // Recv, SendDone, RecvDone). @@ -250,33 +250,33 @@ class HloModuleGroupMetadata { std::vector>> companion_sets_; // Map from each companion while instruction to the index into companion_set_. - tensorflow::gtl::FlatMap companion_set_index_; + absl::flat_hash_map companion_set_index_; // Map from computation to the instruction using it (a kWhile, kConditional). - tensorflow::gtl::FlatMap + absl::flat_hash_map tracked_instructions_; // Maps tracked instructions (kWhile, kConditional, kCall, ...) to the set of // communicating instructions within the proper called computation(s). - tensorflow::gtl::FlatMap> + absl::flat_hash_map> tracked_instructions_comms_; // All channels in the module. std::vector channels_; // Map from channel ids to the index in channels_. - tensorflow::gtl::FlatMap channel_id_map_; + absl::flat_hash_map channel_id_map_; // Map from all-reduce ids to the all reduce instructions. - tensorflow::gtl::FlatMap> all_reduce_map_; + absl::flat_hash_map> all_reduce_map_; // The maximum channel id used in the module group. int64 max_channel_id_ = -1; // The modules that this metadata was built from. - const std::vector& modules_; + const std::vector modules_; - tensorflow::gtl::FlatMap> + absl::flat_hash_map> points_to_analyses_; }; diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.cc b/tensorflow/compiler/xla/service/hlo_module_group_util.cc index d83ee714905252e36f38438e81002a4d6ba7dafa..fddeb5f0a27a43ff9ca8b2b5d314bcfe91aaf0e6 100644 --- a/tensorflow/compiler/xla/service/hlo_module_group_util.cc +++ b/tensorflow/compiler/xla/service/hlo_module_group_util.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" @@ -32,7 +33,6 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -42,7 +42,7 @@ std::vector HloModuleGroupUtil::GlobalPredecessors( HloInstruction* instruction) { std::vector predecessors; // Use a vector to avoid non-determinism. - tensorflow::gtl::FlatSet unique; + absl::flat_hash_set unique; // Adds to the unique predecessors list; if the predecessors is a companion // instruction, also add companion instructions; if the predecessors is a @@ -119,7 +119,7 @@ std::vector HloModuleGroupUtil::GlobalSuccessors( HloInstruction* instruction) { std::vector successors; // Use a vector to avoid non-determinism. - tensorflow::gtl::FlatSet unique; + absl::flat_hash_set unique; // Adds to the unique successors list; if the successor is a companion // instruction, also add companion instructions; if the successor is a diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.h b/tensorflow/compiler/xla/service/hlo_module_group_util.h index 309c23045d1e0dd91e2f245d00c51d9bf9961bf5..f21b44bcd98d77b831de5d8a6afa4f9ddd91d15d 100644 --- a/tensorflow/compiler/xla/service/hlo_module_group_util.h +++ b/tensorflow/compiler/xla/service/hlo_module_group_util.h @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -28,7 +29,6 @@ limitations under the License. #include "tensorflow/compiler/xla/status.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { @@ -87,7 +87,7 @@ class HloModuleGroupUtil { // * visit_state: map from each instruction to its visit state. // * visit_function: function called when each instruction group. // * root: the root instruction of the traversal. - using VisitStates = tensorflow::gtl::FlatMap; + using VisitStates = absl::flat_hash_map; Status VisitTopologicalOrder(VisitStates* visit_state, const VisitFunction& visit_function, HloInstruction* root); diff --git a/tensorflow/compiler/xla/service/hlo_opcode.cc b/tensorflow/compiler/xla/service/hlo_opcode.cc index 2d4e38589fe4693e73c46d6c82e51cb0a8388f85..4551a1c2e259b06818f913cb6a9e782436b7e594 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.cc +++ b/tensorflow/compiler/xla/service/hlo_opcode.cc @@ -14,9 +14,9 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { @@ -31,7 +31,7 @@ string HloOpcodeString(HloOpcode opcode) { } StatusOr StringToHloOpcode(const string& opcode_name) { - static auto* opcode_map = new tensorflow::gtl::FlatMap({ + static auto* opcode_map = new absl::flat_hash_map({ #define STRING_TO_OPCODE_ENTRY(enum_name, opcode_name, ...) \ {opcode_name, HloOpcode::enum_name}, HLO_OPCODE_LIST(STRING_TO_OPCODE_ENTRY) diff --git a/tensorflow/compiler/xla/service/hlo_ordering.cc b/tensorflow/compiler/xla/service/hlo_ordering.cc index f1dc08bafa17a2dd68a7e922d4b84658bbf2589c..23d41d91d6969ddf9062507e926ae39c1e1315d4 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering.cc @@ -92,14 +92,18 @@ bool HloOrdering::ExecutesBefore(const HloInstruction* a, } bool HloOrdering::IsDefinedBefore(const HloValue& a, const HloValue& b) const { - // If 'b' is an entry param then 'a' cannot be defined before 'b' because 'b' - // is live into the module. + // Entry parameter should always be defined before other instructions. const HloModule* module = b.defining_instruction()->parent()->parent(); if (b.defining_instruction()->parent() == module->entry_computation() && b.defining_instruction()->opcode() == HloOpcode::kParameter) { return false; } + if (a.defining_instruction()->parent() == module->entry_computation() && + a.defining_instruction()->opcode() == HloOpcode::kParameter) { + return true; + } + // Phi values require special handling. Because XLA does not have a phi // instruction, the definition instruction of the phis values are // placeholders: either the subcomputation parameter (body or condition) or @@ -316,7 +320,7 @@ string PredecessorHloOrdering::ToStringHelper(const string& name) const { for (auto predecessor : all) { if (predecessors_.at(computation) ->IsReachable(predecessor, instruction)) { - pieces.push_back(absl::StrFormat(" %s", predecessor->name())); + pieces.push_back(absl::StrFormat(" %s", predecessor->name())); } } } diff --git a/tensorflow/compiler/xla/service/hlo_ordering.h b/tensorflow/compiler/xla/service/hlo_ordering.h index b0361c3f02922bcaa14d52ad3b240701080f9b58..66313492eb2dd10ac9a6000639ddb8991b367c0f 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering.h +++ b/tensorflow/compiler/xla/service/hlo_ordering.h @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/service/call_graph.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" @@ -28,7 +29,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_schedule.h" #include "tensorflow/compiler/xla/service/hlo_value.h" #include "tensorflow/compiler/xla/types.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { @@ -120,8 +120,8 @@ class PredecessorHloOrdering : public HloOrdering { // predecessors. An instruction is an element of its own predecessor set. // // Subclasses should fill this in to define the desired ordering. - tensorflow::gtl::FlatMap> + absl::flat_hash_map> predecessors_; }; @@ -204,7 +204,7 @@ class SequentialHloOrdering : public HloOrdering { // this map so more than one instruction may have the same position // value. This is not a problem because ExecutesBefore also verifies // instructions are in the same computation. - tensorflow::gtl::FlatMap order_position_; + absl::flat_hash_map order_position_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc index 00970bcda34209d33867099d0bcf3b2902d52ae8..b045adc9640ac0ca8cf4a127fea2fbfcbb1aaf3f 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc @@ -174,6 +174,26 @@ TEST_F(HloOrderingTest, InstructionsInWhileComputations) { EXPECT_FALSE(ordering.ExecutesBefore(body_param, cond_param)); } +TEST_F(HloOrderingTest, ParametersDefinedBeforeOthers) { + // Entry parameter should always be defined before other instruction. + auto module = CreateNewModule(); + const Shape scalar_shape = ShapeUtil::MakeShape(xla::F32, {}); + auto builder = HloComputation::Builder(TestName()); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(1.0))); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "param")); + module->AddEntryComputation(builder.Build()); + TF_ASSERT_OK_AND_ASSIGN(auto dataflow, + HloDataflowAnalysis::Run(*module, /*ssa_form=*/true)); + + DependencyHloOrdering ordering(module.get()); + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(param), + dataflow->GetValueDefinedAt(constant))); + EXPECT_TRUE(!ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(constant), + dataflow->GetValueDefinedAt(param))); +} + TEST_F(HloOrderingTest, ValuesInWhileComputations) { // Tests the ordering of values (defined by dataflow analysis) in the body and // condition of a while instruction. HLO code: diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 37197b273ba09200dbf4dd04c6b7c4cacc068120..96f9ff665462aed7d0e5bb70df21abc83f7779f9 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -68,7 +68,7 @@ class HloParser { // Runs the parser and constructs the resulting HLO in the given (empty) // HloModule. Returns false if an error occurred. - bool Run(HloModule* module); + Status Run(HloModule* module); // Returns the error information. string GetError() const { return StrJoin(error_, "\n"); } @@ -79,28 +79,37 @@ class HloParser { StatusOr ParseConvolutionDimensionNumbersOnly(); StatusOr ParsePaddingConfigOnly(); - // Stand-alone parsing utility for a single instruction worth of text. - Status ParseSingleInstruction(HloComputation::Builder* builder, - string* root_name); - private: - // Locates an instruction with the given name in the instruction_pool_ or + using InstrNameTable = + std::unordered_map>; + + // Returns the map from the instruction name to the instruction itself and its + // location in the current scope. + InstrNameTable& current_name_table() { return scoped_name_tables_.back(); } + + // Locates an instruction with the given name in the current_name_table() or // returns nullptr. // - // If the missing_instruction_hook_ is registered and a "shape" is provided, - // the hook will be called and may satisfy the request for the given - // instruction. This is useful when we reify parameters as they're resolved; - // i.e. for ParseSingleInstruction. + // When the name is not found or name is empty, if create_missing_instruction_ + // hook is registered and a "shape" is provided, the hook will be called to + // create an instruction. This is useful when we reify parameters as they're + // resolved; i.e. for ParseSingleInstruction. std::pair* FindInstruction( const string& name, const optional& shape = nullopt); + // Parse a single instruction worth of text. + bool ParseSingleInstruction(HloModule* module); + // ParseXXX returns false if an error occurred. bool ParseHloModule(HloModule* module); + bool ParseComputations(HloModule* module); bool ParseComputation(HloComputation** entry_computation); - bool ParseInstructionList(HloComputation::Builder* builder, - string* root_name); + bool ParseInstructionList(HloComputation** computation, + const string& computation_name); bool ParseInstruction(HloComputation::Builder* builder, string* root_name); + bool ParseInstruciontRhs(HloComputation::Builder* builder, const string& name, + LocTy name_loc); bool ParseControlPredecessors(HloInstruction* instruction); bool ParseLiteral(Literal* literal, const Shape& shape); bool ParseTupleLiteral(Literal* literal, const Shape& shape); @@ -165,6 +174,7 @@ class HloParser { kDistribution, kDomain, kPrecisionList, + kShapeList }; struct AttrConfig { @@ -231,6 +241,7 @@ class HloParser { bool ParseSliceRanges(SliceRanges* result); bool ParsePrecisionList(std::vector* result); + bool ParseShapeList(std::vector* result); bool ParseInt64List(const TokKind start, const TokKind end, const TokKind delim, std::vector* result); @@ -281,23 +292,47 @@ class HloParser { bool AddComputation(const string& name, HloComputation* computation, LocTy name_loc); - // The map from the instruction/computation name to the - // instruction/computation itself and it's location. This does not own the - // pointers. - std::unordered_map> - instruction_pool_; + HloLexer lexer_; + + // A stack for the instruction names. The top of the stack stores the + // instruction name table for the current scope. + // + // A instruction's name is unique among its scope (i.e. its parent + // computation), but it's not necessarily unique among all computations in the + // module. When there are multiple levels of nested computations, the same + // name could appear in both an outer computation and an inner computation. So + // we need a stack to make sure a name is only visible within its scope, + std::vector scoped_name_tables_; + + // A helper class which pushes and pops to an InstrNameTable stack via RAII. + class Scope { + public: + explicit Scope(std::vector* scoped_name_tables) + : scoped_name_tables_(scoped_name_tables) { + scoped_name_tables_->emplace_back(); + } + ~Scope() { scoped_name_tables_->pop_back(); } + + private: + std::vector* scoped_name_tables_; + }; + + // Map from the computation name to the computation itself and its location. std::unordered_map> computation_pool_; - HloLexer lexer_; std::vector> computations_; std::vector error_; - // Function that gets invoked when we try to resolve an instruction - // instruction_pool_ but fail to do so. - std::function*(string, - const optional&)> - missing_instruction_hook_; + // When an operand name cannot be resolved, this function is called to create + // a parameter instruction with the given name and shape. It registers the + // name, instruction, and a placeholder location in the name table. It returns + // the newly-created instruction and the placeholder location. If `name` is + // empty, this should create the parameter with a generated name. This is + // supposed to be set and used only in ParseSingleInstruction. + std::function*(const string& name, + const Shape& shape)> + create_missing_instruction_; }; bool SplitToInt64s(absl::string_view s, char delim, std::vector* out) { @@ -344,18 +379,44 @@ bool HloParser::TokenError(absl::string_view msg) { return Error(lexer_.GetLoc(), msg); } -bool HloParser::Run(HloModule* module) { +Status HloParser::Run(HloModule* module) { lexer_.Lex(); - return ParseHloModule(module); + if (lexer_.GetKind() == TokKind::kw_HloModule) { + // This means that the text contains a full HLO module. + if (!ParseHloModule(module)) { + return InvalidArgument( + "Syntax error when trying to parse the text as a HloModule:\n%s", + GetError()); + } + return Status::OK(); + } + // This means that the text is a single HLO instruction. + if (!ParseSingleInstruction(module)) { + return InvalidArgument( + "Syntax error when trying to parse the text as a single " + "HloInstruction:\n%s", + GetError()); + } + return Status::OK(); } std::pair* HloParser::FindInstruction( const string& name, const optional& shape) { - std::pair* instr = - tensorflow::gtl::FindOrNull(instruction_pool_, name); + std::pair* instr = nullptr; + if (!name.empty()) { + instr = tensorflow::gtl::FindOrNull(current_name_table(), name); + } + // Potentially call the missing instruction hook. - if (instr == nullptr && missing_instruction_hook_ != nullptr) { - return missing_instruction_hook_(name, shape); + if (instr == nullptr && create_missing_instruction_ != nullptr && + scoped_name_tables_.size() == 1) { + if (!shape.has_value()) { + Error(lexer_.GetLoc(), + "Operand had no shape in HLO text; cannot create parameter for " + "single-instruction module."); + return nullptr; + } + return create_missing_instruction_(name, *shape); } return instr; } @@ -439,7 +500,6 @@ bool HloParser::ParseComputation(HloComputation** entry_computation) { if (!ParseName(&name)) { return false; } - auto builder = absl::make_unique(name); LocTy shape_loc = nullptr; Shape shape; @@ -447,40 +507,21 @@ bool HloParser::ParseComputation(HloComputation** entry_computation) { return false; } - string root_name; - if (!ParseInstructionList(builder.get(), &root_name)) { + HloComputation* computation = nullptr; + if (!ParseInstructionList(&computation, name)) { return false; } - std::pair* root_node = FindInstruction(root_name); - // This means some instruction was marked as ROOT but we didn't find it in the - // pool, which should not happen. - if (!root_name.empty() && root_node == nullptr) { - LOG(FATAL) << "instruction " << root_name - << " was marked as ROOT but the parser has not seen it before"; - } - - HloInstruction* root = root_node == nullptr ? nullptr : root_node->first; - // Now root can be either an existing instruction or a nullptr. If it's a - // nullptr, the implementation of Builder will set the last instruction as - // root instruction. - computations_.emplace_back(builder->Build(root)); - HloComputation* computation = computations_.back().get(); - - if (!root) { - root = computation->root_instruction(); - } else { - CHECK_EQ(root, computation->root_instruction()); - } - // If param_list_to_shape was present, check compatibility. - if (shape_loc != nullptr && !ShapeUtil::Compatible(root->shape(), shape)) { + if (shape_loc != nullptr && + !ShapeUtil::Compatible(computation->root_instruction()->shape(), shape)) { return Error( shape_loc, - StrCat("Shape of computation ", name, ", ", - ShapeUtil::HumanString(shape), - ", is not compatible with that of its root instruction ", - root_name, ", ", ShapeUtil::HumanString(root->shape()))); + StrCat( + "Shape of computation ", name, ", ", ShapeUtil::HumanString(shape), + ", is not compatible with that of its root instruction ", + computation->root_instruction()->name(), ", ", + ShapeUtil::HumanString(computation->root_instruction()->shape()))); } if (is_entry_computation) { @@ -489,43 +530,62 @@ bool HloParser::ParseComputation(HloComputation** entry_computation) { } *entry_computation = computation; } - instruction_pool_.clear(); return AddComputation(name, computation, name_loc); } // instruction_list ::= '{' instruction_list1 '}' // instruction_list1 ::= (instruction)+ -bool HloParser::ParseInstructionList(HloComputation::Builder* builder, - string* root_name) { +bool HloParser::ParseInstructionList(HloComputation** computation, + const string& computation_name) { + Scope scope(&scoped_name_tables_); + HloComputation::Builder builder(computation_name); if (!ParseToken(TokKind::kLbrace, "expects '{' at the beginning of instruction list.")) { return false; } + string root_name; do { - if (!ParseInstruction(builder, root_name)) { + if (!ParseInstruction(&builder, &root_name)) { return false; } } while (lexer_.GetKind() != TokKind::kRbrace); - return ParseToken(TokKind::kRbrace, - "expects '}' at the end of instruction list."); + if (!ParseToken(TokKind::kRbrace, + "expects '}' at the end of instruction list.")) { + return false; + } + HloInstruction* root = nullptr; + if (!root_name.empty()) { + std::pair* root_node = + tensorflow::gtl::FindOrNull(current_name_table(), root_name); + + // This means some instruction was marked as ROOT but we didn't find it in + // the pool, which should not happen. + if (root_node == nullptr) { + LOG(FATAL) << "instruction " << root_name + << " was marked as ROOT but the parser has not seen it before"; + } + root = root_node->first; + } + + // Now root can be either an existing instruction or a nullptr. If it's a + // nullptr, the implementation of Builder will set the last instruction as + // the root instruction. + computations_.emplace_back(builder.Build(root)); + *computation = computations_.back().get(); + return true; } // instruction ::= ('ROOT')? name '=' shape opcode operands (attribute)* bool HloParser::ParseInstruction(HloComputation::Builder* builder, string* root_name) { string name; - Shape shape; - HloOpcode opcode; - std::vector operands; - LocTy maybe_root_loc = lexer_.GetLoc(); bool is_root = EatIfPresent(TokKind::kw_ROOT); const LocTy name_loc = lexer_.GetLoc(); if (!ParseName(&name) || - !ParseToken(TokKind::kEqual, "expects '=' in instruction") || - !ParseShape(&shape) || !ParseOpcode(&opcode)) { + !ParseToken(TokKind::kEqual, "expects '=' in instruction")) { return false; } @@ -536,6 +596,19 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, *root_name = name; } + return ParseInstruciontRhs(builder, name, name_loc); +} + +bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder, + const string& name, LocTy name_loc) { + Shape shape; + HloOpcode opcode; + std::vector operands; + + if (!ParseShape(&shape) || !ParseOpcode(&opcode)) { + return false; + } + // Add optional attributes. std::unordered_map attrs; optional sharding; @@ -1266,21 +1339,65 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, } case HloOpcode::kCustomCall: { optional custom_call_target; + optional opaque; optional window; optional dnums; optional feature_group_count; + optional> operand_layout_constraints; attrs["custom_call_target"] = {/*required=*/true, AttrTy::kString, &custom_call_target}; + attrs["opaque"] = {/*required=*/false, AttrTy::kString, &opaque}; attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window}; attrs["dim_labels"] = {/*required=*/false, AttrTy::kConvolutionDimensionNumbers, &dnums}; attrs["feature_group_count"] = {/*required=*/false, AttrTy::kInt64, &feature_group_count}; + attrs["operand_layout_constraints"] = { + /*required=*/false, AttrTy::kShapeList, &operand_layout_constraints}; if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } - instruction = builder->AddInstruction(HloInstruction::CreateCustomCall( - shape, operands, *custom_call_target)); + if (operand_layout_constraints.has_value()) { + if (!LayoutUtil::HasLayout(shape)) { + return Error(lexer_.GetLoc(), + "Layout must be set on layout-constrained custom call"); + } + if (operands.size() != operand_layout_constraints->size()) { + return Error(lexer_.GetLoc(), + StrCat("Expected ", operands.size(), + " operand layout constraints, ", + operand_layout_constraints->size(), " given")); + } + for (int64 i = 0; i < operands.size(); ++i) { + const Shape& operand_shape_with_layout = + (*operand_layout_constraints)[i]; + if (!LayoutUtil::HasLayout(operand_shape_with_layout)) { + return Error(lexer_.GetLoc(), + StrCat("Operand layout constraint shape ", + ShapeUtil::HumanStringWithLayout( + operand_shape_with_layout), + " for operand ", i, " does not have a layout")); + } + if (!ShapeUtil::Compatible(operand_shape_with_layout, + operands[i]->shape())) { + return Error( + lexer_.GetLoc(), + StrCat( + "Operand layout constraint shape ", + ShapeUtil::HumanStringWithLayout(operand_shape_with_layout), + " for operand ", i, + " is not compatible with operand shape ", + ShapeUtil::HumanStringWithLayout(operands[i]->shape()))); + } + } + instruction = builder->AddInstruction(HloInstruction::CreateCustomCall( + shape, operands, *custom_call_target, *operand_layout_constraints, + opaque.has_value() ? *opaque : "")); + } else { + instruction = builder->AddInstruction(HloInstruction::CreateCustomCall( + shape, operands, *custom_call_target, + opaque.has_value() ? *opaque : "")); + } if (window.has_value()) { instruction->set_window(*window); } @@ -2143,7 +2260,20 @@ bool HloParser::ParseOperands(std::vector* operands) { } } if (!ParseName(&name)) { - return false; + // When parsing a single instruction (as opposed to a whole module), an + // HLO may have one or more operands with a shape but no name: + // + // foo = add(f32[10], f32[10]) + // + // create_missing_instruction_ is always non-null when parsing a single + // instruction, and is responsible for creating kParameter instructions + // for these operands. + if (shape.has_value() && create_missing_instruction_ != nullptr && + scoped_name_tables_.size() == 1) { + name = ""; + } else { + return false; + } } std::pair* instruction = FindInstruction(name, shape); @@ -2296,9 +2426,17 @@ bool HloParser::ParseAttributeHelper( return true; } case AttrTy::kHloComputation: { - HloComputation* result; - if (!ParseComputationName(&result)) { - return false; + HloComputation* result = nullptr; + if (lexer_.GetKind() == TokKind::kLbrace) { + // This means it is a nested computation. + if (!ParseInstructionList(&result, /*computation_name=*/"_")) { + return false; + } + } else { + // This means it is a computation name. + if (!ParseComputationName(&result)) { + return false; + } } static_cast*>(attr_out_ptr)->emplace(result); return true; @@ -2438,6 +2576,15 @@ bool HloParser::ParseAttributeHelper( ->emplace(result); return true; } + case AttrTy::kShapeList: { + std::vector result; + if (!ParseShapeList(&result)) { + return false; + } + static_cast>*>(attr_out_ptr) + ->emplace(result); + return true; + } } }(); if (!success) { @@ -2730,6 +2877,23 @@ bool HloParser::ParsePrecisionList( parse_and_add_item); } +// shapelist ::= '{' shapes '}' +// precision_elements +// ::= /*empty*/ +// ::= shape (',' shape)* +bool HloParser::ParseShapeList(std::vector* result) { + auto parse_and_add_item = [&]() { + Shape shape; + if (!ParseShape(&shape)) { + return false; + } + result->push_back(std::move(shape)); + return true; + }; + return ParseList(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma, + parse_and_add_item); +} + // int64list ::= start int64_elements end // int64_elements // ::= /*empty*/ @@ -2737,23 +2901,15 @@ bool HloParser::ParsePrecisionList( bool HloParser::ParseInt64List(const TokKind start, const TokKind end, const TokKind delim, std::vector* result) { - if (!ParseToken(start, StrCat("expects an int64 list starting with ", - TokKindToString(start)))) { - return false; - } - if (lexer_.GetKind() == end) { - // empty - } else { - do { - tensorflow::int64 i; - if (!ParseInt64(&i)) { - return false; - } - result->push_back(i); - } while (EatIfPresent(delim)); - } - return ParseToken( - end, StrCat("expects an int64 list to end with ", TokKindToString(end))); + auto parse_and_add_item = [&]() { + tensorflow::int64 i; + if (!ParseInt64(&i)) { + return false; + } + result->push_back(i); + return true; + }; + return ParseList(start, end, delim, parse_and_add_item); } bool HloParser::ParseList(const TokKind start, const TokKind end, @@ -3131,7 +3287,7 @@ bool HloParser::EatIfPresent(TokKind kind) { bool HloParser::AddInstruction(const string& name, HloInstruction* instruction, LocTy name_loc) { - auto result = instruction_pool_.insert({name, {instruction, name_loc}}); + auto result = current_name_table().insert({name, {instruction, name_loc}}); if (!result.second) { Error(name_loc, StrCat("instruction already exists: ", name)); return Error(/*loc=*/result.first->second.second, @@ -3201,82 +3357,78 @@ StatusOr HloParser::ParsePaddingConfigOnly() { return padding_config; } -Status HloParser::ParseSingleInstruction(HloComputation::Builder* builder, - string* root_name) { - TF_RET_CHECK(missing_instruction_hook_ == nullptr); +bool HloParser::ParseSingleInstruction(HloModule* module) { + if (create_missing_instruction_ != nullptr || !scoped_name_tables_.empty()) { + LOG(FATAL) << "Parser state is not clean. Please do not call any other " + "methods before calling ParseSingleInstruction."; + } + HloComputation::Builder builder(module->name()); // The missing instruction hook we register creates the shaped instruction on // the fly as a parameter and returns it. int64 parameter_count = 0; - missing_instruction_hook_ = - [this, builder, ¶meter_count]( - string name, - const optional& shape) -> std::pair* { - if (!shape.has_value()) { - Error(lexer_.GetLoc(), - StrCat("Operand ", name, - " had no shape in HLO text; cannot create parameter for " - "single-instruction module.")); - return nullptr; - } - HloInstruction* parameter = builder->AddInstruction( - HloInstruction::CreateParameter(parameter_count++, *shape, name)); - instruction_pool_[name] = {parameter, lexer_.GetLoc()}; - return tensorflow::gtl::FindOrNull(instruction_pool_, name); + create_missing_instruction_ = + [this, &builder, ¶meter_count]( + const string& name, + const Shape& shape) -> std::pair* { + string new_name = name.empty() ? StrCat("_", parameter_count) : name; + HloInstruction* parameter = builder.AddInstruction( + HloInstruction::CreateParameter(parameter_count++, shape, new_name)); + current_name_table()[new_name] = {parameter, lexer_.GetLoc()}; + return tensorflow::gtl::FindOrNull(current_name_table(), new_name); }; - // Prime the lexer. - lexer_.Lex(); - // Parse the instruction with the registered hook. - if (!ParseInstruction(builder, root_name)) { - return InvalidArgument("Syntax error:\n%s", GetError()); + Scope scope(&scoped_name_tables_); + if (CanBeShape()) { + // This means that the instruction's left-hand side is probably omitted, + // e.g. + // + // f32[10] fusion(...), calls={...} + if (!ParseInstruciontRhs(&builder, module->name(), lexer_.GetLoc())) { + return false; + } + } else { + // This means that the instruction's left-hand side might exist, e.g. + // + // foo = f32[10] fusion(...), calls={...} + string root_name; + if (!ParseInstruction(&builder, &root_name)) { + return false; + } } - return Status::OK(); + + module->AddEntryComputation(builder.Build()); + for (auto& comp : computations_) { + module->AddEmbeddedComputation(std::move(comp)); + } + return true; } } // namespace StatusOr> ParseHloString( absl::string_view str, const HloModuleConfig& config) { - auto module = absl::make_unique(/*name=*/"", config); + auto module = absl::make_unique(/*name=*/"_", config); HloParser parser(str); - if (!parser.Run(module.get())) { - return InvalidArgument("Syntax error:\n%s", parser.GetError()); - } + TF_RETURN_IF_ERROR(parser.Run(module.get())); return std::move(module); } StatusOr> ParseHloString(absl::string_view str) { - auto module = absl::make_unique(/*name=*/"", HloModuleConfig()); + auto module = absl::make_unique(/*name=*/"_", HloModuleConfig()); HloParser parser(str); - if (!parser.Run(module.get())) { - return InvalidArgument("Syntax error:\n%s", parser.GetError()); - } + TF_RETURN_IF_ERROR(parser.Run(module.get())); return std::move(module); } Status ParseHloString(absl::string_view str, HloModule* module) { TF_RET_CHECK(module->computation_count() == 0); HloParser parser(str); - if (!parser.Run(module)) { - return InvalidArgument("Syntax error:\n%s", parser.GetError()); - } + TF_RETURN_IF_ERROR(parser.Run(module)); return Status::OK(); } -StatusOr> ParseHloOpToModule( - absl::string_view str, absl::string_view name) { - HloParser parser(str); - auto builder = absl::make_unique(string(name)); - string root_name; - TF_RETURN_IF_ERROR(parser.ParseSingleInstruction(builder.get(), &root_name)); - std::unique_ptr computation = builder->Build(); - auto module = absl::make_unique(string(name), HloModuleConfig()); - module->AddEntryComputation(std::move(computation)); - return std::move(module); -} - StatusOr ParseSharding(absl::string_view str) { HloParser parser(str); return parser.ParseShardingOnly(); diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h index 369603551463fd4b4911b393f3c6c2b36f0e4bbb..81eeb9f13bf7f06123c0b35e9f3352c197866a7a 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.h +++ b/tensorflow/compiler/xla/service/hlo_parser.h @@ -40,11 +40,6 @@ StatusOr> ParseHloString( // point to an empty module (no computations). Status ParseHloString(absl::string_view str, HloModule* module); -// Parses the text for a single HLO operation into an HLO module with a function -// that runs that operation (with the same parameters) as its entry computation. -StatusOr> ParseHloOpToModule( - absl::string_view str, absl::string_view name = "single_op"); - // Given a string in the HloModule::ToString() format, parses the string and // creates a HloModule with default config. StatusOr> ParseHloString(absl::string_view str); diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index cca50fab5444d5e23c02952d56566b643a2192a4..17538c05bc7cb5041a4a8d62fe6c32e3f6de2c0d 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -802,6 +802,43 @@ ENTRY %ConstantUnsignedNoOverflow () -> u64[] { ROOT %constant = u64[] constant(9223372036854775807) } +)" +}, +// CustomCallWithLayoutConstraints +{ +"CustomCallWithLayoutConstraints", +R"(HloModule CustomCallWithLayoutConstraints + +ENTRY %CustomCallWithLayoutConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] { + %p0 = f32[42,2,3]{0,1,2} parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[123,4]{1,0}} +} + +)" +}, +// CustomCallWithLayoutConstraintsNoOperands +{ +"CustomCallWithLayoutConstraintsNoOperands", +R"(HloModule CustomCallWithLayoutConstraintsNoOperands + +ENTRY %CustomCallWithLayoutConstraints () -> f32[1,2,3] { + ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(), custom_call_target="baz", operand_layout_constraints={} +} + +)" +}, +// CustomCallWithLayoutConstraintsTupleShapes +{ +"CustomCallWithLayoutConstraintsTupleShapes", +R"(HloModule CustomCallWithLayoutConstraintsTupleShapes + +ENTRY %CustomCallWithLayoutConstraints (p0: (f32[2,2], f32[42,2,3]), p1: f32[123,4]) -> (f32[1,2,3], f32[1,2,3]) { + %p0 = (f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = (f32[1,2,3]{0,2,1}, f32[1,2,3]{1,2,0}) custom-call((f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={(f32[2,2]{1,0}, f32[42,2,3]{2,0,1}), f32[123,4]{1,0}} +} + )" }, }); @@ -1002,6 +1039,18 @@ ENTRY CustomCall { ROOT custom-call = f32[1,2,3]{0,2,1} custom-call(constant), custom_call_target="foo\"bar" } +)" +}, +// CustomCall with opaque value. +{ +"CustomCallWithOpaque", +R"(HloModule custom_call + +ENTRY CustomCall { + constant = f32[1]{0} constant({12345}) + ROOT custom-call = f32[1,2,3]{0,2,1} custom-call(constant), custom_call_target="foo\"bar", opaque="this string is opaque" +} + )" }, // Variables with non-default names @@ -1151,49 +1200,80 @@ ENTRY Sort { // clang-format on } -class HloParserTest : public ::testing::Test, - public ::testing::WithParamInterface { +// The test class for those tests defined above which round-trip through the +// parser and ToString is templatized on two bool parameters: +// +// short_form : used for the "short" test cases which use the ShortParsable +// output form. +// proto_round_trip : whether the module should also be round-tripped through +// HloProto form. This provides much better coverage for the proto +// serialization/deserialization. +// +// The proto_round_trip=true case also technically covers the Parser->ToString +// roundtrip as well, but separating out the Parser->ToString roundtrip as its +// own test provides better isolation and could conceivably catch weirdo bugs +// which are hidden by interaction between the textual and proto roundtripping. +template +class HloParameterizedParserTest + : public ::testing::Test, + public ::testing::WithParamInterface { protected: - static void ExpectHasSubstr(string_view s, string_view expected) { - EXPECT_TRUE(absl::StrContains(s, expected)) - << "'" << s << "' does not contain '" << expected << "'"; - } - // Expects "ToString(ParseHloString(string)) == string", that is, parses the // string, asserts that it succeeded, stringifies the parsed module, and // checks that the it equals the original string. void ExpectEqual() { const string& original = GetParam().module_string; - auto result = ParseHloString(original); - TF_ASSERT_OK(result.status()); - EXPECT_EQ(original, result.ValueOrDie()->ToString( - HloPrintOptions().set_print_large_constants(true))); - } -}; - -class HloParserShortTest : public HloParserTest { - protected: - void ExpectEqualShort() { - const string& original = GetParam().module_string; - auto result = ParseHloString(original); - TF_ASSERT_OK(result.status()); - EXPECT_EQ(original, - result.ValueOrDie()->ToString(HloPrintOptions::ShortParsable())); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(original)); + if (proto_round_trip) { + TF_ASSERT_OK_AND_ASSIGN(module, HloModule::CreateFromProto( + module->ToProto(), module->config())); + } + if (short_form) { + EXPECT_EQ(original, module->ToString(HloPrintOptions::ShortParsable())); + } else { + EXPECT_EQ( + original, + module->ToString(HloPrintOptions().set_print_large_constants(true))); + } } }; -TEST_P(HloParserTest, Run) { ExpectEqual(); } +// These using shenanigans are required because the TEST_P macro doesn't like +// template instantiations which contain commas. +using HloParserTestLong = HloParameterizedParserTest; +using HloParserTestLongProto = HloParameterizedParserTest; +using HloParserTestShort = HloParameterizedParserTest; +using HloParserTestShortProto = HloParameterizedParserTest; -TEST_P(HloParserShortTest, Run) { ExpectEqualShort(); } +TEST_P(HloParserTestLong, Run) { ExpectEqual(); } +TEST_P(HloParserTestLongProto, Run) { ExpectEqual(); } +TEST_P(HloParserTestShort, Run) { ExpectEqual(); } +TEST_P(HloParserTestShortProto, Run) { ExpectEqual(); } -INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTest, +INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTestLong, ::testing::ValuesIn(CreateTestCases()), TestDataToString); - -INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserShortTest, +INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, + HloParserTestLongProto, + ::testing::ValuesIn(CreateTestCases()), + TestDataToString); +INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTestShort, + ::testing::ValuesIn(CreateShortTestCases()), + TestDataToString); +INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, + HloParserTestShortProto, ::testing::ValuesIn(CreateShortTestCases()), TestDataToString); +class HloParserTest : public ::testing::Test { + protected: + static void ExpectHasSubstr(string_view s, string_view expected) { + EXPECT_TRUE(absl::StrContains(s, expected)) + << "'" << s << "' does not contain '" << expected << "'"; + } +}; + TEST_F(HloParserTest, Empty) { const string original = ""; auto result = ParseHloString(original); @@ -1261,7 +1341,7 @@ TEST_F(HloParserTest, MoreConstants) { ENTRY %SelectScalarS32True.v4 () -> s32[] { %constant.2 = pred[] constant(true) - %constant.1 = s32[] constant(-42), sharding={s32[5,6] devices=[2,3]1,2,3,4} + %constant.1 = s32[] constant(-42), sharding={s32[5,6] devices=[2,2]1,2,3,4} %constant = s32[] constant(42) %select = s32[] select(pred[] %constant.2, s32[] %constant.1, s32[] %constant) } @@ -1720,6 +1800,25 @@ ENTRY entry { "was parsing 8:39: error: instruction does not exist: aparam"); } +TEST_F(HloParserTest, SameNameDiffComputations) { + const string original = R"(HloModule same_names: +add { + p0 = f32[] parameter(0) + p1 = f32[] parameter(1) + ROOT result = f32[] add(p0, p1) +} + +ENTRY ReduceR3ToR2 { + p0 = f32[8,16,256]{2,1,0} parameter(0) + p1 = f32[] constant(0) + ROOT result = f32[8,16]{1,0} reduce(p0, p1), dimensions={2}, to_apply=add +} +)"; + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(original)); + ASSERT_NE(module->entry_computation(), nullptr); + EXPECT_THAT(module->entry_computation()->root_instruction(), op::Reduce()); +} + TEST_F(HloParserTest, ParseSharding) { const string original = "{maximal device=42}"; TF_ASSERT_OK_AND_ASSIGN(HloSharding sharding, ParseSharding(original)); @@ -1773,27 +1872,142 @@ TEST(HloParserSingleOpTest, SingleOp) { const string text = "%multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %broadcast, " "f32[2,4]{1,0} %x)"; - TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text)); + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text)); const HloComputation* computation = module->entry_computation(); ASSERT_NE(computation, nullptr); EXPECT_THAT(computation->root_instruction(), op::Multiply(op::Parameter(0), op::Parameter(1))); } -TEST(HloParserSingleOpTest, SingleOpNoShapesProducesError) { +TEST(HloParserSingleOpTest, SingleOpNoShapeProducesError) { + const string text = "multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x)"; + StatusOr> module = ParseHloString(text); + ASSERT_TRUE(!module.status().ok()); + LOG(INFO) << "Status: " << module.status(); + EXPECT_THAT(module.status().ToString(), + ::testing::HasSubstr("expects '=' in instruction")); +} + +TEST(HloParserSingleOpTest, SingleOpNoOperandShapesProducesError) { const string text = "%multiply = f32[2,4]{1,0} multiply(%broadcast, %x)"; - StatusOr> module = ParseHloOpToModule(text); + StatusOr> module = ParseHloString(text); ASSERT_TRUE(!module.status().ok()); LOG(INFO) << "Status: " << module.status(); - EXPECT_THAT( - module.status().ToString(), - ::testing::HasSubstr("Operand broadcast had no shape in HLO text")); + EXPECT_THAT(module.status().ToString(), + ::testing::HasSubstr("Operand had no shape in HLO text")); +} + +TEST(HloParserSingleOpTest, SingleOpNoNames) { + const string text = + "%multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text)); + const HloComputation* computation = module->entry_computation(); + ASSERT_NE(computation, nullptr); + EXPECT_THAT(computation->root_instruction(), + op::Multiply(op::Parameter(0), op::Parameter(1))); +} + +TEST(HloParserSingleOpTest, CanonicalOp) { + const string text = "f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text)); + const HloComputation* computation = module->entry_computation(); + ASSERT_NE(computation, nullptr); + EXPECT_THAT(computation->root_instruction(), + op::Multiply(op::Parameter(0), op::Parameter(1))); + EXPECT_EQ( + computation->root_instruction()->ToString(HloPrintOptions::Canonical()), + text); +} + +TEST(HloParserSingleOpTest, CanonicalOpWithNested) { + const string text = + R"(f32[5,20]{1,0} while(f32[5,10]{1,0}), condition= +{ + tmp_0 = f32[5,10]{1,0} parameter(0) + tmp_1 = f32[20,10]{1,0} parameter(1) + ROOT tmp_2 = f32[5,20]{1,0} fusion(f32[5,10]{1,0} tmp_0, f32[20,10]{1,0} tmp_1), kind=kLoop, calls= + { + tmp_0 = f32[5,10]{1,0} parameter(0) + tmp_1 = f32[20,10]{1,0} parameter(1) + tmp_2 = f32[10,20]{1,0} transpose(f32[20,10]{1,0} tmp_1), dimensions={1,0} + ROOT tmp_3 = f32[5,20]{1,0} dot(f32[5,10]{1,0} tmp_0, f32[10,20]{1,0} tmp_2), lhs_contracting_dims={1}, rhs_contracting_dims={0} + } +}, body= +{ + tmp_0 = f32[5,10]{1,0} parameter(0) + tmp_1 = f32[20,10]{1,0} parameter(1) + ROOT tmp_2 = f32[5,20]{1,0} fusion(f32[5,10]{1,0} tmp_0, f32[20,10]{1,0} tmp_1), kind=kLoop, calls= + { + tmp_0 = f32[5,10]{1,0} parameter(0) + tmp_1 = f32[20,10]{1,0} parameter(1) + tmp_2 = f32[10,20]{1,0} transpose(f32[20,10]{1,0} tmp_1), dimensions={1,0} + ROOT tmp_3 = f32[5,20]{1,0} dot(f32[5,10]{1,0} tmp_0, f32[10,20]{1,0} tmp_2), lhs_contracting_dims={1}, rhs_contracting_dims={0} + } +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text)); + const HloComputation* computation = module->entry_computation(); + ASSERT_NE(computation, nullptr); + EXPECT_EQ( + computation->root_instruction()->ToString(HloPrintOptions::Canonical()), + text); +} + +TEST(HloParserSingleOpTest, SingleOpWithNested) { + const string text = + R"(%fusion = f32[3,2,1,1]{3,2,1,0} fusion(f32[3,2,1,1]{3,2,1,0} %p0, f32[2]{0} %p1), kind=kLoop, calls= +{ + %param_0 = f32[3,2,1,1]{3,2,1,0} parameter(0) + %param_1 = f32[2]{0} parameter(1) + %broadcast = f32[3,2,1,1]{3,2,1,0} broadcast(f32[2]{0} %param_1), dimensions={1} + ROOT %subtract = f32[3,2,1,1]{3,2,1,0} subtract(f32[3,2,1,1]{3,2,1,0} %param_0, f32[3,2,1,1]{3,2,1,0} %broadcast) +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text)); + const HloComputation* computation = module->entry_computation(); + ASSERT_NE(computation, nullptr); + EXPECT_THAT(computation->root_instruction(), + op::Fusion(op::Parameter(0), op::Parameter(1))); +} + +TEST(HloParserSingleOpTest, SingleOpWithNested_DoesNotExist) { + const string text = + R"(reduce = f32[] reduce(f32[10], f32[]), dimensions={1}, to_apply= +{ + result = f32[] add(f32[] x, f32[] y) +})"; + auto status = ParseHloString(text).status(); + ASSERT_FALSE(status.ok()); + EXPECT_THAT(status.error_message(), + ::testing::HasSubstr("does not exist: x")); +} + +TEST(HloParserSingleOpTest, SingleOpWithNested_NoLhs) { + const string text = + R"(reduce = f32[] reduce(f32[10], f32[]), dimensions={1}, to_apply= +{ + f32[] add(f32[] x, f32[] y) +})"; + auto status = ParseHloString(text).status(); + ASSERT_FALSE(status.ok()); + EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name")); +} + +TEST(HloParserSingleOpTest, SingleOpWithNested_NoOperandName) { + const string text = + R"(reduce = f32[] reduce(f32[10], f32[]), dimensions={1}, to_apply= +{ + result = f32[] add(f32[], f32[]) +})"; + auto status = ParseHloString(text).status(); + ASSERT_FALSE(status.ok()); + EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name")); } TEST(HloParserSingleOpTest, ConvolutionTrivialFeatureGroupCount) { const string text = R"(%convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1}, dim_labels=b0f_0io->b0f)"; - TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text)); + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text)); const HloComputation* computation = module->entry_computation(); ASSERT_NE(computation, nullptr); EXPECT_THAT(computation->root_instruction(), @@ -1892,5 +2106,35 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { op::Broadcast(), op::Multiply(), op::Add())); } +TEST_F(HloParserTest, CustomCallWrongNumberofOperandConstraints) { + const string original = R"(HloModule CustomCallWrongNumberofOperandConstraints + +ENTRY %CustomCallWrongNumberofOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] { + %p0 = f32[42,2,3]{0,1,2} parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}} +} + +)"; + ExpectHasSubstr(ParseHloString(original).status().error_message(), + "Expected 2 operand layout constraints, 1 given"); +} + +TEST_F(HloParserTest, CustomCallIncompatibleOperandConstraints) { + const string original = R"(HloModule CustomCallIncompatibleOperandConstraints + +ENTRY %CustomCallIncompatibleOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] { + %p0 = f32[42,2,3]{0,1,2} parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[555,5]{1,0}} +} + +)"; + ExpectHasSubstr(ParseHloString(original).status().error_message(), + "operand 1 is not compatible with operand shape"); +} + +// custom call incompatible shape. + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc index 8c2f928ca101fae8e63663705554ae626c863bf6..5e004ce78ac1fd6da18ab2a54d23ef27e9586cf6 100644 --- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc +++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc @@ -17,6 +17,8 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" @@ -24,7 +26,6 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/logging.h" namespace xla { @@ -74,8 +75,8 @@ StatusOr HloPassPipeline::RunPassesInternal( std::vector HloPassPipeline::GetEnabledPasses( const DebugOptions& debug_options) { auto repeated_field = debug_options.xla_disable_hlo_passes(); - tensorflow::gtl::FlatSet disabled_pass_names(repeated_field.begin(), - repeated_field.end()); + absl::flat_hash_set disabled_pass_names(repeated_field.begin(), + repeated_field.end()); if (!disabled_pass_names.empty()) { VLOG(1) << "Passes disabled by --xla_disable_hlo_passes: " << absl::StrJoin(disabled_pass_names, ", "); @@ -98,7 +99,7 @@ void HloPassPipeline::MaybeDumpHlo(const HloModule& module, if (!proto_dump_path.empty()) { static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED); static auto* const module_id_to_pass_number = - new tensorflow::gtl::FlatMap(); + new absl::flat_hash_map(); tensorflow::mutex_lock lock(mu); const int64 pass_number = (*module_id_to_pass_number)[module.unique_id()]++; diff --git a/tensorflow/compiler/xla/service/hlo_query.cc b/tensorflow/compiler/xla/service/hlo_query.cc index 2a07b6fcbc243d955e136ccdf097c8155a115845..2d5197be9e6f69f698729e06b7506a5bc6260bcd 100644 --- a/tensorflow/compiler/xla/service/hlo_query.cc +++ b/tensorflow/compiler/xla/service/hlo_query.cc @@ -24,7 +24,7 @@ namespace hlo_query { bool IsConstantR0F32(HloInstruction* instruction, float* out) { if (instruction->opcode() == HloOpcode::kConstant && - ShapeUtil::IsScalarF32(instruction->shape())) { + ShapeUtil::IsScalarWithElementType(instruction->shape(), F32)) { *out = instruction->literal().Get({}); return true; } diff --git a/tensorflow/compiler/xla/service/hlo_reachability.h b/tensorflow/compiler/xla/service/hlo_reachability.h index b66a2aa4bd2b00a88cdbfa6b41c9123bb370aa87..5a5f01f8fd647c74217c80ce4a7633b8957e335f 100644 --- a/tensorflow/compiler/xla/service/hlo_reachability.h +++ b/tensorflow/compiler/xla/service/hlo_reachability.h @@ -19,11 +19,11 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -154,7 +154,7 @@ class HloReachabilityMap { // Dense assignment from HloInstruction* to number. These numbers index // into the bit_vectors_ vector and into the bits within a BitVector. - tensorflow::gtl::FlatMap indices_; + absl::flat_hash_map indices_; // Bitvectors holding the reachability to each instruction. The bit vector for // instruction X includes ones for each instruction which X is reachable from. diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index a43867193628d05ad7703a5d5ed8bdc9c72de581..5ac43808ee2945eaa5003baad24d5d331419db83 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -20,6 +20,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -75,7 +77,7 @@ bool IsRematerializable(const HloInstruction* instruction) { // cache before, and eventually calling the IsRematerializable() API. bool CanBeRematerialized( const HloInstruction* instruction, - tensorflow::gtl::FlatMap* remat_able) { + absl::flat_hash_map* remat_able) { auto it = remat_able->find(instruction); if (it != remat_able->end()) { return it->second; @@ -268,7 +270,7 @@ class InstructionList { Item* first_; // Item for each instruction. - tensorflow::gtl::FlatMap item_map_; + absl::flat_hash_map item_map_; }; // Return the items which use the given LogicalBuffer. Sets @@ -503,7 +505,7 @@ MemoryUsageTracker::MemoryUsageTracker( PointsToSet::BufferSet live_out_set = points_to_analysis.GetPointsToSet(computation_->root_instruction()) .CreateFlattenedSet(); - tensorflow::gtl::FlatMap + absl::flat_hash_map logical_buffer_to_buffer_id; for (auto* item = instruction_list_.first(); item != nullptr; @@ -854,7 +856,7 @@ int64 RematerializationCost(const HloInstruction* instruction, Item* PickRematerializationCandidate( const MemoryUsageTracker& memory_tracker, const InstructionList& instruction_list, int64 memory_limit_bytes, - tensorflow::gtl::FlatMap* remat_able) { + absl::flat_hash_map* remat_able) { Item* best_item = nullptr; int64 best_cost = 0; @@ -980,10 +982,10 @@ StatusOr HloRematerialization::RematerializeComputation( // rematerialization is essentially a move). If the next rematerialization of // the instruction is also a move then the rematerialization is added to the // blacklist. - tensorflow::gtl::FlatSet remat_move_instructions; + absl::flat_hash_set remat_move_instructions; // The map from instructions to their rematerializable status. - tensorflow::gtl::FlatMap remat_able; + absl::flat_hash_map remat_able; // The peak memory of the computation at any point in the instruction // sequence. diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h index 7330d73c09eb5aa8265fa5753a2de5885f51bf15..70d83c04f07ca7fd0139f586869e8fe688f958f4 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.h +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h @@ -15,6 +15,8 @@ #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_REMATERIALIZATION_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_REMATERIALIZATION_H_ +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/call_graph.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -115,14 +117,13 @@ class HloRematerialization : public HloModulePass { // computations called from sequential context // (CallContext::kSequential). These values are updated as rematerialization // occurs. - tensorflow::gtl::FlatMap - computation_peak_memory_; + absl::flat_hash_map computation_peak_memory_; std::unique_ptr points_to_analysis_; // Set of computations which have had rematerialization // applied. Rematerialization is only applied once per computation. - tensorflow::gtl::FlatSet rematerialized_computations_; + absl::flat_hash_set rematerialized_computations_; // Count of the total instructions rematerialized. int64 instructions_rematerialized_ = 0; diff --git a/tensorflow/compiler/xla/service/hlo_schedule.cc b/tensorflow/compiler/xla/service/hlo_schedule.cc index 3fc5dbeb02a26134a7f255fa0b6ebda1dc41ce4d..9972eb20774550817143cb27dd94667364cf68ec 100644 --- a/tensorflow/compiler/xla/service/hlo_schedule.cc +++ b/tensorflow/compiler/xla/service/hlo_schedule.cc @@ -18,6 +18,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" #include "tensorflow/compiler/xla/map_util.h" @@ -30,7 +32,7 @@ namespace xla { /* static */ StatusOr HloSchedule::CreateFromProto( const HloModule* module, const HloScheduleProto& proto) { - tensorflow::gtl::FlatMap id_to_computation; + absl::flat_hash_map id_to_computation; for (const HloComputation* computation : module->computations()) { id_to_computation[computation->unique_id()] = computation; } @@ -44,7 +46,7 @@ namespace xla { << "No computation exists in HLO module with id " << computation_id; const HloComputation* computation = comp_it->second; - tensorflow::gtl::FlatMap id_to_instruction; + absl::flat_hash_map id_to_instruction; for (const HloInstruction* instruction : computation->instructions()) { id_to_instruction[instruction->unique_id()] = instruction; } @@ -112,13 +114,13 @@ Status HloSchedule::UpdateComputationSchedule( const HloComputation* computation) { // Map from unique ID to HloInstruction pointer for instructions in the // computation. - tensorflow::gtl::FlatMap id_to_instruction; + absl::flat_hash_map id_to_instruction; for (const HloInstruction* instruction : computation->instructions()) { InsertOrDie(&id_to_instruction, instruction->unique_id(), instruction); } // Set of all HloInstructions in the schedule. - tensorflow::gtl::FlatSet ids_in_schedule; + absl::flat_hash_set ids_in_schedule; for (int id : sequences_.at(computation->unique_id()).ids()) { InsertOrDie(&ids_in_schedule, id); } @@ -126,15 +128,13 @@ Status HloSchedule::UpdateComputationSchedule( // Map from HloInstruction X to newly added instructions (instruction is in // computation, but not in schedule) which use X. If an instruction is not in // the map, then it has no users which are newly added instructions. - tensorflow::gtl::FlatMap> + absl::flat_hash_map> new_instruction_uses; // For each newly added instruction, this is the count of the instruction's // operands that have not yet been scheduled. When this value reaches zero, // then the instruction may be placed in the schedule. - tensorflow::gtl::FlatMap - unscheduled_operand_count; + absl::flat_hash_map unscheduled_operand_count; // Create a worklist of newly added instructions which are ready to be added // to the schedule. Initialize worklist with those that have zero operands. @@ -211,15 +211,15 @@ Status HloSchedule::Update() { if (sequences_.size() > nonfusion_computations.size()) { // Schedule contains some computations which have been removed from the // HloModule. Remove them from the schedule as well. - tensorflow::gtl::FlatSet nonfusion_computations_ids; + absl::flat_hash_set nonfusion_computations_ids; for (const HloComputation* computation : nonfusion_computations) { nonfusion_computations_ids.insert(computation->unique_id()); } for (auto it = sequences_.begin(); it != sequences_.end();) { if (nonfusion_computations_ids.count(it->first) == 0) { - it = sequences_.erase(it); + sequences_.erase(it++); } else { - it++; + ++it; } } } @@ -254,7 +254,7 @@ Status HloSchedule::Verify() const { // For each computation verify the set of instructions is the same and that // each dependency and control edge is honored. for (const HloComputation* computation : nonfusion_computations) { - tensorflow::gtl::FlatMap instruction_position; + absl::flat_hash_map instruction_position; int pos = 0; for (const HloInstruction* instruction : sequence(computation).instructions()) { diff --git a/tensorflow/compiler/xla/service/hlo_schedule.h b/tensorflow/compiler/xla/service/hlo_schedule.h index 270fe6039f0afd119c76086de9a0596e0560e93e..0a714101ee587aa847fa674bbde5586287c51f33 100644 --- a/tensorflow/compiler/xla/service/hlo_schedule.h +++ b/tensorflow/compiler/xla/service/hlo_schedule.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -103,8 +104,7 @@ class HloSchedule { // Returns a map from HloComputation unique ID to instruction sequence. The // map contains all sequences in the schedule. - const tensorflow::gtl::FlatMap& sequences() - const { + const absl::flat_hash_map& sequences() const { return sequences_; } @@ -148,7 +148,7 @@ class HloSchedule { // A map from computation unique ID to instruction sequence. Unique IDs are // used rather than HloComputation pointers because HLO pointers are not // unique across HLO transformations because pointers may be recycled. - tensorflow::gtl::FlatMap sequences_; + absl::flat_hash_map sequences_; }; std::ostream& operator<<(std::ostream& out, const HloSchedule& schedule); diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index de7e6b53d4d2aa88e2213248370b4da82bdeadeb..188f4acc7945f3ec98065eae5a87a41c39730432 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -17,6 +17,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" +#include "tensorflow/compiler/xla/overflow_util.h" #include "tensorflow/core/lib/core/errors.h" namespace xla { @@ -369,10 +370,28 @@ Status HloSharding::ValidateNonTuple(const Shape& shape, return HloSharding(tuple_shardings); } else if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) { return Replicate(); - } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL || - proto.tile_assignment_devices().size() == 1) { + } else if (proto.tile_assignment_devices().size() == 1) { return HloSharding(proto.tile_assignment_devices(0)); } + + TF_RET_CHECK(proto.type() != OpSharding::Type::OpSharding_Type_MAXIMAL) + << "Maximal sharding is expected to have single device assignment, but " + << proto.tile_assignment_devices().size() << " has provided."; + + TF_RET_CHECK(proto.tile_assignment_devices().size() > 1); + TF_RET_CHECK(!proto.tile_assignment_dimensions().empty()); + + // RE: the product of tile assignment tensor dimensions must be + // equal to tile_assignment_devices.size(). + int64 product_of_dimensions = 1; + for (auto dimension : proto.tile_assignment_dimensions()) { + TF_RET_CHECK(dimension > 0); + product_of_dimensions = + MultiplyWithoutOverflow(product_of_dimensions, dimension); + TF_RET_CHECK(product_of_dimensions > 0); + } + TF_RET_CHECK(product_of_dimensions == proto.tile_assignment_devices().size()); + // Some versions of gcc cannot infer the TileAssignment constructor from a // braced initializer-list, so create one manually. std::vector devices(proto.tile_assignment_devices().begin(), diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc index 85494877023fa3812973e993a349a7559706ab5d..59594ab2f0f70a206c73e998dbfa69c2c5c7ba43 100644 --- a/tensorflow/compiler/xla/service/hlo_value.cc +++ b/tensorflow/compiler/xla/service/hlo_value.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" @@ -31,7 +32,6 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -167,7 +167,7 @@ void HloValue::SetPositionsAndComputeUses( positions_.insert(positions_.end(), positions.begin(), positions.end()); // Gather the computation roots at which this value appears. - tensorflow::gtl::FlatSet root_positions; + absl::flat_hash_set root_positions; for (const HloPosition& position : positions_) { if (position.instruction == position.instruction->parent()->root_instruction()) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 6eb66589048c1a8d6ccfed73c0f7e32f5fe6e568..2902a11a42b499f392212f89fd3cbc8e5aab4f27 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -15,6 +15,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/str_join.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_instructions.h" @@ -23,7 +24,6 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { @@ -313,8 +313,9 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) { operand_dimension < ShapeUtil::Rank(operand_shape); ++operand_dimension) { int64 output_dimension = broadcast->dimensions()[operand_dimension]; - TF_RET_CHECK(broadcast->shape().dimensions(output_dimension) == - operand_shape.dimensions(operand_dimension)) + TF_RET_CHECK((output_dimension < ShapeUtil::Rank(broadcast->shape())) && + (broadcast->shape().dimensions(output_dimension) == + operand_shape.dimensions(operand_dimension))) << broadcast->ToString() << " operand shape " << operand_shape; } return Status::OK(); @@ -359,7 +360,27 @@ Status ShapeVerifier::HandleCall(HloInstruction* call) { return CheckShape(call, call->to_apply()->root_instruction()->shape()); } -Status ShapeVerifier::HandleCustomCall(HloInstruction*) { return Status::OK(); } +Status ShapeVerifier::HandleCustomCall(HloInstruction* instruction) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); + TF_RET_CHECK(custom_call != nullptr); + if (custom_call->layout_constrained()) { + // If the layout is constrained, verify all the respective shapes have + // layouts and that the constrained operand shapes match the shapes of the + // operands. + TF_RET_CHECK(LayoutUtil::HasLayout(custom_call->shape())); + TF_RET_CHECK(custom_call->operand_count() == + custom_call->operand_shapes_with_layout().size()); + for (int64 i = 0; i < custom_call->operand_count(); ++i) { + const Shape& operand_shape_with_layout = + custom_call->operand_shapes_with_layout()[i]; + TF_RET_CHECK(ShapeUtil::Compatible(custom_call->operand(i)->shape(), + operand_shape_with_layout)); + TF_RET_CHECK(LayoutUtil::HasLayout(operand_shape_with_layout)); + } + } + return Status::OK(); +} Status ShapeVerifier::HandleSlice(HloInstruction* slice) { return CheckShape(slice, @@ -548,6 +569,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) { case HloOpcode::kTupleSelect: case HloOpcode::kSend: case HloOpcode::kSendDone: + case HloOpcode::kSort: case HloOpcode::kTuple: case HloOpcode::kWhile: break; @@ -763,7 +785,136 @@ Status VerifyHloStructure(HloModule* module) { return Status::OK(); } -Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const { +namespace { + +// Returns true if the given Shape has a TOKEN shape as any subshape. +bool ShapeContainsToken(const Shape& shape) { + bool contains_token = false; + ShapeUtil::ForEachSubshape( + shape, [&contains_token](const Shape& subshape, const ShapeIndex&) { + if (ShapeUtil::IsToken(subshape)) { + contains_token = true; + } + }); + return contains_token; +} + +// Verifies that all types entering and exiting the entry computation are +// legal. +Status VerifyEntryAndExitShapes(const HloModule& module) { + // Tokens cannot be passed as entry parameters. + // TODO(b/80000000): Remove this constraint. + for (int i = 0; i < module.entry_computation()->num_parameters(); ++i) { + HloInstruction* param = + module.entry_computation()->parameter_instruction(i); + if (ShapeContainsToken(param->shape())) { + return InternalError( + "Entry parameter %d is or contains a token shape: %s", i, + ShapeUtil::HumanString(param->shape())); + } + } + return Status::OK(); +} + +// Checks if the given two instructions share the same channel id. +Status CheckSameChannel(const HloInstruction* instr1, + const HloInstruction* instr2) { + if (instr1->channel_id() != instr2->channel_id()) { + return InternalError( + "Expected to have the same channel id, actual channel ids are: %s " + "(%d), %s (%d)", + instr1->ToString(), instr1->channel_id(), instr2->ToString(), + instr2->channel_id()); + } + return Status::OK(); +} + +// Checks if the given two instructions have the same is_host_transfer +// attribute value. Intsructions must be send/recv instructions or their +// 'done' variant. +Status CheckSameIsHostTransfer(const HloInstruction* instr1, + const HloInstruction* instr2) { + const HloSendRecvInstruction* send_recv1 = + DynCast(instr1); + const HloSendRecvInstruction* send_recv2 = + DynCast(instr2); + TF_RET_CHECK(send_recv1 != nullptr); + TF_RET_CHECK(send_recv2 != nullptr); + if (send_recv1->is_host_transfer() != send_recv2->is_host_transfer()) { + return InternalError( + "Expected instructions to have the same is-host-transfer property: " + "%s, " + "%s ", + instr1->ToString(), instr2->ToString()); + } + return Status::OK(); +} + +// Checks various invariants of send and recv instructions. +Status VerifySendsAndRecvs(const HloModule& module) { + absl::flat_hash_map host_channels; + // Host send/recv instructions must have their own unique channel. + auto check_unique_host_channel = [&](const HloInstruction* instruction) { + const HloSendRecvInstruction* sendrecv = + DynCast(instruction); + if (sendrecv->is_host_transfer()) { + auto it_inserted = + host_channels.insert({sendrecv->channel_id(), sendrecv}); + if (!it_inserted.second) { + return FailedPrecondition( + "Channel %d is used for multiple host send/recv instructions: " + "%s " + "and " + "%s", + sendrecv->channel_id(), sendrecv->ToString(), + it_inserted.first->second->ToString()); + } + } + + return Status::OK(); + }; + + // Send/Recv instruction must have a single user: the corresponding + // SendDone/RecvDone. with matching channel. + for (const HloComputation* computation : module.computations()) { + for (const HloInstruction* instruction : computation->instructions()) { + switch (instruction->opcode()) { + case HloOpcode::kSend: { + TF_RETURN_IF_ERROR(check_unique_host_channel(instruction)); + TF_RET_CHECK(instruction->users().size() == 1); + const HloInstruction* send_done = instruction->users().front(); + TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone); + TF_RETURN_IF_ERROR(CheckSameChannel(instruction, send_done)); + TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, send_done)); + break; + } + case HloOpcode::kRecv: { + TF_RETURN_IF_ERROR(check_unique_host_channel(instruction)); + TF_RET_CHECK(instruction->users().size() == 1); + const HloInstruction* recv_done = instruction->users().front(); + TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone); + TF_RETURN_IF_ERROR(CheckSameChannel(instruction, recv_done)); + TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, recv_done)); + break; + } + case HloOpcode::kSendDone: + TF_RET_CHECK(instruction->operands().size() == 1); + TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kSend); + break; + case HloOpcode::kRecvDone: + TF_RET_CHECK(instruction->operands().size() == 1); + TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kRecv); + break; + default: + break; + } + } + } + return Status::OK(); +} + +// CHECKs various invariants of a fusion instruction. +Status CheckFusionInstruction(HloInstruction* fusion) { // The parent fusion instruction of the fusion computation must be 'fusion'. HloComputation* fused_computation = fusion->fused_instructions_computation(); if (fusion != fused_computation->FusionInstruction()) { @@ -866,50 +1017,32 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const { } } + TF_RET_CHECK(fusion->called_computations() == + absl::Span( + {fusion->fused_instructions_computation()})) + << "Fusion HLO calls computations other than the " + "fused_instructions_computation: " + << fusion->ToString() << " fusion->fused_instructions_computation(): " + << fusion->fused_instructions_computation()->ToString() + << " fusion->called_computations(): " + << ComputationsToString(fusion->called_computations()); + + for (const auto& fused : fusion->fused_instructions()) { + TF_RET_CHECK(fused->parent() == fusion->fused_instructions_computation()) + << "Fused HLO was missing a parent: " << fused->ToString() + << " parent: " << fused->parent() + << " computation: " << fusion->parent(); + } + // TODO(b/65423525): We'd like to check that all operands are distinct. // This is currently disabled due to the invariant being violated by // multi-output fusion. return Status::OK(); } -Status HloVerifier::CheckWhileInstruction(HloInstruction* instruction) { - auto* while_cond = instruction->while_condition(); - auto* while_body = instruction->while_body(); - if (while_cond->num_parameters() != 1) { - return FailedPrecondition( - "While condition must have exactly 1 parameter; had %d : %s", - while_cond->num_parameters(), while_cond->ToString()); - } - if (while_body->num_parameters() != 1) { - return FailedPrecondition( - "While body must have exactly 1 parameter; had %d : %s", - while_body->num_parameters(), while_body->ToString()); - } - if (instruction->operand_count() != 1) { - return FailedPrecondition( - "While loop must have exactly one operand; had %d : %s", - instruction->operand_count(), instruction->ToString()); - } - return Status::OK(); -} - -Status HloVerifier::CheckConditionalInstruction(HloInstruction* instruction) { - if (instruction->true_computation()->num_parameters() != 1) { - return FailedPrecondition( - "True computation %s of %s must have 1 parameter insted of %d", - instruction->true_computation()->name(), instruction->ToString(), - instruction->true_computation()->num_parameters()); - } - if (instruction->false_computation()->num_parameters() != 1) { - return FailedPrecondition( - "False computation %s of %s must have 1 parameter insted of %d", - instruction->false_computation()->name(), instruction->ToString(), - instruction->false_computation()->num_parameters()); - } - return Status::OK(); -} - -Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) { +// Checks that the non-scalar operand shapes are compatible to the output +// shape, i.e., that there are no implicit broadcasts of size-one dimensions. +Status CheckElementwiseInstruction(HloInstruction* instruction) { const Shape& out_shape = instruction->shape(); for (HloInstruction* operand : instruction->operands()) { const Shape& operand_shape = operand->shape(); @@ -926,133 +1059,143 @@ Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) { return Status::OK(); } -namespace { +// Visitor which verifies various fields on the HLO instruction. This class does +// not check result shape as that is checked in the ShapeVerifier. +class InstructionVerifier : public DfsHloVisitorWithDefault { + public: + explicit InstructionVerifier(std::function + instruction_can_change_layout_func) + : instruction_can_change_layout_func_( + instruction_can_change_layout_func) {} -// Returns true if the given Shape has a TOKEN shape as any subshape. -bool ShapeContainsToken(const Shape& shape) { - bool contains_token = false; - ShapeUtil::ForEachSubshape( - shape, [&contains_token](const Shape& subshape, const ShapeIndex&) { - if (ShapeUtil::IsToken(subshape)) { - contains_token = true; - } - }); - return contains_token; -} + Status DefaultAction(HloInstruction*) override { return Status::OK(); } -// Verifies that all types entering and exiting the entry computation are -// legal. -Status VerifyEntryAndExitShapes(const HloModule& module) { - // Tokens cannot be passed as entry parameters. - // TODO(b/80000000): Remove this constraint. - for (int i = 0; i < module.entry_computation()->num_parameters(); ++i) { - HloInstruction* param = - module.entry_computation()->parameter_instruction(i); - if (ShapeContainsToken(param->shape())) { - return InternalError( - "Entry parameter %d is or contains a token shape: %s", i, - ShapeUtil::HumanString(param->shape())); - } + Status HandleFusion(HloInstruction* fusion) override { + return CheckFusionInstruction(fusion); } - return Status::OK(); -} -// Checks if the given two instructions share the same channel id. -Status CheckSameChannel(const HloInstruction* instr1, - const HloInstruction* instr2) { - if (instr1->channel_id() != instr2->channel_id()) { - return InternalError( - "Expected to have the same channel id, actual channel ids are: %s " - "(%d), %s (%d)", - instr1->ToString(), instr1->channel_id(), instr2->ToString(), - instr2->channel_id()); + Status HandleBroadcast(HloInstruction* broadcast) override { + // If you see this failure then someone has confused the difference + // between the HLO broadcast op, and the UserComputation broadcast + // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I + // or ComputationLowerer::Visit() + TF_RET_CHECK(broadcast->dimensions().size() == + ShapeUtil::Rank(broadcast->operand(0)->shape())) + << "Broadcast HLO (" << broadcast->ToShortString() + << ") has invalid number of dimensions: " + << broadcast->dimensions().size() + << " != " << ShapeUtil::Rank(broadcast->operand(0)->shape()); + return Status::OK(); } - return Status::OK(); -} -// Checks if the given two instructions have the same is_host_transfer -// attribute value. Intsructions must be send/recv instructions or their -// 'done' variant. -Status CheckSameIsHostTransfer(const HloInstruction* instr1, - const HloInstruction* instr2) { - const HloSendRecvInstruction* send_recv1 = - DynCast(instr1); - const HloSendRecvInstruction* send_recv2 = - DynCast(instr2); - TF_RET_CHECK(send_recv1 != nullptr); - TF_RET_CHECK(send_recv2 != nullptr); - if (send_recv1->is_host_transfer() != send_recv2->is_host_transfer()) { - return InternalError( - "Expected instructions to have the same is-host-transfer property: " - "%s, " - "%s ", - instr1->ToString(), instr2->ToString()); + Status HandleWhile(HloInstruction* xla_while) override { + auto* while_cond = xla_while->while_condition(); + auto* while_body = xla_while->while_body(); + if (while_cond->num_parameters() != 1) { + return FailedPrecondition( + "While condition must have exactly 1 parameter; had %d : %s", + while_cond->num_parameters(), while_cond->ToString()); + } + if (while_body->num_parameters() != 1) { + return FailedPrecondition( + "While body must have exactly 1 parameter; had %d : %s", + while_body->num_parameters(), while_body->ToString()); + } + if (xla_while->operand_count() != 1) { + return FailedPrecondition( + "While loop must have exactly one operand; had %d : %s", + xla_while->operand_count(), xla_while->ToString()); + } + return Status::OK(); } - return Status::OK(); -} -// Checks various invariants of send and recv instructions. -Status VerifySendsAndRecvs(const HloModule& module) { - tensorflow::gtl::FlatMap host_channels; - // Host send/recv instructions must have their own unique channel. - auto check_unique_host_channel = [&](const HloInstruction* instruction) { - const HloSendRecvInstruction* sendrecv = - DynCast(instruction); - if (sendrecv->is_host_transfer()) { - auto it_inserted = - host_channels.insert({sendrecv->channel_id(), sendrecv}); - if (!it_inserted.second) { - return FailedPrecondition( - "Channel %d is used for multiple host send/recv instructions: " - "%s " - "and " - "%s", - sendrecv->channel_id(), sendrecv->ToString(), - it_inserted.first->second->ToString()); - } + Status HandleConditional(HloInstruction* conditional) override { + if (conditional->true_computation()->num_parameters() != 1) { + return FailedPrecondition( + "True computation %s of %s must have 1 parameter insted of %d", + conditional->true_computation()->name(), conditional->ToString(), + conditional->true_computation()->num_parameters()); } + if (conditional->false_computation()->num_parameters() != 1) { + return FailedPrecondition( + "False computation %s of %s must have 1 parameter insted of %d", + conditional->false_computation()->name(), conditional->ToString(), + conditional->false_computation()->num_parameters()); + } + return Status::OK(); + } + + Status HandleElementwiseUnary(HloInstruction* instruction) override { + return CheckElementwiseInstruction(instruction); + } + + Status HandleElementwiseBinary(HloInstruction* instruction) override { + return CheckElementwiseInstruction(instruction); + } + Status HandleGetTupleElement(HloInstruction* gte) override { + TF_RET_CHECK(ShapeUtil::IsTuple(gte->operand(0)->shape())); return Status::OK(); - }; + } - // Send/Recv instruction must have a single user: the corresponding - // SendDone/RecvDone. with matching channel. - for (const HloComputation* computation : module.computations()) { - for (const HloInstruction* instruction : computation->instructions()) { - switch (instruction->opcode()) { - case HloOpcode::kSend: { - TF_RETURN_IF_ERROR(check_unique_host_channel(instruction)); - TF_RET_CHECK(instruction->users().size() == 1); - const HloInstruction* send_done = instruction->users().front(); - TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone); - TF_RETURN_IF_ERROR(CheckSameChannel(instruction, send_done)); - TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, send_done)); - break; - } - case HloOpcode::kRecv: { - TF_RETURN_IF_ERROR(check_unique_host_channel(instruction)); - TF_RET_CHECK(instruction->users().size() == 1); - const HloInstruction* recv_done = instruction->users().front(); - TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone); - TF_RETURN_IF_ERROR(CheckSameChannel(instruction, recv_done)); - TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, recv_done)); - break; + Status HandleTranspose(HloInstruction* transpose) override { + const Shape& shape = transpose->shape(); + const HloInstruction* operand = transpose->operand(0); + TF_RET_CHECK(shape.dimensions().size() == transpose->dimensions().size()); + TF_RET_CHECK(shape.dimensions().size() == + transpose->operand(0)->shape().dimensions().size()); + TF_RET_CHECK(std::equal( + operand->shape().dimensions().begin(), + operand->shape().dimensions().end(), + Permute(transpose->dimensions(), shape.dimensions()).begin())) + << "shape: " << shape << ", operand->shape(): " << shape + << ", dimensions: {" << absl::StrJoin(transpose->dimensions(), ", ") + << "}"; + return Status::OK(); + } + + Status Preprocess(HloInstruction* instruction) override { + auto previous = instructions_by_name_.find(instruction->name()); + TF_RET_CHECK(previous == instructions_by_name_.end()) + << "HLO has name that is not unique within module:\n" + << instruction->ToString() + << " in computation: " << instruction->parent()->name() + << "\nPrevious HLO with same name:\n" + << previous->second->ToString() + << " in computation: " << previous->second->parent()->name(); + instructions_by_name_[instruction->name()] = instruction; + return Status::OK(); + } + + Status Postprocess(HloInstruction* instruction) override { + if (instruction_can_change_layout_func_ && + LayoutUtil::IsDenseArray(instruction->shape()) && + !instruction_can_change_layout_func_(instruction)) { + const Shape& result_shape = instruction->shape(); + const Layout& result_layout = result_shape.layout(); + for (HloInstruction* operand : instruction->operands()) { + const Shape& operand_shape = operand->shape(); + if (LayoutUtil::IsDenseArray(operand_shape) && + ShapeUtil::Rank(operand_shape) == ShapeUtil::Rank(result_shape)) { + const Layout& operand_layout = operand_shape.layout(); + TF_RET_CHECK(LayoutUtil::Equal(result_layout, operand_layout)) + << "Instruction shouldn't change layouts " + << instruction->ToString() << " From " + << ShapeUtil::HumanString(result_shape) << " To " + << ShapeUtil::HumanString(operand_shape); } - case HloOpcode::kSendDone: - TF_RET_CHECK(instruction->operands().size() == 1); - TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kSend); - break; - case HloOpcode::kRecvDone: - TF_RET_CHECK(instruction->operands().size() == 1); - TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kRecv); - break; - default: - break; } } + + return Status::OK(); } - return Status::OK(); -} + + private: + absl::flat_hash_map instructions_by_name_; + // Determines whether an instruction can change layouts. + std::function + instruction_can_change_layout_func_; +}; } // namespace @@ -1061,65 +1204,13 @@ StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(VerifyHloStructure(module)); TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module)); - tensorflow::gtl::FlatMap instructions; - for (auto* computation : module->computations()) { - for (const auto& instruction : computation->instructions()) { - TF_RET_CHECK(instruction->parent() == computation); - if (instruction->opcode() == HloOpcode::kFusion) { - TF_RETURN_IF_ERROR(CheckFusionInstruction(instruction)); - TF_RET_CHECK(instruction->called_computations() == - absl::Span( - {instruction->fused_instructions_computation()})) - << "Fusion HLO calls computations other than the " - "fused_instructions_computation: " - << instruction->ToString() - << " instruction->fused_instructions_computation(): " - << instruction->fused_instructions_computation()->ToString() - << " instruction->called_computations(): " - << ComputationsToString(instruction->called_computations()); - - for (const auto& fused : instruction->fused_instructions()) { - TF_RET_CHECK(fused->parent() == - instruction->fused_instructions_computation()) - << "Fused HLO was missing a parent: " << fused->ToString() - << " parent: " << fused->parent() - << " computation: " << computation; - } - } else if (instruction->opcode() == HloOpcode::kBroadcast) { - // If you see this failure then someone has confused the difference - // between the HLO broadcast op, and the UserComputation broadcast - // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I - // or ComputationLowerer::Visit() - TF_RET_CHECK(instruction->dimensions().size() == - ShapeUtil::Rank(instruction->operand(0)->shape())) - << "Broadcast HLO (" << instruction->ToShortString() - << ") has invalid number of dimensions: " - << instruction->dimensions().size() - << " != " << ShapeUtil::Rank(instruction->operand(0)->shape()); - } else if (instruction->opcode() == HloOpcode::kWhile) { - TF_RETURN_IF_ERROR(CheckWhileInstruction(instruction)); - } else if (instruction->opcode() == HloOpcode::kConditional) { - TF_RETURN_IF_ERROR(CheckConditionalInstruction(instruction)); - } else if (instruction->opcode() != - HloOpcode::kRng /* Rng operands are always scalar. */ - && instruction->IsElementwise()) { - TF_RETURN_IF_ERROR(CheckElementwiseInstruction(instruction)); - } - - auto previous = instructions.find(instruction->name()); - TF_RET_CHECK(previous == instructions.end()) - << "HLO has name that is not unique within module:\n" - << instruction->ToString() - << " in computation: " << computation->name() - << "\nPrevious HLO with same name:\n" - << previous->second->ToString() - << " in computation: " << previous->second->parent()->name(); - instructions[instruction->name()] = instruction; - } - std::unique_ptr shape_verifier = shape_verifier_factory_(); TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get())); + + InstructionVerifier instruction_verifier( + instruction_can_change_layout_func_); + TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier)); } TF_RETURN_IF_ERROR(VerifyEntryAndExitShapes(*module)); @@ -1129,6 +1220,8 @@ StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(module->schedule().Verify()); } + TF_RETURN_IF_ERROR(module->input_output_alias_config().Verify(*module)); + return false; } diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h index 0cde4a31af72e81829723c564f59edc362f73335..cb49cb95ba8949b84f57d985bdb07a3177edbc5a 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.h +++ b/tensorflow/compiler/xla/service/hlo_verifier.h @@ -155,11 +155,17 @@ class HloVerifier : public HloModulePass { public: using ShapeVerifierFactory = std::function()>; - explicit HloVerifier(bool layout_sensitive, bool allow_mixed_precision) + explicit HloVerifier(bool layout_sensitive, bool allow_mixed_precision, + std::function + instruction_can_change_layout_func = {}) : shape_verifier_factory_([layout_sensitive, allow_mixed_precision] { return absl::make_unique(layout_sensitive, allow_mixed_precision); - }) {} + }), + instruction_can_change_layout_func_( + std::move(instruction_can_change_layout_func)) { + CHECK(instruction_can_change_layout_func_ == nullptr || layout_sensitive); + } // Uses custom shape verification. explicit HloVerifier(ShapeVerifierFactory shape_verifier_factory) @@ -172,22 +178,15 @@ class HloVerifier : public HloModulePass { StatusOr Run(HloModule* module) override; private: - // CHECKs various invariants of a fusion instruction. - Status CheckFusionInstruction(HloInstruction* fusion) const; - - Status CheckWhileInstruction(HloInstruction* instruction); - - Status CheckConditionalInstruction(HloInstruction* instruction); - - // Checks that the non-scalar operand shapes are compatible to the output - // shape, i.e., that there are no implicit broadcasts of size-one dimensions. - Status CheckElementwiseInstruction(HloInstruction* instruction); - // Creates a ShapeVerifier that checks that shapes match inferred // expectations. This is a factory function because ShapeVerifier, // being a DfsHloVisitor, is stateful. We want a clean object // for each run of the verifier. ShapeVerifierFactory shape_verifier_factory_; + + // Determines whether an instruction can change layouts. + std::function + instruction_can_change_layout_func_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc index 8f0423bb1c72ceb209437116a898d027f4d2c657..afe01e5487c3225815e01343d86e9fe894c2cde8 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier_test.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/service/layout_assignment.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" @@ -50,6 +51,14 @@ class HloVerifierTestAllowMixedPrecision : public HloTestBase { /*allow_mixed_precision_in_hlo_verifier=*/true) {} }; +class HloVerifierTestLayoutSensitive : public HloTestBase { + public: + HloVerifierTestLayoutSensitive() + : HloTestBase(/*verifier_layout_sensitive=*/true, + /*allow_mixed_precision_in_hlo_verifier=*/false, + LayoutAssignment::InstructionCanChangeLayout) {} +}; + TEST_F(HloVerifierTest, NullInstructionParent) { HloComputation::Builder builder(TestName()); const Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); @@ -358,5 +367,63 @@ TEST_F(HloVerifierTest, ConvNegativeBaseDilationNotAllowed) { HasSubstr("non-positive base area dilation factor")); } +static const char* const kAddWithLayoutChangeHlo = R"( + HloModule AddWithLayoutChange + ENTRY AddWithLayoutChange { + par0 = f32[3,4]{1,0} parameter(0) + par1 = f32[3,4]{0,1} parameter(1) + ROOT add0 = f32[3,4]{1,0} add(par0,par1) + } + )"; + +TEST_F(HloVerifierTest, AddWithLayoutChange) { + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(kAddWithLayoutChangeHlo)); + auto status = verifier().Run(module.get()).status(); + ASSERT_TRUE(status.ok()); +} + +TEST_F(HloVerifierTestLayoutSensitive, AddWithLayoutChangeNotAllowed) { + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(kAddWithLayoutChangeHlo)); + auto status = verifier().Run(module.get()).status(); + ASSERT_FALSE(status.ok()); + EXPECT_THAT(status.error_message(), + HasSubstr("Instruction shouldn't change layouts")); +} + +TEST_F(HloVerifierTestLayoutSensitive, SliceWithLayoutChangeNotAllowed) { + const char* const kSliceWithLayoutChangeHlo = R"( + HloModule SliceWithLayoutChange + ENTRY SliceWithLayoutChange { + par0 = f32[4,5]{0,1} parameter(0) + par1 = s32[2] parameter(1) + ROOT dslice0 = f32[3,4]{1,0} dynamic-slice(par0, par1), + dynamic_slice_sizes={3,4} + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseHloString(kSliceWithLayoutChangeHlo)); + auto status = verifier().Run(module.get()).status(); + ASSERT_FALSE(status.ok()); + EXPECT_THAT(status.error_message(), + HasSubstr("Instruction shouldn't change layouts")); +} + +TEST_F(HloVerifierTestLayoutSensitive, ConcatWithLayoutChangeNotAllowed) { + const char* const kConcatWithLayoutChangeHlo = R"( + HloModule ConcatWithLayoutChange + ENTRY ConcatWithLayoutChange { + par0 = f32[3,5]{0,1} parameter(0) + par1 = f32[3,3]{1,0} parameter(1) + ROOT concat0 = f32[3,8]{1,0} concatenate(f32[3,5] par0, f32[3,3] par1), + dimensions={1} + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseHloString(kConcatWithLayoutChangeHlo)); + auto status = verifier().Run(module.get()).status(); + ASSERT_FALSE(status.ok()); + EXPECT_THAT(status.error_message(), + HasSubstr("Instruction shouldn't change layouts")); +} } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc index 06f0e1ed25e71659a61e6de8a84e52cf70064eae..1ebb3319779c00fd4afe90606bf336e16349429d 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc @@ -16,6 +16,8 @@ limitations under the License. #include "tensorflow/compiler/xla/service/indexed_array_analysis.h" #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" @@ -23,7 +25,6 @@ limitations under the License. #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/hlo_evaluator.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { namespace gtl = ::tensorflow::gtl; @@ -95,7 +96,7 @@ Status IndexedArrayAnalysis::TraverseAndPopulateCache( absl::InlinedVector stack; enum DfsState { kDiscovered, kVisited }; - gtl::FlatMap dfs_state_map; + absl::flat_hash_map dfs_state_map; stack.push_back(root); InsertOrDie(&dfs_state_map, root, kDiscovered); diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.h b/tensorflow/compiler/xla/service/indexed_array_analysis.h index 3e238f97a03fb71cddf59da69b0389731314ff49..e5aa67fd850de647652d66017223e19fb92cc068 100644 --- a/tensorflow/compiler/xla/service/indexed_array_analysis.h +++ b/tensorflow/compiler/xla/service/indexed_array_analysis.h @@ -18,10 +18,10 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/util/ptr_util.h" namespace xla { @@ -360,7 +360,7 @@ class IndexedArrayAnalysis { std::vector> owned_tensors_; std::vector owned_literals_; - tensorflow::gtl::FlatMap cache_; + absl::flat_hash_map cache_; }; // A pass that prints all non-trivial results returned by IndexedArrayAnalysis. diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index 3fdc2cee9aad0fe70f66920f757ee5c52bba711f..69a4c160ee5c4539272c3085338dc6de1b9347ff 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -22,11 +22,12 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "tensorflow/compiler/xla/map_util.h" +#include "tensorflow/compiler/xla/service/fusion_queue.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/logging.h" namespace xla { @@ -188,13 +189,20 @@ bool InstructionFusion::EffectivelyAtMostUnary(HloInstruction* hlo) { bool InstructionFusion::CanFuseOnAllPaths( HloInstruction* producer, HloInstruction* consumer, - const HloInstructionSet& do_not_duplicate) { + const HloInstructionSet& do_not_fuse, + absl::flat_hash_map, bool>* + result_cache) { if (consumer == producer) { return true; } if (!consumer->IsFusible()) { return false; } + auto cache_it = result_cache->find(std::make_pair(producer, consumer)); + if (cache_it != result_cache->end()) { + return cache_it->second; + } + bool result = true; for (int64 i = 0, e = consumer->operand_count(); i < e; ++i) { auto* consumer_operand = consumer->mutable_operand(i); // If the operand is not on a path to the producer, it doesn't matter @@ -202,20 +210,23 @@ bool InstructionFusion::CanFuseOnAllPaths( if (!reachability_->IsReachable(producer, consumer_operand)) { continue; } - if (do_not_duplicate.count(consumer_operand) > 0 || - !ShouldFuse(consumer, i)) { - return false; + if (do_not_fuse.count(consumer_operand) > 0 || !ShouldFuse(consumer, i)) { + result = false; + break; } // The producer is reachable from consumer_operand which means we need // to be able to fuse consumer_operand into consumer in order for // producer to be fusible into consumer on all paths. // Perform the recursive step: make sure producer can be fused into // consumer_operand on all paths. - if (!CanFuseOnAllPaths(producer, consumer_operand, do_not_duplicate)) { - return false; + if (!CanFuseOnAllPaths(producer, consumer_operand, do_not_fuse, + result_cache)) { + result = false; + break; } } - return true; + result_cache->emplace(std::make_pair(producer, consumer), result); + return result; } InstructionFusion::HloInstructionSet @@ -231,6 +242,8 @@ InstructionFusion::ComputeGloballyUnfusible( // fusing operations that require duplication later depending on // is_expensive_(). HloInstructionSet do_not_duplicate; + absl::flat_hash_map, bool> + can_fuse_on_all_paths_result_cache; for (HloInstruction* consumer : post_order) { for (HloInstruction* producer : consumer->operands()) { if (do_not_duplicate.count(producer) > 0) { @@ -286,7 +299,8 @@ InstructionFusion::ComputeGloballyUnfusible( // A will be not allowed to be fused into B, as it cannot be fused via // all paths. if (producer->IsFusible() && - CanFuseOnAllPaths(producer, consumer, do_not_duplicate)) { + CanFuseOnAllPaths(producer, consumer, do_not_duplicate, + &can_fuse_on_all_paths_result_cache)) { continue; } do_not_duplicate.insert(producer); @@ -417,7 +431,7 @@ class ReversePostOrderFusionQueue : public FusionQueue { private: std::vector post_order_; - tensorflow::gtl::FlatMap post_order_index_; + absl::flat_hash_map post_order_index_; }; } // namespace diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h index 7e1196fb7fbeb4072929773b2161fe28233d73d9..f14c6675208c72112aea0179c238b58709d625b5 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.h +++ b/tensorflow/compiler/xla/service/instruction_fusion.h @@ -1,3 +1,4 @@ +#include "absl/container/flat_hash_map.h" /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,6 +17,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INSTRUCTION_FUSION_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_INSTRUCTION_FUSION_H_ +#include "tensorflow/compiler/xla/service/fusion_queue.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -24,33 +26,6 @@ limitations under the License. namespace xla { -// A queue interface that allows implementations to choose fusion candidates in -// custom order. -class FusionQueue { - public: - FusionQueue() = default; - virtual ~FusionQueue() = default; - - // Dequeues the next fusion candidates: a consumer and the list of producers - // as operand indices. - virtual std::pair> - DequeueNextInstructionAndOperandsToFuseInOrder() = 0; - - // A callback passed to the queue implementation right before the producer is - // fused into the consumer. - virtual void PreFusion(HloInstruction* producer, HloInstruction* consumer) {} - - // A callback passed to the queue implementation right after the fusion is - // created. Note that original_producer could have been destroyed. - virtual void OnFusingInstruction(HloInstruction* fusion, - HloInstruction* original_producer, - HloInstruction* original_consumer) {} - - // A callback passed to the queue implementation to notify the removal of an - // instruction. - virtual void RemoveInstruction(HloInstruction* instruction) = 0; -}; - // HLO pass which performs instruction fusion. Instructions are fused // "vertically", meaning producing instructions are fused into their consumers // with the intent that the loops which compute their values will be fused in @@ -151,8 +126,15 @@ class InstructionFusion : public HloModulePass { // Whether or not we can fuse producer into consumer on all paths // from the producer to the consumer where nodes are HLOs and edges are uses. - bool CanFuseOnAllPaths(HloInstruction* producer, HloInstruction* consumer, - const HloInstructionSet& do_not_fuse); + // + // A map from to a bool is required as the result cache + // to store and query the results of calls to this function, in order to avoid + // repeated computations. + bool CanFuseOnAllPaths( + HloInstruction* producer, HloInstruction* consumer, + const HloInstructionSet& do_not_fuse, + absl::flat_hash_map, bool>* + result_cache); // Computes the set of nodes that we do not want to fuse into any of their // consumers based on a global analysis of the HLO graph. diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD index 146c9052f10cca8b199a480491d9a672d8bebdff..1484e14df10d94841c5a2e849761779f5800392d 100644 --- a/tensorflow/compiler/xla/service/interpreter/BUILD +++ b/tensorflow/compiler/xla/service/interpreter/BUILD @@ -45,8 +45,8 @@ cc_library( "//tensorflow/compiler/xla/service:hlo_pass", "//tensorflow/compiler/xla/service:hlo_pass_pipeline", "//tensorflow/compiler/xla/service:hlo_subcomputation_unification", - "//tensorflow/compiler/xla/service:inliner", "//tensorflow/compiler/xla/service:layout_assignment", + "//tensorflow/compiler/xla/service:map_inliner", "//tensorflow/compiler/xla/service:reshape_mover", "//tensorflow/compiler/xla/service:while_loop_simplifier", "//tensorflow/core:lib", diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index bb69cb9c47ff2c7de8d13832c4b8e6216c62da73..7c79eb7d791bc9a0743605d3171ff69c6ef41d58 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -28,9 +28,9 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_pass_fix.h" #include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h" #include "tensorflow/compiler/xla/service/hlo_subcomputation_unification.h" -#include "tensorflow/compiler/xla/service/inliner.h" #include "tensorflow/compiler/xla/service/interpreter/executable.h" #include "tensorflow/compiler/xla/service/layout_assignment.h" +#include "tensorflow/compiler/xla/service/map_inliner.h" #include "tensorflow/compiler/xla/service/reshape_mover.h" #include "tensorflow/compiler/xla/service/while_loop_simplifier.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -44,7 +44,8 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) { HloPassPipeline pipeline("Interpreter"); pipeline.AddPass( - hlo_module->mutable_entry_computation_layout()); + hlo_module->mutable_entry_computation_layout(), + LayoutAssignment::InstructionCanChangeLayout); return pipeline.Run(hlo_module).status(); } diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 082bf8bffed484244139e79f4d3fe30ca091d8ac..ad65b147c154ce0d63230f6a777a215942afa577 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -419,6 +419,16 @@ Status LayoutAssignment::BuildHostChannelConstraints( return Status::OK(); } +namespace { + +bool IsLayoutConstrainedCustomCall(HloInstruction* instruction) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); + return custom_call != nullptr && custom_call->layout_constrained(); +} + +} // namespace + Status LayoutAssignment::AddMandatoryConstraints( const ComputationLayout* computation_layout, ChannelLayoutConstraints* channel_constraints, HloComputation* computation, @@ -434,7 +444,6 @@ Status LayoutAssignment::AddMandatoryConstraints( // Constrain layouts of instructions which define values with pre-existing // layouts. for (auto* instruction : computation->instructions()) { - Shape const* shape_with_layout = nullptr; if (instruction->opcode() == HloOpcode::kInfeed) { // Infeed layouts must match the layout of the original inserted // instruction. @@ -456,17 +465,21 @@ Status LayoutAssignment::AddMandatoryConstraints( if (parameter_layout.LayoutIsSet()) { // Parameter layouts must match the respective layout in // ComputationLayout, if there is one. - shape_with_layout = ¶meter_layout.shape(); + TF_RETURN_IF_ERROR(constraints->SetInstructionLayout( + parameter_layout.shape(), instruction)); } } - } - if (shape_with_layout != nullptr) { + } else if (IsLayoutConstrainedCustomCall(instruction)) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); TF_RETURN_IF_ERROR( - constraints->SetInstructionLayout(*shape_with_layout, instruction)); - } - - if (instruction->opcode() == HloOpcode::kSend || - instruction->opcode() == HloOpcode::kRecv) { + constraints->SetInstructionLayout(custom_call->shape(), custom_call)); + for (int64 i = 0; i < custom_call->operand_count(); ++i) { + TF_RETURN_IF_ERROR(constraints->SetOperandLayout( + custom_call->operand_shapes_with_layout()[i], custom_call, i)); + } + } else if (instruction->opcode() == HloOpcode::kSend || + instruction->opcode() == HloOpcode::kRecv) { CHECK(get_channel_constraints(instruction)) << "Multi-module layout assignment requires ChannelLayoutConstraints"; int64 channel_id = instruction->channel_id(); @@ -498,6 +511,22 @@ Status LayoutAssignment::AddMandatoryConstraints( TF_RETURN_IF_ERROR( constraints->SetBufferLayout(new_shape.layout(), *buffer)); } + } else if (instruction->IsCrossModuleAllReduce()) { + CHECK(get_channel_constraints(instruction)) + << "Multi-module layout assignment requires ChannelLayoutConstraints"; + int64 all_reduce_id = instruction->all_reduce_id().value(); + if (!get_channel_constraints(instruction) + ->IsChannelConstrained(all_reduce_id)) { + continue; + } + // TODO(b/68493863): Change to use SetOperandLayout(). + const Shape& buffer_shape = instruction->operand(0)->shape(); + TF_RET_CHECK(ShapeUtil::IsArray(buffer_shape)); + Shape new_buffer_shape = + get_channel_constraints(instruction) + ->LayoutShapeForChannel(buffer_shape, all_reduce_id); + TF_RETURN_IF_ERROR( + constraints->SetInstructionLayout(new_buffer_shape, instruction)); } } @@ -605,31 +634,6 @@ Status LayoutAssignment::AddMandatoryConstraints( TF_RETURN_IF_ERROR(constraints->SetOperandLayout( false_computation_layout.parameter_shape(0), instruction, 2, /*mandatory=*/true)); - } else if (instruction->opcode() == HloOpcode::kCustomCall) { - if (!CustomCallRequiresMajorFirstLayout(instruction)) { - continue; - } - // Add constraints for kCustomCall instruction operands and instructions. - // For now we only support major-first layouts for all inputs and outputs. - Shape result_shape = ShapeUtil::MakeShapeWithDescendingLayout( - instruction->shape().element_type(), - AsInt64Slice(instruction->shape().dimensions())); - TF_RETURN_IF_ERROR( - constraints->SetInstructionLayout(result_shape, instruction)); - for (int64 i = 0; i < instruction->operand_count(); ++i) { - const Shape& operand_shape = instruction->operand(i)->shape(); - // Opaque operands don't get a layout constraint. - if (ShapeUtil::IsOpaque(operand_shape)) { - continue; - } - - Shape row_major_operand_shape = - ShapeUtil::MakeShapeWithDescendingLayout( - operand_shape.element_type(), - AsInt64Slice(operand_shape.dimensions())); - TF_RETURN_IF_ERROR(constraints->SetOperandLayout( - row_major_operand_shape, instruction, i)); - } } } // Finally set the result layout to match ComputationLayout, if there is one. @@ -660,16 +664,18 @@ Status CheckCallLayout(HloInstruction* call, return Status::OK(); } -// Custom calls have fixed input and output layouts. -Status CheckCustomCallLayout(HloInstruction* custom_call) { - for (const HloInstruction* operand : custom_call->operands()) { - TF_RET_CHECK( - ShapeUtil::IsOpaque(operand->shape()) || - LayoutUtil::IsMonotonicWithDim0Major(operand->shape().layout())); +// Operands of layout-constrained custom calls must match the expected +// constrained layouts. +Status CheckCustomCallLayout(HloInstruction* instruction) { + if (IsLayoutConstrainedCustomCall(instruction)) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); + for (int64 i = 0; i < custom_call->operand_count(); ++i) { + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( + custom_call->operand(i)->shape(), + custom_call->operand_shapes_with_layout()[i])); + } } - TF_RET_CHECK( - ShapeUtil::IsOpaque(custom_call->shape()) || - LayoutUtil::IsMonotonicWithDim0Major(custom_call->shape().layout())); return Status::OK(); } @@ -776,21 +782,27 @@ StatusOr LayoutAssignment::CreateCopyWithNewLayout( << " instruction: " << instruction->ToString(); if (ShapeUtil::IsTuple(instruction->shape())) { - // Deep-copy tuples. + // Copy tuple elements which have differing layouts. std::vector element_copies; for (int64 i = 0; i < ShapeUtil::TupleElementCount(instruction->shape()); ++i) { + const Shape& target_shape = + ShapeUtil::GetSubshape(shape_with_layout, {i}); + const Shape& instr_shape = + ShapeUtil::GetSubshape(instruction->shape(), {i}); HloInstruction* gte = instruction->parent()->AddInstruction( - HloInstruction::CreateGetTupleElement( - ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction, - i)); - SetupCopiedInstruction(*instruction, gte, {i}); - // Recurse to copy each elements. - TF_ASSIGN_OR_RETURN( - HloInstruction * element_copy, - CreateCopyWithNewLayout( - ShapeUtil::GetSubshape(shape_with_layout, {i}), gte)); - element_copies.push_back(element_copy); + HloInstruction::CreateGetTupleElement(instr_shape, instruction, i)); + + if (ShapeUtil::Equal(target_shape, instr_shape)) { + // Shapes and layouts are equal, no need to copy. + element_copies.push_back(gte); + } else { + SetupCopiedInstruction(*instruction, gte, {i}); + // Recurse to copy each element. + TF_ASSIGN_OR_RETURN(HloInstruction * element_copy, + CreateCopyWithNewLayout(target_shape, gte)); + element_copies.push_back(element_copy); + } } // Gather element copies into a tuple with a new Tuple instruction. HloInstruction* tuple_copy = instruction->parent()->AddInstruction( @@ -910,9 +922,7 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { FindOrDie(computation_layouts_, instruction->to_apply()))); break; case HloOpcode::kCustomCall: - if (CustomCallRequiresMajorFirstLayout(instruction)) { - TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction)); - } + TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction)); break; case HloOpcode::kFusion: TF_RETURN_IF_ERROR(CheckFusionLayout(instruction)); @@ -958,10 +968,15 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { LayoutAssignment::LayoutAssignment( ComputationLayout* entry_computation_layout, + std::function + instruction_can_change_layout_func, ChannelLayoutConstraints* channel_constraints) : entry_computation_layout_(entry_computation_layout), + saved_entry_computation_layout_(*entry_computation_layout), - channel_layout_constraints_(channel_constraints) { + channel_layout_constraints_(channel_constraints), + instruction_can_change_layout_func_( + std::move(instruction_can_change_layout_func)) { if (channel_layout_constraints_ != nullptr) { // Save a copy of the input ChannelLayoutConstraints so that we can reset it // if we have to undo previous operations (ClearPreviousPassSideEffects()). @@ -982,7 +997,7 @@ std::unique_ptr LayoutAssignment::ChooseOperandLayoutFromOutputLayout( if (!ShapeUtil::IsScalar(operand->shape()) && ShapeUtil::Rank(operand->shape()) == ShapeUtil::Rank(instruction->shape()) && - InstructionRequiresInputLayoutEqualToOutputLayout(instruction)) { + !instruction_can_change_layout_func_(instruction)) { // Propagate the result layout to the operand layout if the instruction // requires the same layout out for the result and the operand. // @@ -1060,7 +1075,7 @@ std::unique_ptr LayoutAssignment::ChooseOutputLayoutFromOperandLayout( if (!ShapeUtil::IsScalar(operand->shape()) && ShapeUtil::Rank(operand->shape()) == ShapeUtil::Rank(user->shape()) && - InstructionRequiresInputLayoutEqualToOutputLayout(user)) { + !instruction_can_change_layout_func_(user)) { // Assign users the same layout as the operand. return absl::make_unique(operand_layout); } @@ -1512,19 +1527,6 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints, // Verify all layouts in the shape have been set. TF_RET_CHECK(LayoutUtil::HasLayout(instruction->shape())); } - - // Copy the root instruction's result if its layout does not match the result - // layout constraint. - if (constraints.ResultLayout() != nullptr && - !constraints.ResultLayout()->MatchesLayoutInShape( - computation->root_instruction()->shape())) { - TF_ASSIGN_OR_RETURN( - HloInstruction * new_root, - CreateCopyWithNewLayout(constraints.ResultLayout()->shape(), - computation->root_instruction())); - computation->set_root_instruction(new_root); - } - return Status::OK(); } @@ -1540,11 +1542,11 @@ Status LayoutAssignment::CalculateComputationLayout( Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) { // Clear existing layouts of the instructions. All layouts must be assigned - // by the LayoutAssignment pass, except for those on infeeds, parameters, - // and the computation result. The latter two are specified in - // computation_layout, so we only need to keep the existing layouts for - // infeeds. Clearing the layouts here avoids hiding potential bugs in the - // layout assignment pass that may accidentally use the existing layout. + // by the LayoutAssignment pass, except for those on parameters, the + // computation result, and a couple special cases. The former two are + // specified in computation_layout. Clearing the layouts here avoids hiding + // potential bugs in the layout assignment pass that may accidentally use the + // existing layout. for (HloInstruction* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kBitcast) { // bitcasts are inherently layout sensitive and so a bitcast instruction @@ -1553,7 +1555,9 @@ Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) { "Unexpected bitcast operation seen during layout assignment: %s.", instruction->ToString()); } - if (instruction->opcode() != HloOpcode::kInfeed) { + // Some instructions carry mandatory layouts in their shape. + if (instruction->opcode() != HloOpcode::kInfeed && + !IsLayoutConstrainedCustomCall(instruction)) { LayoutUtil::ClearLayout(instruction->mutable_shape()); } } @@ -1654,6 +1658,18 @@ Status LayoutAssignment::RunOnComputation( TF_RETURN_IF_ERROR( ConstrainChannelLayouts(computation, channel_constraints)); } + + // Copy the root instruction's result if its layout does not match the result + // layout constraint. + if (constraints.ResultLayout() != nullptr && + !constraints.ResultLayout()->MatchesLayoutInShape( + computation->root_instruction()->shape())) { + TF_ASSIGN_OR_RETURN( + HloInstruction * new_root, + CreateCopyWithNewLayout(constraints.ResultLayout()->shape(), + computation->root_instruction())); + computation->set_root_instruction(new_root); + } return Status::OK(); } @@ -1709,6 +1725,30 @@ Status LayoutAssignment::ConstrainChannelLayouts( ShapeUtil::GetMutableSubshape(instruction->mutable_shape(), {0}); *send_shape = shape; } + } else if (instruction->IsCrossModuleAllReduce()) { + const Layout* layout = + get_channel_constraints(instruction) + ->ConstrainChannel(instruction->all_reduce_id().value(), + instruction->shape().layout()); + if (layout != nullptr) { + // We found an already constrained layout which does not match the one + // the channel wants to impose. Either add a new kCopy, or use the + // existing one to marshal the correct shape. + HloInstruction* operand = instruction->mutable_operand(0); + Shape shape = operand->shape(); + *shape.mutable_layout() = *layout; + if (operand->opcode() != HloOpcode::kCopy) { + HloInstruction* copy = operand->parent()->AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kCopy, operand)); + RegisterAddedCopy(copy); + SetupCopiedInstruction(*operand, copy, {}); + TF_RETURN_IF_ERROR(instruction->ReplaceOperandWith(0, copy)); + operand = copy; + } else { + *operand->mutable_shape() = shape; + } + *instruction->mutable_shape() = shape; + } } } return Status::OK(); @@ -1752,6 +1792,18 @@ StatusOr LayoutAssignment::Run(HloModule* module) { } TF_RETURN_IF_ERROR(Init()); + // Verify computation layout is sane. + const HloComputation* entry = module->entry_computation(); + TF_RET_CHECK(entry_computation_layout_->parameter_count() == + entry->num_parameters()); + for (int64 i = 0; i < entry->num_parameters(); ++i) { + TF_RET_CHECK( + ShapeUtil::Compatible(entry_computation_layout_->parameter_shape(i), + entry->parameter_instruction(i)->shape())); + } + TF_RET_CHECK(ShapeUtil::Compatible(entry_computation_layout_->result_shape(), + entry->root_instruction()->shape())); + // We do two passes. The first one we pass a nullptr ComputationLayout to // the RunOnComputation() calls (for non entry computations), and we register // the ComputationLayout which are naturally flowing in DFS fashion to the @@ -1803,7 +1855,8 @@ StatusOr LayoutAssignment::Run(HloModule* module) { return true; } -bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout( +/* static */ +bool LayoutAssignment::InstructionCanChangeLayout( const HloInstruction* instruction) { switch (instruction->opcode()) { case HloOpcode::kAbs: @@ -1822,7 +1875,6 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout( case HloOpcode::kCrossReplicaSum: case HloOpcode::kAllToAll: case HloOpcode::kCollectivePermute: - case HloOpcode::kCustomCall: case HloOpcode::kDivide: case HloOpcode::kDynamicSlice: case HloOpcode::kDynamicUpdateSlice: @@ -1869,7 +1921,7 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout( case HloOpcode::kTanh: case HloOpcode::kTupleSelect: case HloOpcode::kWhile: - return true; + return false; case HloOpcode::kBatchNormGrad: case HloOpcode::kBatchNormInference: case HloOpcode::kBatchNormTraining: @@ -1879,6 +1931,7 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout( case HloOpcode::kConstant: case HloOpcode::kConvolution: case HloOpcode::kCopy: + case HloOpcode::kCustomCall: case HloOpcode::kDomain: case HloOpcode::kDot: case HloOpcode::kFusion: @@ -1900,7 +1953,7 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout( case HloOpcode::kTrace: case HloOpcode::kTranspose: case HloOpcode::kTuple: - return false; + return true; } } diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index e29c199c42a4878daaf2eeb86b6909d6d3ff920e..cb56f4cd19ded036ef521a579eb7d6ea7f3b6268 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -25,6 +25,8 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/computation_layout.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -38,8 +40,6 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -228,8 +228,8 @@ class LayoutConstraints { // Array-shaped buffers which have not yet been constrained. std::set unconstrained_buffer_ids_; - mutable tensorflow::gtl::FlatMap> + mutable absl::flat_hash_map> buffer_sets_cache_; HloComputation* computation_; @@ -286,6 +286,11 @@ class LayoutAssignment : public HloModulePass { // entry_computation_layout is modified to populate a layout for the result in // the case that no particular layout is requested. // + // instruction_can_change_layout_func is a function object that determines + // whether an instruction can change layouts. An instruction not being able to + // change layout means that it requires operands with the same rank as the + // output to have the same layout as the output. + // // channel_constraints is both an input and output. Any sends or recvs that // are present in channel_constraints will be laid out as constrained. Any // unconstrained sends or recvs will be laid out as locally optimal and their @@ -295,6 +300,8 @@ class LayoutAssignment : public HloModulePass { // within any module passed to `Run`. explicit LayoutAssignment( ComputationLayout* entry_computation_layout, + std::function + instruction_can_change_layout_func = InstructionCanChangeLayout, ChannelLayoutConstraints* channel_constraints = nullptr); ~LayoutAssignment() override {} absl::string_view name() const override { return "layout-assignment"; } @@ -303,10 +310,10 @@ class LayoutAssignment : public HloModulePass { // (any layouts were changed). StatusOr Run(HloModule* module) override; - // Returns true if the instruction requires that operands with the same rank - // as the output have to have the same layout as the output. - virtual bool InstructionRequiresInputLayoutEqualToOutputLayout( - const HloInstruction* instruction); + // Determines whether an instruction can change layouts. An instruction not + // being able to change layout means that it requires operands with the same + // rank as the output to have the same layout as the output. + static bool InstructionCanChangeLayout(const HloInstruction* instruction); protected: // These methods, invoked by PropagateConstraints, propagate a layout @@ -326,19 +333,6 @@ class LayoutAssignment : public HloModulePass { const ResultLayoutConstraint& layout_constraint, LayoutConstraints* constraints); - // By default LayoutAssignment ensures that inputs and outputs of CustomCalls - // have the "major-first" layout (i.e. {n, n-1, ..., 0}). - // - // If this function returns true, LayoutAssignment does not set a layout for - // the given CustomCall. It's up to the backend to set one in - // AddBackendConstraints, if necessary. - // - // Precondition: instruction->opcode() == HloOpcode::kCustomCall. - virtual bool CustomCallRequiresMajorFirstLayout( - const HloInstruction* /*instruction*/) { - return true; - } - // Called after layouts of an instruction have been finalized to allow // subclasses to check for platform specific assumptions. virtual Status Verify(const HloInstruction* instruction) { @@ -504,7 +498,7 @@ class LayoutAssignment : public HloModulePass { // Every copy added to the module by the layout assignment pass is registered // here. - tensorflow::gtl::FlatSet added_copies_; + absl::flat_hash_set added_copies_; // The pointer to the channel layout constraints passed in with the // constructor. If not nullptr, this is an input/output argument. @@ -521,8 +515,10 @@ class LayoutAssignment : public HloModulePass { // The set of HLO instructions which lacked any layout constraint, thus // receiving propagated default layouts. - tensorflow::gtl::FlatSet - unconstrained_layout_instructions_; + absl::flat_hash_set unconstrained_layout_instructions_; + + std::function + instruction_can_change_layout_func_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 752a61476dd7892a2b7f531c4057015f48fc4758..ff6fdb5e4aab68ad30088112e3716da5ca6c516a 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -55,7 +55,8 @@ class LayoutAssignmentTest : public HloVerifiedTestBase { ComputationLayout* entry_computation_layout, ChannelLayoutConstraints* channel_constraints = nullptr) { LayoutAssignment layout_assignment( - entry_computation_layout, /*channel_constraints=*/channel_constraints); + entry_computation_layout, LayoutAssignment::InstructionCanChangeLayout, + /*channel_constraints=*/channel_constraints); EXPECT_IS_OK(layout_assignment.Run(module).status()); } @@ -64,6 +65,27 @@ class LayoutAssignmentTest : public HloVerifiedTestBase { FindInstruction(module, name)->shape().layout().minor_to_major(); return std::vector(minor_to_major.begin(), minor_to_major.end()); } + + void ExpectLayoutIs(const Shape& shape, + absl::Span minor_to_major) { + const Layout expected = LayoutUtil::MakeLayout(minor_to_major); + EXPECT_TRUE(LayoutUtil::Equal(shape.layout(), expected)) + << "Expected layout " << expected << ", actual " << shape.layout(); + } + + void ExpectTupleLayoutIs( + const Shape& shape, + std::initializer_list> minor_to_majors) { + int i = 0; + for (const absl::Span minor_to_major : minor_to_majors) { + const Layout expected = LayoutUtil::MakeLayout(minor_to_major); + const Layout& actual = ShapeUtil::GetTupleElementShape(shape, i).layout(); + EXPECT_TRUE(LayoutUtil::Equal(actual, expected)) + << "Expected tuple element " << i << " layout " << expected + << ", actual " << actual; + ++i; + } + } }; TEST_F(LayoutAssignmentTest, ComputationLayout) { @@ -860,6 +882,50 @@ TEST_F(LayoutAssignmentTest, ChannelLayoutMismatch) { ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0}))); } +TEST_F(LayoutAssignmentTest, AllReduceLayoutMissmatch) { + // Pin non matching layouts to parameter and root. + const char* module_str = R"( + HloModule test_module + + add { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT add = f32[] add(lhs, rhs) + } + + ENTRY entry_computation { + param = (f32[2,2]) parameter(0) + gte = f32[2,2] get-tuple-element(param), index=0 + ar.0 = f32[2,2] cross-replica-sum(gte), + all_reduce_id=0, replica_groups={{0}}, to_apply=add, + sharding={maximal device=0} + const = f32[2,2] constant(f32[2,2]{{0,1},{2,3}}) + ROOT ar.1 = f32[2,2] cross-replica-sum(const), + all_reduce_id=0, replica_groups={{0}}, to_apply=add, + sharding={maximal device=1} + })"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str)); + ComputationLayout computation_layout( + module->entry_computation()->ComputeProgramShape()); + Shape param_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {0, 1})}); + TF_ASSERT_OK( + computation_layout.mutable_parameter_layout(0)->CopyLayoutFromShape( + param_shape)); + computation_layout.mutable_result_layout()->ResetLayout( + LayoutUtil::MakeLayout({1, 0})); + + ChannelLayoutConstraints channel_constraints; + AssignLayouts(module.get(), &computation_layout, &channel_constraints); + + EXPECT_THAT(LayoutOf(module.get(), "gte"), ElementsAre(0, 1)); + EXPECT_THAT(LayoutOf(module.get(), "ar.0"), ElementsAre(0, 1)); + EXPECT_THAT(LayoutOf(module.get(), "ar.1"), ElementsAre(0, 1)); + const HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root->shape().layout().minor_to_major(), ElementsAre(1, 0)); +} + TEST_F(LayoutAssignmentTest, CopySliceOperandToAvoidImplicitLayoutChange) { const char* module_str = R"( HloModule CopySliceOperandToAvoidImplicitLayoutChange @@ -998,5 +1064,233 @@ TEST_F(LayoutAssignmentTest, PropagatingLayoutFromResultToOperand) { op::ShapeWithLayout(shape_copy)))); } +TEST_F(LayoutAssignmentTest, TupleCopyOnLayoutMismatch) { + // The first infeed uses layout {0,1}, while the second uses layout {1,0}. + // The mismatch forces a copy of the tuple. The tuple contains a token, so + // layout assignment will fail if it tries to copy the whole tuple. + const char* module_str = R"( + HloModule TupleCopyOnLayoutMismatch + + condition.1 (tup: (s32[], token[], f32[512,1024]{0,1})) -> pred[] { + tup.1 = (s32[], token[], f32[512,1024]{0,1}) parameter(0) + counter.1 = s32[] get-tuple-element(tup.1), index=0 + five = s32[] constant(5) + ROOT lt = pred[] less-than(counter.1, five) + } + + body.2 (tup: (s32[], token[], f32[512,1024]{0,1})) -> (s32[], token[], f32[512,1024]{0,1}) { + tup.2 = (s32[], token[], f32[512,1024]{0,1}) parameter(0) + counter.2 = s32[] get-tuple-element(tup.2), index=0 + tok.2 = token[] get-tuple-element(tup.2), index=1 + + ifeed.2 = (f32[512,1024]{1,0}, token[]) infeed(tok.2) + next_tok = token[] get-tuple-element(ifeed.2), index=1 + next_buf = f32[512,1024]{1,0} get-tuple-element(ifeed.2), index=0 + + one = s32[] constant(1) + next_counter = s32[] add(counter.2, one) + ROOT tup = (s32[], token[], f32[512,1024]{0,1}) tuple(next_counter, next_tok, next_buf) + } + + ENTRY main () -> f32[512,1024]{0,1} { + start_tok = token[] after-all() + + ifeed.3 = (f32[512,1024]{0,1}, token[]) infeed(start_tok) + itok = token[] get-tuple-element(ifeed.3), index=1 + ibuf = f32[512,1024]{0,1} get-tuple-element(ifeed.3), index=0 + + zero = s32[] constant(0) + itup = (s32[], token[], f32[512,1024]{0,1}) tuple(zero, itok, ibuf) + + loop = (s32[], token[], f32[512,1024]{0,1}) while(itup), condition=condition.1, body=body.2 + ROOT result = f32[512,1024]{0,1} get-tuple-element(loop), index=2 + } + )"; + + ParseAndVerifyModule(module_str); + ComputationLayout computation_layout( + module().entry_computation()->ComputeProgramShape()); + + // Sanity check to verify that there's a layout mismatch. + EXPECT_THAT(LayoutOf(&module(), "ibuf"), ElementsAre(0, 1)); + EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0)); + + AssignLayouts(&module(), &computation_layout); + + // Make sure that layout assignment did not magically eliminate the mismatch, + // in which case the test didn't prove anything. + EXPECT_THAT(LayoutOf(&module(), "ibuf"), ElementsAre(0, 1)); + EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0)); +} + +TEST_F(LayoutAssignmentTest, CustomCallNotLayoutConstrained) { + const char* module_str = R"( +HloModule CustomCallNotLayoutConstrained + +ENTRY %CustomCallWithNotLayoutConstrained (p: f32[42,2,3]) -> f32[1,2,3,4] { + %p = f32[42,2,3] parameter(0) + ROOT %custom-call = f32[1,2,3,4] custom-call(f32[42,2,3] %p), custom_call_target="baz" +} +)"; + // Try with a couple different layouts. In each case the custom calls operand + // and result layout should match that of the computation. + { + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 2, 1})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {3, 2, 0, 1})); + AssignLayouts(module.get(), &computation_layout); + + HloInstruction* root = module->entry_computation()->root_instruction(); + ASSERT_THAT(root, op::CustomCall(op::Parameter())); + ExpectLayoutIs(root->shape(), {3, 2, 0, 1}); + ExpectLayoutIs(root->operand(0)->shape(), {0, 2, 1}); + } + { + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 1, 2})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {0, 2, 3, 1})); + AssignLayouts(module.get(), &computation_layout); + + HloInstruction* root = module->entry_computation()->root_instruction(); + ASSERT_THAT(root, op::CustomCall(op::Parameter())); + ExpectLayoutIs(root->shape(), {0, 2, 3, 1}); + ExpectLayoutIs(root->operand(0)->shape(), {0, 1, 2}); + } +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrained) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrained + +ENTRY %CustomCallWithLayoutConstraints (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] { + %p0 = f32[4,4] parameter(0) + %p1 = f32[2,3] parameter(1) + ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(f32[4,4] %p0, f32[2,3] %p1), custom_call_target="baz", operand_layout_constraints={f32[4,4]{0,1}, f32[2,3]{1,0}} +} +)"; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0})); + *computation_layout.mutable_parameter_layout(1) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3})); + AssignLayouts(module.get(), &computation_layout); + + // The custom call should be partially encapsulated in kCopy instructions + // because of the layout mismatches. + ASSERT_THAT(module->entry_computation()->root_instruction(), + op::Copy(op::CustomCall(op::Copy(), op::Parameter()))); + + const HloInstruction* custom_call = + module->entry_computation()->root_instruction()->operand(0); + ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1}); + ExpectLayoutIs(custom_call->operand(0)->shape(), {0, 1}); + ExpectLayoutIs(custom_call->operand(1)->shape(), {1, 0}); +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedZeroOperands) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrainedZeroOperands + +ENTRY %CustomCallLayoutConstrainedZeroOperands () -> f32[1,2,3,4] { + ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(), custom_call_target="baz", operand_layout_constraints={} +} +)"; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3})); + AssignLayouts(module.get(), &computation_layout); + + ASSERT_THAT(module->entry_computation()->root_instruction(), + op::Copy(op::CustomCall())); + + const HloInstruction* custom_call = + module->entry_computation()->root_instruction()->operand(0); + ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1}); +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleOperand) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrainedTupleOperand + +ENTRY %CustomCallLayoutConstrainedTupleOperand (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] { + %p0 = f32[4,4] parameter(0) + %p1 = f32[2,3] parameter(1) + %tuple = (f32[4,4], f32[2,3]) tuple(%p0, %p1) + ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(%tuple), custom_call_target="baz", operand_layout_constraints={(f32[4,4]{1,0}, f32[2,3]{0,1})} +} +)"; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0})); + *computation_layout.mutable_parameter_layout(1) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3})); + AssignLayouts(module.get(), &computation_layout); + + HloInstruction* root = module->entry_computation()->root_instruction(); + ExpectLayoutIs(root->shape(), {2, 1, 0, 3}); + + ASSERT_THAT(module->entry_computation()->root_instruction(), + op::Copy(op::CustomCall(op::Tuple()))); + + const HloInstruction* custom_call = + module->entry_computation()->root_instruction()->operand(0); + ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1}); + ExpectTupleLayoutIs(custom_call->operand(0)->shape(), {{1, 0}, {0, 1}}); +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleResult) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrainedTupleResult + +ENTRY %CustomCallLayoutConstrainedTupleResult (p0: f32[4,4]) -> (f32[4,4]{1,0}, f32[2,3]{0,1}) { + %p0 = f32[4,4] parameter(0) + ROOT %custom-call = (f32[4,4]{1,0}, f32[2,3]{0,1}) custom-call(%p0), custom_call_target="baz", operand_layout_constraints={f32[4,4]{1,0}} +} +)"; + // Try with a couple different layouts. In each case the custom calls operand + // and result layout should match that of the computation. + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0})); + *computation_layout.mutable_result_layout() = + ShapeLayout(ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}), + ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})})); + AssignLayouts(module.get(), &computation_layout); + + ExpectTupleLayoutIs(module->entry_computation()->root_instruction()->shape(), + {{1, 0}, {1, 0}}); + + const HloInstruction* custom_call = + FindInstruction(module.get(), "custom-call"); + ExpectTupleLayoutIs(custom_call->shape(), {{1, 0}, {0, 1}}); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index 540bbb7c7a74f65ab70f4c6704d6600db2adbb60..6223a34b1258961944a3ac64cd10876d1272c94e 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -38,6 +38,8 @@ cc_library( "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:logical_buffer", "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@llvm//:core", ], diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc index e5370eca56f2e3a891523ba2b72961d66ec809aa..643ecd0fbaa546c551097b29e74ccd49418e1466 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h" -#include +#include #include "llvm/IR/MDBuilder.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" @@ -164,9 +164,7 @@ llvm::MDNode* AliasAnalysis::GetNoaliasMetadataForBuffer( add_buffers_to_worklist(operand); } - tensorflow::gtl::FlatSet - buffers; + std::set buffers; for (const LogicalBuffer* buffer : worklist) { // Skip buffers which cannot be added to the noalias set. if (!assignment.HasAllocation(*buffer) || diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h index 8d9fa99d82b4e49b653d9f05cc9baa5e3fdcefa6..2b46b3c3964b15548dbacc8b0ada0047a0fa85b6 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h +++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h @@ -16,14 +16,13 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_ALIAS_ANALYSIS_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_ALIAS_ANALYSIS_H_ +#include "absl/container/flat_hash_map.h" #include "absl/strings/str_cat.h" #include "llvm/IR/Module.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" #include "tensorflow/compiler/xla/types.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { namespace llvm_ir { @@ -77,14 +76,14 @@ class AliasAnalysis { // A map from a buffer slice to metadata corresponding to its alias.scope // metadata. The index kParameterAliasSet is used to hold aliasing // information for parameters. - tensorflow::gtl::FlatMap + absl::flat_hash_map alias_scope_metadata_; // A map from a buffer slice to metadata corresponding to its noalias // metadata. - tensorflow::gtl::FlatMap + absl::flat_hash_map noalias_metadata_; }; diff --git a/tensorflow/compiler/xla/service/inliner.cc b/tensorflow/compiler/xla/service/map_inliner.cc similarity index 76% rename from tensorflow/compiler/xla/service/inliner.cc rename to tensorflow/compiler/xla/service/map_inliner.cc index 5fd779ebf9b59e34a0844cc3a898bb72ce6044ee..2200ef054a6993fb884751643ab1fb5ab83efe05 100644 --- a/tensorflow/compiler/xla/service/inliner.cc +++ b/tensorflow/compiler/xla/service/map_inliner.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/inliner.h" +#include "tensorflow/compiler/xla/service/map_inliner.h" #include #include @@ -32,10 +32,10 @@ limitations under the License. namespace xla { -// InlinerVisitor traverses the HLO computation and inlines maps. -class InlinerVisitor : public DfsHloVisitorWithDefault { +// MapInlinerVisitor traverses the HLO computation and inlines maps. +class MapInlinerVisitor : public DfsHloVisitorWithDefault { public: - explicit InlinerVisitor(HloComputation* computation) + explicit MapInlinerVisitor(HloComputation* computation) : computation_(computation) {} // Default visitor action is to do nothing and return OK. @@ -49,48 +49,44 @@ class InlinerVisitor : public DfsHloVisitorWithDefault { StatusOr Run(HloComputation* computation); private: - // Current HloComputation instance the InlinerVisitor is traversing. + // Current HloComputation instance the MapInlinerVisitor is traversing. HloComputation* computation_; // Whether algebraic simplification has occurred. bool changed_ = false; }; -StatusOr InlinerVisitor::Run(HloComputation* computation) { +StatusOr MapInlinerVisitor::Run(HloComputation* computation) { changed_ = false; computation_ = computation; TF_RETURN_IF_ERROR(computation->root_instruction()->Accept(this)); return changed_; } -Status InlinerVisitor::HandleMap(HloInstruction* map) { +Status MapInlinerVisitor::HandleMap(HloInstruction* map) { HloComputation* function = map->to_apply(); HloInstruction& root = *function->root_instruction(); - // TODO(b/29249531): Add DCE pass to remove unused HloComputations. // Only inlining functions that are simply a single operation until a better // profitability model for inlining is defined. if (hlo_query::AllOperandsAreParameters(root)) { if (root.opcode() == HloOpcode::kFusion || - root.opcode() == HloOpcode::kParameter || root.opcode() == HloOpcode::kTrace) { // Cloning not supported for these instructions. return Status::OK(); } VLOG(10) << "inlining map({X ... Y}, op) => : op(X ... Y) with function " << root.ToShortString(); - // If the input is a constant then the shape of the constant could be - // different than the map shape. Hence, a broadcast is needed, else the - // cloned operand with new shape and operands work. - if (root.opcode() != HloOpcode::kConstant) { - std::vector params; - for (int64 o = 0; o < root.operands().size(); o++) { - params.push_back(map->operands()[root.operand(o)->parameter_number()]); - } - HloInstruction* placed_instruction = computation_->AddInstruction( - root.CloneWithNewOperands(map->shape(), params)); + if (root.opcode() == HloOpcode::kParameter) { + // If the root is a parameter, then use the corresponding operand as the + // result of the computation. TF_RETURN_IF_ERROR( - computation_->ReplaceInstruction(map, placed_instruction)); - } else { + map->ReplaceAllUsesWith(map->operands()[root.parameter_number()])); + TF_RETURN_IF_ERROR(computation_->RemoveInstruction(map)); + } else if (root.opcode() == HloOpcode::kConstant) { + // If the input is a constant then the shape of the constant could be + // different than the map shape. Hence, a broadcast is needed, else the + // cloned operand with new shape and operands work. + // // The constant is in an embedded computation and needs to be recreated // as part of the computation that the broadcast is inserted into. HloInstruction* constant = computation_->AddInstruction(root.Clone()); @@ -98,6 +94,15 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) { HloInstruction::CreateBroadcast(map->shape(), constant, {})); TF_RETURN_IF_ERROR( computation_->ReplaceInstruction(map, placed_instruction)); + } else { + std::vector params; + for (int64 o = 0; o < root.operands().size(); o++) { + params.push_back(map->operands()[root.operand(o)->parameter_number()]); + } + HloInstruction* placed_instruction = computation_->AddInstruction( + root.CloneWithNewOperands(map->shape(), params)); + TF_RETURN_IF_ERROR( + computation_->ReplaceInstruction(map, placed_instruction)); } changed_ = true; return Status::OK(); @@ -106,8 +111,8 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) { return Status::OK(); } -StatusOr Inliner::Run(HloModule* module) { - InlinerVisitor visitor(/*computation=*/nullptr); +StatusOr MapInliner::Run(HloModule* module) { + MapInlinerVisitor visitor(/*computation=*/nullptr); bool changed = false; for (HloComputation* computation : module->computations()) { TF_ASSIGN_OR_RETURN(bool computation_changed, visitor.Run(computation)); diff --git a/tensorflow/compiler/xla/service/inliner.h b/tensorflow/compiler/xla/service/map_inliner.h similarity index 59% rename from tensorflow/compiler/xla/service/inliner.h rename to tensorflow/compiler/xla/service/map_inliner.h index e20af08fb7329c3646903761ee081e421daa5712..b67911811846e2250068921ef252b7df596d4016 100644 --- a/tensorflow/compiler/xla/service/inliner.h +++ b/tensorflow/compiler/xla/service/map_inliner.h @@ -13,27 +13,27 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INLINER_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_INLINER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MAP_INLINER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_MAP_INLINER_H_ #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" namespace xla { -// A pass which performs inlining. Which can result, for example, in functions -// that were previously being mapped by Map instead directly applied to the -// forwarded operands (i.e., map({X, Y}, max) -> max(X, Y)). -class Inliner : public HloModulePass { +// A pass which performs map inlining. This replaces kMap instructions with +// their equivalent sequence of array operations. For example: +// map({X, Y}, add) -> add(X, Y)). +class MapInliner : public HloModulePass { public: - ~Inliner() override = default; - absl::string_view name() const override { return "inline"; } + ~MapInliner() override = default; + absl::string_view name() const override { return "map-inline"; } - // Run inlining on the given computation. Returns whether the computation was - // changed. + // Run map inlining on the given computation. Returns whether the computation + // was changed. StatusOr Run(HloModule* module) override; }; } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_INLINER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MAP_INLINER_H_ diff --git a/tensorflow/compiler/xla/service/inliner_test.cc b/tensorflow/compiler/xla/service/map_inliner_test.cc similarity index 78% rename from tensorflow/compiler/xla/service/inliner_test.cc rename to tensorflow/compiler/xla/service/map_inliner_test.cc index 7e967f035c1054e22d10790188a5a232ca8e751a..84059dd0f71ee8fc0a25703cbab2268d7dc149a8 100644 --- a/tensorflow/compiler/xla/service/inliner_test.cc +++ b/tensorflow/compiler/xla/service/map_inliner_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/inliner.h" +#include "tensorflow/compiler/xla/service/map_inliner.h" #include #include @@ -35,10 +35,10 @@ namespace op = xla::testing::opcode_matchers; namespace xla { namespace { -using InlinerTest = HloVerifiedTestBase; +using MapInlinerTest = HloVerifiedTestBase; // Test that `map` with `max` is transformed to `max` -TEST_F(InlinerTest, MapMax) { +TEST_F(MapInlinerTest, MapMax) { Shape r0f32 = ShapeUtil::MakeShape(F32, {}); auto max_builder = HloComputation::Builder(TestName()); @@ -63,7 +63,7 @@ TEST_F(InlinerTest, MapMax) { hlo_module->AddEmbeddedComputation(std::move(max_f32)); hlo_module->AddEntryComputation(std::move(computation)); - Inliner inliner; + MapInliner inliner; EXPECT_TRUE(inliner.Run(hlo_module).ValueOrDie()); EXPECT_THAT(hlo_module->entry_computation()->root_instruction(), op::Maximum(lhs, rhs)); @@ -75,7 +75,7 @@ TEST_F(InlinerTest, MapMax) { } // Test that `constant` function is changed to `broadcast`. -TEST_F(InlinerTest, MapConstant) { +TEST_F(MapInlinerTest, MapConstant) { Shape r0f32 = ShapeUtil::MakeShape(F32, {}); auto const2_builder = HloComputation::Builder(TestName()); @@ -97,7 +97,7 @@ TEST_F(InlinerTest, MapConstant) { hlo_module->AddEmbeddedComputation(std::move(const2_f32)); hlo_module->AddEntryComputation(std::move(computation)); HloInstruction* root = hlo_module->entry_computation()->root_instruction(); - Inliner inliner; + MapInliner inliner; EXPECT_TRUE(inliner.Run(hlo_module).ValueOrDie()); root = hlo_module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::Broadcast(op::Constant())); @@ -108,7 +108,7 @@ TEST_F(InlinerTest, MapConstant) { EXPECT_TRUE(LiteralTestUtil::Equal(result, expected)); } -TEST_F(InlinerTest, MapSubtractOppositeOrder) { +TEST_F(MapInlinerTest, MapSubtractOppositeOrder) { Shape r0f32 = ShapeUtil::MakeShape(F32, {}); // Note that the parameter ordinals are in the opposite order to their @@ -135,7 +135,7 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) { hlo_module->AddEmbeddedComputation(std::move(max_f32)); hlo_module->AddEntryComputation(std::move(computation)); - Inliner inliner; + MapInliner inliner; EXPECT_TRUE(inliner.Run(hlo_module).ValueOrDie()); EXPECT_THAT(hlo_module->entry_computation()->root_instruction(), op::Subtract(rhs, lhs)); @@ -146,6 +146,36 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) { EXPECT_TRUE(LiteralTestUtil::Equal(result, expected)); } +TEST_F(MapInlinerTest, MapParameter) { + Shape r0f32 = ShapeUtil::MakeShape(F32, {}); + + auto param_builder = HloComputation::Builder(TestName()); + param_builder.AddInstruction(HloInstruction::CreateParameter(0, r0f32, "p0")); + param_builder.AddInstruction(HloInstruction::CreateParameter(1, r0f32, "p1")); + auto param_f32 = param_builder.Build(); + + auto builder = HloComputation::Builder("MapParamFunction"); + auto lhs = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(1))); + auto rhs = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(4))); + builder.AddInstruction( + HloInstruction::CreateMap(lhs->shape(), {lhs, rhs}, param_f32.get())); + + auto computation = builder.Build(); + auto hlo_module = CreateNewVerifiedModule(); + hlo_module->AddEmbeddedComputation(std::move(param_f32)); + hlo_module->AddEntryComputation(std::move(computation)); + + MapInliner inliner; + EXPECT_TRUE(inliner.Run(hlo_module.get()).ValueOrDie()); + EXPECT_THAT(hlo_module->entry_computation()->root_instruction(), rhs); + + // Verify execution on CPU. + auto result = ExecuteAndTransfer(hlo_module->Clone(), {}); + auto expected = LiteralUtil::CreateR0(4); + EXPECT_TRUE(LiteralTestUtil::Equal(result, expected)); +} } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc index b9ec31c4977be0c31dfff01a0c495902191d7d5b..2ca527bc4cb8f66a085c1e6a7cbb8ddaedbfc07e 100644 --- a/tensorflow/compiler/xla/service/multi_output_fusion.cc +++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc @@ -15,10 +15,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/multi_output_fusion.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -50,7 +50,7 @@ StatusOr MultiOutputFusion::Run(HloModule* module) { all_fusion_candidates_.push_back(instruction); std::vector candidates; - tensorflow::gtl::FlatSet candidates_set; + absl::flat_hash_set candidates_set; VLOG(10) << "Looking at instruction: " << instruction->name(); for (auto operand : instruction->operands()) { // Filter out the non-interesting instructions -- they @@ -172,7 +172,7 @@ void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) { // Update the fusible list for fusion. Variable new_fusibles keeps // track of the new or changed entries. std::vector> new_fusibles; - tensorflow::gtl::FlatSet in_list; + absl::flat_hash_set in_list; auto it = fusion_node.fusibles.begin(); while (it != fusion_node.fusibles.end()) { HloInstruction* instr = it->first; diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.h b/tensorflow/compiler/xla/service/multi_output_fusion.h index 0344626b26b2cd1d659657c51636266706d17afb..9508ab2ed1d38ec40983d8892ec8875b848fb21b 100644 --- a/tensorflow/compiler/xla/service/multi_output_fusion.h +++ b/tensorflow/compiler/xla/service/multi_output_fusion.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -126,7 +127,7 @@ class MultiOutputFusion : public HloModulePass { std::vector candidates_; // A map that maps an instruction to the index_. - tensorflow::gtl::FlatMap candidates_index_; + absl::flat_hash_map candidates_index_; // The reachability map of current computation. std::unique_ptr reachability_; diff --git a/tensorflow/compiler/xla/service/name_uniquer.h b/tensorflow/compiler/xla/service/name_uniquer.h index 6dd89c240f81c9f0ccac66e50c7f244bfd5429f1..8909d0f4fea801e43ab06a75e8933d24a74146bc 100644 --- a/tensorflow/compiler/xla/service/name_uniquer.h +++ b/tensorflow/compiler/xla/service/name_uniquer.h @@ -18,10 +18,10 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/string_view.h" #include "tensorflow/compiler/xla/types.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/macros.h" namespace xla { @@ -69,7 +69,7 @@ class NameUniquer { int64 next_ = 0; // Set of all the identifiers which has been used. - tensorflow::gtl::FlatSet used_; + absl::flat_hash_set used_; }; // The string to use to separate the prefix of the name from the uniquing @@ -78,7 +78,7 @@ class NameUniquer { // Map from name prefix to the generator data structure which tracks used // identifiers and generates new ones. - tensorflow::gtl::FlatMap generated_names_; + absl::flat_hash_map generated_names_; TF_DISALLOW_COPY_AND_ASSIGN(NameUniquer); }; diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.h b/tensorflow/compiler/xla/service/reduce_precision_insertion.h index 4bb22428f3d66f27d268ac4490c6e2613966cbed..0b4e82e8d606cf2cacfab42d07c2201939d5e10b 100644 --- a/tensorflow/compiler/xla/service/reduce_precision_insertion.h +++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.h @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" #include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 7194b2cafd348c144a2ee83027cf78642bfaf75f..e379911462f1d2caa53f708a6ebf8b7363dc2fc3 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" @@ -33,7 +34,6 @@ limitations under the License. #include "tensorflow/compiler/xla/window_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" @@ -577,7 +577,7 @@ Status ValidateDotDimensionNumbers( // Check that dimension numbers are unique. auto dims_unique = [](absl::Span contracting_dims, absl::Span batch_dims) -> bool { - tensorflow::gtl::FlatSet dim_set; + absl::flat_hash_set dim_set; auto is_unique = [&dim_set](int64 i) -> bool { return dim_set.insert(i).second; }; @@ -2380,7 +2380,9 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, !std::is_permutation(dimensions.begin(), dimensions.end(), indices.begin())) { return InvalidArgument( - "Transpose dimensions not a permutation of the operand dimensions."); + "Transpose dimensions [%s] are not a permutation of the operand " + "dimensions (operand shape is %s).", + StrJoin(dimensions, ","), ShapeUtil::HumanString(operand)); } // Permute(dimensions,input) computes output[dimensions[i]]=input[i]. However, diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc index 921a984589bb4fb64058a2a56adfe84fe14af69b..56952e3adae59656605a12fd499162504a2a3379 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.cc +++ b/tensorflow/compiler/xla/service/shaped_buffer.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -26,7 +27,6 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/logging.h" namespace xla { @@ -147,7 +147,7 @@ void ScopedShapedBuffer::Deallocate() { // Deallocate all non-null buffers. A buffer may appear in more than one spot // in the shape (eg, a tuple with a repeated element) so keep track of what // has been deallocated. - tensorflow::gtl::FlatSet deallocated_ptrs; + absl::flat_hash_set deallocated_ptrs; for (auto& pair : buffers_) { se::DeviceMemoryBase& memory_base = pair.second; if (!memory_base.is_null() && diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index 6fed7c76d04ad5d8236fecd07aa27f1eda221ea7..811ac55e2dc2939293e62f1ebcd2bce266a12133 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -280,16 +280,6 @@ Status TuplePointsToAnalysis::HandleDomain(HloInstruction* domain) { return Status::OK(); } -Status TuplePointsToAnalysis::HandleSlice(HloInstruction* slice) { - // A kSlice instruction aliases its operand if the backend lowers it to an - // in-place implementation. - if (slice->IsInPlaceSlice()) { - CreateCopiedPointsToSet(slice, slice->operand(0)); - return Status::OK(); - } - return DefaultAction(slice); -} - Status TuplePointsToAnalysis::HandleRecvDone(HloInstruction* recv_done) { // RecvDone aliases its input (Recv) tuple element {0} to element {0} of its // output. The other indices ({} and {1}) define their own buffers. @@ -455,15 +445,10 @@ bool TuplePointsToAnalysis::InstructionDefinesBufferAtIndex( Status TuplePointsToAnalysis::VerifyBuffer(const LogicalBuffer& buffer) const { if (!InstructionDefinesBufferAtIndex(buffer.instruction(), buffer.index())) { - // kSlice ops that are lowered to an in-place version are expected to not - // define their output buffer. - if (buffer.instruction()->opcode() != HloOpcode::kSlice || - !buffer.instruction()->IsInPlaceSlice()) { - return FailedPrecondition( - "LogicalBuffer %s is ill-defined: instruction %s does not define a " - "buffer at that index", - buffer.ToString(), buffer.instruction()->name()); - } + return FailedPrecondition( + "LogicalBuffer %s is ill-defined: instruction %s does not define a " + "buffer at that index", + buffer.ToString(), buffer.instruction()->name()); } if (buffer.id() < 0 || diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h index a9e8a51e0923362162c6b8a2e97fc334e56d4329..30c365053c5dac5af3c559f7c92b11d389d7fff8 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h @@ -36,8 +36,6 @@ limitations under the License. #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/compactptrset.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" @@ -249,7 +247,6 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { Status HandleGetTupleElement(HloInstruction* get_tuple_element) override; Status HandleBitcast(HloInstruction* bitcast) override; Status HandleDomain(HloInstruction* domain) override; - Status HandleSlice(HloInstruction* slice) override; Status HandleCopy(HloInstruction* copy) override; Status HandleRecvDone(HloInstruction* recv_done) override; Status HandleSend(HloInstruction* send) override; diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc index 56145822be70f391ac3eaab5fc17db4a80e1b9cc..067cfcc17d65860a249de4d9e31703df12091d3a 100644 --- a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc +++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc @@ -18,7 +18,6 @@ limitations under the License. #include "absl/container/inlined_vector.h" #include "tensorflow/compiler/xla/service/while_util.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc index e8fe33e62659ae0fffff1ad46e8ba77f715b76b2..9795b2830b6d9add82b89ac76b5438ddc3d2bfe8 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc @@ -15,18 +15,18 @@ limitations under the License. #include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h" #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" #include "tensorflow/compiler/xla/service/tuple_util.h" #include "tensorflow/compiler/xla/service/while_util.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" -#include "tensorflow/core/lib/gtl/flatset.h" namespace xla { +using absl::flat_hash_map; +using absl::flat_hash_set; using absl::InlinedVector; -using tensorflow::gtl::FlatMap; -using tensorflow::gtl::FlatSet; // Copies `to_hoist` to the computation containing `while_instr`, hoisting its // operands as needed. All of its transitive operands are expected to be either @@ -34,8 +34,8 @@ using tensorflow::gtl::FlatSet; // function hoists the operands in `unhoisted_invariant_instructions` and moves // them into `hoisted_instructions`. static void CreateLoopInvariantCopy( - FlatMap* hoisted_instructions, - FlatSet* unhoisted_invariant_instructions, + flat_hash_map* hoisted_instructions, + flat_hash_set* unhoisted_invariant_instructions, HloInstruction* while_instr, HloInstruction* to_hoist) { HloComputation* parent_of_while = while_instr->parent(); HloComputation* while_body = while_instr->while_body(); @@ -147,13 +147,13 @@ WhileLoopInvariantCodeMotion::TryHoistingInvariantInstructionsFromWhileBody( // Maps instructions in the while body to instructions hoisted outside the // while that compute the same value. - FlatMap hoisted_instructions; + flat_hash_map hoisted_instructions; // Contains instructions that can be legally hoisted, but were deemed to be // unprofitable to be hoisted alone by NotWorthHoistingIndividually. When we // hoist an instruction in this set, we move it from // unhoisted_invariant_instructions to hoisted_instructions. - FlatSet unhoisted_invariant_instructions; + flat_hash_set unhoisted_invariant_instructions; // Invariant GTE's axiomatically satisfy the constraints for // unhoisted_invariant_instructions -- they can be legally hoisted, but there diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index 9a74f22395099fe4f14cbc9af49814d35203df01..630d71e5ca25e9d282ce6283284a32d6f725a193 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -14,12 +14,13 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/while_loop_simplifier.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/call_inliner.h" #include "tensorflow/compiler/xla/service/while_loop_analysis.h" -#include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { @@ -114,7 +115,7 @@ static StatusOr TryRemoveDeadWhileParams(HloInstruction* while_op) { return false; } - tensorflow::gtl::FlatSet used_tuple_indices; + absl::flat_hash_set used_tuple_indices; for (HloComputation* comp : {while_body, while_cond}) { // The HLO verifier ensures that while_input's shape matches while_init's // shape, which we verified above is a tuple. @@ -181,7 +182,7 @@ static StatusOr TryRemoveDeadWhileParams(HloInstruction* while_op) { used_tuple_indices.end()); std::sort(new_to_old_tuple_idx.begin(), new_to_old_tuple_idx.end()); - tensorflow::gtl::FlatMap old_to_new_tuple_idx; + absl::flat_hash_map old_to_new_tuple_idx; for (int64 new_idx = 0; new_idx < new_to_old_tuple_idx.size(); ++new_idx) { int64 old_idx = new_to_old_tuple_idx[new_idx]; old_to_new_tuple_idx[old_idx] = new_idx; @@ -405,7 +406,7 @@ static StatusOr TryPropagateConstant(HloInstruction* while_op) { // build a map from the tuple element index to the constant value. Limit this // to scalar constant values because propagating array constants can regress // performance by forcing us to copy constants. - tensorflow::gtl::FlatMap index_to_constant; + absl::flat_hash_map index_to_constant; for (int i = 0; i < root_operands.size(); i++) { HloInstruction* instr = root_operands[i]; if (instr->opcode() == HloOpcode::kGetTupleElement && diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 020c167ee953bbb3508bae94107de60f386602c0..9267de3cfc455ce351ac8cf57f8d45786ea4b5ba 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -461,8 +461,9 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( return ShapeUtil::IsArray(shape) && ElementsIn(shape) == 0; } -/* static */ bool ShapeUtil::IsScalarF32(const Shape& shape) { - return shape.element_type() == F32 && Rank(shape) == 0; +/* static */ bool ShapeUtil::IsScalarWithElementType( + const Shape& shape, PrimitiveType element_type) { + return IsScalar(shape) && shape.element_type() == element_type; } namespace { @@ -831,7 +832,8 @@ StatusOr ParseShapeStringInternal(absl::string_view* s) { /* static */ Status ShapeUtil::ValidateShapeWithOptionalLayoutInternal( const Shape& shape) { - if (shape.element_type() == PRIMITIVE_TYPE_INVALID) { + if (shape.element_type() == PRIMITIVE_TYPE_INVALID || + !PrimitiveType_IsValid(shape.element_type())) { return InvalidArgument("shape has invalid element type: %s", shape.ShortDebugString()); } @@ -868,11 +870,8 @@ StatusOr ParseShapeStringInternal(absl::string_view* s) { return Status::OK(); } - if (Rank(shape) != shape.dimensions_size()) { - return InvalidArgument( - "shape's rank is mismatched with dimension count; rank=%d " - "dimensions_size=%d", - Rank(shape), shape.dimensions_size()); + if (LayoutUtil::IsSparseArray(shape) && Rank(shape) == 0) { + return InvalidArgument("sparse arrays must have rank > 0"); } for (int64 i = 0; i < Rank(shape); ++i) { int64 dimension = shape.dimensions(i); @@ -1647,7 +1646,7 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, } std::ostream& operator<<(std::ostream& out, const Shape& shape) { - out << ShapeUtil::HumanString(shape); + out << ShapeUtil::HumanStringWithLayout(shape); return out; } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index d8bb27beae64bb665c79c2cd7134f613495529cc..51cedce7f0e13e65dfd0e250689e0ecd30f971dc 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -72,7 +72,7 @@ class ShapeIndex { void push_back(int64 value) { indices_.push_back(value); } void pop_back() { indices_.pop_back(); } - // push_front is O(n^2), but shapes don't usually have a ton of dimensions. + // push_front is O(n), but shapes don't usually have a ton of dimensions. void push_front(int64 value) { indices_.insert(indices_.begin(), value); } using container_type = absl::InlinedVector; @@ -312,7 +312,10 @@ class ShapeUtil { static bool IsEffectiveScalar(const Shape& shape) { return IsArray(shape) && TrueRank(shape) == 0; } - static bool IsScalarF32(const Shape& shape); + + // Returns whether "shape" is a scalar (array) with the given element_type. + static bool IsScalarWithElementType(const Shape& shape, + PrimitiveType element_type); // Extracts the size of the shape's dimension at dimension number // GetDimensionNumber(dimension_number). diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index f474ecb18c75327edec449433c36a91d8ac7de83..8a0ae330420531b833ed670118e6b6b1056bd358 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -422,6 +422,7 @@ xla_test( "//tensorflow/core:regexp_internal", "//tensorflow/core:test", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], ) @@ -2145,11 +2146,11 @@ xla_test( ":test_utils", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla/client:xla_builder", - "//tensorflow/compiler/xla/client:xla_computation", "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", "//tensorflow/core:test", + "@com_google_absl//absl/container:flat_hash_set", ], ) diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 070b092d18930027e215cb43ff917e36cac99f12..b851db14ec048a20947fb8136a31e457d3922f86 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -91,7 +91,14 @@ class ForwardPassConvolution_3x3x256_256_OutputZ_Iota : public ConvolutionTest { XlaBuilder builder(TestName()); auto lhs = ConstantR4FromArray4D(&builder, *alhs); auto rhs = ConstantR4FromArray4D(&builder, *arhs); - Conv(lhs, rhs, {1, 1}, Padding::kValid); + PrecisionConfig precision; + // The left hand side of the convolution is numbers between 0 and 2304 which + // requires at least 11 mantissa bits and the DEFAULT precision config is + // allowed to round to bfloat16 which only has 7 mantissa bits. + precision.add_operand_precision(PrecisionConfig::HIGHEST); + precision.add_operand_precision(PrecisionConfig::DEFAULT); + Conv(lhs, rhs, {1, 1}, Padding::kValid, /*feature_group_count=*/1, + &precision); ComputeAndCompare(&builder, {}, error_spec_); } diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc index a693fa35954bcb2d95074c94d0aa3eabc1d5fd62..001490c6a8c568656437465054ee4db40d0d8dee 100644 --- a/tensorflow/compiler/xla/tests/custom_call_test.cc +++ b/tensorflow/compiler/xla/tests/custom_call_test.cc @@ -105,8 +105,7 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR2F32Reduce)) { LiteralTestUtil::ExpectR0Near(10.0f, result, error_spec_); } -XLA_TEST_F(CustomCallTest, - DISABLED_ON_GPU(CustomCall_UsedInOtherComputations)) { +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(UsedInOtherComputations)) { auto module = CreateNewModule(); auto b = HloComputation::Builder(TestName()); @@ -130,6 +129,53 @@ XLA_TEST_F(CustomCallTest, Array3D{{{2, 3}, {4, 5}}, {{3, 4}, {5, 6}}}, result); } +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(InputAndOutputLayoutDiffer)) { + auto module = CreateNewModule(); + auto b = HloComputation::Builder(TestName()); + + auto input = + b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p")); + b.AddInstruction( + HloInstruction::CreateCustomCall(r2f32_, {input}, "Add1ToValues")); + + module->AddEntryComputation(b.Build()); + ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0})); + ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1})); + + Literal argument = LiteralUtil::CreateR2({{1.f, 2.f}, {3.f, 4.f}}); + + // Note, the expected result is transposed! This is because the input and + // output layouts of the custom call differ and the called function just + // blindly adds one to each element. + Literal result = ExecuteAndTransfer(std::move(module), {&argument}); + LiteralTestUtil::ExpectR2Equal({{2.f, 4.f}, {3.f, 5.f}}, result); +} + +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(LayoutConstrained)) { + // The argument and result of the computation are set to different layouts, + // but the custom call is layout constrained to a fixed operand and result + // layout, so the correct result should be produced. + auto module = CreateNewModule(); + auto b = HloComputation::Builder(TestName()); + + auto input = + b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p")); + + const Shape& r2f32_dim0_major = + ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0}); + b.AddInstruction(HloInstruction::CreateCustomCall( + r2f32_dim0_major, {input}, "Add1ToValues", {r2f32_dim0_major})); + + module->AddEntryComputation(b.Build()); + ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0})); + ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1})); + + Literal argument = LiteralUtil::CreateR2({{1.f, 2.f}, {3.f, 4.f}}); + + Literal result = ExecuteAndTransfer(std::move(module), {&argument}); + LiteralTestUtil::ExpectR2Equal({{2.f, 3.f}, {4.f, 5.f}}, result); +} + class CustomCallClientAPITest : public ClientLibraryTestBase {}; // When using the client API, CustomCall targets can't begin with '$' -- these diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 0171f515839d556827f0723772214d175939d386..6c0847a875798870b4362a99ac2ab65d99f9f3e6 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -394,6 +394,10 @@ class ParametricDotTestWithoutLayoutAssignment : public ParametricDotTest { ParametricDotTestWithoutLayoutAssignment() { execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes( "layout-assignment"); + // Disable algebraic simplification because the pass may replace a dot + // instruction with a layout-changing multiplication instruction. + execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes( + "algsimp"); } }; @@ -404,31 +408,18 @@ std::vector CreateNoLayoutAssignmentDotTestParameters() { for (bool lhs_row_major : {true, false}) { for (bool rhs_row_major : {true, false}) { for (bool has_addend : {true, false}) { + // The addend needs to be row major to match the result of the dot. params.push_back({/*m=*/1, /*k=*/k, /*n=*/n, /*dot_lhs_row_major=*/lhs_row_major, /*dot_rhs_row_major=*/rhs_row_major, /*has_addend=*/has_addend, /*addend_row_major=*/true}); - if (has_addend) { - params.push_back({/*m=*/1, /*k=*/k, /*n=*/n, - /*dot_lhs_row_major=*/lhs_row_major, - /*dot_rhs_row_major=*/rhs_row_major, - /*has_addend=*/has_addend, - /*addend_row_major=*/false}); - } if (n != 1) { params.push_back({/*m=*/n, /*k=*/k, /*n=*/1, /*dot_lhs_row_major=*/lhs_row_major, /*dot_rhs_row_major=*/rhs_row_major, /*has_addend=*/has_addend, /*addend_row_major=*/true}); - if (has_addend) { - params.push_back({/*m=*/n, /*k=*/k, /*n=*/1, - /*dot_lhs_row_major=*/lhs_row_major, - /*dot_rhs_row_major=*/rhs_row_major, - /*has_addend=*/has_addend, - /*addend_row_major=*/false}); - } } } } diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc index 9c94acb437e9fc948a4255f7112e2e7a40cfa5fb..4d4b676a538947c8dd92a7e34db72e45766cae2c 100644 --- a/tensorflow/compiler/xla/tests/fusion_test.cc +++ b/tensorflow/compiler/xla/tests/fusion_test.cc @@ -764,8 +764,10 @@ XLA_TEST_F(FusionTest, Clamp2D) { TestElementwise2D(HloOpcode::kClamp); } -// TODO(b/73903144): Enable on interpreter once interpreter supports bitcast. -XLA_TEST_F(FusionTest, DISABLED_ON_INTERPRETER(FusionWithLayout)) { +// TODO(b/117156505): Remove this test when the bug is fixed and the CPU backend +// should not generate layout changing elementwise operations. +#ifdef XLA_TEST_BACKEND_CPU +XLA_TEST_F(FusionTest, LayoutChangingElementWiseOp) { const string hlo_text = R"( HloModule Cluster @@ -794,6 +796,7 @@ ENTRY main { LiteralUtil::CreateR3({{{0.}, {0.76159415595}}, {{0.}, {0.}}}), result)); } +#endif class FusionClientLibraryTest : public ClientLibraryTestBase {}; diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc index bdd4fd7e3d0f585d81e94a3326e6d24bb5c42f39..7ab2ecda58666acd7e9b8587d200a902b75822f3 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -86,19 +86,25 @@ ProgramShape GetProgramShapeWithLayout(const HloModule& module) { } // namespace HloTestBase::HloTestBase(bool verifier_layout_sensitive, - bool allow_mixed_precision_in_hlo_verifier) + bool allow_mixed_precision_in_hlo_verifier, + std::function + instruction_can_change_layout_func) : HloTestBase(GetTestPlatform(), GetReferencePlatform(), verifier_layout_sensitive, - allow_mixed_precision_in_hlo_verifier) {} + allow_mixed_precision_in_hlo_verifier, + instruction_can_change_layout_func) {} HloTestBase::HloTestBase(se::Platform* test_platform, se::Platform* reference_platform, bool verifier_layout_sensitive, - bool allow_mixed_precision_in_hlo_verifier) + bool allow_mixed_precision_in_hlo_verifier, + std::function + instruction_can_change_layout_func) : test_runner_(test_platform), reference_runner_(reference_platform) { hlo_verifier_ = absl::make_unique( /*layout_sensitive=*/verifier_layout_sensitive, - /*allow_mixed_precision=*/allow_mixed_precision_in_hlo_verifier); + /*allow_mixed_precision=*/allow_mixed_precision_in_hlo_verifier, + instruction_can_change_layout_func); } std::unique_ptr HloTestBase::CreateNewModule(const string& name) { diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h index 0ae4bdc104d656946d45008adec9ea3960984545..217428befa474448cf2dcbae2eb6cb5b0e61d44c 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_test_base.h @@ -88,14 +88,18 @@ class HloTestBase : public ::testing::Test { // interpreter is the only supported backend, it will be both the test backend // and the reference backend. HloTestBase(bool verifier_layout_sensitive = false, - bool allow_mixed_precision_in_hlo_verifier = true); + bool allow_mixed_precision_in_hlo_verifier = true, + std::function + instruction_can_change_layout_func = {}); // If your test doesn't use interpreter as the reference backend, you can use // this constructor. Note that your test target is responsible for linking in // both needed backends. HloTestBase(se::Platform* test_platform, se::Platform* reference_platform, bool verifier_layout_sensitive = false, - bool allow_mixed_precision_in_hlo_verifier = true); + bool allow_mixed_precision_in_hlo_verifier = true, + std::function + instruction_can_change_layout_func = {}); ~HloTestBase() override {} diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index c25ccafaf83cf1b29095a77eefa357d9af08dc60..22fe4a2670e2e0e1fedc45036a1ceec19f44e42e 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -638,6 +638,8 @@ class R4ReduceWindowTest : public ReduceWindowTestBase, /*computation=*/computation, /*window_dimensions=*/param.window_bounds, /*window_strides=*/param.strides, + /*base_dilations=*/{}, + /*window_dilations=*/{}, /*padding=*/padding); CHECK(reducer == kAdd || reducer == kMax); @@ -1158,7 +1160,10 @@ class R2ReduceWindowTest : public ReduceWindowTestBase, /*init_value=*/init_value, /*computation=*/computation, /*window_dimensions=*/param.window_bounds, - /*window_strides=*/param.strides, /*padding=*/padding); + /*window_strides=*/param.strides, + /*base_dilations=*/{}, + /*window_dilations=*/{}, + /*padding=*/padding); auto reduce_func = param.reducer == kAdd ? +[](float a, float b) { return a + b; } @@ -1369,7 +1374,10 @@ TEST_P(R1ReduceWindowTest, DoIt) { /*init_value=*/init_value, /*computation=*/computation, /*window_dimensions=*/param.window_bounds, - /*window_strides=*/param.strides, /*padding=*/padding); + /*window_strides=*/param.strides, + /*base_dilations=*/{}, + /*window_dilations=*/{}, + /*padding=*/padding); auto reduce_func = param.reducer == kAdd ? +[](float a, float b) { return a + b; } diff --git a/tensorflow/compiler/xla/tests/test_utils_test.cc b/tensorflow/compiler/xla/tests/test_utils_test.cc index 181e5cbe290b0df0cf605cc4ef4b8a4945b3d367..bc433eac8fcb02087d8e4eb10f638c85dc141b22 100644 --- a/tensorflow/compiler/xla/tests/test_utils_test.cc +++ b/tensorflow/compiler/xla/tests/test_utils_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/test_utils.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -145,7 +146,7 @@ ENTRY %sort.148.1589 (parameter.0: f32[1048576], parameter.1: s32[1048576]) -> ( ASSERT_EQ(args.size(), 2); const Literal& key_arg = args[0]; - tensorflow::gtl::FlatSet key_set; + absl::flat_hash_set key_set; for (const float& value : key_arg.data()) { EXPECT_TRUE(key_set.insert(tensorflow::bit_cast(value)).second); } @@ -168,7 +169,7 @@ ENTRY %sort.148.1589 (parameter.0: s32[1048576], parameter.1: s32[1048576]) -> ( ASSERT_EQ(args.size(), 2); const Literal& key_arg = args[0]; - tensorflow::gtl::FlatSet key_set; + absl::flat_hash_set key_set; for (const int32& value : key_arg.data()) { EXPECT_TRUE(key_set.insert(tensorflow::bit_cast(value)).second); } diff --git a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc index db5a824de08edeb81b5deb047507dc6158833008..a6e70eb6ca25ffac24a8ebaf0420238e109e4fad 100644 --- a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc +++ b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" @@ -32,7 +33,6 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/regexp.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -83,7 +83,7 @@ struct ParsedProfileOutputLine { Status ParseOneProfileOutputLine( const string& line, bool expect_hlo, - gtl::FlatMap* parsed_results, + absl::flat_hash_map* parsed_results, absl::Span opcodes_to_ignore = {}) { string separator = "[^:]*:: +"; string match_percentage = R"(\d+\.\d*% +\d+Σ)"; @@ -208,7 +208,7 @@ XLA_TEST_F(HloProfileTest, ProfileSingleComputation) { std::vector profile_output_lines = absl::StrSplit(profile_output, '\n'); - gtl::FlatMap parsed_profile_lines; + absl::flat_hash_map parsed_profile_lines; TF_ASSERT_OK(ParseOneProfileOutputLine( profile_output_lines[1], /*expect_hlo=*/false, &parsed_profile_lines)); @@ -314,7 +314,7 @@ XLA_TEST_F(HloProfileTest, ProfileWhileComputation) { ASSERT_NE(while_body_profile_end, profile_output_lines.end()); - gtl::FlatMap parsed_profile_lines; + absl::flat_hash_map parsed_profile_lines; for (auto while_body_profile_i = while_body_profile_start + 1; while_body_profile_i != while_body_profile_end; while_body_profile_i++) { diff --git a/tensorflow/compiler/xrt/ops/xrt_execute_op.cc b/tensorflow/compiler/xrt/ops/xrt_execute_op.cc index fda4c31298ebc8c906418afdb8127492b1c5d3f0..40ec1b0ba9b336f5b6407c79c8d63e31219f9b84 100644 --- a/tensorflow/compiler/xrt/ops/xrt_execute_op.cc +++ b/tensorflow/compiler/xrt/ops/xrt_execute_op.cc @@ -21,7 +21,7 @@ limitations under the License. namespace tensorflow { REGISTER_OP("XRTExecute") - .Attr("Ninputs: int") + .Attr("Ninputs: int >= 0") .Input("computation_handle: int64") .Input("execution_config: string") .Input("input_handles: Ninputs * int64") diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc index 2952feb16a8a60aecf16be87c9b800d314c4af58..f590fbf0d9d85e6e8b041f6719ab6a14ec9e2191 100644 --- a/tensorflow/compiler/xrt/tests/raw_api_test.cc +++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc @@ -108,6 +108,14 @@ bool CompareLiteralToLiteralProto(const xla::Literal& a, return equal; } +xla::XlaComputation OnePlusTwo() { + xla::XlaBuilder builder("OnePlusTwo"); + auto c0 = xla::ConstantR0(&builder, 1.0f); + auto c1 = xla::ConstantR0(&builder, 2.0f); + xla::Add(c0, c1); + return builder.Build().ValueOrDie(); +} + xla::XlaComputation AddAndScale() { xla::XlaBuilder builder("AddAndScale"); auto p0 = xla::Parameter(&builder, 0, @@ -346,6 +354,39 @@ TEST(RawApiTest, CompileAndExecute) { EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); } +TEST(RawApiTest, CompileAndExecuteZeroArg) { + xrt::XLAComputation c; + auto config = c.mutable_config(); + auto shapes = config->mutable_program_shape(); + *shapes->mutable_result() = xla::ShapeUtil::MakeShape(xla::F32, {}); + + xrt::XRTExecutionConfig e; + e.set_release_input_handles(true); + e.set_release_compilation_handle(true); + StoreComputationSnapshot(OnePlusTwo(), c.mutable_hlo_snapshot()); + + Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); + auto e_config = + ops::Const(root.WithDevice("/device:CPU:0"), e.SerializeAsString()); + auto computation = + ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); + auto c_handle = ops::XRTCompile(root, computation); + auto result = ops::XRTExecute(root, c_handle, e_config, + std::initializer_list({})); + auto read_back = ops::XRTReadLiteralAndRelease(root, result); + TF_ASSERT_OK(root.status()); + + ClientSession session(root); + std::vector outputs; + TF_EXPECT_OK(session.Run({read_back}, &outputs)); + + xla::LiteralProto response; + EXPECT_TRUE(response.ParseFromString(outputs[0].scalar()())); + + auto expected = xla::LiteralUtil::CreateR0(3.0f); + EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); +} + TEST(RawApiTest, CompileAndExecuteReturnTuple) { xrt::XLAAllocation p0; p0.set_device_ordinal(0); diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index ae5ca32bcf906a6333efe486e88c3d4cbceb0bc4..fa06d351d4e64bfc2fc5e64c81c810185600000a 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -29,6 +29,7 @@ py_library( "//tensorflow/contrib/cluster_resolver:cluster_resolver_py", "//tensorflow/contrib/coder:coder_py", "//tensorflow/contrib/compiler:compiler_py", + "//tensorflow/contrib/compiler:xla", "//tensorflow/contrib/autograph", "//tensorflow/contrib/constrained_optimization", "//tensorflow/contrib/copy_graph:copy_graph_py", @@ -112,29 +113,22 @@ py_library( "//tensorflow/python:util", "//tensorflow/python/estimator:estimator_py", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + select({ - "//tensorflow:with_kafka_support_windows_override": [], - "//tensorflow:with_kafka_support": [ - "//tensorflow/contrib/kafka", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_aws_support_windows_override": [], - "//tensorflow:with_aws_support": [ - "//tensorflow/contrib/kinesis", - ], - "//conditions:default": [], - }) + if_not_windows_cuda([ - "//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols - ]) + if_not_windows([ - ]) + select({ "//tensorflow:linux_s390x": [], "//tensorflow:windows": [], "//conditions:default": [ "//tensorflow/contrib/bigtable", "//tensorflow/contrib/cloud:cloud_py", + "//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols + "//tensorflow/contrib/kafka", + "//tensorflow/contrib/kinesis", "//tensorflow/contrib/tensorrt:init_py", "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", ], + }) + select({ + "//tensorflow:with_ignite_support": [ + "//tensorflow/contrib/ignite", + ], + "//conditions:default": [], }), ) @@ -144,7 +138,6 @@ cc_library( deps = [ "//tensorflow/contrib/boosted_trees:boosted_trees_kernels", "//tensorflow/contrib/coder:all_kernels", - "//tensorflow/contrib/data/kernels:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/hadoop:dataset_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", @@ -159,20 +152,14 @@ cc_library( ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_cuda([ "//tensorflow/contrib/nccl:nccl_kernels", ]) + select({ - "//tensorflow:with_kafka_support_windows_override": [], - "//tensorflow:with_kafka_support": [ + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//conditions:default": [ "//tensorflow/contrib/kafka:dataset_kernels", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_aws_support_windows_override": [], - "//tensorflow:with_aws_support": [ "//tensorflow/contrib/kinesis:dataset_kernels", + "//tensorflow/contrib/tensorrt:trt_engine_op_kernel", ], - "//conditions:default": [], - }) + if_not_windows([ - "//tensorflow/contrib/tensorrt:trt_engine_op_kernel", - ]), + }), ) cc_library( @@ -181,8 +168,6 @@ cc_library( deps = [ "//tensorflow/contrib/boosted_trees:boosted_trees_ops_op_lib", "//tensorflow/contrib/coder:all_ops", - "//tensorflow/contrib/data:dataset_ops_op_lib", - "//tensorflow/contrib/data:indexed_dataset_ops_op_lib", "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/hadoop:dataset_ops_op_lib", @@ -198,18 +183,17 @@ cc_library( "//tensorflow/contrib/text:all_ops", "//tensorflow/contrib/tpu:all_ops", ] + select({ - "//tensorflow:with_kafka_support_windows_override": [], - "//tensorflow:with_kafka_support": [ + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//conditions:default": [ "//tensorflow/contrib/kafka:dataset_ops_op_lib", + "//tensorflow/contrib/kinesis:dataset_ops_op_lib", + "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib", ], - "//conditions:default": [], }) + select({ - "//tensorflow:with_aws_support_windows_override": [], - "//tensorflow:with_aws_support": [ - "//tensorflow/contrib/kinesis:dataset_ops_op_lib", + "//tensorflow:with_ignite_support": [ + "//tensorflow/contrib/ignite:dataset_ops_op_lib", ], "//conditions:default": [], - }) + if_not_windows([ - "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib", - ]), + }), ) diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index e71b0e0ae33f9c2dd48643e557447372bc67b3e3..f52a1a7babceeae93cdd2e5a93dad413a1d30191 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -21,14 +21,6 @@ from __future__ import print_function import os -from tensorflow.python.tools import component_api_helper -component_api_helper.package_hook( - parent_package_str=( - "tensorflow.contrib"), - child_package_str=( - "tensorflow_estimator.contrib.estimator")) -del component_api_helper - # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import autograph from tensorflow.contrib import batching diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD index b27a19b16c08cb588b45949105a6399623e766e1..648f3ebb05646a66144bcb118347cbc391909409 100644 --- a/tensorflow/contrib/batching/BUILD +++ b/tensorflow/contrib/batching/BUILD @@ -7,64 +7,6 @@ package( licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") - -cc_library( - name = "batch_scheduler_hdrs", - hdrs = ["batch_scheduler.h"], - deps = [ - "//tensorflow/core/kernels/batching_util:batch_scheduler_hdrs", - ], -) - -cc_library( - name = "batch_scheduler", - hdrs = ["batch_scheduler.h"], - deps = [ - "//tensorflow/core/kernels/batching_util:batch_scheduler", - ], -) - -cc_library( - name = "shared_batch_scheduler_hdrs", - hdrs = ["shared_batch_scheduler.h"], - deps = [ - "//tensorflow/core/kernels/batching_util:shared_batch_scheduler_hdrs", - ], -) - -cc_library( - name = "shared_batch_scheduler", - hdrs = ["shared_batch_scheduler.h"], - deps = [ - "//tensorflow/core/kernels/batching_util:shared_batch_scheduler", - ], - alwayslink = 1, -) - -cc_library( - name = "adaptive_shared_batch_scheduler", - hdrs = ["adaptive_shared_batch_scheduler.h"], - deps = [ - "//tensorflow/core/kernels/batching_util:adaptive_shared_batch_scheduler", - ], -) - -cc_library( - name = "serial_device_batch_scheduler", - hdrs = ["serial_device_batch_scheduler.h"], - deps = [ - "//tensorflow/core/kernels/batching_util:serial_device_batch_scheduler", - ], -) - -cc_library( - name = "basic_batch_scheduler", - hdrs = ["basic_batch_scheduler.h"], - deps = [ - "//tensorflow/core/kernels/batching_util:basic_batch_scheduler", - ], -) - load( "//tensorflow:tensorflow.bzl", "py_test", diff --git a/tensorflow/contrib/batching/basic_batch_scheduler.h b/tensorflow/contrib/batching/basic_batch_scheduler.h deleted file mode 100644 index d9b37da6933aa0847c229607f43d1d5d121a928c..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/batching/basic_batch_scheduler.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_ -#define TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_ - -#include "tensorflow/core/kernels/batching_util/basic_batch_scheduler.h" - -#endif // TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_ diff --git a/tensorflow/contrib/batching/serial_device_batch_scheduler.h b/tensorflow/contrib/batching/serial_device_batch_scheduler.h deleted file mode 100644 index bf6b7083612018eecf0d1784e60cbbf0c5796fef..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/batching/serial_device_batch_scheduler.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_ -#define TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_ - -#include "tensorflow/core/kernels/batching_util/serial_device_batch_scheduler.h" - -#endif // TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_ diff --git a/tensorflow/contrib/batching/shared_batch_scheduler.h b/tensorflow/contrib/batching/shared_batch_scheduler.h deleted file mode 100644 index 83a59695d7db7e0a24fb437a3ea71a4d9e23c93f..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/batching/shared_batch_scheduler.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_ -#define TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_ - -#include "tensorflow/core/kernels/batching_util/shared_batch_scheduler.h" - -#endif // TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_ diff --git a/tensorflow/contrib/batching/test_util/BUILD b/tensorflow/contrib/batching/test_util/BUILD deleted file mode 100644 index 7cb2d8079bd18660f72eab92654629434ce4d6a5..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/batching/test_util/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -# Description: Utilities to aid testing. - -package( - default_visibility = ["//tensorflow:internal"], -) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -cc_library( - name = "fake_clock_env", - testonly = 1, - hdrs = ["fake_clock_env.h"], - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/core/kernels/batching_util:fake_clock_env", - ], -) diff --git a/tensorflow/contrib/batching/test_util/fake_clock_env.h b/tensorflow/contrib/batching/test_util/fake_clock_env.h deleted file mode 100644 index 40a39a5569854350c72a47102f3dac07b362ce8e..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/batching/test_util/fake_clock_env.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_ -#define TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_ - -#include "tensorflow/core/kernels/batching_util/fake_clock_env.h" - -#endif // TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_ diff --git a/tensorflow/contrib/batching/util/BUILD b/tensorflow/contrib/batching/util/BUILD deleted file mode 100644 index 8f81b6702f2807d7da7e72190ce2d86b28e52113..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/batching/util/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -# Description: Utilities. - -package( - default_visibility = ["//tensorflow:internal"], -) - -licenses(["notice"]) # Apache 2.0 - -load("//tensorflow:tensorflow.bzl", "tf_cc_test") - -cc_library( - name = "periodic_function_dynamic", - hdrs = ["periodic_function.h"], - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/core/kernels/batching_util:periodic_function_dynamic", - "//third_party/eigen3", - ], -) - -cc_library( - name = "periodic_function", - visibility = ["//visibility:public"], - deps = [ - ":periodic_function_dynamic", - "//tensorflow/core/kernels/batching_util:periodic_function", - ], -) diff --git a/tensorflow/contrib/batching/util/periodic_function.h b/tensorflow/contrib/batching/util/periodic_function.h deleted file mode 100644 index aa2ed0a385125fa090a7a56b6339a87eb2d57b1f..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/batching/util/periodic_function.h +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_ -#define TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_ - -#include "tensorflow/core/kernels/batching_util/periodic_function.h" - -#endif // TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_ diff --git a/tensorflow/contrib/bigtable/README.md b/tensorflow/contrib/bigtable/README.md index f33eaf7e3df356e10939f591ef75cb4f17978144..2c44abed5e1955cc666273e97e6b2378766f13d2 100644 --- a/tensorflow/contrib/bigtable/README.md +++ b/tensorflow/contrib/bigtable/README.md @@ -203,7 +203,7 @@ def interleave_fn(index): start = tf.string_join(['training_data_', start_idx_str]) end = tf.string_join(['training_data_', end_idx_str]) return table.scan_range(start_idx, end_idx, columns=columns) -ds = ds.apply(tf.contrib.data.parallel_interleave( +ds = ds.apply(tf.data.experimental.parallel_interleave( interleave_fn, cycle_length=NUM_PARALLEL_READS, prefetch_input_elements=1)) ``` @@ -249,7 +249,7 @@ def make_row_key_dataset(): - ... - fake-data-23498103 """ - counter_dataset = tf.contrib.data.Counter() + counter_dataset = tf.data.experimental.Counter() width = 8 row_key_prefix = 'fake-data-' ds = counter_dataset.map(lambda index: tf.as_string(index, diff --git a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py index cf56822ff401ac81c4bb7bd65418c81fe7b398d8..7c87b0daeb09950cc44c51f49c16534d413f0376 100644 --- a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py +++ b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py @@ -31,8 +31,8 @@ from six import iteritems from six import string_types from tensorflow.contrib.bigtable.ops import gen_bigtable_ops -from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.contrib.util import loader +from tensorflow.python.data.experimental.ops import interleave_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes @@ -228,7 +228,7 @@ class BigtableTable(object): """Retrieves a sampling of row keys from the Bigtable table. This dataset is most often used in conjunction with - `tf.contrib.data.parallel_interleave` to construct a set of ranges for + `tf.data.experimental.parallel_interleave` to construct a set of ranges for scanning in parallel. Returns: diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index c7eb2493a8ba56943740326cf68ad6b3a91f67c4..8531e97f90236b8e5eb64bc0f4c9bb3b674f35cd 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -402,13 +402,13 @@ class GradientBoostedDecisionTreeModel(object): self._feature_columns = feature_columns self._learner_config_serialized = learner_config.SerializeToString() self._num_quantiles = num_quantiles - self._max_tree_depth = variables.Variable( + self._max_tree_depth = variables.VariableV1( initial_value=self._learner_config.constraints.max_tree_depth) - self._attempted_trees = variables.Variable( + self._attempted_trees = variables.VariableV1( initial_value=array_ops.zeros([], dtypes.int64), trainable=False, name="attempted_trees") - self._finalized_trees = variables.Variable( + self._finalized_trees = variables.VariableV1( initial_value=array_ops.zeros([], dtypes.int64), trainable=False, name="finalized_trees") @@ -770,28 +770,28 @@ class GradientBoostedDecisionTreeModel(object): fc_name_idx += 1 # Create ensemble stats variables. - num_layer_examples = variables.Variable( + num_layer_examples = variables.VariableV1( initial_value=array_ops.zeros([], dtypes.int64), name="num_layer_examples", trainable=False) - num_layer_steps = variables.Variable( + num_layer_steps = variables.VariableV1( initial_value=array_ops.zeros([], dtypes.int64), name="num_layer_steps", trainable=False) - num_layers = variables.Variable( + num_layers = variables.VariableV1( initial_value=array_ops.zeros([], dtypes.int64), name="num_layers", trainable=False) - active_tree = variables.Variable( + active_tree = variables.VariableV1( initial_value=array_ops.zeros([], dtypes.int64), name="active_tree", trainable=False) - active_layer = variables.Variable( + active_layer = variables.VariableV1( initial_value=array_ops.zeros([], dtypes.int64), name="active_layer", trainable=False) # Variable that becomes false once bias centering is done. - continue_centering = variables.Variable( + continue_centering = variables.VariableV1( initial_value=self._center_bias, name="continue_centering", trainable=False) diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index 9d9941f696af25a75532383f8d0b3f461f2107bc..6d20a2e7f482953481fb1effe4c6e2e5a300786f 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -239,7 +239,7 @@ class GbdtTest(test_util.TensorFlowTestCase): predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -503,7 +503,7 @@ class GbdtTest(test_util.TensorFlowTestCase): predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -607,7 +607,7 @@ class GbdtTest(test_util.TensorFlowTestCase): predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -711,7 +711,7 @@ class GbdtTest(test_util.TensorFlowTestCase): predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -783,7 +783,7 @@ class GbdtTest(test_util.TensorFlowTestCase): predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -847,7 +847,7 @@ class GbdtTest(test_util.TensorFlowTestCase): predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -1090,7 +1090,7 @@ class GbdtTest(test_util.TensorFlowTestCase): weights = array_ops.ones([batch_size, 1], dtypes.float32) partition_ids = array_ops.zeros([batch_size], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -1194,7 +1194,7 @@ class GbdtTest(test_util.TensorFlowTestCase): weights = array_ops.ones([batch_size, 1], dtypes.float32) partition_ids = array_ops.zeros([batch_size], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -1299,7 +1299,7 @@ class GbdtTest(test_util.TensorFlowTestCase): weights = array_ops.ones([batch_size, 1], dtypes.float32) partition_ids = array_ops.zeros([batch_size], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -1405,7 +1405,7 @@ class GbdtTest(test_util.TensorFlowTestCase): predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -1524,7 +1524,7 @@ class GbdtTest(test_util.TensorFlowTestCase): predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, @@ -1656,7 +1656,7 @@ class GbdtTest(test_util.TensorFlowTestCase): predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) - ensemble_stamp = variables.Variable( + ensemble_stamp = variables.VariableV1( initial_value=0, name="ensemble_stamp", trainable=False, diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index 1056894f18f1ec19a598dfbd1161d7f9bea7e94f..f4a8e16c99f464b813a98e981579bd0ff53bd464 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -60,6 +60,7 @@ class TPUClusterResolver(ClusterResolver): if (self._tpu == compat.as_bytes('') or self._tpu == compat.as_bytes('local') or self._tpu.startswith(compat.as_bytes('/bns')) or + self._tpu.startswith(compat.as_bytes('localhost:')) or self._tpu.startswith(compat.as_bytes('grpc://'))): return False return True diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index c6d6f04168b0c95662123f5feceb5ebb0474ffe9..244683765a75626acd932ef8a10d8e5b6639ebb0 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -1,6 +1,16 @@ # Minimum CMake required cmake_minimum_required(VERSION 3.5) +if(WIN32) + if(${CMAKE_VERSION} VERSION_LESS "3.8") + message(WARNING "Your current cmake version is ${CMAKE_VERSION} which does not support setting the toolset architecture to x64. This may cause \"compiler out of heap space\" errors when building. Consider upgrading your cmake to > 3.8 and using the flag -Thost=x64 when running cmake.") + else() + if(NOT CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE OR NOT "${CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE}" STREQUAL "x64") + message(WARNING "Your current cmake generator is set to use 32 bit toolset architecture. This may cause \"compiler out of heap space\" errors when building. Consider using the flag -Thost=x64 when running cmake.") + endif() + endif() +endif() + # Project project(tensorflow C CXX) @@ -30,7 +40,6 @@ endif() option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON) option(tensorflow_ENABLE_HDFS_SUPPORT "Enable HDFS support" OFF) -option(tensorflow_ENABLE_JEMALLOC_SUPPORT "Enable jemalloc support" OFF) option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON) option(tensorflow_BUILD_PYTHON_BINDINGS "Build the Python bindings" ON) option(tensorflow_BUILD_ALL_KERNELS "Build all OpKernels" ON) @@ -218,10 +227,6 @@ if (tensorflow_WIN_CPU_SIMD_OPTIONS) endif() endif() -if (tensorflow_ENABLE_JEMALLOC_SUPPORT) - add_definitions(-DTENSORFLOW_USE_JEMALLOC -DJEMALLOC_EXPORT=) -endif() - # External dependencies include(zlib) include(gif) @@ -329,12 +334,6 @@ if(tensorflow_ENABLE_GRPC_SUPPORT) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES boringssl) endif() endif() -if(tensorflow_ENABLE_JEMALLOC_SUPPORT) - include(jemalloc) - list(APPEND tensorflow_EXTERNAL_LIBRARIES ${jemalloc_STATIC_LIBRARIES}) - list(APPEND tensorflow_EXTERNAL_DEPENDENCIES jemalloc) - include_directories(${jemalloc_INCLUDE_DIRS}) -endif() if(tensorflow_ENABLE_SNAPPY_SUPPORT) include(snappy) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${snappy_STATIC_LIBRARIES}) @@ -363,9 +362,7 @@ if (tensorflow_ENABLE_MKL_SUPPORT) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn_copy_shared_to_destination) include_directories(${mkldnn_INCLUDE_DIRS}) - else (tensorflow_ENABLE_MKLDNN_SUPPORT) - add_definitions(-DINTEL_MKL_ML_ONLY) - endif() + endif(tensorflow_ENABLE_MKLDNN_SUPPORT) endif (tensorflow_ENABLE_MKL_SUPPORT) if (tensorflow_ENABLE_GPU) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 77242b34fd8302cb9104c50a83d4141607911e7f..84c679162c3ed8ffc9babcd3af583b26fb62c2d6 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -108,180 +108,177 @@ ops or APIs. Step-by-step Windows build ========================== -1. Install the prerequisites detailed above, and set up your environment. - - * The following commands assume that you are using the Windows Command - Prompt (`cmd.exe`). You will need to set up your environment to use the - appropriate toolchain, i.e. the 64-bit tools. (Some of the binary targets - we will build are too large for the 32-bit tools, and they will fail with - out-of-memory errors.) The typical command to do set up your - environment is: - - ``` - D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat" - ``` - - * When building with GPU support after installing the CUDNN zip file from NVidia, append its - bin directory to your PATH environment variable. - In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable. - It should contain the directory of the CUDA dlls and the directory of the CUDNN dll. - For example: - - ``` - D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin - D:\local\cuda\bin - ``` - - * When building with MKL support after installing [MKL](https://software.intel.com/en-us/mkl) from INTEL, append its bin directories to your PATH environment variable. - - In case TensorFlow fails to find the MKL dll's during initialization, check your PATH environment variable. - It should contain the directory of the MKL dlls. For example: - - ``` - D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\mkl - D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\compiler - D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\tbb\vc_mt - ``` - - - * We assume that `cmake` and `git` are installed and in your `%PATH%`. If - for example `cmake` is not in your path and it is installed in - `C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory - to your `%PATH%` as follows: - - ``` - D:\temp> set PATH="%PATH%;C:\Program Files (x86)\CMake\bin\cmake.exe" - ``` - -2. Clone the TensorFlow repository and create a working directory for your - build: - - ``` - D:\temp> git clone https://github.com/tensorflow/tensorflow.git - D:\temp> cd tensorflow\tensorflow\contrib\cmake - D:\temp\tensorflow\tensorflow\contrib\cmake> mkdir build - D:\temp\tensorflow\tensorflow\contrib\cmake> cd build - D:\temp\tensorflow\tensorflow\contrib\cmake\build> - ``` - -3. Invoke CMake to create Visual Studio solution and project files. - - **N.B.** This assumes that `cmake.exe` is in your `%PATH%` environment - variable. The other paths are for illustrative purposes only, and may - be different on your platform. The `^` character is a line continuation - and must be the last character on each line. - - ``` - D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^ - More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^ - More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^ - More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib - ``` - To build with GPU support add "^" at the end of the last line above following with: - ``` - More? -Dtensorflow_ENABLE_GPU=ON ^ - More? -DCUDNN_HOME="D:\...\cudnn" - ``` - To build with MKL support add "^" at the end of the last line above following with: - - ``` - More? -Dtensorflow_ENABLE_MKL_SUPPORT=ON ^ - More? -DMKL_HOME="D:\...\compilers_and_libraries" - ``` - - To enable SIMD instructions with MSVC, as AVX and SSE, define it as follows: - - ``` - More? -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX - ``` - - Note that the `-DCMAKE_BUILD_TYPE=Release` flag must match the build - configuration that you choose when invoking `msbuild`. The known-good - values are `Release` and `RelWithDebInfo`. The `Debug` build type is - not currently supported, because it relies on a `Debug` library for - Python (`python35d.lib`) that is not distributed by default. - - There are various options that can be specified when generating the - solution and project files: - - * `-DCMAKE_BUILD_TYPE=(Release|RelWithDebInfo)`: Note that the - `CMAKE_BUILD_TYPE` option must match the build configuration that you - choose when invoking MSBuild in step 4. The known-good values are - `Release` and `RelWithDebInfo`. The `Debug` build type is not currently - supported, because it relies on a `Debug` library for Python - (`python35d.lib`) that is not distributed by default. - - * `-Dtensorflow_BUILD_ALL_KERNELS=(ON|OFF)`. Defaults to `ON`. You can - build a small subset of the kernels for a faster build by setting this - option to `OFF`. - - * `-Dtensorflow_BUILD_CC_EXAMPLE=(ON|OFF)`. Defaults to `ON`. Generate - project files for a simple C++ - [example training program](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/tutorials/example_trainer.cc). - - * `-Dtensorflow_BUILD_PYTHON_BINDINGS=(ON|OFF)`. Defaults to `ON`. Generate - project files for building a PIP package containing the TensorFlow runtime - and its Python bindings. - - * `-Dtensorflow_ENABLE_GRPC_SUPPORT=(ON|OFF)`. Defaults to `ON`. Include - gRPC support and the distributed client and server code in the TensorFlow - runtime. - - * `-Dtensorflow_ENABLE_SSL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include - SSL support (for making secure HTTP requests) in the TensorFlow runtime. - This support is incomplete, and will be used for Google Cloud Storage - support. - - * `-Dtensorflow_ENABLE_GPU=(ON|OFF)`. Defaults to `OFF`. Include - GPU support. If GPU is enabled you need to install the CUDA 8.0 Toolkit and CUDNN 5.1. - CMake will expect the location of CUDNN in -DCUDNN_HOME=path_you_unzipped_cudnn. - - * `-Dtensorflow_BUILD_CC_TESTS=(ON|OFF)`. Defaults to `OFF`. This builds cc unit tests. - There are many of them and building will take a few hours. - After cmake, build and execute the tests with - ``` - MSBuild /p:Configuration=RelWithDebInfo ALL_BUILD.vcxproj - ctest -C RelWithDebInfo - ``` - - * `-Dtensorflow_BUILD_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This enables python kernel tests. - After building the python wheel, you need to install the new wheel before running the tests. - To execute the tests, use - ``` - ctest -C RelWithDebInfo - ``` - - * `-Dtensorflow_BUILD_MORE_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This enables python tests on - serveral major packages. This option is only valid if this and tensorflow_BUILD_PYTHON_TESTS are both set as `ON`. - After building the python wheel, you need to install the new wheel before running the tests. - To execute the tests, use - ``` - ctest -C RelWithDebInfo - ``` - - * `-Dtensorflow_ENABLE_MKL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include MKL support. If MKL is enabled you need to install the [Intel Math Kernal Library](https://software.intel.com/en-us/mkl). - CMake will expect the location of MKL in -MKL_HOME=path_you_install_mkl. - - * `-Dtensorflow_ENABLE_MKLDNN_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include MKL DNN support. MKL DNN is [Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)](https://github.com/intel/mkl-dnn). You have to add `-Dtensorflow_ENABLE_MKL_SUPPORT=ON` before including MKL DNN support. - - -4. Invoke MSBuild to build TensorFlow. - - To build the C++ example program, which will be created as a `.exe` - executable in the subdirectory `.\Release`: - - ``` - D:\...\build> MSBuild /p:Configuration=Release tf_tutorials_example_trainer.vcxproj - D:\...\build> Release\tf_tutorials_example_trainer.exe - ``` - - To build the PIP package, which will be created as a `.whl` file in the - subdirectory `.\tf_python\dist`: - - ``` - D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj - ``` - +1. Install the prerequisites detailed above, and set up your environment. + + * When building with GPU support after installing the CUDNN zip file from + NVidia, append its bin directory to your PATH environment variable. In + case TensorFlow fails to find the CUDA dll's during initialization, + check your PATH environment variable. It should contain the directory of + the CUDA dlls and the directory of the CUDNN dll. For example: + + ``` + D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin + D:\local\cuda\bin + ``` + + * When building with MKL support after installing + [MKL](https://software.intel.com/en-us/mkl) from INTEL, append its bin + directories to your PATH environment variable. + + In case TensorFlow fails to find the MKL dll's during initialization, + check your PATH environment variable. It should contain the directory of + the MKL dlls. For example: + + ``` + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\mkl + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\compiler + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\tbb\vc_mt + ``` + + * We assume that `cmake` and `git` are installed and in your `%PATH%`. If + for example `cmake` is not in your path and it is installed in + `C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory + to your `%PATH%` as follows: + + ``` + D:\temp> set PATH="%PATH%;C:\Program Files (x86)\CMake\bin\cmake.exe" + ``` + +2. Clone the TensorFlow repository and create a working directory for your + build: + + ``` + D:\temp> git clone https://github.com/tensorflow/tensorflow.git + D:\temp> cd tensorflow\tensorflow\contrib\cmake + D:\temp\tensorflow\tensorflow\contrib\cmake> mkdir build + D:\temp\tensorflow\tensorflow\contrib\cmake> cd build + D:\temp\tensorflow\tensorflow\contrib\cmake\build> + ``` + +3. Invoke CMake to create Visual Studio solution and project files. + + **N.B.** This assumes that `cmake.exe` is in your `%PATH%` environment + variable. The other paths are for illustrative purposes only, and may be + different on your platform. The `^` character is a line continuation and + must be the last character on each line. + + ``` + D:\...\build> cmake .. -A x64 -Thost=x64 -DCMAKE_BUILD_TYPE=Release ^ + More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^ + More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^ + More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib + ``` + + To build with GPU support add "^" at the end of the last line above + following with: `More? -Dtensorflow_ENABLE_GPU=ON ^ More? + -DCUDNN_HOME="D:\...\cudnn"` To build with MKL support add "^" at the end of + the last line above following with: + + ``` + More? -Dtensorflow_ENABLE_MKL_SUPPORT=ON ^ + More? -DMKL_HOME="D:\...\compilers_and_libraries" + ``` + + To enable SIMD instructions with MSVC, as AVX and SSE, define it as follows: + + ``` + More? -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX + ``` + + Note that the `-DCMAKE_BUILD_TYPE=Release` flag must match the build + configuration that you choose when invoking `msbuild`. The known-good values + are `Release` and `RelWithDebInfo`. The `Debug` build type is not currently + supported, because it relies on a `Debug` library for Python + (`python35d.lib`) that is not distributed by default. + + The `-Thost=x64` flag will ensure that the 64 bit compiler and linker is + used when building. Without this flag, MSBuild will use the 32 bit toolchain + which is prone to compile errors such as "compiler out of heap space". + + There are various options that can be specified when generating the solution + and project files: + + * `-DCMAKE_BUILD_TYPE=(Release|RelWithDebInfo)`: Note that the + `CMAKE_BUILD_TYPE` option must match the build configuration that you + choose when invoking MSBuild in step 4. The known-good values are + `Release` and `RelWithDebInfo`. The `Debug` build type is not currently + supported, because it relies on a `Debug` library for Python + (`python35d.lib`) that is not distributed by default. + + * `-Dtensorflow_BUILD_ALL_KERNELS=(ON|OFF)`. Defaults to `ON`. You can + build a small subset of the kernels for a faster build by setting this + option to `OFF`. + + * `-Dtensorflow_BUILD_CC_EXAMPLE=(ON|OFF)`. Defaults to `ON`. Generate + project files for a simple C++ + [example training program](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/tutorials/example_trainer.cc). + + * `-Dtensorflow_BUILD_PYTHON_BINDINGS=(ON|OFF)`. Defaults to `ON`. + Generate project files for building a PIP package containing the + TensorFlow runtime and its Python bindings. + + * `-Dtensorflow_ENABLE_GRPC_SUPPORT=(ON|OFF)`. Defaults to `ON`. Include + gRPC support and the distributed client and server code in the + TensorFlow runtime. + + * `-Dtensorflow_ENABLE_SSL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include + SSL support (for making secure HTTP requests) in the TensorFlow runtime. + This support is incomplete, and will be used for Google Cloud Storage + support. + + * `-Dtensorflow_ENABLE_GPU=(ON|OFF)`. Defaults to `OFF`. Include GPU + support. If GPU is enabled you need to install the CUDA 8.0 Toolkit and + CUDNN 5.1. CMake will expect the location of CUDNN in + -DCUDNN_HOME=path_you_unzipped_cudnn. + + * `-Dtensorflow_BUILD_CC_TESTS=(ON|OFF)`. Defaults to `OFF`. This builds + cc unit tests. There are many of them and building will take a few + hours. After cmake, build and execute the tests with `MSBuild + /p:Configuration=RelWithDebInfo ALL_BUILD.vcxproj ctest -C + RelWithDebInfo` + + * `-Dtensorflow_BUILD_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This + enables python kernel tests. After building the python wheel, you need + to install the new wheel before running the tests. To execute the tests, + use `ctest -C RelWithDebInfo` + + * `-Dtensorflow_BUILD_MORE_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This + enables python tests on serveral major packages. This option is only + valid if this and tensorflow_BUILD_PYTHON_TESTS are both set as `ON`. + After building the python wheel, you need to install the new wheel + before running the tests. To execute the tests, use `ctest -C + RelWithDebInfo` + + * `-Dtensorflow_ENABLE_MKL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include + MKL support. If MKL is enabled you need to install the + [Intel Math Kernal Library](https://software.intel.com/en-us/mkl). CMake + will expect the location of MKL in -MKL_HOME=path_you_install_mkl. + + * `-Dtensorflow_ENABLE_MKLDNN_SUPPORT=(ON|OFF)`. Defaults to `OFF`. + Include MKL DNN support. MKL DNN is [Intel(R) Math Kernel Library for + Deep Neural Networks (Intel(R) + MKL-DNN)](https://github.com/intel/mkl-dnn). You have to add + `-Dtensorflow_ENABLE_MKL_SUPPORT=ON` before including MKL DNN support. + +4. Invoke MSBuild to build TensorFlow. + + Set up the path to find MSbuild: `D:\temp> "C:\Program Files (x86)\Microsoft + Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat"` + + To build the C++ example program, which will be created as a `.exe` + executable in the subdirectory `.\Release`: + + ``` + D:\...\build> MSBuild /p:Configuration=Release tf_tutorials_example_trainer.vcxproj + D:\...\build> Release\tf_tutorials_example_trainer.exe + ``` + + To build the PIP package, which will be created as a `.whl` file in the + subdirectory `.\tf_python\dist`: + + ``` + D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj + ``` Linux Continuous Integration build ================================== diff --git a/tensorflow/contrib/cmake/external/jemalloc.cmake b/tensorflow/contrib/cmake/external/jemalloc.cmake deleted file mode 100644 index afadcc007d66414be3306e91e7186a00b6e587ce..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/cmake/external/jemalloc.cmake +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -include (ExternalProject) - -set(jemalloc_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/jemalloc/src/jemalloc/include) -set(jemalloc_URL https://mirror.bazel.build/github.com/jemalloc/jemalloc-cmake/archive/jemalloc-cmake.4.3.1.tar.gz) -set(jemalloc_HASH SHA256=f9be9a05fe906deb5c1c8ca818071a7d2e27d66fd87f5ba9a7bf3750bcedeaf0) -set(jemalloc_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jemalloc/src/jemalloc) - -if (WIN32) - set(jemalloc_INCLUDE_DIRS - ${jemalloc_INCLUDE_DIRS} - ${CMAKE_CURRENT_BINARY_DIR}/jemalloc/src/jemalloc/include/msvc_compat - ) - if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") - set(jemalloc_STATIC_LIBRARIES ${jemalloc_BUILD}/Release/jemalloc.lib) - else() - set(jemalloc_STATIC_LIBRARIES ${jemalloc_BUILD}/jemalloc.lib) - endif() -else() - set(jemalloc_STATIC_LIBRARIES ${jemalloc_BUILD}/Release/jemalloc.a) -endif() - -ExternalProject_Add(jemalloc - PREFIX jemalloc - URL ${jemalloc_URL} - URL_HASH ${jemalloc_HASH} - DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" - BUILD_IN_SOURCE 1 - BUILD_BYPRODUCTS ${jemalloc_STATIC_LIBRARIES} - BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release --target jemalloc - INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "Skipping install step." - CMAKE_CACHE_ARGS - -DCMAKE_BUILD_TYPE:STRING=Release - -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -Dwith-jemalloc-prefix:STRING=jemalloc_ - -Dwithout-export:BOOL=ON -) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index f56fb35a0f71250f00b84e5cf94a24682bda6c82..56a57a2340ddc7f923c611c222a0399e279ad58a 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG v3.6.0) +set(PROTOBUF_TAG v3.6.1) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/contrib/cmake/make.bat b/tensorflow/contrib/cmake/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..d52b24e01d6590180106ba6cee2782c99d734ee3 --- /dev/null +++ b/tensorflow/contrib/cmake/make.bat @@ -0,0 +1,38 @@ +%echo off + +cd /d %~dp0 + +if exist _build rd /s /q _build + +mkdir _build +chdir _build + + +rem cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install + +CALL :NORMALIZEPATH "..\..\..\.." +SET SOURCE_DIR=%RETVAL% + +echo %SOURCE_DIR% + +SET SOURCE_DIR=F:\frameworks\tensorflow\ + +CALL :NORMALIZEPATH "../../../tools/git/gen_git_source.py" +SET SOURCE_PYTHON_SCRIPT=%RETVAL% + +CALL :NORMALIZEPATH "../../../core/util/version_info.cc" +SET SOURCE_VERSION_CC=%RETVAL% + +python %SOURCE_PYTHON_SCRIPT% --raw_generate %SOURCE_VERSION_CC% --source_dir %SOURCE_DIR% --git_tag_override= + +cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install + +EXIT /B + +:NORMALIZEPATH + SET RETVAL=%~dpfn1 + EXIT /B + + + + \ No newline at end of file diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index c0763f4c0ed71687360d981dfa3c170065ecc39f..6e72670142d560a364350bb4769f1153f884b0f6 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -132,10 +132,8 @@ tensorflow/contrib/cudnn_rnn/python tensorflow/contrib/cudnn_rnn/python/layers tensorflow/contrib/cudnn_rnn/python/ops tensorflow/contrib/data -tensorflow/contrib/data/kernels tensorflow/contrib/data/python tensorflow/contrib/data/python/kernel_tests -tensorflow/contrib/data/python/kernel_tests/serialization tensorflow/contrib/data/python/ops tensorflow/contrib/decision_trees tensorflow/contrib/decision_trees/proto @@ -207,6 +205,8 @@ tensorflow/contrib/integrate/python tensorflow/contrib/integrate/python/ops tensorflow/contrib/kafka/python tensorflow/contrib/kafka/python/ops +tensorflow/contrib/ignite/python +tensorflow/contrib/ignite/python/ops tensorflow/contrib/keras tensorflow/contrib/keras/api tensorflow/contrib/keras/api/keras diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 067c299a71cd4ac96878bcf27b4453466785e4ba..7e806685b8448cbd629985cdc00ed1193857abe6 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -258,14 +258,21 @@ add_dependencies(tf_core_lib ${tensorflow_EXTERNAL_DEPENDENCIES} tf_protos_cc) # force_rebuild always runs forcing ${VERSION_INFO_CC} target to run # ${VERSION_INFO_CC} would cache, but it depends on a phony never produced # target. -set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) -add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC}) -add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) -add_custom_command(OUTPUT - ${VERSION_INFO_CC} - COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py - ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE} - DEPENDS __force_rebuild) +# This code forces rebuild every time, not needed as version from git is fetched only once +# move to make.bat which mimicks make.sh + +if (NOT WIN32) + + set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) + add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC}) + add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) + add_custom_command(OUTPUT + ${VERSION_INFO_CC} + COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py + ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE} + DEPENDS __force_rebuild) +endif() + set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) ######################################################## diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py index 1e305251594c17698aad9c5ffe39e68beb8d2a97..873b03580d6f1d9cb25c79cb31989d43cdb8c9a7 100644 --- a/tensorflow/contrib/compiler/xla.py +++ b/tensorflow/contrib/compiler/xla.py @@ -293,7 +293,8 @@ def _compile_internal(computation, inputs=None): saved_use_resource = vscope.use_resource vscope.set_use_resource(True) - outputs = computation(*computation_inputs) + with _disable_summary_context(): + outputs = computation(*computation_inputs) # Restore variable scope after computation. vscope.set_use_resource(saved_use_resource) @@ -371,13 +372,13 @@ def _disable_summary_context(): Yields: None. """ - origional_skip_summary_func = summary_op_util.skip_summary + original_skip_summary_func = summary_op_util.skip_summary summary_op_util.skip_summary = lambda: True try: yield finally: - summary_op_util.skip_summary = origional_skip_summary_func + summary_op_util.skip_summary = original_skip_summary_func class _CapturedObject(object): @@ -436,8 +437,7 @@ class _ModelFnWrapper(object): if mode == model_fn_lib.ModeKeys.TRAIN: train_step, captured_scaffold_fn = self._make_train_step( features, labels, params) - with _disable_summary_context(): - (loss,) = compile(train_step) + (loss,) = compile(train_step) return model_fn_lib.EstimatorSpec( mode=mode, loss=loss, @@ -446,8 +446,7 @@ class _ModelFnWrapper(object): elif mode == model_fn_lib.ModeKeys.EVAL: eval_step, captured_eval_metric_fn, captured_scaffold_fn = ( self._make_eval_step(features, labels, params)) - with _disable_summary_context(): - outputs = compile(eval_step) + outputs = compile(eval_step) loss = outputs[0] # Calculate eval_metric_ops if eval_metric_fn is set and captured. diff --git a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py index d1af15f7e423c5135071ea73f6b7a0709d140600..67f8ac2b9322f39b02c521f8b9cde3831c7889b8 100644 --- a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py +++ b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py @@ -102,9 +102,9 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius): 0.0, (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive))) - multipliers += scale * inactive + multipliers = multipliers + (scale * inactive) new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype) - multipliers *= new_inactive + multipliers = multipliers * new_inactive return (iteration, multipliers, new_inactive, inactive) iteration = standard_ops.constant(0) diff --git a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py index 2c673d9347141b3a12eb9ec76065d22f1769ac12..a6cb1f62f059770c90bd1aeea391d841aed9aacf 100644 --- a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py +++ b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py @@ -175,9 +175,9 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix): scale = (1.0 - standard_ops.reduce_sum( matrix, axis=0, keepdims=True)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True)) - matrix += scale * inactive + matrix = matrix + (scale * inactive) new_inactive = standard_ops.cast(matrix > 0, matrix.dtype) - matrix *= new_inactive + matrix = matrix * new_inactive return (iteration, matrix, new_inactive, inactive) iteration = standard_ops.constant(0) @@ -210,8 +210,9 @@ def _project_log_stochastic_matrix_wrt_kl_divergence(log_matrix): # For numerical reasons, make sure that the largest matrix element is zero # before exponentiating. - log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keepdims=True) - log_matrix -= standard_ops.log( + log_matrix = log_matrix - standard_ops.reduce_max( + log_matrix, axis=0, keepdims=True) + log_matrix = log_matrix - standard_ops.log( standard_ops.reduce_sum( standard_ops.exp(log_matrix), axis=0, keepdims=True)) return log_matrix diff --git a/tensorflow/contrib/copy_graph/python/util/copy_elements.py b/tensorflow/contrib/copy_graph/python/util/copy_elements.py index 6c9ab6aeb87fd39b22ab4f28d69b432b15899a13..9c5871da343ab1a5f11aeb674a20ab83f2eb1fbf 100644 --- a/tensorflow/contrib/copy_graph/python/util/copy_elements.py +++ b/tensorflow/contrib/copy_graph/python/util/copy_elements.py @@ -31,7 +31,7 @@ from __future__ import division from __future__ import print_function from copy import deepcopy -from tensorflow.python.ops.variables import Variable +from tensorflow.python.ops.variables import VariableV1 from tensorflow.python.client.session import Session from tensorflow.python.framework import ops @@ -55,7 +55,7 @@ def copy_variable_to_graph(org_instance, to_graph, scope=''): TypeError: If `org_instance` is not a `Variable`. """ - if not isinstance(org_instance, Variable): + if not isinstance(org_instance, VariableV1): raise TypeError(str(org_instance) + ' is not a Variable') #The name of the new variable @@ -88,7 +88,7 @@ def copy_variable_to_graph(org_instance, to_graph, scope=''): #Initialize the new variable with to_graph.as_default(): - new_var = Variable( + new_var = VariableV1( init_value, trainable, name=new_name, diff --git a/tensorflow/contrib/copy_graph/python/util/copy_test.py b/tensorflow/contrib/copy_graph/python/util/copy_test.py index 05744bec4e05405c04b5ec442e72e4495737ab5b..ba97c7845635596c3f4f849044b6707ec43f5bbf 100644 --- a/tensorflow/contrib/copy_graph/python/util/copy_test.py +++ b/tensorflow/contrib/copy_graph/python/util/copy_test.py @@ -36,7 +36,7 @@ class CopyVariablesTest(test.TestCase): with graph1.as_default(): #Define a Variable in graph1 - some_var = variables.Variable(2) + some_var = variables.VariableV1(2) #Initialize session sess1 = session_lib.Session() #Initialize the Variable @@ -72,7 +72,7 @@ class CopyOpsTest(test.TestCase): with graph1.as_default(): #Initialize a basic expression y = ax + b x = array_ops.placeholder("float") - a = variables.Variable(3.0) + a = variables.VariableV1(3.0) b = constant_op.constant(4.0) ax = math_ops.multiply(x, a) y = math_ops.add(ax, b) diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 2a91dcb63a80016e62d10d1310ca57e3e54434c5..43bb43129bfe1cb1c66f4965476f9b7f849658ad 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -56,7 +56,6 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.layers import utils from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn @@ -214,10 +213,11 @@ def crf_log_norm(inputs, sequence_lengths, transition_params): log_norm) return log_norm - max_seq_len = array_ops.shape(inputs)[1] - return control_flow_ops.cond(pred=math_ops.equal(max_seq_len, 1), - true_fn=_single_seq_fn, - false_fn=_multi_seq_fn) + return utils.smart_cond( + pred=math_ops.equal(inputs.shape[1].value or + array_ops.shape(inputs)[1], 1), + true_fn=_single_seq_fn, + false_fn=_multi_seq_fn) def crf_log_likelihood(inputs, diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 9f710613dd0d549d4f93bae8780427f7878234a6..38f1c65a4d5c33ab2558fa9277b512ab86e98959 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -4,17 +4,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load( - "//tensorflow:tensorflow.bzl", - "tf_custom_op_library", - "tf_gen_op_libs", - "if_not_windows", -) -load( - "//tensorflow/core:platform/default/build_config_root.bzl", - "if_static", -) - py_library( name = "data", srcs = ["__init__.py"], @@ -25,30 +14,3 @@ py_library( "//tensorflow/python:util", ], ) - -cc_library( - name = "lib_proto_parsing_for_dataset_ops", - deps = if_not_windows(["//tensorflow/core:lib_proto_parsing"]), -) - -tf_custom_op_library( - name = "_dataset_ops.so", - srcs = [ - "ops/dataset_ops.cc", - "ops/indexed_dataset_ops.cc", - ], - deps = [ - "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/contrib/data/kernels:indexed_dataset", - ] + if_static( - extra_deps = [":lib_proto_parsing_for_dataset_ops"], - otherwise = [], - ), -) - -tf_gen_op_libs( - op_lib_names = [ - "dataset_ops", - "indexed_dataset_ops", - ], -) diff --git a/tensorflow/contrib/data/README.md b/tensorflow/contrib/data/README.md index 848782e8d89b8670caf3b45de4912a7e0855c102..90be7a66cac6746e29a121fe6a772a94e04e8f02 100644 --- a/tensorflow/contrib/data/README.md +++ b/tensorflow/contrib/data/README.md @@ -1,10 +1,12 @@ `tf.contrib.data` API ===================== -NOTE: The `tf.contrib.data` module has been deprecated. Use `tf.data` instead. -We are continuing to support existing code using the `tf.contrib.data` APIs in -the current version of TensorFlow, but will eventually remove support. The -`tf.data` APIs are subject to backwards compatibility guarantees. +NOTE: The `tf.contrib.data` module has been deprecated. Use `tf.data` instead, +or `tf.data.experimental` for the experimental transformations previously hosted +in this module. We are continuing to support existing code using the +`tf.contrib.data` APIs in the current version of TensorFlow, but will eventually +remove support. The non-experimental `tf.data` APIs are subject to backwards +compatibility guarantees. Porting your code to `tf.data` ------------------------------ @@ -25,13 +27,13 @@ instead apply them using `Dataset.apply()` transformation. The full list of changes is as follows: * `dataset.dense_to_sparse_batch(...)` is now - `dataset.apply(tf.contrib.data.dense_to_sparse_batch(...)`. + `dataset.apply(tf.data.experimental.dense_to_sparse_batch(...)`. * `dataset.enumerate(...)` is now - `dataset.apply(tf.contrib.data.enumerate_dataset(...))`. + `dataset.apply(tf.data.experimental.enumerate_dataset(...))`. * `dataset.group_by_window(...)` is now - `dataset.apply(tf.contrib.data.group_by_window(...))`. + `dataset.apply(tf.data.experimental.group_by_window(...))`. * `dataset.ignore_errors()` is now - `dataset.apply(tf.contrib.data.ignore_errors())`. + `dataset.apply(tf.data.experimental.ignore_errors())`. * `dataset.unbatch()` is now `dataset.apply(tf.contrib.data.unbatch())`. The `Dataset.make_dataset_resource()` and `Iterator.dispose_op()` methods have diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 3cb51279c356f5fe79da98acb5cf481b4d76f6b8..c3d3e981fa10144ed94217cf948b485a7c2bced8 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -96,10 +96,6 @@ from tensorflow.contrib.data.python.ops.interleave_ops import sample_from_datase from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave from tensorflow.contrib.data.python.ops.iterator_ops import CheckpointInputPipelineHook from tensorflow.contrib.data.python.ops.iterator_ops import make_saveable_from_iterator - -# Optimization constant that can be used to enable auto-tuning. -from tensorflow.contrib.data.python.ops.optimization import AUTOTUNE - from tensorflow.contrib.data.python.ops.parsing_ops import parse_example_dataset from tensorflow.contrib.data.python.ops.prefetching_ops import copy_to_device from tensorflow.contrib.data.python.ops.prefetching_ops import prefetch_to_device @@ -114,11 +110,12 @@ from tensorflow.contrib.data.python.ops.resampling import rejection_resample from tensorflow.contrib.data.python.ops.scan_ops import scan from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch -from tensorflow.contrib.data.python.ops.stats_ops import latency_stats -from tensorflow.contrib.data.python.ops.stats_ops import set_stats_aggregator -from tensorflow.contrib.data.python.ops.stats_ops import StatsAggregator from tensorflow.contrib.data.python.ops.unique import unique from tensorflow.contrib.data.python.ops.writers import TFRecordWriter + +# Optimization constant that can be used to enable auto-tuning. +from tensorflow.python.data.experimental.ops.optimization import AUTOTUNE + from tensorflow.python.data.ops.iterator_ops import get_next_as_optional from tensorflow.python.data.ops.optional_ops import Optional # pylint: enable=unused-import diff --git a/tensorflow/contrib/data/ops/indexed_dataset_ops.cc b/tensorflow/contrib/data/ops/indexed_dataset_ops.cc deleted file mode 100644 index cd9b7c68a04a33ca6dec1e9088c3606deebdb7f4..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/data/ops/indexed_dataset_ops.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" - -namespace tensorflow { - -REGISTER_OP("IdentityIndexedDataset") - .Input("size: uint64") - .Output("handle: variant") - .SetIsStateful() - .SetShapeFn( - shape_inference::ScalarShape); // TODO(saeta): check input shapes. - -/////////////////////////////////////////////////////////////////////////////// -// IndexedDataset Internals -/////////////////////////////////////////////////////////////////////////////// - -// Creates the handle. -REGISTER_OP("MaterializedIndexDatasetHandle") - .Output("handle: resource") - .Attr("container: string") - .Attr("shared_name: string") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); - -// Actually materialize the materialize handle. -REGISTER_OP("IndexedDatasetMaterialize") - .Input("dataset: variant") - .Input("materialized: resource") - .SetShapeFn(shape_inference::NoOutputs); - -namespace { - -Status GetShapeFn(shape_inference::InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - std::vector output_shapes; - TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); - if (output_shapes.size() != c->num_outputs()) { - return errors::InvalidArgument( - "`output_shapes` must be the same length as `output_types` (", - output_shapes.size(), " vs. ", c->num_outputs()); - } - for (size_t i = 0; i < output_shapes.size(); ++i) { - shape_inference::ShapeHandle output_shape_handle; - TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( - output_shapes[i], &output_shape_handle)); - c->set_output(static_cast(i), output_shape_handle); - } - return Status::OK(); -} - -} // namespace - -REGISTER_OP("IndexedDatasetGet") - .Input("materialized: resource") - .Input("index: uint64") - .Output("components: output_types") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(GetShapeFn) - .Doc(R"doc( -Gets the element at `index` from `materialized` IndexedDataset. -)doc"); - -} // namespace tensorflow diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index ce52c990ce04f9f4f9db9663c06ab478922c93ff..42f538b4ba1cb5b6a9a717e94f4e688cae56b056 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -8,192 +8,25 @@ load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "py_test") py_test( - name = "batch_dataset_op_test", - size = "medium", - srcs = ["batch_dataset_op_test.py"], + name = "assert_element_shape_test", + srcs = ["assert_element_shape_test.py"], srcs_version = "PY2AND3", - tags = [ - "no_oss", # (b/79552534) - "no_pip", - ], deps = [ "//tensorflow/contrib/data/python/ops:batching", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", "//tensorflow/python:script_ops", - "//tensorflow/python:session", - "//tensorflow/python:sparse_tensor", - "//tensorflow/python:string_ops", - "//tensorflow/python:tensor_shape", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", - ], -) - -py_test( - name = "bucketing_test", - size = "medium", - srcs = ["bucketing_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/data/python/ops:grouping", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:sparse_tensor", "//tensorflow/python:string_ops", "//tensorflow/python:tensor_shape", + "//tensorflow/python/data/kernel_tests:test_base", "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", ], ) -py_test( - name = "csv_dataset_op_test", - size = "medium", - srcs = ["csv_dataset_op_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - "//tensorflow/contrib/data/python/ops:error_ops", - "//tensorflow/contrib/data/python/ops:readers", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:parsing_ops", - "//tensorflow/python:platform", - "//tensorflow/python:platform_test", - "//tensorflow/python:session", - "//tensorflow/python/data/ops:readers", - "//tensorflow/python/eager:context", - "//third_party/py/numpy", - ], -) - -py_test( - name = "dataset_constructor_op_test", - size = "medium", - srcs = ["dataset_constructor_op_test.py"], - srcs_version = "PY2AND3", - tags = [ - "manual", - "nomac", # b/62040583 - ], - deps = [ - "//tensorflow/contrib/data/python/ops:batching", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - ], -) - -py_test( - name = "directed_interleave_dataset_test", - size = "medium", - srcs = ["directed_interleave_dataset_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/data/python/ops:interleave_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:errors", - "//tensorflow/python:random_seed", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", - ], -) - -py_test( - name = "get_single_element_test", - size = "small", - srcs = ["get_single_element_test.py"], - deps = [ - "//tensorflow/contrib/data/python/ops:get_single_element", - "//tensorflow/contrib/data/python/ops:grouping", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:sparse_tensor", - "//tensorflow/python/data/ops:dataset_ops", - "@absl_py//absl/testing:parameterized", - ], -) - -py_test( - name = "indexed_dataset_ops_test", - srcs = ["indexed_dataset_ops_test.py"], - deps = [ - "//tensorflow/contrib/data/python/ops:contrib_op_loader", - "//tensorflow/contrib/data/python/ops:gen_dataset_ops", - "//tensorflow/contrib/data/python/ops:indexed_dataset_ops", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", - ], -) - -py_test( - name = "interleave_dataset_op_test", - size = "medium", - srcs = ["interleave_dataset_op_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_oss", - "no_pip", - "notap", - ], - deps = [ - "//tensorflow/contrib/data/python/ops:interleave_ops", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:math_ops", - "//tensorflow/python:script_ops", - "//tensorflow/python:sparse_ops", - "//tensorflow/python:sparse_tensor", - "//tensorflow/python/data/ops:dataset_ops", - "@six_archive//:six", - ], -) - -py_test( - name = "iterator_ops_test", - size = "small", - srcs = ["iterator_ops_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - "//tensorflow/contrib/data/python/ops:iterator_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:training", - "//tensorflow/python:variables", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/estimator:estimator_py", - ], -) - py_test( name = "lmdb_dataset_op_test", size = "medium", @@ -215,247 +48,24 @@ py_test( "//tensorflow/python:platform", "//tensorflow/python:platform_test", "//tensorflow/python:session", + "//tensorflow/python/data/kernel_tests:test_base", "//third_party/py/numpy", ], ) py_test( - name = "map_dataset_op_test", - size = "medium", - srcs = ["map_dataset_op_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "noasan", # times out - "optonly", - ], - deps = [ - "//tensorflow/contrib/data/python/ops:batching", - "//tensorflow/contrib/data/python/ops:error_ops", - "//tensorflow/contrib/data/python/ops:optimization", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:io_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", - ], -) - -py_test( - name = "filter_dataset_op_test", - size = "medium", - srcs = ["filter_dataset_op_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/data/python/ops:optimization", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:io_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", - ], -) - -py_test( - name = "map_defun_op_test", - size = "small", - srcs = ["map_defun_op_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - "//tensorflow/contrib/data/python/ops:map_defun", - "//tensorflow/python:array_ops", - "//tensorflow/python:check_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:data_flow_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:function", - "//tensorflow/python:functional_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:session", - ], -) - -py_test( - name = "parsing_ops_test", - size = "small", - srcs = ["parsing_ops_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/data/python/ops:parsing_ops", - "//tensorflow/core:protos_all_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:parsing_ops", - "//tensorflow/python:platform", - "//tensorflow/python:sparse_tensor", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//third_party/py/numpy", - ], -) - -cuda_py_test( - name = "prefetching_ops_test", - size = "small", - srcs = ["prefetching_ops_test.py"], - additional_deps = [ - "//tensorflow/contrib/data/python/ops:prefetching_ops", - "//tensorflow/core:protos_all_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:function", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python/compat:compat", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/ops:iterator_ops", - ], - tags = ["no_windows_gpu"], -) - -py_test( - name = "range_dataset_op_test", + name = "reduce_dataset_test", size = "small", - srcs = ["range_dataset_op_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/data/python/ops:counter", - "//tensorflow/contrib/data/python/ops:enumerate_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:tensor_shape", - "//tensorflow/python/data/ops:dataset_ops", - ], -) - -py_library( - name = "reader_dataset_ops_test_base", - testonly = 1, - srcs = [ - "reader_dataset_ops_test_base.py", - ], - srcs_version = "PY2AND3", - visibility = [ - "//tensorflow/contrib/data/python/kernel_tests:__pkg__", - "//tensorflow/contrib/data/python/kernel_tests/serialization:__pkg__", - ], + srcs = ["reduce_dataset_test.py"], deps = [ - "//tensorflow/contrib/data/python/ops:readers", - "//tensorflow/core:protos_all_py", + "//tensorflow/contrib/data/python/ops:get_single_element", + "//tensorflow/contrib/data/python/ops:grouping", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:lib", - "//tensorflow/python:parsing_ops", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:iterator_ops", - "//tensorflow/python/data/ops:readers", - ], -) - -py_test( - name = "reader_dataset_ops_test", - size = "medium", - srcs = ["reader_dataset_ops_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - ":reader_dataset_ops_test_base", - "//tensorflow/contrib/data/python/ops:readers", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:parsing_ops", - "//tensorflow/python:string_ops", - "//tensorflow/python/data/ops:readers", - "//tensorflow/python/data/util:nest", - "//third_party/py/numpy", - ], -) - -py_test( - name = "resample_test", - size = "medium", - srcs = ["resample_test.py"], - shard_count = 2, - srcs_version = "PY2AND3", - tags = [ - "noasan", - "optonly", - ], - deps = [ - "//tensorflow/contrib/data/python/ops:resampling", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:math_ops", - "//tensorflow/python:random_ops", - "//tensorflow/python:string_ops", - "//tensorflow/python:util", + "//tensorflow/python/data/kernel_tests:test_base", "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", - "@six_archive//:six", - ], -) - -py_test( - name = "scan_dataset_op_test", - size = "small", - srcs = ["scan_dataset_op_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - "//tensorflow/contrib/data/python/ops:scan_ops", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:sparse_tensor", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/eager:context", - "//third_party/py/numpy", - ], -) - -py_test( - name = "shuffle_dataset_op_test", - size = "medium", - srcs = ["shuffle_dataset_op_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "optonly", - ], - deps = [ - "//tensorflow/contrib/data/python/ops:shuffle_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", ], ) @@ -471,151 +81,9 @@ py_test( "//tensorflow/python:errors", "//tensorflow/python:math_ops", "//tensorflow/python:sparse_tensor", + "//tensorflow/python/data/kernel_tests:test_base", "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", ], ) - -py_library( - name = "sql_dataset_op_test_base", - srcs = ["sql_dataset_op_test_base.py"], - srcs_version = "PY2AND3", - visibility = [ - "//tensorflow/contrib/data/python/kernel_tests:__pkg__", - "//tensorflow/contrib/data/python/kernel_tests/serialization:__pkg__", - ], - deps = [ - "//tensorflow/contrib/data/python/ops:readers", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "@org_sqlite//:python", - ], -) - -py_test( - name = "sql_dataset_op_test", - size = "small", - srcs = ["sql_dataset_op_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - ":sql_dataset_op_test_base", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - ], -) - -py_test( - name = "stats_dataset_ops_test", - size = "medium", - srcs = ["stats_dataset_ops_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - ":reader_dataset_ops_test_base", - ":stats_dataset_test_base", - "//tensorflow/contrib/data/python/ops:stats_ops", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", - ], -) - -py_library( - name = "stats_dataset_test_base", - srcs = ["stats_dataset_test_base.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/core:protos_all_py", - "//tensorflow/python:client_testlib", - ], -) - -py_test( - name = "threadpool_dataset_ops_test", - size = "small", - srcs = ["threadpool_dataset_ops_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - "//tensorflow/contrib/data/python/ops:threadpool", - "//tensorflow/contrib/data/python/ops:unique", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:script_ops", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", - ], -) - -py_test( - name = "unique_dataset_op_test", - size = "small", - srcs = ["unique_dataset_op_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - "//tensorflow/contrib/data/python/ops:unique", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - ], -) - -py_test( - name = "window_dataset_op_test", - size = "medium", - srcs = ["window_dataset_op_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - ], - deps = [ - "//tensorflow/contrib/data/python/ops:batching", - "//tensorflow/contrib/data/python/ops:grouping", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:math_ops", - "//tensorflow/python:sparse_tensor", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", - ], -) - -py_test( - name = "writer_ops_test", - size = "small", - srcs = ["writer_ops_test.py"], - deps = [ - "//tensorflow/contrib/data/python/ops:writers", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:lib", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/ops:readers", - ], -) - -py_library( - name = "test_utils", - srcs = ["test_utils.py"], - deps = [ - "//tensorflow/python:client_testlib", - "//tensorflow/python:errors", - "//tensorflow/python/data/util:nest", - ], -) diff --git a/tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py b/tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py new file mode 100644 index 0000000000000000000000000000000000000000..0456463a1928cf226010670b90a5d574579e0411 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py @@ -0,0 +1,226 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.data.python.ops import batching +from tensorflow.python.data.kernel_tests import test_base +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import script_ops +from tensorflow.python.platform import test + + +class AssertElementShapeTest(test_base.DatasetTestBase): + + def test_assert_element_shape(self): + + def create_dataset(_): + return (array_ops.ones(2, dtype=dtypes.float32), + array_ops.zeros((3, 4), dtype=dtypes.int32)) + + dataset = dataset_ops.Dataset.range(5).map(create_dataset) + expected_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((3, 4))) + self.assertEqual(expected_shapes, dataset.output_shapes) + + result = dataset.apply(batching.assert_element_shape(expected_shapes)) + self.assertEqual(expected_shapes, result.output_shapes) + + iterator = result.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + with self.cached_session() as sess: + sess.run(init_op) + for _ in range(5): + sess.run(get_next) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def test_assert_wrong_element_shape(self): + + def create_dataset(_): + return (array_ops.ones(2, dtype=dtypes.float32), + array_ops.zeros((3, 4), dtype=dtypes.int32)) + + dataset = dataset_ops.Dataset.range(3).map(create_dataset) + wrong_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((3, 10))) + with self.assertRaises(ValueError): + dataset.apply(batching.assert_element_shape(wrong_shapes)) + + def test_assert_element_shape_on_unknown_shape_dataset(self): + + def create_unknown_shape_dataset(x): + return script_ops.py_func( + lambda _: ( # pylint: disable=g-long-lambda + np.ones(2, dtype=np.float32), + np.zeros((3, 4), dtype=np.int32)), + [x], + [dtypes.float32, dtypes.int32]) + + dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset) + unknown_shapes = (tensor_shape.TensorShape(None), + tensor_shape.TensorShape(None)) + self.assertEqual(unknown_shapes, dataset.output_shapes) + + expected_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((3, 4))) + result = dataset.apply(batching.assert_element_shape(expected_shapes)) + self.assertEqual(expected_shapes, result.output_shapes) + + iterator = result.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + with self.cached_session() as sess: + sess.run(init_op) + for _ in range(5): + sess.run(get_next) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def test_assert_wrong_element_shape_on_unknown_shape_dataset(self): + + def create_unknown_shape_dataset(x): + return script_ops.py_func( + lambda _: ( # pylint: disable=g-long-lambda + np.ones(2, dtype=np.float32), + np.zeros((3, 4), dtype=np.int32)), + [x], + [dtypes.float32, dtypes.int32]) + + dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset) + unknown_shapes = (tensor_shape.TensorShape(None), + tensor_shape.TensorShape(None)) + self.assertEqual(unknown_shapes, dataset.output_shapes) + + wrong_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((3, 10))) + iterator = ( + dataset.apply(batching.assert_element_shape(wrong_shapes)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + with self.cached_session() as sess: + sess.run(init_op) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(get_next) + + def test_assert_partial_element_shape(self): + + def create_dataset(_): + return (array_ops.ones(2, dtype=dtypes.float32), + array_ops.zeros((3, 4), dtype=dtypes.int32)) + + dataset = dataset_ops.Dataset.range(5).map(create_dataset) + partial_expected_shape = ( + tensor_shape.TensorShape(None), # Unknown shape + tensor_shape.TensorShape((None, 4))) # Partial shape + result = dataset.apply( + batching.assert_element_shape(partial_expected_shape)) + # Partial shapes are merged with actual shapes: + actual_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((3, 4))) + self.assertEqual(actual_shapes, result.output_shapes) + + iterator = result.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + with self.cached_session() as sess: + sess.run(init_op) + for _ in range(5): + sess.run(get_next) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def test_assert_wrong_partial_element_shape(self): + + def create_dataset(_): + return (array_ops.ones(2, dtype=dtypes.float32), + array_ops.zeros((3, 4), dtype=dtypes.int32)) + + dataset = dataset_ops.Dataset.range(3).map(create_dataset) + wrong_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((None, 10))) + with self.assertRaises(ValueError): + dataset.apply(batching.assert_element_shape(wrong_shapes)) + + def test_assert_partial_element_shape_on_unknown_shape_dataset(self): + + def create_unknown_shape_dataset(x): + return script_ops.py_func( + lambda _: ( # pylint: disable=g-long-lambda + np.ones(2, dtype=np.float32), + np.zeros((3, 4), dtype=np.int32)), + [x], + [dtypes.float32, dtypes.int32]) + + dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset) + unknown_shapes = (tensor_shape.TensorShape(None), + tensor_shape.TensorShape(None)) + self.assertEqual(unknown_shapes, dataset.output_shapes) + + expected_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((None, 4))) + result = dataset.apply(batching.assert_element_shape(expected_shapes)) + self.assertEqual(expected_shapes, result.output_shapes) + + iterator = result.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + with self.cached_session() as sess: + sess.run(init_op) + for _ in range(5): + sess.run(get_next) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def test_assert_wrong_partial_element_shape_on_unknown_shape_dataset(self): + + def create_unknown_shape_dataset(x): + return script_ops.py_func( + lambda _: ( # pylint: disable=g-long-lambda + np.ones(2, dtype=np.float32), + np.zeros((3, 4), dtype=np.int32)), + [x], + [dtypes.float32, dtypes.int32]) + + dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset) + unknown_shapes = (tensor_shape.TensorShape(None), + tensor_shape.TensorShape(None)) + self.assertEqual(unknown_shapes, dataset.output_shapes) + + wrong_shapes = (tensor_shape.TensorShape(2), + tensor_shape.TensorShape((None, 10))) + iterator = ( + dataset.apply(batching.assert_element_shape(wrong_shapes)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + with self.cached_session() as sess: + sess.run(init_op) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(get_next) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py deleted file mode 100644 index e2508de9e92568eef3f8b2eecd855b0acbb243f5..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ /dev/null @@ -1,990 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for the experimental input pipeline ops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import time - -from absl.testing import parameterized -import numpy as np - -from tensorflow.contrib.data.python.ops import batching -from tensorflow.python.client import session -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import script_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.platform import test -from tensorflow.python.util import compat - - -class BatchDatasetTest(test.TestCase, parameterized.TestCase): - - def assertSparseValuesEqual(self, a, b): - self.assertAllEqual(a.indices, b.indices) - self.assertAllEqual(a.values, b.values) - self.assertAllEqual(a.dense_shape, b.dense_shape) - - def testDenseToSparseBatchDataset(self): - components = np.random.randint(12, size=(100,)).astype(np.int32) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: array_ops.fill([x], x)).apply( - batching.dense_to_sparse_batch(4, [12])) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - - for start in range(0, len(components), 4): - results = sess.run(get_next) - self.assertAllEqual([[i, j] - for i, c in enumerate(components[start:start + 4]) - for j in range(c)], results.indices) - self.assertAllEqual( - [c for c in components[start:start + 4] for _ in range(c)], - results.values) - self.assertAllEqual([min(4, - len(components) - start), 12], - results.dense_shape) - - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testDenseToSparseBatchDatasetWithUnknownShape(self): - components = np.random.randint(5, size=(40,)).astype(np.int32) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: array_ops.fill([x, x], x)).apply( - batching.dense_to_sparse_batch( - 4, [5, None])).make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - - for start in range(0, len(components), 4): - results = sess.run(get_next) - self.assertAllEqual([[i, j, z] - for i, c in enumerate(components[start:start + 4]) - for j in range(c) - for z in range(c)], results.indices) - self.assertAllEqual([ - c - for c in components[start:start + 4] for _ in range(c) - for _ in range(c) - ], results.values) - self.assertAllEqual([ - min(4, - len(components) - start), 5, - np.max(components[start:start + 4]) - ], results.dense_shape) - - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testDenseToSparseBatchDatasetWithInvalidShape(self): - input_tensor = array_ops.constant([[1]]) - with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"): - dataset_ops.Dataset.from_tensors(input_tensor).apply( - batching.dense_to_sparse_batch(4, [-2])).make_initializable_iterator() - - def testDenseToSparseBatchDatasetShapeErrors(self): - input_tensor = array_ops.placeholder(dtypes.int32) - iterator = ( - dataset_ops.Dataset.from_tensors(input_tensor).apply( - batching.dense_to_sparse_batch(4, [12])) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - # Initialize with an input tensor of incompatible rank. - sess.run(init_op, feed_dict={input_tensor: [[1]]}) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "incompatible with the row shape"): - sess.run(get_next) - - # Initialize with an input tensor that is larger than `row_shape`. - sess.run(init_op, feed_dict={input_tensor: range(13)}) - with self.assertRaisesRegexp(errors.DataLossError, - "larger than the row shape"): - sess.run(get_next) - - def testUnbatchScalarDataset(self): - data = tuple([math_ops.range(10) for _ in range(3)]) - data = dataset_ops.Dataset.from_tensor_slices(data) - expected_types = (dtypes.int32,) * 3 - data = data.batch(2) - self.assertEqual(expected_types, data.output_types) - data = data.apply(batching.unbatch()) - self.assertEqual(expected_types, data.output_types) - - iterator = data.make_one_shot_iterator() - op = iterator.get_next() - - with self.cached_session() as sess: - for i in range(10): - self.assertEqual((i,) * 3, sess.run(op)) - - with self.assertRaises(errors.OutOfRangeError): - sess.run(op) - - def testUnbatchDatasetWithStrings(self): - data = tuple([math_ops.range(10) for _ in range(3)]) - data = dataset_ops.Dataset.from_tensor_slices(data) - data = data.map(lambda x, y, z: (x, string_ops.as_string(y), z)) - expected_types = (dtypes.int32, dtypes.string, dtypes.int32) - data = data.batch(2) - self.assertEqual(expected_types, data.output_types) - data = data.apply(batching.unbatch()) - self.assertEqual(expected_types, data.output_types) - - iterator = data.make_one_shot_iterator() - op = iterator.get_next() - - with self.cached_session() as sess: - for i in range(10): - self.assertEqual((i, compat.as_bytes(str(i)), i), sess.run(op)) - - with self.assertRaises(errors.OutOfRangeError): - sess.run(op) - - def testUnbatchDatasetWithSparseTensor(self): - st = sparse_tensor.SparseTensorValue( - indices=[[i, i] for i in range(10)], - values=list(range(10)), - dense_shape=[10, 10]) - data = dataset_ops.Dataset.from_tensors(st) - data = data.apply(batching.unbatch()) - data = data.batch(5) - data = data.apply(batching.unbatch()) - iterator = data.make_one_shot_iterator() - next_element = iterator.get_next() - - with self.cached_session() as sess: - for i in range(10): - st_row = sess.run(next_element) - self.assertEqual([i], st_row.indices) - self.assertEqual([i], st_row.values) - self.assertEqual([10], st_row.dense_shape) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - def testUnbatchDatasetWithDenseAndSparseTensor(self): - st = sparse_tensor.SparseTensorValue( - indices=[[i, i] for i in range(10)], - values=list(range(10)), - dense_shape=[10, 10]) - data = dataset_ops.Dataset.from_tensors((list(range(10)), st)) - data = data.apply(batching.unbatch()) - data = data.batch(5) - data = data.apply(batching.unbatch()) - iterator = data.make_one_shot_iterator() - next_element = iterator.get_next() - - with self.cached_session() as sess: - for i in range(10): - dense_elem, st_row = sess.run(next_element) - self.assertEqual(i, dense_elem) - self.assertEqual([i], st_row.indices) - self.assertEqual([i], st_row.values) - self.assertEqual([10], st_row.dense_shape) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - def testUnbatchSingleElementTupleDataset(self): - data = tuple([(math_ops.range(10),) for _ in range(3)]) - data = dataset_ops.Dataset.from_tensor_slices(data) - expected_types = ((dtypes.int32,),) * 3 - data = data.batch(2) - self.assertEqual(expected_types, data.output_types) - data = data.apply(batching.unbatch()) - self.assertEqual(expected_types, data.output_types) - - iterator = data.make_one_shot_iterator() - op = iterator.get_next() - - with self.cached_session() as sess: - for i in range(10): - self.assertEqual(((i,),) * 3, sess.run(op)) - - with self.assertRaises(errors.OutOfRangeError): - sess.run(op) - - def testUnbatchMultiElementTupleDataset(self): - data = tuple([(math_ops.range(10 * i, 10 * i + 10), - array_ops.fill([10], "hi")) for i in range(3)]) - data = dataset_ops.Dataset.from_tensor_slices(data) - expected_types = ((dtypes.int32, dtypes.string),) * 3 - data = data.batch(2) - self.assertAllEqual(expected_types, data.output_types) - data = data.apply(batching.unbatch()) - self.assertAllEqual(expected_types, data.output_types) - - iterator = data.make_one_shot_iterator() - op = iterator.get_next() - - with self.cached_session() as sess: - for i in range(10): - self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")), - sess.run(op)) - - with self.assertRaises(errors.OutOfRangeError): - sess.run(op) - - def testUnbatchEmpty(self): - data = dataset_ops.Dataset.from_tensors( - (constant_op.constant([]), constant_op.constant([], shape=[0, 4]), - constant_op.constant([], shape=[0, 4, 0]))) - data = data.apply(batching.unbatch()) - iterator = data.make_one_shot_iterator() - next_element = iterator.get_next() - - with self.cached_session() as sess: - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - def testUnbatchStaticShapeMismatch(self): - data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8), - np.arange(9))) - with self.assertRaises(ValueError): - data.apply(batching.unbatch()) - - def testUnbatchDynamicShapeMismatch(self): - ph1 = array_ops.placeholder(dtypes.int32, shape=[None]) - ph2 = array_ops.placeholder(dtypes.int32, shape=None) - data = dataset_ops.Dataset.from_tensors((ph1, ph2)) - data = data.apply(batching.unbatch()) - iterator = data.make_initializable_iterator() - next_element = iterator.get_next() - - with self.cached_session() as sess: - # Mismatch in the 0th dimension. - sess.run( - iterator.initializer, - feed_dict={ - ph1: np.arange(7).astype(np.int32), - ph2: np.arange(8).astype(np.int32) - }) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(next_element) - - # No 0th dimension (i.e. scalar value) for one component. - sess.run( - iterator.initializer, - feed_dict={ - ph1: np.arange(7).astype(np.int32), - ph2: 7 - }) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(next_element) - - def testBatchAndDropRemainder(self): - components = (np.arange(7), - np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], - np.array(37.0) * np.arange(7)) - - batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(batch_size)) - .make_initializable_iterator()) - - next_element = iterator.get_next() - - with self.cached_session() as sess: - for test_batch_size in [1, 3, 7, 10]: - sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size}) - num_batches = 7 // test_batch_size - for i in range(num_batches): - result = sess.run(next_element) - for component, result_component in zip(components, result): - for j in range(test_batch_size): - self.assertAllEqual(component[(i * test_batch_size + j)], - result_component[j]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - def testBatchAndDropRemainderSparse(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=[[0]], values=(i * [1]), dense_shape=[1]) - - iterator = dataset_ops.Dataset.range(12).map(_sparse).apply( - batching.batch_and_drop_remainder(5)).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - for i in range(2): - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], - values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], - dense_shape=[5, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testPaddedBatchAndDropRemainder(self): - els = [] - for length in [3, 6, 9, 4, 12, 10, 2]: - els.append((np.array(length), np.arange(length) + 1, - np.array(length * 2))) - - dataset = dataset_ops.Dataset.from_tensors(els[0]) - for el in els[1:]: - dataset = dataset.concatenate(dataset_ops.Dataset.from_tensors(el)) - - batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = ( - dataset.apply( - batching.padded_batch_and_drop_remainder( - batch_size, ([], [None], []))).make_initializable_iterator()) - - next_element = iterator.get_next() - - with self.cached_session() as sess: - for test_batch_size in [1, 3, 7, 10]: - sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size}) - num_batches = 7 // test_batch_size - for i in range(num_batches): - result = sess.run(next_element) - for component_idx, result_component in enumerate(result): - for j in range(test_batch_size): - data_idx = i * test_batch_size + j - comp = result_component[j] - unpadded = comp[comp > 0] - if np.isscalar(comp): - # The boolean mask indexing above adds a dim back. Rm it. - unpadded = unpadded[0] - self.assertAllEqual(els[data_idx][component_idx], unpadded) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - def testPaddedBatchAndDropRemainderSparseError(self): - - def _map_fn(i): - return sparse_tensor.SparseTensorValue( - indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i - - with self.assertRaises(TypeError): - _ = dataset_ops.Dataset.range(10).map(_map_fn).apply( - batching.padded_batch_and_drop_remainder(5)) - - def testBatchAndDropRemainderShapeInference(self): - components = (array_ops.placeholder(dtypes.int32), - (array_ops.placeholder(dtypes.int32, shape=[None]), - array_ops.placeholder(dtypes.int32, shape=[20, 30]))) - - # Test with a statically known batch size. - dataset = ( - dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(128))) - - self.assertIs(None, dataset.output_shapes[0].ndims) - self.assertEqual([128], dataset.output_shapes[1][0].as_list()) - self.assertEqual([128, 30], dataset.output_shapes[1][1].as_list()) - - # Test with a dynamic batch size: the static shape will be unknown, because - # `batch_size` is a placeholder. - batch_size = array_ops.placeholder(dtypes.int64) - dataset = ( - dataset_ops.Dataset.from_tensor_slices(components).apply( - batching.batch_and_drop_remainder(batch_size))) - - self.assertIs(None, dataset.output_shapes[0].ndims) - self.assertEqual([None], dataset.output_shapes[1][0].as_list()) - self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list()) - - @parameterized.named_parameters( - ("Default", None, None), - ("SequentialCalls", 1, None), - ("ParallelCalls", 2, None), - ("ParallelBatches", None, 10), - ) - def testMapAndBatch(self, num_parallel_calls, num_parallel_batches): - """Test a dataset that maps a TF function across its input elements.""" - # The pipeline is TensorSliceDataset -> - # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size). - components = (np.arange(7), - np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], - np.array(37.0) * np.arange(7)) - - count = array_ops.placeholder(dtypes.int64, shape=[]) - batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - - def _map_fn(x, y, z): - return math_ops.square(x), math_ops.square(y), math_ops.square(z) - - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply( - batching.map_and_batch( - map_func=_map_fn, - batch_size=batch_size, - num_parallel_calls=num_parallel_calls, - num_parallel_batches=num_parallel_batches)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - self.assertEqual([[None] + list(c.shape[1:]) for c in components], - [t.shape.as_list() for t in get_next]) - - with self.cached_session() as sess: - # Batch of a finite input, where the batch_size divides the - # total number of elements. - sess.run(init_op, feed_dict={count: 28, batch_size: 14}) - num_batches = (28 * 7) // 14 - for i in range(num_batches): - result = sess.run(get_next) - for component, result_component in zip(components, result): - for j in range(14): - self.assertAllEqual(component[(i * 14 + j) % 7]**2, - result_component[j]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Batch of a finite input, where the batch_size does not - # divide the total number of elements. - sess.run(init_op, feed_dict={count: 14, batch_size: 8}) - - # We expect (num_batches - 1) full-sized batches. - num_batches = int(math.ceil((14 * 7) / 8)) - for i in range(num_batches - 1): - result = sess.run(get_next) - for component, result_component in zip(components, result): - for j in range(8): - self.assertAllEqual(component[(i * 8 + j) % 7]**2, - result_component[j]) - result = sess.run(get_next) - for component, result_component in zip(components, result): - for j in range((14 * 7) % 8): - self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2, - result_component[j]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Batch of an empty input should fail straight away. - sess.run(init_op, feed_dict={count: 0, batch_size: 8}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Empty batch should be an initialization time error. - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, batch_size: 0}) - - @parameterized.named_parameters( - ("Even", False), - ("Uneven", True), - ) - def testMapAndBatchPartialBatch(self, drop_remainder): - iterator = ( - dataset_ops.Dataset.range(10).apply( - batching.map_and_batch( - lambda x: array_ops.reshape(x * x, [1]), - batch_size=4, - drop_remainder=drop_remainder)).make_one_shot_iterator()) - if drop_remainder: - self.assertEqual([4, 1], iterator.output_shapes.as_list()) - else: - self.assertEqual([None, 1], iterator.output_shapes.as_list()) - next_element = iterator.get_next() - with self.cached_session() as sess: - self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element)) - self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element)) - if not drop_remainder: - self.assertAllEqual([[64], [81]], sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - def testMapAndBatchYieldsPartialBatch(self): - iterator = (dataset_ops.Dataset.range(10) - .apply(batching.map_and_batch( - lambda x: array_ops.reshape(x * x, [1]), 4)) - .make_one_shot_iterator()) - self.assertEqual([None, 1], iterator.output_shapes.as_list()) - next_element = iterator.get_next() - with self.cached_session() as sess: - self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element)) - self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element)) - self.assertAllEqual([[64], [81]], sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - - def testMapAndBatchParallelGetNext(self): - iterator = (dataset_ops.Dataset.range(50000) - .apply(batching.map_and_batch(lambda x: x, batch_size=100)) - .make_one_shot_iterator()) - elements = [] - for _ in range(100): - elements.append(iterator.get_next()) - with self.cached_session() as sess: - for i in range(5): - got = sess.run(elements) - got.sort(key=lambda x: x[0]) - expected = [] - for j in range(100): - expected.append(range(i*10000+j*100, i*10000+(j+1)*100)) - self.assertAllEqual(got, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(elements) - - def testMapAndBatchParallelGetNextDropRemainder(self): - iterator = ( - dataset_ops.Dataset.range(49999).apply( - batching.map_and_batch( - lambda x: x, batch_size=100, drop_remainder=True)) - .make_one_shot_iterator()) - elements = [] - for _ in range(100): - elements.append(iterator.get_next()) - with self.cached_session() as sess: - for i in range(4): - got = sess.run(elements) - got.sort(key=lambda x: x[0]) - expected = [] - for j in range(100): - expected.append(range(i*10000+j*100, i*10000+(j+1)*100)) - self.assertAllEqual(got, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(elements) - - def testMapAndBatchSparse(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=[[0]], values=(i * [1]), dense_shape=[1]) - - iterator = dataset_ops.Dataset.range(10).apply( - batching.map_and_batch(_sparse, 5)).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - for i in range(2): - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], - values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4], - dense_shape=[5, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testMapAndBatchFails(self): - """Test a dataset that maps a TF function across its input elements.""" - dataset = dataset_ops.Dataset.from_tensors( - array_ops.check_numerics( - constant_op.constant(1.0) / constant_op.constant(0.0), "oops")) - batch_size = array_ops.placeholder(dtypes.int64, shape=[]) - iterator = ( - dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) - .make_initializable_iterator()) - init_op = iterator.initializer - with self.cached_session() as sess: - with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"): - sess.run(init_op, feed_dict={batch_size: 14}) - - def testMapAndBatchShapeMismatch(self): - """Test a dataset that maps a TF function across its input elements.""" - - def generator(): - yield [1] - yield [2] - yield [3] - yield [[4, 5, 6]] - - dataset = dataset_ops.Dataset.from_generator( - generator, output_types=dtypes.int32) - batch_size = 4 - iterator = ( - dataset.apply(batching.map_and_batch(lambda x: x, batch_size)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "number of elements does not match"): - sess.run(get_next) - - def testMapAndBatchImplicitDispose(self): - # Tests whether a map and batch dataset will be cleaned up correctly when - # the pipeline does not run it until exhaustion. - # The pipeline is TensorSliceDataset -> RepeatDataset(1000) -> - # MapAndBatchDataset(f=square_3, batch_size=100). - components = (np.arange(1000), - np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis], - np.array(37.0) * np.arange(1000)) - - def _map_fn(x, y, z): - return math_ops.square(x), math_ops.square(y), math_ops.square(z) - - dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat( - 1000).apply(batching.map_and_batch(_map_fn, batch_size=100)) - dataset = dataset.prefetch(5) - iterator = dataset.make_one_shot_iterator() - get_next = iterator.get_next() - - with self.cached_session() as sess: - for _ in range(3): - sess.run(get_next) - - @parameterized.named_parameters( - ("1", 0), - ("2", 5), - ("3", 10), - ("4", 90), - ("5", 95), - ("6", 99), - ) - def testMapAndBatchOutOfRangeError(self, threshold): - - def raising_py_fn(i): - if i >= threshold: - raise StopIteration() - else: - return i - - iterator = ( - dataset_ops.Dataset.range(100).apply( - batching.map_and_batch( - lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64), - batch_size=10)).make_one_shot_iterator()) - get_next = iterator.get_next() - - with self.cached_session() as sess: - for i in range(threshold // 10): - self.assertAllEqual([i * 10 + j for j in range(10)], sess.run(get_next)) - if threshold % 10 != 0: - self.assertAllEqual( - [threshold // 10 * 10 + j for j in range(threshold % 10)], - sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - @parameterized.named_parameters( - ("1", False, dtypes.bool), - ("2", -42, dtypes.int8), - ("3", -42, dtypes.int16), - ("4", -42, dtypes.int32), - ("5", -42, dtypes.int64), - ("6", 42, dtypes.uint8), - ("7", 42, dtypes.uint16), - ("8", 42.0, dtypes.float16), - ("9", 42.0, dtypes.float32), - ("10", 42.0, dtypes.float64), - ("11", b"hello", dtypes.string), - ) - def testMapAndBatchTypes(self, element, dtype): - def gen(): - yield element - - dataset = dataset_ops.Dataset.from_generator(gen, dtype).repeat(100).apply( - batching.map_and_batch(lambda x: x, batch_size=10)) - - get_next = dataset.make_one_shot_iterator().get_next() - - with self.cached_session() as sess: - for _ in range(10): - self.assertAllEqual([element for _ in range(10)], sess.run(get_next)) - - -class RestructuredDatasetTest(test.TestCase): - - def test_assert_element_shape(self): - - def create_dataset(_): - return (array_ops.ones(2, dtype=dtypes.float32), - array_ops.zeros((3, 4), dtype=dtypes.int32)) - - dataset = dataset_ops.Dataset.range(5).map(create_dataset) - expected_shapes = (tensor_shape.TensorShape(2), - tensor_shape.TensorShape((3, 4))) - self.assertEqual(expected_shapes, dataset.output_shapes) - - result = dataset.apply(batching.assert_element_shape(expected_shapes)) - self.assertEqual(expected_shapes, result.output_shapes) - - iterator = result.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op) - for _ in range(5): - sess.run(get_next) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def test_assert_wrong_element_shape(self): - - def create_dataset(_): - return (array_ops.ones(2, dtype=dtypes.float32), - array_ops.zeros((3, 4), dtype=dtypes.int32)) - - dataset = dataset_ops.Dataset.range(3).map(create_dataset) - wrong_shapes = (tensor_shape.TensorShape(2), - tensor_shape.TensorShape((3, 10))) - with self.assertRaises(ValueError): - dataset.apply(batching.assert_element_shape(wrong_shapes)) - - def test_assert_element_shape_on_unknown_shape_dataset(self): - - def create_unknown_shape_dataset(x): - return script_ops.py_func( - lambda _: ( # pylint: disable=g-long-lambda - np.ones(2, dtype=np.float32), - np.zeros((3, 4), dtype=np.int32)), - [x], - [dtypes.float32, dtypes.int32]) - - dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset) - unknown_shapes = (tensor_shape.TensorShape(None), - tensor_shape.TensorShape(None)) - self.assertEqual(unknown_shapes, dataset.output_shapes) - - expected_shapes = (tensor_shape.TensorShape(2), - tensor_shape.TensorShape((3, 4))) - result = dataset.apply(batching.assert_element_shape(expected_shapes)) - self.assertEqual(expected_shapes, result.output_shapes) - - iterator = result.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op) - for _ in range(5): - sess.run(get_next) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def test_assert_wrong_element_shape_on_unknown_shape_dataset(self): - - def create_unknown_shape_dataset(x): - return script_ops.py_func( - lambda _: ( # pylint: disable=g-long-lambda - np.ones(2, dtype=np.float32), - np.zeros((3, 4), dtype=np.int32)), - [x], - [dtypes.float32, dtypes.int32]) - - dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset) - unknown_shapes = (tensor_shape.TensorShape(None), - tensor_shape.TensorShape(None)) - self.assertEqual(unknown_shapes, dataset.output_shapes) - - wrong_shapes = (tensor_shape.TensorShape(2), - tensor_shape.TensorShape((3, 10))) - iterator = ( - dataset.apply(batching.assert_element_shape(wrong_shapes)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(get_next) - - def test_assert_partial_element_shape(self): - - def create_dataset(_): - return (array_ops.ones(2, dtype=dtypes.float32), - array_ops.zeros((3, 4), dtype=dtypes.int32)) - - dataset = dataset_ops.Dataset.range(5).map(create_dataset) - partial_expected_shape = (tensor_shape.TensorShape(None), # Unknown shape - tensor_shape.TensorShape((None, 4))) # Partial shape - result = dataset.apply( - batching.assert_element_shape(partial_expected_shape)) - # Partial shapes are merged with actual shapes: - actual_shapes = (tensor_shape.TensorShape(2), - tensor_shape.TensorShape((3, 4))) - self.assertEqual(actual_shapes, result.output_shapes) - - iterator = result.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op) - for _ in range(5): - sess.run(get_next) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def test_assert_wrong_partial_element_shape(self): - - def create_dataset(_): - return (array_ops.ones(2, dtype=dtypes.float32), - array_ops.zeros((3, 4), dtype=dtypes.int32)) - - dataset = dataset_ops.Dataset.range(3).map(create_dataset) - wrong_shapes = (tensor_shape.TensorShape(2), - tensor_shape.TensorShape((None, 10))) - with self.assertRaises(ValueError): - dataset.apply(batching.assert_element_shape(wrong_shapes)) - - def test_assert_partial_element_shape_on_unknown_shape_dataset(self): - - def create_unknown_shape_dataset(x): - return script_ops.py_func( - lambda _: ( # pylint: disable=g-long-lambda - np.ones(2, dtype=np.float32), - np.zeros((3, 4), dtype=np.int32)), - [x], - [dtypes.float32, dtypes.int32]) - - dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset) - unknown_shapes = (tensor_shape.TensorShape(None), - tensor_shape.TensorShape(None)) - self.assertEqual(unknown_shapes, dataset.output_shapes) - - expected_shapes = (tensor_shape.TensorShape(2), - tensor_shape.TensorShape((None, 4))) - result = dataset.apply(batching.assert_element_shape(expected_shapes)) - self.assertEqual(expected_shapes, result.output_shapes) - - iterator = result.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op) - for _ in range(5): - sess.run(get_next) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def test_assert_wrong_partial_element_shape_on_unknown_shape_dataset(self): - - def create_unknown_shape_dataset(x): - return script_ops.py_func( - lambda _: ( # pylint: disable=g-long-lambda - np.ones(2, dtype=np.float32), - np.zeros((3, 4), dtype=np.int32)), - [x], - [dtypes.float32, dtypes.int32]) - - dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset) - unknown_shapes = (tensor_shape.TensorShape(None), - tensor_shape.TensorShape(None)) - self.assertEqual(unknown_shapes, dataset.output_shapes) - - wrong_shapes = (tensor_shape.TensorShape(2), - tensor_shape.TensorShape((None, 10))) - iterator = ( - dataset.apply(batching.assert_element_shape(wrong_shapes)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(get_next) - - -class UnbatchDatasetBenchmark(test.Benchmark): - - def benchmarkNativeUnbatch(self): - batch_sizes = [1, 2, 5, 10, 20, 50] - elems_per_trial = 10000 - with ops.Graph().as_default(): - dataset = dataset_ops.Dataset.from_tensors("element").repeat(None) - batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[]) - dataset = dataset.batch(batch_size_placeholder) - dataset = dataset.apply(batching.unbatch()) - dataset = dataset.skip(elems_per_trial) - iterator = dataset.make_initializable_iterator() - next_element = iterator.get_next() - - with session.Session() as sess: - for batch_size in batch_sizes: - deltas = [] - for _ in range(5): - sess.run( - iterator.initializer, - feed_dict={batch_size_placeholder: batch_size}) - start = time.time() - sess.run(next_element.op) - end = time.time() - deltas.append((end - start) / elems_per_trial) - - median_wall_time = np.median(deltas) - print("Unbatch (native) batch size: %d Median wall time per element:" - " %f microseconds" % (batch_size, median_wall_time * 1e6)) - self.report_benchmark( - iters=10000, - wall_time=median_wall_time, - name="benchmark_unbatch_dataset_native_batch_size_%d" % - batch_size) - - # Include a benchmark of the previous `unbatch()` implementation that uses - # a composition of more primitive ops. Eventually we'd hope to generate code - # that is as good in both cases. - def benchmarkOldUnbatchImplementation(self): - batch_sizes = [1, 2, 5, 10, 20, 50] - elems_per_trial = 10000 - with ops.Graph().as_default(): - dataset = dataset_ops.Dataset.from_tensors("element").repeat(None) - batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[]) - dataset = dataset.batch(batch_size_placeholder) - dataset = dataset.flat_map(dataset_ops.Dataset.from_tensor_slices) - dataset = dataset.skip(elems_per_trial) - iterator = dataset.make_initializable_iterator() - next_element = iterator.get_next() - - with session.Session() as sess: - for batch_size in batch_sizes: - deltas = [] - for _ in range(5): - sess.run( - iterator.initializer, - feed_dict={batch_size_placeholder: batch_size}) - start = time.time() - sess.run(next_element.op) - end = time.time() - deltas.append((end - start) / elems_per_trial) - - median_wall_time = np.median(deltas) - print("Unbatch (unfused) batch size: %d Median wall time per element:" - " %f microseconds" % (batch_size, median_wall_time * 1e6)) - self.report_benchmark( - iters=10000, - wall_time=median_wall_time, - name="benchmark_unbatch_dataset_unfused_batch_size_%d" % - batch_size) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py deleted file mode 100644 index 48971f2ccc4317d2bf591ae1e07cd6d5baf7b965..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py +++ /dev/null @@ -1,823 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for the experimental input pipeline ops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import random - -import numpy as np - -from tensorflow.contrib.data.python.ops import grouping -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.platform import test - - -class GroupByReducerTest(test.TestCase): - - def checkResults(self, dataset, shapes, values): - self.assertEqual(shapes, dataset.output_shapes) - get_next = dataset.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - for expected in values: - got = sess.run(get_next) - self.assertEqual(got, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testSum(self): - reducer = grouping.Reducer( - init_func=lambda _: np.int64(0), - reduce_func=lambda x, y: x + y, - finalize_func=lambda x: x) - for i in range(1, 11): - dataset = dataset_ops.Dataset.range(2 * i).apply( - grouping.group_by_reducer(lambda x: x % 2, reducer)) - self.checkResults( - dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i]) - - def testAverage(self): - - def reduce_fn(x, y): - return (x[0] * x[1] + math_ops.cast(y, dtypes.float32)) / ( - x[1] + 1), x[1] + 1 - - reducer = grouping.Reducer( - init_func=lambda _: (0.0, 0.0), - reduce_func=reduce_fn, - finalize_func=lambda x, _: x) - for i in range(1, 11): - dataset = dataset_ops.Dataset.range(2 * i).apply( - grouping.group_by_reducer( - lambda x: math_ops.cast(x, dtypes.int64) % 2, reducer)) - self.checkResults( - dataset, shapes=tensor_shape.scalar(), values=[i - 1, i]) - - def testConcat(self): - components = np.array(list("abcdefghijklmnopqrst")).view(np.chararray) - reducer = grouping.Reducer( - init_func=lambda x: "", - reduce_func=lambda x, y: x + y[0], - finalize_func=lambda x: x) - for i in range(1, 11): - dataset = dataset_ops.Dataset.zip( - (dataset_ops.Dataset.from_tensor_slices(components), - dataset_ops.Dataset.range(2 * i))).apply( - grouping.group_by_reducer(lambda x, y: y % 2, reducer)) - self.checkResults( - dataset, - shapes=tensor_shape.scalar(), - values=[b"acegikmoqs" [:i], b"bdfhjlnprt" [:i]]) - - def testSparseSum(self): - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=np.array([[0, 0]]), - values=(i * np.array([1], dtype=np.int64)), - dense_shape=np.array([1, 1])) - - reducer = grouping.Reducer( - init_func=lambda _: _sparse(np.int64(0)), - reduce_func=lambda x, y: _sparse(x.values[0] + y.values[0]), - finalize_func=lambda x: x.values[0]) - for i in range(1, 11): - dataset = dataset_ops.Dataset.range(2 * i).map(_sparse).apply( - grouping.group_by_reducer(lambda x: x.values[0] % 2, reducer)) - self.checkResults( - dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i]) - - def testChangingStateShape(self): - - def reduce_fn(x, _): - # Statically known rank, but dynamic length. - larger_dim = array_ops.concat([x[0], x[0]], 0) - # Statically unknown rank. - larger_rank = array_ops.expand_dims(x[1], 0) - return larger_dim, larger_rank - - reducer = grouping.Reducer( - init_func=lambda x: ([0], 1), - reduce_func=reduce_fn, - finalize_func=lambda x, y: (x, y)) - - for i in range(1, 11): - dataset = dataset_ops.Dataset.from_tensors(np.int64(0)).repeat(i).apply( - grouping.group_by_reducer(lambda x: x, reducer)) - self.assertEqual([None], dataset.output_shapes[0].as_list()) - self.assertIs(None, dataset.output_shapes[1].ndims) - iterator = dataset.make_one_shot_iterator() - get_next = iterator.get_next() - with self.cached_session() as sess: - x, y = sess.run(get_next) - self.assertAllEqual([0] * (2**i), x) - self.assertAllEqual(np.array(1, ndmin=i), y) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testTypeMismatch(self): - reducer = grouping.Reducer( - init_func=lambda x: constant_op.constant(1, dtype=dtypes.int32), - reduce_func=lambda x, y: constant_op.constant(1, dtype=dtypes.int64), - finalize_func=lambda x: x) - - dataset = dataset_ops.Dataset.range(10) - with self.assertRaisesRegexp( - TypeError, - "The element types for the new state must match the initial state."): - dataset.apply( - grouping.group_by_reducer(lambda _: np.int64(0), reducer)) - - # TODO(b/78665031): Remove once non-scalar keys are supported. - def testInvalidKeyShape(self): - reducer = grouping.Reducer( - init_func=lambda x: np.int64(0), - reduce_func=lambda x, y: x + y, - finalize_func=lambda x: x) - - dataset = dataset_ops.Dataset.range(10) - with self.assertRaisesRegexp( - ValueError, "`key_func` must return a single tf.int64 tensor."): - dataset.apply( - grouping.group_by_reducer(lambda _: np.int64((0, 0)), reducer)) - - # TODO(b/78665031): Remove once non-int64 keys are supported. - def testInvalidKeyType(self): - reducer = grouping.Reducer( - init_func=lambda x: np.int64(0), - reduce_func=lambda x, y: x + y, - finalize_func=lambda x: x) - - dataset = dataset_ops.Dataset.range(10) - with self.assertRaisesRegexp( - ValueError, "`key_func` must return a single tf.int64 tensor."): - dataset.apply( - grouping.group_by_reducer(lambda _: "wrong", reducer)) - - def testTuple(self): - def init_fn(_): - return np.array([], dtype=np.int64), np.int64(0) - - def reduce_fn(state, value): - s1, s2 = state - v1, v2 = value - return array_ops.concat([s1, [v1]], 0), s2 + v2 - - def finalize_fn(s1, s2): - return s1, s2 - - reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn) - dataset = dataset_ops.Dataset.zip( - (dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(10))).apply( - grouping.group_by_reducer(lambda x, y: np.int64(0), reducer)) - get_next = dataset.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - x, y = sess.run(get_next) - self.assertAllEqual(x, np.asarray([x for x in range(10)])) - self.assertEqual(y, 45) - - -class GroupByWindowTest(test.TestCase): - - def testSimple(self): - components = np.random.randint(100, size=(200,)).astype(np.int64) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components).map(lambda x: x * x) - .apply( - grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4), - 4)).make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - counts = [] - with self.assertRaises(errors.OutOfRangeError): - while True: - result = sess.run(get_next) - self.assertTrue( - all(x % 2 == 0 - for x in result) or all(x % 2 == 1) - for x in result) - counts.append(result.shape[0]) - - self.assertEqual(len(components), sum(counts)) - num_full_batches = len([c for c in counts if c == 4]) - self.assertGreaterEqual(num_full_batches, 24) - self.assertTrue(all(c == 4 for c in counts[:num_full_batches])) - - def testImmediateOutput(self): - components = np.array( - [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components).repeat(-1).apply( - grouping.group_by_window(lambda x: x % 3, lambda _, xs: xs.batch(4), - 4)).make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - # The input is infinite, so this test demonstrates that: - # 1. We produce output without having to consume the entire input, - # 2. Different buckets can produce output at different rates, and - # 3. For deterministic input, the output is deterministic. - for _ in range(3): - self.assertAllEqual([0, 0, 0, 0], sess.run(get_next)) - self.assertAllEqual([1, 1, 1, 1], sess.run(get_next)) - self.assertAllEqual([2, 2, 2, 2], sess.run(get_next)) - self.assertAllEqual([0, 0, 0, 0], sess.run(get_next)) - - def testSmallGroups(self): - components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64) - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components).apply( - grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4), - 4)).make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - self.assertAllEqual([0, 0, 0, 0], sess.run(get_next)) - self.assertAllEqual([1, 1, 1, 1], sess.run(get_next)) - # The small outputs at the end are deterministically produced in key - # order. - self.assertAllEqual([0, 0, 0], sess.run(get_next)) - self.assertAllEqual([1], sess.run(get_next)) - - def testEmpty(self): - iterator = ( - dataset_ops.Dataset.range(4).apply( - grouping.group_by_window(lambda _: 0, lambda _, xs: xs, 0)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - "Window size must be greater than zero, but got 0."): - print(sess.run(get_next)) - - def testReduceFuncError(self): - components = np.random.randint(100, size=(200,)).astype(np.int64) - - def reduce_func(_, xs): - # Introduce an incorrect padded shape that cannot (currently) be - # detected at graph construction time. - return xs.padded_batch( - 4, - padded_shapes=(tensor_shape.TensorShape([]), - constant_op.constant([5], dtype=dtypes.int64) * -1)) - - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: (x, ops.convert_to_tensor([x * x]))).apply( - grouping.group_by_window(lambda x, _: x % 2, reduce_func, - 32)).make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(get_next) - - def testConsumeWindowDatasetMoreThanOnce(self): - components = np.random.randint(50, size=(200,)).astype(np.int64) - - def reduce_func(key, window): - # Apply two different kinds of padding to the input: tight - # padding, and quantized (to a multiple of 10) padding. - return dataset_ops.Dataset.zip(( - window.padded_batch( - 4, padded_shapes=tensor_shape.TensorShape([None])), - window.padded_batch( - 4, padded_shapes=ops.convert_to_tensor([(key + 1) * 10])), - )) - - iterator = ( - dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: array_ops.fill([math_ops.cast(x, dtypes.int32)], x)) - .apply(grouping.group_by_window( - lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64), - reduce_func, 4)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - counts = [] - with self.assertRaises(errors.OutOfRangeError): - while True: - tight_result, multiple_of_10_result = sess.run(get_next) - self.assertEqual(0, multiple_of_10_result.shape[1] % 10) - self.assertAllEqual(tight_result, - multiple_of_10_result[:, :tight_result.shape[1]]) - counts.append(tight_result.shape[0]) - self.assertEqual(len(components), sum(counts)) - - -# NOTE(mrry): These tests are based on the tests in bucket_ops_test.py. -# Currently, they use a constant batch size, though should be made to use a -# different batch size per key. -class BucketTest(test.TestCase): - - def _dynamicPad(self, bucket, window, window_size): - # TODO(mrry): To match `tf.contrib.training.bucket()`, implement a - # generic form of padded_batch that pads every component - # dynamically and does not rely on static shape information about - # the arguments. - return dataset_ops.Dataset.zip( - (dataset_ops.Dataset.from_tensors(bucket), - window.padded_batch( - 32, (tensor_shape.TensorShape([]), tensor_shape.TensorShape( - [None]), tensor_shape.TensorShape([3]))))) - - def testSingleBucket(self): - - def _map_fn(v): - return (v, array_ops.fill([v], v), - array_ops.fill([3], string_ops.as_string(v))) - - input_dataset = ( - dataset_ops.Dataset.from_tensor_slices(math_ops.range(32)).map(_map_fn)) - - bucketed_dataset = input_dataset.apply( - grouping.group_by_window( - lambda x, y, z: 0, - lambda k, bucket: self._dynamicPad(k, bucket, 32), 32)) - - iterator = bucketed_dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - - which_bucket, bucketed_values = sess.run(get_next) - - self.assertEqual(0, which_bucket) - - expected_scalar_int = np.arange(32, dtype=np.int64) - expected_unk_int64 = np.zeros((32, 31)).astype(np.int64) - for i in range(32): - expected_unk_int64[i, :i] = i - expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T - - self.assertAllEqual(expected_scalar_int, bucketed_values[0]) - self.assertAllEqual(expected_unk_int64, bucketed_values[1]) - self.assertAllEqual(expected_vec3_str, bucketed_values[2]) - - def testEvenOddBuckets(self): - - def _map_fn(v): - return (v, array_ops.fill([v], v), - array_ops.fill([3], string_ops.as_string(v))) - - input_dataset = ( - dataset_ops.Dataset.from_tensor_slices(math_ops.range(64)).map(_map_fn)) - - bucketed_dataset = input_dataset.apply( - grouping.group_by_window( - lambda x, y, z: math_ops.cast(x % 2, dtypes.int64), - lambda k, bucket: self._dynamicPad(k, bucket, 32), 32)) - - iterator = bucketed_dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - - # Get two minibatches (one containing even values, one containing odds) - which_bucket_even, bucketed_values_even = sess.run(get_next) - which_bucket_odd, bucketed_values_odd = sess.run(get_next) - - # Count number of bucket_tensors. - self.assertEqual(3, len(bucketed_values_even)) - self.assertEqual(3, len(bucketed_values_odd)) - - # Ensure bucket 0 was used for all minibatch entries. - self.assertAllEqual(0, which_bucket_even) - self.assertAllEqual(1, which_bucket_odd) - - # Test the first bucket outputted, the events starting at 0 - expected_scalar_int = np.arange(0, 32 * 2, 2, dtype=np.int64) - expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64) - for i in range(0, 32): - expected_unk_int64[i, :2 * i] = 2 * i - expected_vec3_str = np.vstack( - 3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T - - self.assertAllEqual(expected_scalar_int, bucketed_values_even[0]) - self.assertAllEqual(expected_unk_int64, bucketed_values_even[1]) - self.assertAllEqual(expected_vec3_str, bucketed_values_even[2]) - - # Test the second bucket outputted, the odds starting at 1 - expected_scalar_int = np.arange(1, 32 * 2 + 1, 2, dtype=np.int64) - expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64) - for i in range(0, 32): - expected_unk_int64[i, :2 * i + 1] = 2 * i + 1 - expected_vec3_str = np.vstack( - 3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T - - self.assertAllEqual(expected_scalar_int, bucketed_values_odd[0]) - self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1]) - self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2]) - - def testEvenOddBucketsFilterOutAllOdd(self): - - def _map_fn(v): - return { - "x": v, - "y": array_ops.fill([v], v), - "z": array_ops.fill([3], string_ops.as_string(v)) - } - - def _dynamic_pad_fn(bucket, window, _): - return dataset_ops.Dataset.zip( - (dataset_ops.Dataset.from_tensors(bucket), - window.padded_batch( - 32, { - "x": tensor_shape.TensorShape([]), - "y": tensor_shape.TensorShape([None]), - "z": tensor_shape.TensorShape([3]) - }))) - - input_dataset = ( - dataset_ops.Dataset.from_tensor_slices(math_ops.range(128)).map(_map_fn) - .filter(lambda d: math_ops.equal(d["x"] % 2, 0))) - - bucketed_dataset = input_dataset.apply( - grouping.group_by_window( - lambda d: math_ops.cast(d["x"] % 2, dtypes.int64), - lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32)) - - iterator = bucketed_dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - - # Get two minibatches ([0, 2, ...] and [64, 66, ...]) - which_bucket0, bucketed_values_even0 = sess.run(get_next) - which_bucket1, bucketed_values_even1 = sess.run(get_next) - - # Ensure that bucket 1 was completely filtered out - self.assertAllEqual(0, which_bucket0) - self.assertAllEqual(0, which_bucket1) - self.assertAllEqual( - np.arange(0, 64, 2, dtype=np.int64), bucketed_values_even0["x"]) - self.assertAllEqual( - np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"]) - - def testDynamicWindowSize(self): - components = np.arange(100).astype(np.int64) - - # Key fn: even/odd - # Reduce fn: batches of 5 - # Window size fn: even=5, odd=10 - - def window_size_func(key): - window_sizes = constant_op.constant([5, 10], dtype=dtypes.int64) - return window_sizes[key] - - dataset = dataset_ops.Dataset.from_tensor_slices(components).apply( - grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(20), - None, window_size_func)) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.cached_session() as sess: - sess.run(init_op) - with self.assertRaises(errors.OutOfRangeError): - batches = 0 - while True: - result = sess.run(get_next) - is_even = all(x % 2 == 0 for x in result) - is_odd = all(x % 2 == 1 for x in result) - self.assertTrue(is_even or is_odd) - expected_batch_size = 5 if is_even else 10 - self.assertEqual(expected_batch_size, result.shape[0]) - batches += 1 - - self.assertEqual(batches, 15) - - -def _element_length_fn(x, y=None): - del y - return array_ops.shape(x)[0] - - -def _to_sparse_tensor(record): - return sparse_tensor.SparseTensor(**record) - - -def _format_record(array, sparse): - if sparse: - return { - "values": array, - "indices": [[i] for i in range(len(array))], - "dense_shape": (len(array),) - } - return array - - -def _get_record_type(sparse): - if sparse: - return { - "values": dtypes.int64, - "indices": dtypes.int64, - "dense_shape": dtypes.int64 - } - return dtypes.int32 - - -def _get_record_shape(sparse): - if sparse: - return { - "values": tensor_shape.TensorShape([None,]), - "indices": tensor_shape.TensorShape([None, 1]), - "dense_shape": tensor_shape.TensorShape([1,]) - } - return tensor_shape.TensorShape([None]) - - -class BucketBySequenceLength(test.TestCase): - - def testBucket(self): - - boundaries = [10, 20, 30] - batch_sizes = [10, 8, 4, 2] - lengths = [8, 13, 25, 35] - - def build_dataset(sparse): - def _generator(): - # Produce 1 batch for each bucket - elements = [] - for batch_size, length in zip(batch_sizes, lengths): - record_len = length - 1 - for _ in range(batch_size): - elements.append([1] * record_len) - record_len = length - random.shuffle(elements) - for el in elements: - yield (_format_record(el, sparse),) - dataset = dataset_ops.Dataset.from_generator( - _generator, - (_get_record_type(sparse),), - (_get_record_shape(sparse),)) - if sparse: - dataset = dataset.map(lambda x: (_to_sparse_tensor(x),)) - return dataset - - def _test_bucket_by_padding(no_padding): - dataset = build_dataset(sparse=no_padding) - dataset = dataset.apply( - grouping.bucket_by_sequence_length( - _element_length_fn, - boundaries, - batch_sizes, - no_padding=no_padding)) - batch, = dataset.make_one_shot_iterator().get_next() - - with self.cached_session() as sess: - batches = [] - for _ in range(4): - batches.append(sess.run(batch)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(batch) - batch_sizes_val = [] - lengths_val = [] - for batch in batches: - shape = batch.dense_shape if no_padding else batch.shape - batch_size = shape[0] - length = shape[1] - batch_sizes_val.append(batch_size) - lengths_val.append(length) - sum_check = batch.values.sum() if no_padding else batch.sum() - self.assertEqual(sum_check, batch_size * length - 1) - self.assertEqual(sum(batch_sizes_val), sum(batch_sizes)) - self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val)) - self.assertEqual(sorted(lengths), sorted(lengths_val)) - - for no_padding in (True, False): - _test_bucket_by_padding(no_padding) - - def testPadToBoundary(self): - - boundaries = [10, 20, 30] - batch_sizes = [10, 8, 4, 2] - lengths = [8, 13, 25] - - def element_gen(): - # Produce 1 batch for each bucket - elements = [] - for batch_size, length in zip(batch_sizes[:-1], lengths): - for _ in range(batch_size): - elements.append([1] * length) - random.shuffle(elements) - for el in elements: - yield (el,) - for _ in range(batch_sizes[-1]): - el = [1] * (boundaries[-1] + 5) - yield (el,) - - element_len = lambda el: array_ops.shape(el)[0] - dataset = dataset_ops.Dataset.from_generator( - element_gen, (dtypes.int64,), ([None],)).apply( - grouping.bucket_by_sequence_length( - element_len, boundaries, batch_sizes, - pad_to_bucket_boundary=True)) - batch, = dataset.make_one_shot_iterator().get_next() - - with self.cached_session() as sess: - batches = [] - for _ in range(3): - batches.append(sess.run(batch)) - with self.assertRaisesOpError("bucket_boundaries"): - sess.run(batch) - batch_sizes_val = [] - lengths_val = [] - for batch in batches: - batch_size = batch.shape[0] - length = batch.shape[1] - batch_sizes_val.append(batch_size) - lengths_val.append(length) - batch_sizes = batch_sizes[:-1] - self.assertEqual(sum(batch_sizes_val), sum(batch_sizes)) - self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val)) - self.assertEqual([boundary - 1 for boundary in sorted(boundaries)], - sorted(lengths_val)) - - def testPadToBoundaryNoExtraneousPadding(self): - - boundaries = [3, 7, 11] - batch_sizes = [2, 2, 2, 2] - lengths = range(1, 11) - - def element_gen(): - for length in lengths: - yield ([1] * length,) - - element_len = lambda element: array_ops.shape(element)[0] - dataset = dataset_ops.Dataset.from_generator( - element_gen, (dtypes.int64,), ([None],)).apply( - grouping.bucket_by_sequence_length( - element_len, boundaries, batch_sizes, - pad_to_bucket_boundary=True)) - batch, = dataset.make_one_shot_iterator().get_next() - - with self.cached_session() as sess: - batches = [] - for _ in range(5): - batches.append(sess.run(batch)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(batch) - - self.assertAllEqual(batches[0], [[1, 0], - [1, 1]]) - self.assertAllEqual(batches[1], [[1, 1, 1, 0, 0, 0], - [1, 1, 1, 1, 0, 0]]) - self.assertAllEqual(batches[2], [[1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1]]) - self.assertAllEqual(batches[3], [[1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]) - self.assertAllEqual(batches[4], [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) - - def testTupleElements(self): - - def build_dataset(sparse): - def _generator(): - text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]] - label = [1, 2, 1, 2] - for x, y in zip(text, label): - yield (_format_record(x, sparse), y) - dataset = dataset_ops.Dataset.from_generator( - generator=_generator, - output_types=(_get_record_type(sparse), dtypes.int32), - output_shapes=(_get_record_shape(sparse), - tensor_shape.TensorShape([]))) - if sparse: - dataset = dataset.map(lambda x, y: (_to_sparse_tensor(x), y)) - return dataset - - def _test_tuple_elements_by_padding(no_padding): - dataset = build_dataset(sparse=no_padding) - dataset = dataset.apply(grouping.bucket_by_sequence_length( - element_length_func=_element_length_fn, - bucket_batch_sizes=[2, 2, 2], - bucket_boundaries=[0, 8], - no_padding=no_padding)) - shapes = dataset.output_shapes - self.assertEqual([None, None], shapes[0].as_list()) - self.assertEqual([None], shapes[1].as_list()) - - for no_padding in (True, False): - _test_tuple_elements_by_padding(no_padding) - - def testBucketSparse(self): - """Tests bucketing of sparse tensors (case where `no_padding` == True). - - Test runs on following dataset: - [ - [0], - [0, 1], - [0, 1, 2] - ... - [0, ..., max_len - 1] - ] - Sequences are bucketed by length and batched with - `batch_size` < `bucket_size`. - """ - - min_len = 0 - max_len = 100 - batch_size = 7 - bucket_size = 10 - - def _build_dataset(): - input_data = [range(i+1) for i in range(min_len, max_len)] - def generator_fn(): - for record in input_data: - yield _format_record(record, sparse=True) - dataset = dataset_ops.Dataset.from_generator( - generator=generator_fn, - output_types=_get_record_type(sparse=True)) - dataset = dataset.map(_to_sparse_tensor) - return dataset - - def _compute_expected_batches(): - """Computes expected batch outputs and stores in a set.""" - all_expected_sparse_tensors = set() - for bucket_start_len in range(min_len, max_len, bucket_size): - for batch_offset in range(0, bucket_size, batch_size): - batch_start_len = bucket_start_len + batch_offset - batch_end_len = min(batch_start_len + batch_size, - bucket_start_len + bucket_size) - expected_indices = [] - expected_values = [] - for length in range(batch_start_len, batch_end_len): - for val in range(length + 1): - expected_indices.append((length - batch_start_len, val)) - expected_values.append(val) - expected_sprs_tensor = (tuple(expected_indices), - tuple(expected_values)) - all_expected_sparse_tensors.add(expected_sprs_tensor) - return all_expected_sparse_tensors - - def _compute_batches(dataset): - """Computes actual batch outputs of dataset and stores in a set.""" - batch = dataset.make_one_shot_iterator().get_next() - all_sparse_tensors = set() - with self.cached_session() as sess: - with self.assertRaises(errors.OutOfRangeError): - while True: - output = sess.run(batch) - sprs_tensor = (tuple([tuple(idx) for idx in output.indices]), - tuple(output.values)) - all_sparse_tensors.add(sprs_tensor) - return all_sparse_tensors - - dataset = _build_dataset() - boundaries = range(min_len + bucket_size + 1, max_len, bucket_size) - dataset = dataset.apply(grouping.bucket_by_sequence_length( - _element_length_fn, - boundaries, - [batch_size] * (len(boundaries) + 1), - no_padding=True)) - batches = _compute_batches(dataset) - expected_batches = _compute_expected_batches() - self.assertEqual(batches, expected_batches) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py index 1cc5ddc9a2e1eff4473c19bc397d656e7e0b90ed..d2a72272db159755ac2d741bcdbce9ec646d928e 100644 --- a/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py @@ -22,6 +22,7 @@ import os import shutil from tensorflow.contrib.data.python.ops import readers +from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -31,7 +32,7 @@ from tensorflow.python.util import compat prefix_path = "tensorflow/core/lib" -class LMDBDatasetTest(test.TestCase): +class LMDBDatasetTest(test_base.DatasetTestBase): def setUp(self): super(LMDBDatasetTest, self).setUp() diff --git a/tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py b/tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py new file mode 100644 index 0000000000000000000000000000000000000000..e7281d531870c75c638b5c48fa3fc6dc606a3623 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py @@ -0,0 +1,62 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy as np + +from tensorflow.contrib.data.python.ops import get_single_element +from tensorflow.contrib.data.python.ops import grouping +from tensorflow.python.data.kernel_tests import test_base +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class ReduceDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): + + @parameterized.named_parameters( + ("SumZero", 0), + ("SumOne", 1), + ("SumFive", 5), + ("SumTen", 10), + ) + def testReduceDataset(self, stop): + def init_fn(_): + return np.int64(0) + + def reduce_fn(state, value): + return state + value + + def finalize_fn(state): + return state + + sum_reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn) + + stop_t = array_ops.placeholder(dtypes.int64, shape=[]) + dataset = dataset_ops.Dataset.range(stop_t) + element = get_single_element.reduce_dataset(dataset, sum_reducer) + + with self.cached_session() as sess: + value = sess.run(element, feed_dict={stop_t: stop}) + self.assertEqual(stop * (stop - 1) / 2, value) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py index 90d18dca2aa727ea51d636cb971f48b50bc0c663..c5a786232252432481566e3cde23e9310df172cc 100644 --- a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py @@ -21,6 +21,7 @@ from absl.testing import parameterized import numpy as np from tensorflow.contrib.data.python.ops import sliding +from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -30,7 +31,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.platform import test -class SlideDatasetTest(test.TestCase, parameterized.TestCase): +class SlideDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): @parameterized.named_parameters( ("1", 20, 14, 7, 1), @@ -197,11 +198,6 @@ class SlideDatasetTest(test.TestCase, parameterized.TestCase): sliding.sliding_window_batch( window_size=1, stride=1, window_shift=1, window_stride=1)) - def assertSparseValuesEqual(self, a, b): - self.assertAllEqual(a.indices, b.indices) - self.assertAllEqual(a.values, b.values) - self.assertAllEqual(a.dense_shape, b.dense_shape) - def testSlideSparse(self): def _sparse(i): diff --git a/tensorflow/contrib/data/python/kernel_tests/test_utils.py b/tensorflow/contrib/data/python/kernel_tests/test_utils.py deleted file mode 100644 index 4c3353fe4046d6b2bfabac580b46f88c8d7f2941..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/data/python/kernel_tests/test_utils.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Test utilities for tf.data functionality.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re - -from tensorflow.python.data.util import nest -from tensorflow.python.framework import errors -from tensorflow.python.platform import test - - -class DatasetTestBase(test.TestCase): - """Base class for dataset tests.""" - - def _assert_datasets_equal(self, dataset1, dataset2): - # TODO(rachelim): support sparse tensor outputs - next1 = dataset1.make_one_shot_iterator().get_next() - next2 = dataset2.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - while True: - try: - op1 = sess.run(next1) - except errors.OutOfRangeError: - with self.assertRaises(errors.OutOfRangeError): - sess.run(next2) - break - op2 = sess.run(next2) - - op1 = nest.flatten(op1) - op2 = nest.flatten(op2) - assert len(op1) == len(op2) - for i in range(len(op1)): - self.assertAllEqual(op1[i], op2[i]) - - def _assert_datasets_raise_same_error(self, - dataset1, - dataset2, - exception_class, - replacements=None): - # We are defining next1 and next2 in the same line so that we get identical - # file:line_number in the error messages - # pylint: disable=line-too-long - next1, next2 = dataset1.make_one_shot_iterator().get_next(), dataset2.make_one_shot_iterator().get_next() - # pylint: enable=line-too-long - with self.cached_session() as sess: - try: - sess.run(next1) - raise ValueError( - "Expected dataset to raise an error of type %s, but it did not." % - repr(exception_class)) - except exception_class as e: - expected_message = e.message - for old, new, count in replacements: - expected_message = expected_message.replace(old, new, count) - # Check that the first segment of the error messages are the same. - with self.assertRaisesRegexp(exception_class, - re.escape(expected_message)): - sess.run(next2) diff --git a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py deleted file mode 100644 index 8b7b3ac0f7bbe0b2212b451961f66c4493774dad..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py +++ /dev/null @@ -1,526 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for the experimental input pipeline ops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -import numpy as np - -from tensorflow.contrib.data.python.ops import batching -from tensorflow.contrib.data.python.ops import grouping -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import sparse_ops -from tensorflow.python.platform import test - - -class WindowDatasetTest(test.TestCase, parameterized.TestCase): - - def _structuredDataset(self, structure, shape, dtype): - if structure is None: - return dataset_ops.Dataset.from_tensors( - array_ops.zeros(shape, dtype=dtype)) - else: - return dataset_ops.Dataset.zip( - tuple([ - self._structuredDataset(substructure, shape, dtype) - for substructure in structure - ])) - - def _structuredElement(self, structure, shape, dtype): - if structure is None: - return array_ops.zeros(shape, dtype=dtype) - else: - return tuple([ - self._structuredElement(substructure, shape, dtype) - for substructure in structure - ]) - - def _assertEqual(self, xs, ys): - self.assertEqual(type(xs), type(ys)) - if isinstance(xs, tuple) and isinstance(ys, tuple): - self.assertEqual(len(xs), len(ys)) - for x, y in zip(xs, ys): - self._assertEqual(x, y) - elif isinstance(xs, np.ndarray) and isinstance(ys, np.ndarray): - self.assertAllEqual(xs, ys) - else: - self.assertEqual(xs, ys) - - @parameterized.named_parameters( - ("1", None, np.int32([]), dtypes.bool), - ("2", None, np.int32([]), dtypes.int32), - ("3", None, np.int32([]), dtypes.float32), - ("4", None, np.int32([]), dtypes.string), - ("5", None, np.int32([2]), dtypes.int32), - ("6", None, np.int32([2, 2]), dtypes.int32), - ("7", (None, None, None), np.int32([]), dtypes.int32), - ("8", (None, (None, None)), np.int32([]), dtypes.int32), - ) - def testWindowDatasetFlatMap(self, structure, shape, dtype): - """Tests windowing by chaining it with flat map. - - Args: - structure: the input structure - shape: the input shape - dtype: the input data type - """ - - def fn(*args): - if len(args) == 1 and not isinstance(args[0], tuple): - return args[0] - return dataset_ops.Dataset.zip( - tuple([fn(*arg) if isinstance(arg, tuple) else arg for arg in args])) - - dataset = self._structuredDataset(structure, shape, dtype).repeat(5).apply( - grouping.window_dataset(5)).flat_map(fn) - get_next = dataset.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - expected = sess.run(self._structuredElement(structure, shape, dtype)) - for _ in range(5): - actual = sess.run(get_next) - self._assertEqual(expected, actual) - - @parameterized.named_parameters( - ("1", None, np.int32([]), dtypes.bool), - ("2", None, np.int32([]), dtypes.int32), - ("3", None, np.int32([]), dtypes.float32), - ("4", None, np.int32([]), dtypes.string), - ("5", None, np.int32([2]), dtypes.int32), - ("6", None, np.int32([2, 2]), dtypes.int32), - ("7", (None, None, None), np.int32([]), dtypes.int32), - ("8", (None, (None, None)), np.int32([]), dtypes.int32), - ) - def testWindowDatasetBatchDense(self, structure, shape, dtype): - """Tests batching of dense tensor windows. - - Args: - structure: the input structure - shape: the input shape - dtype: the input data type - """ - - def fn(*args): - if len(args) == 1 and not isinstance(args[0], tuple): - return batching.batch_window(args[0]) - - return tuple([ - fn(*arg) if isinstance(arg, tuple) else batching.batch_window(arg) - for arg in args - ]) - - dataset = self._structuredDataset(structure, shape, dtype).repeat(5).apply( - grouping.window_dataset(5)).apply(grouping._map_x_dataset(fn)) - get_next = dataset.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - expected = sess.run( - self._structuredElement(structure, np.concatenate( - ([5], shape), axis=0), dtype)) - actual = sess.run(get_next) - self._assertEqual(expected, actual) - - @parameterized.named_parameters( - ("1", np.int32([])), - ("2", np.int32([1])), - ("3", np.int32([1, 2, 3])), - ) - def testWindowDatasetBatchDenseDynamicShape(self, shape): - """Tests batching of dynamically shaped dense tensor windows. - - Args: - shape: the input shape - """ - - shape_t = array_ops.placeholder(dtypes.int32) - dataset = dataset_ops.Dataset.from_tensors( - array_ops.zeros(shape_t)).repeat(5).apply( - grouping.window_dataset(5)).apply( - grouping._map_x_dataset(batching.batch_window)) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op, {shape_t: shape}) - expected = sess.run( - self._structuredElement(None, np.concatenate(([5], shape), axis=0), - dtypes.int32)) - actual = sess.run(get_next) - self._assertEqual(expected, actual) - - def _make_dense_to_sparse_fn(self, is_scalar): - - def dense_to_sparse_scalar(tensor): - indices = [[]] - values = array_ops.expand_dims(tensor, 0) - shape = [] - return sparse_tensor.SparseTensorValue(indices, values, shape) - - def dense_to_sparse_non_scalar(tensor): - indices = array_ops.where(array_ops.ones_like(tensor, dtype=dtypes.bool)) - values = array_ops.gather_nd(tensor, indices) - shape = array_ops.shape(tensor, out_type=dtypes.int64) - return sparse_tensor.SparseTensorValue(indices, values, shape) - - if is_scalar: - return dense_to_sparse_scalar - return dense_to_sparse_non_scalar - - def _structuredSparseDataset(self, structure, shape, dtype): - dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0) # pylint: disable=g-explicit-length-test - if structure is None: - return dataset_ops.Dataset.from_tensors( - dense_to_sparse(array_ops.zeros(shape, dtype=dtype))) - else: - return dataset_ops.Dataset.zip( - tuple([ - self._structuredSparseDataset(substructure, shape, dtype) - for substructure in structure - ])) - - def _structuredSparseElement(self, structure, shape, dtype): - dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0) # pylint: disable=g-explicit-length-test - if structure is None: - return dense_to_sparse(array_ops.zeros(shape, dtype=dtype)) - else: - return tuple([ - self._structuredSparseElement(substructure, shape, dtype) - for substructure in structure - ]) - - @parameterized.named_parameters( - ("1", None, np.int32([]), dtypes.bool), - ("2", None, np.int32([]), dtypes.int32), - ("3", None, np.int32([]), dtypes.float32), - ("4", None, np.int32([]), dtypes.string), - ("5", None, np.int32([2]), dtypes.int32), - ("6", None, np.int32([2, 2]), dtypes.int32), - ("7", (None, None, None), np.int32([]), dtypes.int32), - ("8", (None, (None, None)), np.int32([]), dtypes.int32), - ) - def testWindowDatasetBatchSparse(self, structure, shape, dtype): - """Tests batching of sparse tensor windows. - - Args: - structure: the input structure - shape: the input shape - dtype: the input data type - """ - - def fn(*args): - if len(args) == 1 and not isinstance(args[0], tuple): - return batching.batch_window(args[0]) - - return tuple([ - fn(*arg) if isinstance(arg, tuple) else batching.batch_window(arg) - for arg in args - ]) - - dataset = self._structuredSparseDataset( - structure, shape, dtype).repeat(5).apply( - grouping.window_dataset(5)).apply(grouping._map_x_dataset(fn)) - get_next = dataset.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - expected = sess.run( - self._structuredSparseElement(structure, - np.concatenate(([5], shape), axis=0), - dtype)) - actual = sess.run(get_next) - self._assertEqual(expected, actual) - - @parameterized.named_parameters( - ("1", np.int32([])), - ("2", np.int32([1])), - ("3", np.int32([1, 2, 3])), - ) - def testWindowDatasetBatchSparseDynamicShape(self, shape): - """Tests batching of dynamically shaped sparse tensor windows. - - Args: - shape: the input shape - """ - - shape_t = array_ops.placeholder(dtypes.int32) - dataset = dataset_ops.Dataset.from_tensors(array_ops.zeros(shape_t)).map( - self._make_dense_to_sparse_fn(len(shape) == 0)).repeat(5).apply( # pylint: disable=g-explicit-length-test - grouping.window_dataset(5)).apply( - grouping._map_x_dataset(batching.batch_window)) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op, {shape_t: shape}) - expected = sess.run( - self._structuredSparseElement(None, - np.concatenate(([5], shape), axis=0), - dtypes.int32)) - actual = sess.run(get_next) - self._assertEqual(expected, actual) - - def _structuredRaggedDataset(self, structure, shapes, dtype): - - if structure is None: - return dataset_ops.Dataset.from_tensor_slices(shapes).map( - lambda shape: array_ops.zeros(shape, dtype=dtype)) - else: - return dataset_ops.Dataset.zip( - tuple([ - self._structuredRaggedDataset(substructure, shapes, dtype) - for substructure in structure - ])) - - @parameterized.named_parameters( - ("1", None, np.int32([[1], [2], [3]]), dtypes.bool, [-1]), - ("2", None, np.int32([[1], [2], [3]]), dtypes.int32, [-1]), - ("3", None, np.int32([[1], [2], [3]]), dtypes.float32, [-1]), - ("4", None, np.int32([[1], [2], [3]]), dtypes.string, [-1]), - ("5", None, np.int32([[1, 3], [2, 2], [3, 1]]), dtypes.int32, [-1, -1]), - ("6", None, np.int32([[3, 1, 3], [1, 3, 1]]), dtypes.int32, [-1, -1, -1]), - ("7", (None, None, None), np.int32([[1], [2], [3]]), dtypes.int32, [-1]), - ("8", (None, - (None, None)), np.int32([[1], [2], [3]]), dtypes.int32, [-1]), - ("9", None, np.int32([[1], [2], [3]]), dtypes.int32, [-1]), - ("10", None, np.int32([[1], [2], [3]]), dtypes.int32, np.int32([10])), - ) - def testWindowDatasetPaddedBatchDense(self, structure, shapes, dtype, - padded_shape): - """Tests padded batching of dense tensor windows. - - Args: - structure: the input structure - shapes: the input shapes - dtype: the input data type - padded_shape: the shape to pad the output to - """ - - def fn(*args): - if len(args) == 1 and not isinstance(args[0], tuple): - return batching.padded_batch_window(args[0], padded_shape) - - return tuple([ - fn(*arg) if isinstance(arg, tuple) else batching.padded_batch_window( - arg, padded_shape) for arg in args - ]) - - dataset = self._structuredRaggedDataset(structure, shapes, dtype).apply( - grouping.window_dataset(len(shapes))).apply( - grouping._map_x_dataset(fn)) - get_next = dataset.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - expected_shape = np.maximum(np.amax(shapes, axis=0), padded_shape) - expected = sess.run( - self._structuredElement( - structure, - np.concatenate((np.int32([len(shapes)]), expected_shape)), dtype)) - actual = sess.run(get_next) - self._assertEqual(expected, actual) - - @parameterized.named_parameters( - ("1", np.int32([[1], [2], [3]]), [-1]), - ("2", np.int32([[1, 3], [2, 2], [3, 1]]), [-1, -1]), - ("3", np.int32([[3, 1, 3], [1, 3, 1]]), [-1, -1, -1]), - ) - def testWindowDatasetPaddedBatchDenseDynamicShape(self, shapes, padded_shape): - """Tests padded batching of dynamically shaped dense tensor windows. - - Args: - shapes: the input shapes - padded_shape: the shape to pad the output to - """ - - shapes_t = array_ops.placeholder(dtypes.int32) - dataset = dataset_ops.Dataset.from_tensor_slices(shapes_t).map( - lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).apply( - grouping.window_dataset(len(shapes))).apply( - grouping._map_x_dataset( - lambda x: batching.padded_batch_window(x, padded_shape))) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op, {shapes_t: shapes}) - expected_shape = np.maximum(np.amax(shapes, axis=0), padded_shape) - expected = sess.run( - self._structuredElement( - None, np.concatenate((np.int32([len(shapes)]), expected_shape)), - dtypes.int32)) - actual = sess.run(get_next) - self._assertEqual(expected, actual) - - @parameterized.named_parameters( - ("1", np.int32([[1]]), np.int32([0])), - ("2", np.int32([[10], [20]]), np.int32([15])), - ) - def testWindowDatasetPaddedBatchDenseInvalid(self, shapes, padded_shape): - """Tests invalid padded batching of dense tensor windows. - - Args: - shapes: the input shapes - padded_shape: the shape to pad the output to - """ - - dataset = dataset_ops.Dataset.from_tensor_slices(shapes).map( - lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).apply( - grouping.window_dataset(len(shapes))).apply( - grouping._map_x_dataset( - lambda x: batching.padded_batch_window(x, padded_shape))) - get_next = dataset.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - with self.assertRaises(errors.InvalidArgumentError): - sess.run(get_next) - - def _structuredRaggedSparseDataset(self, structure, shapes, dtype): - - def map_fn(shape): - dense_to_sparse = self._make_dense_to_sparse_fn(False) - return dense_to_sparse(array_ops.zeros(shape, dtype=dtype)) - - if structure is None: - return dataset_ops.Dataset.from_tensor_slices(shapes).map(map_fn) - else: - return dataset_ops.Dataset.zip( - tuple([ - self._structuredRaggedSparseDataset(substructure, shapes, dtype) - for substructure in structure - ])) - - def _structuredRaggedSparseElement(self, structure, shapes, dtype, - padded_shape): - if structure is None: - dense_shape = np.maximum(np.amax(shapes, axis=0), padded_shape) - values = [] - for shape in shapes: - dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0) # pylint: disable=g-explicit-length-test - sparse = dense_to_sparse(array_ops.zeros(shape, dtype=dtype)) - padded_sparse = sparse_tensor.SparseTensor(sparse.indices, - sparse.values, dense_shape) - reshaped_sparse = sparse_ops.sparse_reshape( - padded_sparse, - array_ops.concat([np.array([1], dtype=np.int64), dense_shape], 0)) - values.append(reshaped_sparse) - return sparse_ops.sparse_concat(0, values) - else: - return tuple([ - self._structuredRaggedSparseElement(substructure, shapes, dtype, - padded_shape) - for substructure in structure - ]) - - @parameterized.named_parameters( - ("1", None, np.int64([[1], [2], [3]]), dtypes.bool, [-1]), - ("2", None, np.int64([[1], [2], [3]]), dtypes.int32, [-1]), - ("3", None, np.int64([[1], [2], [3]]), dtypes.float32, [-1]), - ("4", None, np.int64([[1], [2], [3]]), dtypes.string, [-1]), - ("5", None, np.int64([[1, 3], [2, 2], [3, 1]]), dtypes.int32, [-1, -1]), - ("6", None, np.int64([[1, 3, 1], [3, 1, 3]]), dtypes.int32, [-1, -1, -1]), - ("7", (None, None, None), np.int64([[1], [2], [3]]), dtypes.int32, [-1]), - ("8", (None, - (None, None)), np.int64([[1], [2], [3]]), dtypes.int32, [-1]), - ("9", None, np.int64([[1], [2], [3]]), dtypes.int32, [-1]), - ("10", None, np.int64([[1], [2], [3]]), dtypes.int32, np.int64([10])), - ) - def testWindowDatasetPaddedBatchSparse(self, structure, shapes, dtype, - padded_shape): - """Tests padded batching of sparse tensor windows. - - Args: - structure: the input structure - shapes: the input shapes - dtype: the input data type - padded_shape: the shape to pad the output to - """ - - def fn(*args): - if len(args) == 1 and not isinstance(args[0], tuple): - return batching.padded_batch_window(args[0], padded_shape) - - return tuple([ - fn(*arg) if isinstance(arg, tuple) else batching.padded_batch_window( - arg, padded_shape) for arg in args - ]) - - dataset = self._structuredRaggedSparseDataset( - structure, shapes, dtype).apply(grouping.window_dataset( - len(shapes))).apply(grouping._map_x_dataset(fn)) - get_next = dataset.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - expected = sess.run( - self._structuredRaggedSparseElement(structure, shapes, dtype, - padded_shape)) - actual = sess.run(get_next) - self._assertEqual(expected, actual) - - @parameterized.named_parameters( - ("1", np.int64([[1], [2], [3]]), [-1]), - ("2", np.int64([[1, 3], [2, 2], [3, 1]]), [-1, -1]), - ("3", np.int64([[3, 1, 3], [1, 3, 1]]), [-1, -1, -1]), - ) - def testWindowDatasetPaddedBatchSparseDynamicShape(self, shapes, - padded_shape): - """Tests padded batching of dynamically shaped sparse tensor windows. - - Args: - shapes: the input shapes - padded_shape: the shape to pad the output to - """ - - shapes_t = array_ops.placeholder(dtypes.int32) - dataset = dataset_ops.Dataset.from_tensor_slices(shapes_t).map( - lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).map( - self._make_dense_to_sparse_fn(False) - ).apply(grouping.window_dataset(len(shapes))).apply( - grouping._map_x_dataset( - lambda x: batching.padded_batch_window(x, padded_shape))) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - with self.cached_session() as sess: - sess.run(init_op, {shapes_t: shapes}) - expected = sess.run( - self._structuredRaggedSparseElement(None, shapes, dtypes.int32, - padded_shape)) - actual = sess.run(get_next) - self._assertEqual(expected, actual) - - @parameterized.named_parameters( - ("1", np.int64([[1]]), [0]), - ("2", np.int64([[10], [20]]), [15]), - ) - def testWindowDatasetPaddedBatchSparseInvalid(self, shapes, padded_shape): - """Tests invalid padded batching of sparse tensor windows. - - Args: - shapes: the input shapes - padded_shape: the shape to pad the output to - """ - - dataset = dataset_ops.Dataset.from_tensor_slices(shapes).map( - lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).map( - self._make_dense_to_sparse_fn(False) - ).apply(grouping.window_dataset(len(shapes))).apply( - grouping._map_x_dataset( - lambda x: batching.padded_batch_window(x, padded_shape))) - get_next = dataset.make_one_shot_iterator().get_next() - with self.cached_session() as sess: - with self.assertRaises(errors.InvalidArgumentError): - sess.run(get_next) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index a14781cd933e12ff1f04ad7ea26a923e2e1ef9e4..34dc2379d0cb38f8f6962fa42efe21b793bc8d65 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -16,10 +16,7 @@ py_library( srcs = ["counter.py"], srcs_version = "PY2AND3", deps = [ - ":scan_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/experimental/ops:counter", ], ) @@ -28,12 +25,7 @@ py_library( srcs = ["get_single_element.py"], srcs_version = "PY2AND3", deps = [ - ":grouping", - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", - "//third_party/py/numpy", + "//tensorflow/python/data/experimental/ops:get_single_element", ], ) @@ -44,10 +36,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:framework_ops", - "//tensorflow/python:training", - "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/experimental/ops:iterator_ops", ], ) @@ -58,15 +47,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:constant_op", - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:random_seed", - "//tensorflow/python:tensor_shape", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", + "//tensorflow/python/data/experimental/ops:random_ops", ], ) @@ -78,19 +59,19 @@ py_library( srcs_version = "PY2AND3", deps = [ ":batching", - ":gen_dataset_ops", ":interleave_ops", - ":optimization", ":parsing_ops", ":shuffle_ops", "//tensorflow/python:constant_op", "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python:framework_ops", "//tensorflow/python:lib", "//tensorflow/python:platform", "//tensorflow/python:tensor_shape", "//tensorflow/python:util", + "//tensorflow/python/data/experimental/ops:readers", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/ops:readers", "//tensorflow/python/data/util:convert", @@ -106,7 +87,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/experimental/ops:shuffle_ops", ], ) @@ -125,6 +106,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python:tensor_util", + "//tensorflow/python/data/experimental/ops:batching", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:convert", "//tensorflow/python/data/util:nest", @@ -138,8 +120,7 @@ py_library( srcs = ["enumerate_ops.py"], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:dtypes", - "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/experimental/ops:enumerate_ops", ], ) @@ -148,11 +129,7 @@ py_library( srcs = ["error_ops.py"], srcs_version = "PY2AND3", deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", + "//tensorflow/python/data/experimental/ops:error_ops", ], ) @@ -161,16 +138,7 @@ py_library( srcs = ["grouping.py"], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:array_ops", - "//tensorflow/python:check_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:function", - "//tensorflow/python:math_ops", - "//tensorflow/python:tensor_shape", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", + "//tensorflow/python/data/experimental/ops:grouping", ], ) @@ -179,32 +147,7 @@ py_library( srcs = ["interleave_ops.py"], srcs_version = "PY2AND3", deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", - ":random_ops", - "//tensorflow/contrib/stateless", - "//tensorflow/python:array_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:readers", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", - ], -) - -py_library( - name = "optimization", - srcs = ["optimization.py"], - srcs_version = "PY2AND3", - deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", + "//tensorflow/python/data/experimental/ops:interleave_ops", ], ) @@ -213,25 +156,7 @@ py_library( srcs = ["parsing_ops.py"], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:parsing_ops", - "//tensorflow/python:sparse_tensor", - "//tensorflow/python:tensor_shape", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - ], -) - -py_library( - name = "map_defun", - srcs = ["map_defun.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:framework_ops", - "//tensorflow/python:tensor_shape", + "//tensorflow/python/data/experimental/ops:parsing_ops", ], ) @@ -240,18 +165,7 @@ py_library( srcs = ["resampling.py"], srcs_version = "PY2AND3", deps = [ - ":batching", - ":interleave_ops", - ":scan_ops", - "//tensorflow/python:array_ops", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:logging_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:random_ops", - "//tensorflow/python/data/ops:dataset_ops", - "//third_party/py/numpy", + "//tensorflow/python/data/experimental/ops:resampling", ], ) @@ -260,12 +174,7 @@ py_library( srcs = ["scan_ops.py"], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:framework_ops", - "//tensorflow/python:function", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", + "//tensorflow/python/data/experimental/ops:scan_ops", ], ) @@ -284,33 +193,12 @@ py_library( ], ) -py_library( - name = "stats_ops", - srcs = ["stats_ops.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/ops:iterator_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", - ], -) - py_library( name = "threadpool", srcs = ["threadpool.py"], srcs_version = "PY2AND3", deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", - "//tensorflow/python/eager:context", + "//tensorflow/python/data/experimental/ops:threadpool", ], ) @@ -321,12 +209,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", + "//tensorflow/python/data/experimental/ops:unique", ], ) @@ -337,56 +220,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:dtypes", - "//tensorflow/python/data/ops:dataset_ops", - ], -) - -tf_gen_op_wrapper_py( - name = "gen_dataset_ops", - out = "gen_dataset_ops.py", - deps = [ - "//tensorflow/contrib/data:dataset_ops_op_lib", - "//tensorflow/contrib/data:indexed_dataset_ops_op_lib", - ], -) - -tf_kernel_library( - name = "dataset_ops_kernels", - deps = [ - "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/core:framework", - ], - alwayslink = 1, -) - -tf_custom_op_py_library( - name = "contrib_op_loader", - srcs = ["contrib_op_loader.py"], - dso = ["//tensorflow/contrib/data:_dataset_ops.so"], - kernels = [ - ":dataset_ops_kernels", - "//tensorflow/contrib/data:indexed_dataset_ops_op_lib", - "//tensorflow/contrib/data:dataset_ops_op_lib", - ], - srcs_version = "PY2AND3", - deps = [ - ":gen_dataset_ops", - "//tensorflow/contrib/util:util_py", - "//tensorflow/python:platform", - ], -) - -py_library( - name = "indexed_dataset_ops", - srcs = ["indexed_dataset_ops.py"], - deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", - "//tensorflow/python:framework_ops", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", + "//tensorflow/python/data/experimental/ops:writers", ], ) @@ -394,11 +228,7 @@ py_library( name = "prefetching_ops", srcs = ["prefetching_ops.py"], deps = [ - ":contrib_op_loader", - "//tensorflow/python:framework_ops", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", - "//tensorflow/python/data/util:sparse", + "//tensorflow/python/data/experimental/ops:prefetching_ops", ], ) @@ -411,17 +241,14 @@ py_library( ":error_ops", ":get_single_element", ":grouping", - ":indexed_dataset_ops", ":interleave_ops", - ":map_defun", - ":optimization", ":prefetching_ops", + ":random_ops", ":readers", ":resampling", ":scan_ops", ":shuffle_ops", ":sliding", - ":stats_ops", ":threadpool", ":unique", ":writers", diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index 7a0f221284385ed1de2b7389c9419f4b3da3c5b8..8c60459ca81cd7a7e08d90339011c54275ea9c0b 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -17,134 +17,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - -from tensorflow.contrib.data.python.ops import get_single_element -from tensorflow.contrib.data.python.ops import grouping from tensorflow.contrib.framework import with_shape -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import convert +from tensorflow.python.data.experimental.ops import batching from tensorflow.python.data.util import nest -from tensorflow.python.data.util import sparse -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gen_array_ops -from tensorflow.python.ops import gen_dataset_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import sparse_ops from tensorflow.python.util import deprecation -def batch_window(dataset): - """Batches a window of tensors. - - Args: - dataset: the input dataset. - - Returns: - A `Tensor` representing the batch of the entire input dataset. - """ - if isinstance(dataset.output_classes, tuple): - raise TypeError("Input dataset expected to have a single component") - if dataset.output_classes is ops.Tensor: - return _batch_dense_window(dataset) - elif dataset.output_classes is sparse_tensor.SparseTensor: - return _batch_sparse_window(dataset) - else: - raise TypeError("Unsupported dataset type: %s" % dataset.output_classes) - - -def _batch_dense_window(dataset): - """Batches a window of dense tensors.""" - - def key_fn(_): - return np.int64(0) - - def shape_init_fn(_): - return array_ops.shape(first_element) - - def shape_reduce_fn(state, value): - check_ops.assert_equal(state, array_ops.shape(value)) - return state - - def finalize_fn(state): - return state - - if dataset.output_shapes.is_fully_defined(): - shape = dataset.output_shapes - else: - first_element = get_single_element.get_single_element(dataset.take(1)) - shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn, - finalize_fn) - shape = get_single_element.get_single_element( - dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer))) - - def batch_init_fn(_): - batch_shape = array_ops.concat([[0], shape], 0) - return gen_array_ops.empty(batch_shape, dtype=dataset.output_types) - - def batch_reduce_fn(state, value): - return array_ops.concat([state, [value]], 0) - - batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) - return get_single_element.get_single_element( - dataset.apply(grouping.group_by_reducer(key_fn, batch_reducer))) - - -def _batch_sparse_window(dataset): - """Batches a window of sparse tensors.""" - - def key_fn(_): - return np.int64(0) - - def shape_init_fn(_): - return first_element.dense_shape - - def shape_reduce_fn(state, value): - check_ops.assert_equal(state, value.dense_shape) - return state - - def finalize_fn(state): - return state - - if dataset.output_shapes.is_fully_defined(): - shape = dataset.output_shapes - else: - first_element = get_single_element.get_single_element(dataset.take(1)) - shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn, - finalize_fn) - shape = get_single_element.get_single_element( - dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer))) - - def batch_init_fn(_): - indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0) - return sparse_tensor.SparseTensor( - indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64), - values=constant_op.constant([], shape=[0], dtype=dataset.output_types), - dense_shape=array_ops.concat( - [np.array([0], dtype=np.int64), - math_ops.cast(shape, dtypes.int64)], 0)) - - def batch_reduce_fn(state, value): - return sparse_ops.sparse_concat(0, [state, value]) - - def reshape_fn(value): - return sparse_ops.sparse_reshape( - value, - array_ops.concat([np.array([1], dtype=np.int64), value.dense_shape], 0)) - - batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) - return get_single_element.get_single_element( - dataset.map(reshape_fn).apply( - grouping.group_by_reducer(key_fn, batch_reducer))) - - +@deprecation.deprecated( + None, "Use `tf.data.experimental.dense_to_sparse_batch(...)`.") def dense_to_sparse_batch(batch_size, row_shape): """A transformation that batches ragged elements into `tf.SparseTensor`s. @@ -187,201 +67,10 @@ def dense_to_sparse_batch(batch_size, row_shape): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - - def _apply_fn(dataset): - return _DenseToSparseBatchDataset(dataset, batch_size, row_shape) - - return _apply_fn - - -def padded_batch_window(dataset, padded_shape, padding_value=None): - """Batches a window of tensors with padding. - - Args: - dataset: the input dataset. - padded_shape: (Optional.) `tf.TensorShape` or `tf.int64` vector tensor-like - object representing the shape to which the input elements should be padded - prior to batching. Any unknown dimensions (e.g. `tf.Dimension(None)` in a - `tf.TensorShape` or `-1` in a tensor-like object) will be padded to the - maximum size of that dimension in each batch. - padding_value: (Optional.) A scalar-shaped `tf.Tensor`, representing the - padding value to use. Defaults are `0` for numeric types and the empty - string for string types. If `dataset` contains `tf.SparseTensor`, this - value is ignored. - - Returns: - A `Tensor` representing the batch of the entire input dataset. - - Raises: - ValueError: if invalid arguments are provided. - """ - if not issubclass(dataset.output_classes, - (ops.Tensor, sparse_tensor.SparseTensor)): - raise TypeError("Input dataset expected to have a single tensor component") - if issubclass(dataset.output_classes, (ops.Tensor)): - return _padded_batch_dense_window(dataset, padded_shape, padding_value) - elif issubclass(dataset.output_classes, (sparse_tensor.SparseTensor)): - if padding_value is not None: - raise ValueError("Padding value not allowed for sparse tensors") - return _padded_batch_sparse_window(dataset, padded_shape) - else: - raise TypeError("Unsupported dataset type: %s" % dataset.output_classes) - - -def _padded_batch_dense_window(dataset, padded_shape, padding_value=None): - """Batches a window of dense tensors with padding.""" - - padded_shape = math_ops.cast( - convert.partial_shape_to_tensor(padded_shape), dtypes.int32) - - def key_fn(_): - return np.int64(0) - - def max_init_fn(_): - return padded_shape - - def max_reduce_fn(state, value): - """Computes the maximum shape to pad to.""" - condition = math_ops.reduce_all( - math_ops.logical_or( - math_ops.less_equal(array_ops.shape(value), padded_shape), - math_ops.equal(padded_shape, -1))) - assert_op = control_flow_ops.Assert(condition, [ - "Actual shape greater than padded shape: ", - array_ops.shape(value), padded_shape - ]) - with ops.control_dependencies([assert_op]): - return math_ops.maximum(state, array_ops.shape(value)) - - def finalize_fn(state): - return state - - # Compute the padded shape. - max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn) - padded_shape = get_single_element.get_single_element( - dataset.apply(grouping.group_by_reducer(key_fn, max_reducer))) - - if padding_value is None: - if dataset.output_types == dtypes.string: - padding_value = "" - elif dataset.output_types == dtypes.bool: - padding_value = False - elif dataset.output_types == dtypes.variant: - raise TypeError("Unable to create padding for field of type 'variant'") - else: - padding_value = 0 - - def batch_init_fn(_): - batch_shape = array_ops.concat( - [np.array([0], dtype=np.int32), padded_shape], 0) - return gen_array_ops.empty(batch_shape, dtype=dataset.output_types) - - def batch_reduce_fn(state, value): - return array_ops.concat([state, [value]], 0) - - def pad_fn(value): - shape = array_ops.shape(value) - left = array_ops.zeros_like(shape) - right = padded_shape - shape - return array_ops.pad( - value, array_ops.stack([left, right], 1), constant_values=padding_value) - - batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) - return get_single_element.get_single_element( - dataset.map(pad_fn).apply( - grouping.group_by_reducer(key_fn, batch_reducer))) - - -def _padded_batch_sparse_window(dataset, padded_shape): - """Batches a window of sparse tensors with padding.""" - - def key_fn(_): - return np.int64(0) - - def max_init_fn(_): - return convert.partial_shape_to_tensor(padded_shape) - - def max_reduce_fn(state, value): - """Computes the maximum shape to pad to.""" - condition = math_ops.reduce_all( - math_ops.logical_or( - math_ops.less_equal(value.dense_shape, padded_shape), - math_ops.equal(padded_shape, -1))) - assert_op = control_flow_ops.Assert(condition, [ - "Actual shape greater than padded shape: ", value.dense_shape, - padded_shape - ]) - with ops.control_dependencies([assert_op]): - return math_ops.maximum(state, value.dense_shape) - - def finalize_fn(state): - return state - - # Compute the padded shape. - max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn) - padded_shape = get_single_element.get_single_element( - dataset.apply(grouping.group_by_reducer(key_fn, max_reducer))) - - def batch_init_fn(_): - indices_shape = array_ops.concat([[0], [array_ops.size(padded_shape) + 1]], - 0) - return sparse_tensor.SparseTensor( - indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64), - values=constant_op.constant([], shape=[0], dtype=dataset.output_types), - dense_shape=array_ops.concat( - [np.array([0], dtype=np.int64), padded_shape], 0)) - - def batch_reduce_fn(state, value): - padded_value = sparse_tensor.SparseTensor( - indices=value.indices, values=value.values, dense_shape=padded_shape) - reshaped_value = sparse_ops.sparse_reshape( - padded_value, - array_ops.concat( - [np.array([1], dtype=np.int64), padded_value.dense_shape], 0)) - return sparse_ops.sparse_concat(0, [state, reshaped_value]) - - reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) - return get_single_element.get_single_element( - dataset.apply(grouping.group_by_reducer(key_fn, reducer))) - - -class _UnbatchDataset(dataset_ops.UnaryDataset): - """A dataset that splits the elements of its input into multiple elements.""" - - def __init__(self, input_dataset): - """See `unbatch()` for more details.""" - super(_UnbatchDataset, self).__init__(input_dataset) - flat_shapes = nest.flatten(input_dataset.output_shapes) - if any(s.ndims == 0 for s in flat_shapes): - raise ValueError("Cannot unbatch an input with scalar components.") - known_batch_dim = tensor_shape.Dimension(None) - for s in flat_shapes: - try: - known_batch_dim = known_batch_dim.merge_with(s[0]) - except ValueError: - raise ValueError("Cannot unbatch an input whose components have " - "different batch sizes.") - self._input_dataset = input_dataset - - def _as_variant_tensor(self): - return gen_dataset_ops.unbatch_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - **dataset_ops.flat_structure(self)) - - @property - def output_classes(self): - return self._input_dataset.output_classes - - @property - def output_shapes(self): - return nest.map_structure(lambda s: s[1:], - self._input_dataset.output_shapes) - - @property - def output_types(self): - return self._input_dataset.output_types + return batching.dense_to_sparse_batch(batch_size, row_shape) +@deprecation.deprecated(None, "Use `tf.data.experimental.unbatch()`.") def unbatch(): """Splits elements of a dataset into multiple elements on the batch dimension. @@ -403,39 +92,7 @@ def unbatch(): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - - def _apply_fn(dataset): - """Function from `Dataset` to `Dataset` that applies the transformation.""" - if not sparse.any_sparse(dataset.output_classes): - return _UnbatchDataset(dataset) - - # NOTE(mrry): We must ensure that any SparseTensors in `dataset` - # are normalized to the rank-1 dense representation, so that the - # sparse-oblivious unbatching logic will slice them - # appropriately. This leads to a somewhat inefficient re-encoding step - # for all SparseTensor components. - # TODO(mrry): Consider optimizing this in future - # if it turns out to be a bottleneck. - def normalize(arg, *rest): - if rest: - return sparse.serialize_many_sparse_tensors((arg,) + rest) - else: - return sparse.serialize_many_sparse_tensors(arg) - - normalized_dataset = dataset.map(normalize) - - # NOTE(mrry): Our `map()` has lost information about the sparseness - # of any SparseTensor components, so re-apply the structure of the - # original dataset. - restructured_dataset = _RestructuredDataset( - normalized_dataset, - dataset.output_types, - dataset.output_shapes, - dataset.output_classes, - allow_unsafe_cast=True) - return _UnbatchDataset(restructured_dataset) - - return _apply_fn + return batching.unbatch() @deprecation.deprecated( @@ -514,135 +171,8 @@ def padded_batch_and_drop_remainder(batch_size, return _apply_fn -class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset): - """A `Dataset` that batches ragged dense elements into `tf.SparseTensor`s.""" - - def __init__(self, input_dataset, batch_size, row_shape): - """See `Dataset.dense_to_sparse_batch()` for more details.""" - super(_DenseToSparseBatchDataset, self).__init__(input_dataset) - if not isinstance(input_dataset.output_types, dtypes.DType): - raise TypeError("DenseToSparseDataset requires an input whose elements " - "have a single component, whereas the input has %r." % - input_dataset.output_types) - self._input_dataset = input_dataset - self._batch_size = batch_size - self._row_shape = row_shape - - def _as_variant_tensor(self): - return gen_dataset_ops.dense_to_sparse_batch_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - self._batch_size, - row_shape=convert.partial_shape_to_tensor(self._row_shape), - **dataset_ops.flat_structure(self)) - - @property - def output_classes(self): - return sparse_tensor.SparseTensor - - @property - def output_shapes(self): - return tensor_shape.vector(None).concatenate(self._row_shape) - - @property - def output_types(self): - return self._input_dataset.output_types - - -class _RestructuredDataset(dataset_ops.UnaryDataset): - """An internal helper for changing the structure and shape of a dataset.""" - - def __init__(self, - dataset, - output_types, - output_shapes=None, - output_classes=None, - allow_unsafe_cast=False): - """Creates a new dataset with the given output types and shapes. - - The given `dataset` must have a structure that is convertible: - * `dataset.output_types` must be the same as `output_types` module nesting. - * Each shape in `dataset.output_shapes` must be compatible with each shape - in `output_shapes` (if given). - - Note: This helper permits "unsafe casts" for shapes, equivalent to using - `tf.Tensor.set_shape()` where domain-specific knowledge is available. - - Args: - dataset: A `Dataset` object. - output_types: A nested structure of `tf.DType` objects. - output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects. - If omitted, the shapes will be inherited from `dataset`. - output_classes: (Optional.) A nested structure of class types. - If omitted, the class types will be inherited from `dataset`. - allow_unsafe_cast: (Optional.) If `True`, the caller may switch the - reported output types and shapes of the restructured dataset, e.g. to - switch a sparse tensor represented as `tf.variant` to its user-visible - type and shape. - - Raises: - ValueError: If either `output_types` or `output_shapes` is not compatible - with the structure of `dataset`. - """ - super(_RestructuredDataset, self).__init__(dataset) - self._input_dataset = dataset - - if not allow_unsafe_cast: - # Validate that the types are compatible. - output_types = nest.map_structure(dtypes.as_dtype, output_types) - flat_original_types = nest.flatten(dataset.output_types) - flat_new_types = nest.flatten(output_types) - if flat_original_types != flat_new_types: - raise ValueError( - "Dataset with output types %r cannot be restructured to have " - "output types %r" % (dataset.output_types, output_types)) - - self._output_types = output_types - - if output_shapes is None: - # Inherit shapes from the original `dataset`. - self._output_shapes = nest.pack_sequence_as(output_types, - nest.flatten( - dataset.output_shapes)) - else: - if not allow_unsafe_cast: - # Validate that the shapes are compatible. - nest.assert_same_structure(output_types, output_shapes) - flat_original_shapes = nest.flatten(dataset.output_shapes) - flat_new_shapes = nest.flatten_up_to(output_types, output_shapes) - - for original_shape, new_shape in zip(flat_original_shapes, - flat_new_shapes): - if not original_shape.is_compatible_with(new_shape): - raise ValueError( - "Dataset with output shapes %r cannot be restructured to have " - "incompatible output shapes %r" % (dataset.output_shapes, - output_shapes)) - self._output_shapes = nest.map_structure_up_to( - output_types, tensor_shape.as_shape, output_shapes) - if output_classes is None: - # Inherit class types from the original `dataset`. - self._output_classes = nest.pack_sequence_as(output_types, - nest.flatten( - dataset.output_classes)) - else: - self._output_classes = output_classes - - def _as_variant_tensor(self): - return self._input_dataset._as_variant_tensor() # pylint: disable=protected-access - - @property - def output_classes(self): - return self._output_classes - - @property - def output_types(self): - return self._output_types - - @property - def output_shapes(self): - return self._output_shapes - - +# TODO(b/116817045): Move this to `tf.data.experimental` when the `with_shape()` +# function is available in the core. def assert_element_shape(expected_shapes): """Assert the shape of this `Dataset`. @@ -687,7 +217,8 @@ def assert_element_shape(expected_shapes): def _apply_fn(dataset): output_shapes = _merge_output_shapes(dataset.output_shapes, expected_shapes) - return _RestructuredDataset( + # pylint: disable=protected-access + return batching._RestructuredDataset( dataset.map(_check_shape), dataset.output_types, output_shapes=output_shapes, @@ -696,49 +227,7 @@ def assert_element_shape(expected_shapes): return _apply_fn -class _MapAndBatchDataset(dataset_ops.MapDataset): - """A `Dataset` that maps a function over a batch of elements.""" - - def __init__(self, input_dataset, map_func, batch_size, num_parallel_calls, - drop_remainder): - """See `Dataset.map()` for details.""" - super(_MapAndBatchDataset, self).__init__(input_dataset, map_func) - self._batch_size_t = ops.convert_to_tensor( - batch_size, dtype=dtypes.int64, name="batch_size") - self._num_parallel_calls_t = ops.convert_to_tensor( - num_parallel_calls, dtype=dtypes.int64, name="num_parallel_calls") - self._drop_remainder_t = ops.convert_to_tensor( - drop_remainder, dtype=dtypes.bool, name="drop_remainder") - - self._batch_size = batch_size - self._drop_remainder = drop_remainder - - def _as_variant_tensor(self): - # pylint: disable=protected-access - input_resource = self._input_dataset._as_variant_tensor() - return gen_dataset_ops.map_and_batch_dataset_v2( - input_resource, - self._map_func.captured_inputs, - f=self._map_func, - batch_size=self._batch_size_t, - num_parallel_calls=self._num_parallel_calls_t, - drop_remainder=self._drop_remainder_t, - **dataset_ops.flat_structure(self)) - # pylint: enable=protected-access - - @property - def output_shapes(self): - dim = self._batch_size if self._drop_remainder else None - return nest.pack_sequence_as(self._output_shapes, [ - tensor_shape.vector(dim).concatenate(s) - for s in nest.flatten(self._output_shapes) - ]) - - @property - def output_types(self): - return self._output_types - - +@deprecation.deprecated(None, "Use `tf.data.experimental.map_and_batch(...)`.") def map_and_batch(map_func, batch_size, num_parallel_batches=None, @@ -779,17 +268,5 @@ def map_and_batch(map_func, ValueError: If both `num_parallel_batches` and `num_parallel_calls` are specified. """ - - if num_parallel_batches is None and num_parallel_calls is None: - num_parallel_calls = batch_size - elif num_parallel_batches is not None and num_parallel_calls is None: - num_parallel_calls = batch_size * num_parallel_batches - elif num_parallel_batches is not None and num_parallel_calls is not None: - raise ValueError("The `num_parallel_batches` and `num_parallel_calls` " - "arguments are mutually exclusive.") - - def _apply_fn(dataset): - return _MapAndBatchDataset(dataset, map_func, batch_size, - num_parallel_calls, drop_remainder) - - return _apply_fn + return batching.map_and_batch(map_func, batch_size, num_parallel_batches, + drop_remainder, num_parallel_calls) diff --git a/tensorflow/contrib/data/python/ops/counter.py b/tensorflow/contrib/data/python/ops/counter.py index 6ef65f9624601286691505a795a86dd6226eead1..4ff5bf3e39dc2c9313b7d47d1ef965ebb22afc06 100644 --- a/tensorflow/contrib/data/python/ops/counter.py +++ b/tensorflow/contrib/data/python/ops/counter.py @@ -17,13 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import scan_ops - -from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.experimental.ops import counter from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, "Use `tf.data.experimental.Counter(...)`.") def Counter(start=0, step=1, dtype=dtypes.int64): """Creates a `Dataset` that counts from `start` in steps of size `step`. @@ -46,8 +45,4 @@ def Counter(start=0, step=1, dtype=dtypes.int64): Returns: A `Dataset` of scalar `dtype` elements. """ - with ops.name_scope("counter"): - start = ops.convert_to_tensor(start, dtype=dtype, name="start") - step = ops.convert_to_tensor(step, dtype=dtype, name="step") - return dataset_ops.Dataset.from_tensors(0).repeat(None).apply( - scan_ops.scan(start, lambda state, _: (state + step, state))) + return counter.Counter(start, step, dtype) diff --git a/tensorflow/contrib/data/python/ops/enumerate_ops.py b/tensorflow/contrib/data/python/ops/enumerate_ops.py index 490281e0d2da7a454a2f63f95753c7c436b87a76..a21da4d3eca508f2af9bac49d57fb0c4b08f3be0 100644 --- a/tensorflow/contrib/data/python/ops/enumerate_ops.py +++ b/tensorflow/contrib/data/python/ops/enumerate_ops.py @@ -17,12 +17,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import dtypes +from tensorflow.python.data.experimental.ops import enumerate_ops +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, + "Use `tf.data.experimental.enumerate_dataset(...)`.") def enumerate_dataset(start=0): """A transformation that enumerate the elements of a dataset. @@ -49,10 +50,4 @@ def enumerate_dataset(start=0): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - - def _apply_fn(dataset): - max_value = np.iinfo(dtypes.int64.as_numpy_dtype).max - return dataset_ops.Dataset.zip((dataset_ops.Dataset.range(start, max_value), - dataset)) - - return _apply_fn + return enumerate_ops.enumerate_dataset(start) diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py index 615dbcabd4d4e8d63e3a74c71afb063806b597ff..0559a2e09cce43cf16e88dbe20dba2c46db4c979 100644 --- a/tensorflow/contrib/data/python/ops/error_ops.py +++ b/tensorflow/contrib/data/python/ops/error_ops.py @@ -17,11 +17,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops -from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.experimental.ops import error_ops +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, "Use `tf.data.experimental.ignore_errors()`.") def ignore_errors(): """Creates a `Dataset` from another `Dataset` and silently ignores any errors. @@ -44,34 +44,4 @@ def ignore_errors(): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - - def _apply_fn(dataset): - return _IgnoreErrorsDataset(dataset) - - return _apply_fn - - -class _IgnoreErrorsDataset(dataset_ops.UnaryDataset): - """A `Dataset` that silently ignores errors when computing its input.""" - - def __init__(self, input_dataset): - """See `Dataset.ignore_errors()` for details.""" - super(_IgnoreErrorsDataset, self).__init__(input_dataset) - self._input_dataset = input_dataset - - def _as_variant_tensor(self): - return gen_dataset_ops.ignore_errors_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - **dataset_ops.flat_structure(self)) - - @property - def output_classes(self): - return self._input_dataset.output_classes - - @property - def output_shapes(self): - return self._input_dataset.output_shapes - - @property - def output_types(self): - return self._input_dataset.output_types + return error_ops.ignore_errors() diff --git a/tensorflow/contrib/data/python/ops/get_single_element.py b/tensorflow/contrib/data/python/ops/get_single_element.py index a6713b017afa315edec9389d0a6c1c7135e6aeb9..58ad9eea903c42981b8fd083ed1c39421c58189f 100644 --- a/tensorflow/contrib/data/python/ops/get_single_element.py +++ b/tensorflow/contrib/data/python/ops/get_single_element.py @@ -19,13 +19,13 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib.data.python.ops import grouping +from tensorflow.python.data.experimental.ops import get_single_element as experimental_get_single_element from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import nest -from tensorflow.python.data.util import sparse -from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, + "Use `tf.data.experimental.get_single_element(...)`.") def get_single_element(dataset): """Returns the single element in `dataset` as a nested structure of tensors. @@ -61,18 +61,10 @@ def get_single_element(dataset): InvalidArgumentError (at runtime): if `dataset` does not contain exactly one element. """ - if not isinstance(dataset, dataset_ops.Dataset): - raise TypeError("`dataset` must be a `tf.data.Dataset` object.") - - nested_ret = nest.pack_sequence_as( - dataset.output_types, gen_dataset_ops.dataset_to_single_element( - dataset._as_variant_tensor(), # pylint: disable=protected-access - **dataset_ops.flat_structure(dataset))) - return sparse.deserialize_sparse_tensors( - nested_ret, dataset.output_types, dataset.output_shapes, - dataset.output_classes) + return experimental_get_single_element.get_single_element(dataset) +@deprecation.deprecated(None, "Use `tf.data.Dataset.reduce(...)`.") def reduce_dataset(dataset, reducer): """Returns the result of reducing the `dataset` using `reducer`. @@ -90,11 +82,4 @@ def reduce_dataset(dataset, reducer): if not isinstance(dataset, dataset_ops.Dataset): raise TypeError("`dataset` must be a `tf.data.Dataset` object.") - # The sentinel dataset is used in case the reduced dataset is empty. - sentinel_dataset = dataset_ops.Dataset.from_tensors( - reducer.finalize_func(reducer.init_func(np.int64(0)))) - reduced_dataset = dataset.apply( - grouping.group_by_reducer(lambda x: np.int64(0), reducer)) - - return get_single_element( - reduced_dataset.concatenate(sentinel_dataset).take(1)) + return dataset.reduce(reducer.init_func(np.int64(0)), reducer.reduce_func) diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 7cae33beb35e0280c0e3cc0e3f74eaab7f76dac2..a99dc2f29ae4c9d47c21afd83f49bf4eb89eca18 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -17,20 +17,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import nest -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import gen_dataset_ops -from tensorflow.python.ops import math_ops +from tensorflow.python.data.experimental.ops import grouping +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, + "Use `tf.data.experimental.group_by_reducer(...)`.") def group_by_reducer(key_func, reducer): """A transformation that groups elements and performs a reduction. @@ -52,14 +45,11 @@ def group_by_reducer(key_func, reducer): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - - def _apply_fn(dataset): - """Function from `Dataset` to `Dataset` that applies the transformation.""" - return _GroupByReducerDataset(dataset, key_func, reducer) - - return _apply_fn + return grouping.group_by_reducer(key_func, reducer) +@deprecation.deprecated(None, + "Use `tf.data.experimental.group_by_window(...)`.") def group_by_window(key_func, reduce_func, window_size=None, @@ -98,27 +88,12 @@ def group_by_window(key_func, ValueError: if neither or both of {`window_size`, `window_size_func`} are passed. """ - if (window_size is not None and window_size_func or - not (window_size is not None or window_size_func)): - raise ValueError("Must pass either window_size or window_size_func.") - - if window_size is not None: - - def constant_window_func(unused_key): - return ops.convert_to_tensor(window_size, dtype=dtypes.int64) - - window_size_func = constant_window_func - - assert window_size_func is not None - - def _apply_fn(dataset): - """Function from `Dataset` to `Dataset` that applies the transformation.""" - return _GroupByWindowDataset(dataset, key_func, reduce_func, - window_size_func) - - return _apply_fn + return grouping.group_by_window(key_func, reduce_func, window_size, + window_size_func) +@deprecation.deprecated( + None, "Use `tf.data.experimental.bucket_by_sequence_length(...)`.") def bucket_by_sequence_length(element_length_func, bucket_boundaries, bucket_batch_sizes, @@ -163,342 +138,12 @@ def bucket_by_sequence_length(element_length_func, Raises: ValueError: if `len(bucket_batch_sizes) != len(bucket_boundaries) + 1`. """ - with ops.name_scope("bucket_by_seq_length"): - if len(bucket_batch_sizes) != (len(bucket_boundaries) + 1): - raise ValueError( - "len(bucket_batch_sizes) must equal len(bucket_boundaries) + 1") - - batch_sizes = constant_op.constant(bucket_batch_sizes, dtype=dtypes.int64) - - def element_to_bucket_id(*args): - """Return int64 id of the length bucket for this element.""" - seq_length = element_length_func(*args) - - boundaries = list(bucket_boundaries) - buckets_min = [np.iinfo(np.int32).min] + boundaries - buckets_max = boundaries + [np.iinfo(np.int32).max] - conditions_c = math_ops.logical_and( - math_ops.less_equal(buckets_min, seq_length), - math_ops.less(seq_length, buckets_max)) - bucket_id = math_ops.reduce_min(array_ops.where(conditions_c)) - - return bucket_id - - def window_size_fn(bucket_id): - # The window size is set to the batch size for this bucket - window_size = batch_sizes[bucket_id] - return window_size - - def make_padded_shapes(shapes, none_filler=None): - padded = [] - for shape in nest.flatten(shapes): - shape = tensor_shape.TensorShape(shape) - shape = [ - none_filler if d.value is None else d - for d in shape - ] - padded.append(shape) - return nest.pack_sequence_as(shapes, padded) - - def batching_fn(bucket_id, grouped_dataset): - """Batch elements in dataset.""" - batch_size = window_size_fn(bucket_id) - if no_padding: - return grouped_dataset.batch(batch_size) - none_filler = None - if pad_to_bucket_boundary: - err_msg = ("When pad_to_bucket_boundary=True, elements must have " - "length < max(bucket_boundaries).") - check = check_ops.assert_less( - bucket_id, - constant_op.constant(len(bucket_batch_sizes) - 1, - dtype=dtypes.int64), - message=err_msg) - with ops.control_dependencies([check]): - boundaries = constant_op.constant(bucket_boundaries, - dtype=dtypes.int64) - bucket_boundary = boundaries[bucket_id] - none_filler = bucket_boundary - 1 - shapes = make_padded_shapes( - padded_shapes or grouped_dataset.output_shapes, - none_filler=none_filler) - return grouped_dataset.padded_batch(batch_size, shapes, padding_values) - - def _apply_fn(dataset): - return dataset.apply( - group_by_window(element_to_bucket_id, batching_fn, - window_size_func=window_size_fn)) - - return _apply_fn - - -def _map_x_dataset(map_func): - """A transformation that maps `map_func` across its input. - - This transformation is similar to `tf.data.Dataset.map`, but in addition to - supporting dense and sparse tensor inputs, it also supports dataset inputs. - - Args: - map_func: A function mapping a nested structure of tensors and/or datasets - (having shapes and types defined by `self.output_shapes` and - `self.output_types`) to another nested structure of tensors and/or - datasets. - - Returns: - Dataset: A `Dataset`. - """ - - def _apply_fn(dataset): - """Function from `Dataset` to `Dataset` that applies the transformation.""" - return _MapXDataset(dataset, map_func) - - return _apply_fn - - -# TODO(b/115382007) Remove this once canned reducers move to core. -def window_dataset(window_size): - """A transformation that creates window datasets from the input dataset. - - The resulting datasets will contain `window_size` elements (or - `N % window_size` for the last dataset if `window_size` does not divide the - number of input elements `N` evenly). - - Args: - window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of - consecutive elements of the input dataset to combine into a window. - - Returns: - Dataset: A `Dataset`. - """ - - def _apply_fn(dataset): - return dataset_ops.WindowDataset( - dataset, - size=window_size, - shift=window_size, - stride=1, - drop_remainder=False) - - return _apply_fn - - -class _GroupByReducerDataset(dataset_ops.UnaryDataset): - """A `Dataset` that groups its input and performs a reduction.""" - - def __init__(self, input_dataset, key_func, reducer): - """See `group_by_reducer()` for details.""" - super(_GroupByReducerDataset, self).__init__(input_dataset) + return grouping.bucket_by_sequence_length( + element_length_func, bucket_boundaries, bucket_batch_sizes, padded_shapes, + padding_values, pad_to_bucket_boundary, no_padding) - self._input_dataset = input_dataset - self._make_key_func(key_func, input_dataset) - self._make_init_func(reducer.init_func) - self._make_reduce_func(reducer.reduce_func, input_dataset) - self._make_finalize_func(reducer.finalize_func) - - def _make_key_func(self, key_func, input_dataset): - """Make wrapping Defun for key_func.""" - wrapped_func = dataset_ops.StructuredFunctionWrapper( - key_func, "tf.contrib.data.group_by_reducer()", input_dataset) - if not ( - wrapped_func.output_types == dtypes.int64 and - wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())): - raise ValueError( - "`key_func` must return a single tf.int64 tensor. " - "Got type=%s and shape=%s" - % (wrapped_func.output_types, wrapped_func.output_shapes)) - self._key_func = wrapped_func.function - - def _make_init_func(self, init_func): - """Make wrapping Defun for init_func.""" - wrapped_func = dataset_ops.StructuredFunctionWrapper( - init_func, "tf.contrib.data.group_by_reducer()", - input_classes=ops.Tensor, input_shapes=tensor_shape.scalar(), - input_types=dtypes.int64) - self._init_func = wrapped_func.function - self._state_classes = wrapped_func.output_classes - self._state_shapes = wrapped_func.output_shapes - self._state_types = wrapped_func.output_types - - def _make_reduce_func(self, reduce_func, input_dataset): - """Make wrapping Defun for reduce_func.""" - - # Iteratively rerun the reduce function until reaching a fixed point on - # `self._state_shapes`. - need_to_rerun = True - while need_to_rerun: - - wrapped_func = dataset_ops.StructuredFunctionWrapper( - reduce_func, "tf.contrib.data.group_by_reducer()", - input_classes=(self._state_classes, input_dataset.output_classes), - input_shapes=(self._state_shapes, input_dataset.output_shapes), - input_types=(self._state_types, input_dataset.output_types), - add_to_graph=False) - - # Extract and validate class information from the returned values. - for new_state_class, state_class in zip( - nest.flatten(wrapped_func.output_classes), - nest.flatten(self._state_classes)): - if not issubclass(new_state_class, state_class): - raise TypeError( - "The element classes for the new state must match the initial " - "state. Expected %s; got %s." % - (self._state_classes, wrapped_func.output_classes)) - - # Extract and validate type information from the returned values. - for new_state_type, state_type in zip( - nest.flatten(wrapped_func.output_types), - nest.flatten(self._state_types)): - if new_state_type != state_type: - raise TypeError( - "The element types for the new state must match the initial " - "state. Expected %s; got %s." % - (self._state_types, wrapped_func.output_types)) - - # Extract shape information from the returned values. - flat_state_shapes = nest.flatten(self._state_shapes) - flat_new_state_shapes = nest.flatten(wrapped_func.output_shapes) - weakened_state_shapes = [ - original.most_specific_compatible_shape(new) - for original, new in zip(flat_state_shapes, flat_new_state_shapes) - ] - - need_to_rerun = False - for original_shape, weakened_shape in zip(flat_state_shapes, - weakened_state_shapes): - if original_shape.ndims is not None and ( - weakened_shape.ndims is None or - original_shape.as_list() != weakened_shape.as_list()): - need_to_rerun = True - break - - if need_to_rerun: - self._state_shapes = nest.pack_sequence_as(self._state_shapes, - weakened_state_shapes) - - self._reduce_func = wrapped_func.function - self._reduce_func.add_to_graph(ops.get_default_graph()) - - def _make_finalize_func(self, finalize_func): - """Make wrapping Defun for finalize_func.""" - wrapped_func = dataset_ops.StructuredFunctionWrapper( - finalize_func, "tf.contrib.data.group_by_reducer()", - input_classes=self._state_classes, input_shapes=self._state_shapes, - input_types=self._state_types) - self._finalize_func = wrapped_func.function - self._output_classes = wrapped_func.output_classes - self._output_shapes = wrapped_func.output_shapes - self._output_types = wrapped_func.output_types - - @property - def output_classes(self): - return self._output_classes - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types - - def _as_variant_tensor(self): - return gen_dataset_ops.group_by_reducer_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - self._key_func.captured_inputs, - self._init_func.captured_inputs, - self._reduce_func.captured_inputs, - self._finalize_func.captured_inputs, - key_func=self._key_func, - init_func=self._init_func, - reduce_func=self._reduce_func, - finalize_func=self._finalize_func, - **dataset_ops.flat_structure(self)) - - -class _GroupByWindowDataset(dataset_ops.UnaryDataset): - """A `Dataset` that groups its input and performs a windowed reduction.""" - - def __init__(self, input_dataset, key_func, reduce_func, window_size_func): - """See `group_by_window()` for details.""" - super(_GroupByWindowDataset, self).__init__(input_dataset) - - self._input_dataset = input_dataset - - self._make_key_func(key_func, input_dataset) - self._make_reduce_func(reduce_func, input_dataset) - self._make_window_size_func(window_size_func) - - def _make_window_size_func(self, window_size_func): - """Make wrapping Defun for window_size_func.""" - def window_size_func_wrapper(key): - return ops.convert_to_tensor(window_size_func(key), dtype=dtypes.int64) - wrapped_func = dataset_ops.StructuredFunctionWrapper( - window_size_func_wrapper, "tf.contrib.data.group_by_window()", - input_classes=ops.Tensor, input_shapes=tensor_shape.scalar(), - input_types=dtypes.int64) - if not ( - wrapped_func.output_types == dtypes.int64 and - wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())): - raise ValueError( - "`window_size_func` must return a single tf.int64 scalar tensor.") - self._window_size_func = wrapped_func.function - - def _make_key_func(self, key_func, input_dataset): - """Make wrapping Defun for key_func.""" - def key_func_wrapper(*args): - return ops.convert_to_tensor(key_func(*args), dtype=dtypes.int64) - wrapped_func = dataset_ops.StructuredFunctionWrapper( - key_func_wrapper, "tf.contrib.data.group_by_window()", input_dataset) - if not ( - wrapped_func.output_types == dtypes.int64 and - wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())): - raise ValueError( - "`key_func` must return a single tf.int64 scalar tensor.") - self._key_func = wrapped_func.function - - def _make_reduce_func(self, reduce_func, input_dataset): - """Make wrapping Defun for reduce_func.""" - nested_dataset = dataset_ops._NestedDatasetComponent(input_dataset) # pylint: disable=protected-access - wrapped_func = dataset_ops.StructuredFunctionWrapper( - reduce_func, "tf.contrib.data.reduce_by_window()", - input_classes=(ops.Tensor, nested_dataset), - input_shapes=(tensor_shape.scalar(), nested_dataset), - input_types=(dtypes.int64, nested_dataset), - experimental_nested_dataset_support=True) - if not isinstance( - wrapped_func.output_classes, dataset_ops._NestedDatasetComponent): # pylint: disable=protected-access - raise TypeError("`reduce_func` must return a `Dataset` object.") - self._output_classes = wrapped_func.output_classes.output_classes - self._output_types = wrapped_func.output_types.output_types - self._output_shapes = wrapped_func.output_shapes.output_shapes - self._reduce_func = wrapped_func.function - - @property - def output_classes(self): - return self._output_classes - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types - - def _as_variant_tensor(self): - return gen_dataset_ops.group_by_window_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - self._key_func.captured_inputs, - self._reduce_func.captured_inputs, - self._window_size_func.captured_inputs, - key_func=self._key_func, - reduce_func=self._reduce_func, - window_size_func=self._window_size_func, - **dataset_ops.flat_structure(self)) - - -class Reducer(object): +class Reducer(grouping.Reducer): """A reducer is used for reducing a set of elements. A reducer is represented as a tuple of the three functions: @@ -507,58 +152,6 @@ class Reducer(object): 3) finalization function: state => result """ + @deprecation.deprecated(None, "Use `tf.data.experimental.Reducer(...)`.") def __init__(self, init_func, reduce_func, finalize_func): - self._init_func = init_func - self._reduce_func = reduce_func - self._finalize_func = finalize_func - - @property - def init_func(self): - return self._init_func - - @property - def reduce_func(self): - return self._reduce_func - - @property - def finalize_func(self): - return self._finalize_func - - -class _MapXDataset(dataset_ops.UnaryDataset): - """A `Dataset` that maps a function over elements in its input.""" - - def __init__(self, input_dataset, map_func): - """See `map_x_dataset()` for details.""" - super(_MapXDataset, self).__init__(input_dataset) - self._input_dataset = input_dataset - - wrapped_func = dataset_ops.StructuredFunctionWrapper( - map_func, - "tf.contrib.data.map_x_dataset()", - input_dataset, - experimental_nested_dataset_support=True) - self._output_classes = wrapped_func.output_classes - self._output_shapes = wrapped_func.output_shapes - self._output_types = wrapped_func.output_types - self._map_func = wrapped_func.function - - def _as_variant_tensor(self): - input_t = self._input_dataset._as_variant_tensor() # pylint: disable=protected-access - return gen_dataset_ops.map_dataset( - input_t, - self._map_func.captured_inputs, - f=self._map_func, - **dataset_ops.flat_structure(self)) - - @property - def output_classes(self): - return self._output_classes - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types + super(Reducer, self).__init__(init_func, reduce_func, finalize_func) diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index bfa3fdf54337c58716fbdd70dcb85a378c65066f..f50da4d429f715418a95cf177a3f4b5d273c8844 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -17,21 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib import stateless -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops -from tensorflow.contrib.data.python.ops import random_ops -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.ops import readers -from tensorflow.python.data.util import nest -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops +from tensorflow.python.data.experimental.ops import interleave_ops from tensorflow.python.util import deprecation +@deprecation.deprecated(None, + "Use `tf.data.experimental.parallel_interleave(...)`.") def parallel_interleave(map_func, cycle_length, block_length=1, @@ -81,12 +72,9 @@ def parallel_interleave(map_func, A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - def _apply_fn(dataset): - return readers.ParallelInterleaveDataset( - dataset, map_func, cycle_length, block_length, sloppy, - buffer_output_elements, prefetch_input_elements) - - return _apply_fn + return interleave_ops.parallel_interleave( + map_func, cycle_length, block_length, sloppy, buffer_output_elements, + prefetch_input_elements) @deprecation.deprecated( @@ -140,61 +128,12 @@ def sloppy_interleave(map_func, cycle_length, block_length=1): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - def _apply_fn(dataset): - return readers.ParallelInterleaveDataset( - dataset, - map_func, - cycle_length, - block_length, - sloppy=True, - buffer_output_elements=None, - prefetch_input_elements=None) - - return _apply_fn - - -class _DirectedInterleaveDataset(dataset_ops.Dataset): - """A substitute for `Dataset.interleave()` on a fixed list of datasets.""" - - def __init__(self, selector_input, data_inputs): - self._selector_input = selector_input - self._data_inputs = list(data_inputs) - - for data_input in data_inputs[1:]: - if (data_input.output_types != data_inputs[0].output_types or - data_input.output_classes != data_inputs[0].output_classes): - raise TypeError("All datasets must have the same type and class.") - - def _as_variant_tensor(self): - # pylint: disable=protected-access - return gen_dataset_ops.directed_interleave_dataset( - self._selector_input._as_variant_tensor(), - [data_input._as_variant_tensor() for data_input in self._data_inputs], - **dataset_ops.flat_structure(self)) - # pylint: enable=protected-access - - def _inputs(self): - return [self._selector_input] + self._data_inputs - - @property - def output_classes(self): - return self._data_inputs[0].output_classes - - @property - def output_shapes(self): - ret = self._data_inputs[0].output_shapes - for data_input in self._data_inputs[1:]: - ret = nest.pack_sequence_as(ret, [ - ts1.most_specific_compatible_shape(ts2) for (ts1, ts2) in zip( - nest.flatten(ret), nest.flatten(data_input.output_shapes)) - ]) - return ret - - @property - def output_types(self): - return self._data_inputs[0].output_types + return interleave_ops.parallel_interleave( + map_func, cycle_length, block_length, sloppy=True) +@deprecation.deprecated(None, + "Use `tf.data.experimental.sample_from_datasets(...)`.") def sample_from_datasets(datasets, weights=None, seed=None): """Samples elements at random from the datasets in `datasets`. @@ -218,64 +157,11 @@ def sample_from_datasets(datasets, weights=None, seed=None): ValueError: If the `weights` argument is specified and does not match the length of the `datasets` element. """ - num_datasets = len(datasets) - if not isinstance(weights, dataset_ops.Dataset): - if weights is None: - # Select inputs with uniform probability. - logits = [[1.0] * num_datasets] - - else: - # Use the given `weights` as the probability of choosing the respective - # input. - weights = ops.convert_to_tensor(weights, name="weights") - if weights.dtype not in (dtypes.float32, dtypes.float64): - raise TypeError("`weights` must be convertible to a tensor of " - "`tf.float32` or `tf.float64` elements.") - if not weights.shape.is_compatible_with([num_datasets]): - raise ValueError( - "`weights` must be a vector of length `len(datasets)`.") - - # The `stateless_multinomial()` op expects log-probabilities, as opposed - # to weights. - logits = array_ops.expand_dims(math_ops.log(weights, name="logits"), 0) - - # NOTE(mrry): We only specialize when `weights` is not a `Dataset`. When it - # is a `Dataset`, it is possible that evaluating it has a side effect the - # user depends on. - if len(datasets) == 1: - return datasets[0] - - def select_dataset_constant_logits(seed): - return array_ops.squeeze( - stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1]) - - selector_input = dataset_ops.MapDataset( - random_ops.RandomDataset(seed).batch(2), - select_dataset_constant_logits, - use_inter_op_parallelism=False) - - else: - # Use each element of the given `weights` dataset as the probability of - # choosing the respective input. - - # The `stateless_multinomial()` op expects log-probabilities, as opposed to - # weights. - logits_ds = weights.map(lambda *p: math_ops.log(p, name="logits")) - - def select_dataset_varying_logits(logits, seed): - return array_ops.squeeze( - stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1]) - - logits_and_seeds = dataset_ops.Dataset.zip( - (logits_ds, random_ops.RandomDataset(seed).batch(2))) - selector_input = dataset_ops.MapDataset( - logits_and_seeds, - select_dataset_varying_logits, - use_inter_op_parallelism=False) - - return _DirectedInterleaveDataset(selector_input, datasets) + return interleave_ops.sample_from_datasets(datasets, weights, seed) +@deprecation.deprecated(None, + "Use `tf.data.experimental.choose_from_datasets(...)`.") def choose_from_datasets(datasets, choice_dataset): """Creates a dataset that deterministically chooses elements from `datasets`. @@ -311,10 +197,4 @@ def choose_from_datasets(datasets, choice_dataset): TypeError: If the `datasets` or `choice_dataset` arguments have the wrong type. """ - if not (choice_dataset.output_types == dtypes.int64 - and choice_dataset.output_shapes.is_compatible_with( - tensor_shape.scalar()) - and choice_dataset.output_classes == ops.Tensor): - raise TypeError("`choice_dataset` must be a dataset of scalar " - "`tf.int64` tensors.") - return _DirectedInterleaveDataset(choice_dataset, datasets) + return interleave_ops.choose_from_datasets(datasets, choice_dataset) diff --git a/tensorflow/contrib/data/python/ops/iterator_ops.py b/tensorflow/contrib/data/python/ops/iterator_ops.py index 18515e21edfe0449514ab4f21683a600eaf48910..48c325c86f74b4c922e70a33212b49196b34e357 100644 --- a/tensorflow/contrib/data/python/ops/iterator_ops.py +++ b/tensorflow/contrib/data/python/ops/iterator_ops.py @@ -16,15 +16,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops import iterator_ops -from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_dataset_ops -from tensorflow.python.training import basic_session_run_hooks -from tensorflow.python.training import checkpoint_management -from tensorflow.python.training import saver as saver_lib -from tensorflow.python.training import session_run_hook +from tensorflow.python.data.experimental.ops import iterator_ops +from tensorflow.python.util import deprecation + +@deprecation.deprecated( + None, "Use `tf.data.experimental.make_saveable_from_iterator(...)`.") def make_saveable_from_iterator(iterator): """Returns a SaveableObject for saving/restore iterator state using Saver. @@ -60,27 +58,10 @@ def make_saveable_from_iterator(iterator): Note: Not all iterators support checkpointing yet. Attempting to save the state of an unsupported iterator will throw an error. """ - return _Saveable(iterator._iterator_resource) # pylint: disable=protected-access - - -class _Saveable(saver_lib.BaseSaverBuilder.SaveableObject): - """SaveableObject for saving/restoring iterator state.""" + return iterator_ops.make_saveable_from_iterator(iterator) - def __init__(self, iterator_resource): - serialized_iterator = gen_dataset_ops.serialize_iterator(iterator_resource) - specs = [ - saver_lib.BaseSaverBuilder.SaveSpec(serialized_iterator, "", - iterator_resource.name + "-state") - ] - super(_Saveable, self).__init__(iterator_resource, specs, - iterator_resource.name) - def restore(self, restored_tensors, unused_restored_shapes): - with ops.colocate_with(self.op): - return gen_dataset_ops.deserialize_iterator(self.op, restored_tensors[0]) - - -class CheckpointInputPipelineHook(session_run_hook.SessionRunHook): +class CheckpointInputPipelineHook(iterator_ops.CheckpointInputPipelineHook): """Checkpoints input pipeline state every N steps or seconds. This hook saves the state of the iterators in the `Graph` so that when @@ -125,135 +106,7 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook): collector when building the eval graph. """ + @deprecation.deprecated( + None, "Use `tf.data.experimental.CheckpointInputPipelineHook(...)`.") def __init__(self, estimator): - """Initializes a `CheckpointInputPipelineHook`. - - Args: - estimator: Estimator. - - Raises: - ValueError: One of `save_steps` or `save_secs` should be set. - ValueError: At most one of saver or scaffold should be set. - """ - # `checkpoint_basename` is "input.ckpt" for non-distributed pipelines or - # of the form "input__.ckpt" for distributed pipelines. - # Note: The default `checkpoint_basename` used by `CheckpointSaverHook` is - # "model.ckpt". We intentionally choose the input pipeline checkpoint prefix - # to be different to avoid conflicts with the model checkpoint. - - # pylint: disable=protected-access - checkpoint_prefix = "input" - if estimator._config.num_worker_replicas > 1: - # Distributed setting. - suffix = "_{}_{}".format(estimator._config.task_type, - estimator._config.task_id) - checkpoint_prefix += suffix - # pylint: enable=protected-access - - # We use a composition paradigm instead of inheriting from - # `CheckpointSaverHook` because `Estimator` does an `isinstance` check - # to check whether a `CheckpointSaverHook` is already present in the list - # of hooks and if not, adds one. Inheriting from `CheckpointSaverHook` - # would thwart this behavior. This hook checkpoints *only the iterators* - # and not the graph variables. - self._checkpoint_saver_hook = basic_session_run_hooks.CheckpointSaverHook( - estimator.model_dir, - save_secs=estimator._config.save_checkpoints_secs, # pylint: disable=protected-access - save_steps=estimator._config.save_checkpoints_steps, # pylint: disable=protected-access - checkpoint_basename=checkpoint_prefix + ".ckpt") - - # Name for the protocol buffer file that will contain the list of most - # recent checkpoints stored as a `CheckpointState` protocol buffer. - # This file, kept in the same directory as the checkpoint files, is - # automatically managed by the `Saver` to keep track of recent checkpoints. - # The default name used by the `Saver` for this file is "checkpoint". Here - # we use the name "checkpoint_" so that in case the - # `checkpoint_dir` is the same as the model checkpoint directory, there are - # no conflicts during restore. - self._latest_filename = "checkpoint_" + checkpoint_prefix - self._first_run = True - - def begin(self): - # Build a Saver that saves all iterators in the `GLOBAL_ITERATORS` - # collection if no `Saver` or `Scaffold` is provided. - # pylint: disable=protected-access - if (self._checkpoint_saver_hook._saver is None and - self._checkpoint_saver_hook._scaffold is None): - iterators = ops.get_collection(iterator_ops.GLOBAL_ITERATORS) - saveables = [_Saveable(i) for i in iterators] - self._checkpoint_saver_hook._saver = _CustomSaver(saveables, - self._latest_filename) - # pylint: enable=protected-access - self._checkpoint_saver_hook.begin() - - def _restore_or_save_initial_ckpt(self, session): - # Ideally this should be run in after_create_session but is not for the - # following reason: - # Currently there is no way of enforcing an order of running the - # `SessionRunHooks`. Hence it is possible that the `_DatasetInitializerHook` - # is run *after* this hook. That is troublesome because - # 1. If a checkpoint exists and this hook restores it, the initializer hook - # will override it. - # 2. If no checkpoint exists, this hook will try to save an initialized - # iterator which will result in an exception. - # - # As a temporary fix we enter the following implicit contract between this - # hook and the _DatasetInitializerHook. - # 1. The _DatasetInitializerHook initializes the iterator in the call to - # after_create_session. - # 2. This hook saves the iterator on the first call to `before_run()`, which - # is guaranteed to happen after `after_create_session()` of all hooks - # have been run. - - # Check if there is an existing checkpoint. If so, restore from it. - # pylint: disable=protected-access - latest_checkpoint_path = checkpoint_management.latest_checkpoint( - self._checkpoint_saver_hook._checkpoint_dir, - latest_filename=self._latest_filename) - if latest_checkpoint_path: - self._checkpoint_saver_hook._get_saver().restore(session, - latest_checkpoint_path) - else: - # The checkpoint saved here is the state at step "global_step". - # Note: We do not save the GraphDef or MetaGraphDef here. - global_step = session.run(self._checkpoint_saver_hook._global_step_tensor) - self._checkpoint_saver_hook._save(session, global_step) - self._checkpoint_saver_hook._timer.update_last_triggered_step(global_step) - # pylint: enable=protected-access - - def before_run(self, run_context): - if self._first_run: - self._restore_or_save_initial_ckpt(run_context.session) - self._first_run = False - return self._checkpoint_saver_hook.before_run(run_context) - - def after_run(self, run_context, run_values): - self._checkpoint_saver_hook.after_run(run_context, run_values) - - def end(self, session): - self._checkpoint_saver_hook.end(session) - - -class _CustomSaver(saver_lib.Saver): - """`Saver` with a different default `latest_filename`. - - This is used in the `CheckpointInputPipelineHook` to avoid conflicts with - the model ckpt saved by the `CheckpointSaverHook`. - """ - - def __init__(self, var_list, latest_filename): - super(_CustomSaver, self).__init__(var_list) - self._latest_filename = latest_filename - - def save(self, - sess, - save_path, - global_step=None, - latest_filename=None, - meta_graph_suffix="meta", - write_meta_graph=True, - write_state=True, - strip_default_attrs=False): - return super(_CustomSaver, self).save( - sess, save_path, global_step, latest_filename or self._latest_filename, - meta_graph_suffix, write_meta_graph, write_state, strip_default_attrs) + super(CheckpointInputPipelineHook, self).__init__(estimator) diff --git a/tensorflow/contrib/data/python/ops/parsing_ops.py b/tensorflow/contrib/data/python/ops/parsing_ops.py index cfbba701b06d9a55e72e038702e0fb45c904f6ba..3aeee9d8e42dce5af133afeeab4a8c97e50d5571 100644 --- a/tensorflow/contrib/data/python/ops/parsing_ops.py +++ b/tensorflow/contrib/data/python/ops/parsing_ops.py @@ -17,92 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import nest -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import gen_dataset_ops -from tensorflow.python.ops import parsing_ops +from tensorflow.python.data.experimental.ops import parsing_ops +from tensorflow.python.util import deprecation -class _ParseExampleDataset(dataset_ops.UnaryDataset): - """A `Dataset` that parses `example` dataset into a `dict` dataset.""" - - def __init__(self, input_dataset, features, num_parallel_calls): - super(_ParseExampleDataset, self).__init__(input_dataset) - self._input_dataset = input_dataset - if not all(types == dtypes.string - for types in nest.flatten(input_dataset.output_types)): - raise TypeError("Input dataset should be a dataset of vectors of strings") - self._num_parallel_calls = num_parallel_calls - # pylint: disable=protected-access - self._features = parsing_ops._prepend_none_dimension(features) - # sparse_keys and dense_keys come back sorted here. - (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults, - dense_shapes) = parsing_ops._features_to_raw_params( - self._features, [ - parsing_ops.VarLenFeature, parsing_ops.SparseFeature, - parsing_ops.FixedLenFeature, parsing_ops.FixedLenSequenceFeature - ]) - # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature. - (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys, dense_shapes, - dense_shape_as_shape) = parsing_ops._process_raw_parameters( - None, dense_defaults, sparse_keys, sparse_types, dense_keys, - dense_types, dense_shapes) - # pylint: enable=protected-access - self._sparse_keys = sparse_keys - self._sparse_types = sparse_types - self._dense_keys = dense_keys - self._dense_defaults = dense_defaults_vec - self._dense_shapes = dense_shapes - self._dense_types = dense_types - dense_output_shapes = [ - self._input_dataset.output_shapes.concatenate(shape) - for shape in dense_shape_as_shape - ] - sparse_output_shapes = [ - self._input_dataset.output_shapes.concatenate([None]) - for _ in range(len(sparse_keys)) - ] - - self._output_shapes = dict( - zip(self._dense_keys + self._sparse_keys, - dense_output_shapes + sparse_output_shapes)) - self._output_types = dict( - zip(self._dense_keys + self._sparse_keys, - self._dense_types + self._sparse_types)) - self._output_classes = dict( - zip(self._dense_keys + self._sparse_keys, - [ops.Tensor for _ in range(len(self._dense_defaults))] + - [sparse_tensor.SparseTensor for _ in range(len(self._sparse_keys)) - ])) - - def _as_variant_tensor(self): - return gen_dataset_ops.parse_example_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - self._num_parallel_calls, - self._dense_defaults, - self._sparse_keys, - self._dense_keys, - self._sparse_types, - self._dense_shapes, - **dataset_ops.flat_structure(self)) - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types - - @property - def output_classes(self): - return self._output_classes - - -# TODO(b/111553342): add arguments names and example names as well. +@deprecation.deprecated( + None, "Use `tf.data.experimental.parse_example_dataset(...)`.") def parse_example_dataset(features, num_parallel_calls=1): """A transformation that parses `Example` protos into a `dict` of tensors. @@ -130,21 +50,4 @@ def parse_example_dataset(features, num_parallel_calls=1): Raises: ValueError: if features argument is None. """ - if features is None: - raise ValueError("Missing: features was %s." % features) - - def _apply_fn(dataset): - """Function from `Dataset` to `Dataset` that applies the transformation.""" - out_dataset = _ParseExampleDataset(dataset, features, num_parallel_calls) - if any([ - isinstance(feature, parsing_ops.SparseFeature) - for _, feature in features.items() - ]): - # pylint: disable=protected-access - # pylint: disable=g-long-lambda - out_dataset = out_dataset.map( - lambda x: parsing_ops._construct_sparse_tensors_for_sparse_features( - features, x), num_parallel_calls=num_parallel_calls) - return out_dataset - - return _apply_fn + return parsing_ops.parse_example_dataset(features, num_parallel_calls) diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py index 58395879e69e6cf75023d120d60c8ca136592165..adfb390cd9a6b159fe3887666993c6e9d6c758d8 100644 --- a/tensorflow/contrib/data/python/ops/prefetching_ops.py +++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py @@ -17,320 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import warnings - -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.ops import iterator_ops -from tensorflow.python.data.util import nest -from tensorflow.python.data.util import sparse -from tensorflow.python.eager import context -from tensorflow.python.framework import device as framework_device -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import function -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import functional_ops -from tensorflow.python.ops import gen_dataset_ops as core_gen_dataset_ops -from tensorflow.python.ops import resource_variable_ops - - -def function_buffering_resource(string_arg, - target_device, - f, - buffer_size, - output_types, - container="", - shared_name=None, - name=None): - """Creates a FunctionBufferingResource. - - A FunctionBufferingResource fills up a buffer by calling a function `f` on - `target_device`. `f` should take in only a single string argument as input. - - Args: - string_arg: The single string argument to the function. - target_device: The device to run `f` on. - f: The function to be executed. - buffer_size: Size of the buffer to be populated. - output_types: The output types generated by the function. - container: (Optional) string. Defaults to "". - shared_name: (Optional) string. - name: (Optional) string to name the op. - - Returns: - Handle to a FunctionBufferingResource. - """ - if shared_name is None: - shared_name = "" - return gen_dataset_ops.function_buffering_resource( - string_arg=string_arg, - target_device=target_device, - shared_name=shared_name, - f=f, - buffer_size=buffer_size, - container=container, - name=name, - output_types=output_types) - - -def function_buffering_resource_get_next(function_buffer_resource, - output_types, - name=None): - return gen_dataset_ops.function_buffering_resource_get_next( - function_buffer_resource=function_buffer_resource, - output_types=output_types, - name=name) - - -def function_buffering_resource_reset(function_buffer_resource, name=None): - return gen_dataset_ops.function_buffering_resource_reset( - function_buffer_resource=function_buffer_resource, name=name) - - -# pylint: disable=protected-access -class _PrefetchToDeviceIterator(object): - """A replacement for `tf.data.Iterator` that prefetches to another device. - - Args: - input_dataset: The input dataset - one_shot: If true, we make a one shot iterator that's already initialized. - device: A fully specified device string where we want to prefetch to - buffer_size: Size of the prefetching buffer. - shared_name: (Optional.) If non-empty, the returned iterator will be - shared under the given name across multiple sessions that share the - same devices (e.g. when using a remote server). - - Returns: - An Iterator type object. - """ - - def __init__(self, - input_dataset, - one_shot, - device, - buffer_size, - shared_name=None): - self._input_dataset = input_dataset - self._get_next_call_count = 0 - self._one_shot = one_shot - if shared_name is None: - shared_name = "" - - if self._one_shot: - self._input_iterator = input_dataset.make_one_shot_iterator() - else: - self._input_iterator = iterator_ops.Iterator.from_structure( - self._input_dataset.output_types, self._input_dataset.output_shapes, - shared_name, self._input_dataset.output_classes) - input_iterator_handle = self._input_iterator.string_handle() - - @function.Defun(dtypes.string) - def _prefetch_fn(handle): - """Prefetches one element from `input_iterator`.""" - remote_iterator = iterator_ops.Iterator.from_string_handle( - handle, self._input_iterator.output_types, - self._input_iterator.output_shapes, - self._input_iterator.output_classes) - ret = remote_iterator.get_next() - return nest.flatten(sparse.serialize_sparse_tensors(ret)) - - iterator_device = gen_dataset_ops.iterator_get_device( - self._input_iterator._iterator_resource) - - with ops.device(device): - self._buffering_resource = function_buffering_resource( - f=_prefetch_fn, - target_device=iterator_device, - string_arg=input_iterator_handle, - buffer_size=buffer_size, - shared_name=shared_name, - output_types=nest.flatten( - sparse.as_dense_types(self._input_dataset.output_types, - self._input_dataset.output_classes))) - - if not self._one_shot: - reset_op = function_buffering_resource_reset(self._buffering_resource) - with ops.control_dependencies([reset_op]): - self._initializer = self._input_iterator.make_initializer( - self._input_dataset) - - def get_next(self, name=None): - """See `tf.data.Iterator.get_next`.""" - self._get_next_call_count += 1 - if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD: - warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE) - - flat_ret = gen_dataset_ops.function_buffering_resource_get_next( - self._buffering_resource, - output_types=nest.flatten(sparse.as_dense_types( - self.output_types, self.output_classes)), name=name) - - ret = sparse.deserialize_sparse_tensors( - nest.pack_sequence_as(self.output_types, flat_ret), - self.output_types, self.output_shapes, self.output_classes) - - for tensor, shape in zip( - nest.flatten(ret), nest.flatten(self.output_shapes)): - if isinstance(tensor, ops.Tensor): - tensor.set_shape(shape) - - return ret - - @property - def initializer(self): - if self._one_shot: - raise NotImplementedError("Can't initialize a one_shot_iterator") - return self._initializer - - @property - def output_classes(self): - return self._input_dataset.output_classes - - @property - def output_shapes(self): - return self._input_dataset.output_shapes - - @property - def output_types(self): - return self._input_dataset.output_types - - -class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator): - """A replacement for `tf.data.Iterator` that prefetches to another device. - - Args: - input_dataset: The input dataset - one_shot: If true, we make a one shot iterator that's already initialized. - device: A fully specified device string where we want to prefetch to - buffer_size: Size of the prefetching buffer. - shared_name: (Optional.) If non-empty, the returned iterator will be - shared under the given name across multiple sessions that share the - same devices (e.g. when using a remote server). - - Returns: - An Iterator type object. - """ - - def __init__(self, - input_dataset, - device, - buffer_size): - with ops.device("/device:CPU:0"): - super(_PrefetchToDeviceEagerIterator, self).__init__(input_dataset) - input_iterator_handle = core_gen_dataset_ops.iterator_to_string_handle( - self._resource) - - self._device = device - - @function.Defun(dtypes.string) - def _prefetch_fn(handle): - """Prefetches one element from `input_iterator`.""" - remote_iterator = iterator_ops.Iterator.from_string_handle( - handle, self.output_types, self.output_shapes, self.output_classes) - ret = remote_iterator.get_next() - return nest.flatten(sparse.serialize_sparse_tensors(ret)) - - _prefetch_fn.add_to_graph(None) - - with ops.device(device): - self._buffering_resource = function_buffering_resource( - f=_prefetch_fn, - output_types=self._flat_output_types, - target_device=gen_dataset_ops.iterator_get_device(self._resource), - string_arg=input_iterator_handle, - buffer_size=buffer_size, - shared_name=iterator_ops._generate_shared_name( - "function_buffer_resource")) - - def _next_internal(self): - """Returns a nested structure of `tf.Tensor`s containing the next element. - """ - # This runs in sync mode as iterators use an error status to communicate - # that there is no more data to iterate over. - # TODO(b/77291417): Fix - with context.execution_mode(context.SYNC): - with ops.device(self._device): - ret = gen_dataset_ops.function_buffering_resource_get_next( - function_buffer_resource=self._buffering_resource, - output_types=self._flat_output_types) - return sparse.deserialize_sparse_tensors( - nest.pack_sequence_as(self._output_types, ret), self._output_types, - self._output_shapes, self._output_classes) -# pylint: enable=protected-access - - -class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset): - """A `Dataset` whose iterator prefetches elements to another device.""" - - def __init__(self, input_dataset, device, buffer_size): - super(_PrefetchToDeviceDataset, self).__init__(input_dataset) - self._input_dataset = input_dataset - self._device = device - self._buffer_size = buffer_size if buffer_size is not None else 1 - - # The static analysis cannot tell that the eager iterator's superclass has - # a `next()` method. - # pylint: disable=non-iterator-returned - def __iter__(self): - """Creates an `Iterator` for enumerating the elements of this dataset. - - The returned iterator implements the Python iterator protocol and therefore - can only be used in eager mode. - - Returns: - An `Iterator` over the elements of this dataset. - - Raises: - RuntimeError: If eager execution is enabled. - """ - if context.executing_eagerly(): - return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device, - self._buffer_size) - else: - raise RuntimeError("dataset.__iter__() is only supported when eager " - "execution is enabled.") - # pylint: enable=non-iterator-returned - - def make_one_shot_iterator(self): - if context.executing_eagerly(): - return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device, - self._buffer_size) - else: - return _PrefetchToDeviceIterator(self._input_dataset, one_shot=True, - device=self._device, - buffer_size=self._buffer_size) - - def make_initializable_iterator(self, shared_name=None): - return _PrefetchToDeviceIterator( - self._input_dataset, - one_shot=False, - device=self._device, - buffer_size=self._buffer_size, - shared_name=shared_name) - - def _as_variant_tensor(self): - # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset - # transformation methods is called. - # TODO(mrry): Investigate support for chaining further transformations after - # the prefetch, including GPU support. - raise NotImplementedError("`prefetch_to_device()` must be the last " - "transformation in a dataset pipeline.") - - @property - def output_types(self): - return self._input_dataset.output_types - - @property - def output_shapes(self): - return self._input_dataset.output_shapes - - @property - def output_classes(self): - return self._input_dataset.output_classes +from tensorflow.python.data.experimental.ops import prefetching_ops +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, + "Use `tf.data.experimental.prefetch_to_device(...)`.") def prefetch_to_device(device, buffer_size=None): """A transformation that prefetches dataset values to the given `device`. @@ -346,12 +38,10 @@ def prefetch_to_device(device, buffer_size=None): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - def _apply_fn(dataset): - return _PrefetchToDeviceDataset(dataset, device, buffer_size) - - return _apply_fn + return prefetching_ops.prefetch_to_device(device, buffer_size) +@deprecation.deprecated(None, "Use `tf.data.experimental.copy_to_device(...)`.") def copy_to_device(target_device, source_device="/cpu:0"): """A transformation that copies dataset elements to the given `target_device`. @@ -363,165 +53,4 @@ def copy_to_device(target_device, source_device="/cpu:0"): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - - def _apply_fn(dataset): - return _CopyToDeviceDataset( - dataset, target_device=target_device, source_device=source_device) - - return _apply_fn - - -# TODO(rohanj): Use the _input_hostmem attr on the RemoteCall ops to indicate -# all inputs to the Op are in host memory, thereby avoiding some unnecessary -# Sends and Recvs. -class _CopyToDeviceDataset(dataset_ops.UnaryDataset): - """A `Dataset` that copies elements to another device.""" - - def __init__(self, input_dataset, target_device, source_device="/cpu:0"): - """Constructs a _CopyToDeviceDataset. - - Args: - input_dataset: `Dataset` to be copied - target_device: The name of the device to which elements would be copied. - source_device: Device where input_dataset would be placed. - """ - super(_CopyToDeviceDataset, self).__init__(input_dataset) - self._input_dataset = input_dataset - self._target_device = target_device - spec = framework_device.DeviceSpec().from_string(self._target_device) - self._is_gpu_target = (spec.device_type == "GPU") - self._source_device_string = source_device - self._source_device = ops.convert_to_tensor(source_device) - - self._flat_output_shapes = nest.flatten( - sparse.as_dense_shapes(self._input_dataset.output_shapes, - self._input_dataset.output_classes)) - self._flat_output_types = nest.flatten( - sparse.as_dense_types(self._input_dataset.output_types, - self._input_dataset.output_classes)) - - @function.Defun() - def _init_func(): - """Creates an iterator for the input dataset. - - Returns: - A `string` tensor that encapsulates the iterator created. - """ - # pylint: disable=protected-access - ds_variant = self._input_dataset._as_variant_tensor() - resource = core_gen_dataset_ops.anonymous_iterator( - output_types=self._flat_output_types, - output_shapes=self._flat_output_shapes) - with ops.control_dependencies( - [core_gen_dataset_ops.make_iterator(ds_variant, resource)]): - return core_gen_dataset_ops.iterator_to_string_handle(resource) - - @function.Defun() - def _remote_init_func(): - return functional_ops.remote_call( - target=self._source_device, - args=_init_func.captured_inputs, - Tout=[dtypes.string], - f=_init_func) - - self._init_func = _remote_init_func - self._init_captured_args = _remote_init_func.captured_inputs - - @function.Defun(dtypes.string) - def _next_func(string_handle): - """Calls get_next for created iterator. - - Args: - string_handle: An iterator string handle created by _init_func - Returns: - The elements generated from `input_dataset` - """ - with ops.device(self._source_device_string): - iterator = iterator_ops.Iterator.from_string_handle( - string_handle, self.output_types, self.output_shapes, - self.output_classes) - ret = iterator.get_next() - return nest.flatten(sparse.serialize_sparse_tensors(ret)) - - @function.Defun(dtypes.string) - def _remote_next_func(string_handle): - return functional_ops.remote_call( - target=self._source_device, - args=[string_handle] + _next_func.captured_inputs, - Tout=self._flat_output_types, - f=_next_func) - - self._next_func = _remote_next_func - self._next_captured_args = _remote_next_func.captured_inputs - - @function.Defun(dtypes.string) - def _finalize_func(string_handle): - """Destroys the iterator resource created. - - Args: - string_handle: An iterator string handle created by _init_func - Returns: - Tensor constant 0 - """ - iterator_resource = core_gen_dataset_ops.iterator_from_string_handle_v2( - string_handle, - output_types=self._flat_output_types, - output_shapes=self._flat_output_shapes) - with ops.control_dependencies([ - resource_variable_ops.destroy_resource_op( - iterator_resource, ignore_lookup_error=True)]): - return array_ops.constant(0, dtypes.int64) - - @function.Defun(dtypes.string) - def _remote_finalize_func(string_handle): - return functional_ops.remote_call( - target=self._source_device, - args=[string_handle] + _finalize_func.captured_inputs, - Tout=[dtypes.int64], - f=_finalize_func) - - self._finalize_func = _remote_finalize_func - self._finalize_captured_args = _remote_finalize_func.captured_inputs - - g = ops.get_default_graph() - _remote_init_func.add_to_graph(g) - _remote_next_func.add_to_graph(g) - _remote_finalize_func.add_to_graph(g) - # pylint: enable=protected-scope - - # The one_shot_iterator implementation needs a 0 arg _make_dataset function - # that thereby captures all the inputs required to create the dataset. Since - # there are strings that are inputs to the GeneratorDataset which can't be - # placed on a GPU, this fails for the GPU case. Therefore, disabling it for - # GPU - def make_one_shot_iterator(self): - if self._is_gpu_target: - raise ValueError("Cannot create a one shot iterator when using " - "`tf.contrib.data.copy_to_device()` on GPU. Please use " - "`Dataset.make_initializable_iterator()` instead.") - else: - return super(_CopyToDeviceDataset, self).make_one_shot_iterator() - - def _as_variant_tensor(self): - with ops.device(self._target_device): - return core_gen_dataset_ops.generator_dataset( - self._init_captured_args, - self._next_captured_args, - self._finalize_captured_args, - init_func=self._init_func, - next_func=self._next_func, - finalize_func=self._finalize_func, - output_types=self._flat_output_types, - output_shapes=self._flat_output_shapes) - - @property - def output_types(self): - return self._input_dataset.output_types - - @property - def output_shapes(self): - return self._input_dataset.output_shapes - - @property - def output_classes(self): - return self._input_dataset.output_classes + return prefetching_ops.copy_to_device(target_device, source_device) diff --git a/tensorflow/contrib/data/python/ops/random_ops.py b/tensorflow/contrib/data/python/ops/random_ops.py index 344a0763c839f4a769177f7b76534326548bb543..2c951256368a5ffdbc2be424cef12eafc6ecd782 100644 --- a/tensorflow/contrib/data/python/ops/random_ops.py +++ b/tensorflow/contrib/data/python/ops/random_ops.py @@ -17,36 +17,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import random_seed -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.data.experimental.ops import random_ops +from tensorflow.python.util import deprecation -class RandomDataset(dataset_ops.DatasetSource): +class RandomDataset(random_ops.RandomDataset): """A `Dataset` of pseudorandom values.""" + @deprecation.deprecated( + None, "Use `tf.data.experimental.RandomDataset(...)`.") def __init__(self, seed=None): - """A `Dataset` of pseudorandom values.""" - super(RandomDataset, self).__init__() - self._seed, self._seed2 = random_seed.get_seed(seed) - - def _as_variant_tensor(self): - return gen_dataset_ops.random_dataset( - seed=self._seed, - seed2=self._seed2, - **dataset_ops.flat_structure(self)) - - @property - def output_classes(self): - return ops.Tensor - - @property - def output_shapes(self): - return tensor_shape.scalar() - - @property - def output_types(self): - return dtypes.int64 + super(RandomDataset, self).__init__(seed) diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index d9d06e270311a385c2b952d8f3d78341b877e635..4601376dff47e161962e92678883039c4b88bab7 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -17,295 +17,20 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections -import csv - -import numpy as np - -from tensorflow.contrib.data.python.ops import batching -from tensorflow.contrib.data.python.ops import gen_dataset_ops as contrib_gen_dataset_ops -from tensorflow.contrib.data.python.ops import interleave_ops -from tensorflow.contrib.data.python.ops import optimization -from tensorflow.contrib.data.python.ops import parsing_ops -from tensorflow.contrib.data.python.ops import shuffle_ops +from tensorflow.python.data.experimental.ops import optimization +from tensorflow.python.data.experimental.ops import readers from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers as core_readers -from tensorflow.python.data.util import convert from tensorflow.python.data.util import nest -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape -from tensorflow.python.lib.io import file_io -from tensorflow.python.ops import gen_dataset_ops -from tensorflow.python.platform import gfile +from tensorflow.python.ops import gen_experimental_dataset_ops from tensorflow.python.util import deprecation -_ACCEPTABLE_CSV_TYPES = (dtypes.float32, dtypes.float64, dtypes.int32, - dtypes.int64, dtypes.string) - - -def _is_valid_int32(str_val): - try: - # Checks equality to prevent int32 overflow - return dtypes.int32.as_numpy_dtype(str_val) == dtypes.int64.as_numpy_dtype( - str_val) - except (ValueError, OverflowError): - return False - - -def _is_valid_int64(str_val): - try: - dtypes.int64.as_numpy_dtype(str_val) - return True - except (ValueError, OverflowError): - return False - - -def _is_valid_float(str_val, float_dtype): - try: - return float_dtype.as_numpy_dtype(str_val) < np.inf - except ValueError: - return False - - -def _infer_type(str_val, na_value, prev_type): - """Given a string, infers its tensor type. - - Infers the type of a value by picking the least 'permissive' type possible, - while still allowing the previous type inference for this column to be valid. - - Args: - str_val: String value to infer the type of. - na_value: Additional string to recognize as a NA/NaN CSV value. - prev_type: Type previously inferred based on values of this column that - we've seen up till now. - Returns: - Inferred dtype. - """ - if str_val in ("", na_value): - # If the field is null, it gives no extra information about its type - return prev_type - - type_list = [ - dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64, dtypes.string - ] # list of types to try, ordered from least permissive to most - - type_functions = [ - _is_valid_int32, - _is_valid_int64, - lambda str_val: _is_valid_float(str_val, dtypes.float32), - lambda str_val: _is_valid_float(str_val, dtypes.float64), - lambda str_val: True, - ] # Corresponding list of validation functions - - for i in range(len(type_list)): - validation_fn = type_functions[i] - if validation_fn(str_val) and (prev_type is None or - prev_type in type_list[:i + 1]): - return type_list[i] - - -def _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header): - """Generator that yields rows of CSV file(s) in order.""" - for fn in filenames: - with file_io.FileIO(fn, "r") as f: - rdr = csv.reader( - f, - delimiter=field_delim, - quoting=csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE) - if header: - next(rdr) # Skip header lines - - for csv_row in rdr: - if len(csv_row) != num_cols: - raise ValueError( - "Problem inferring types: CSV row has different number of fields " - "than expected.") - yield csv_row - - -def _infer_column_defaults(filenames, num_cols, field_delim, use_quote_delim, - na_value, header, num_rows_for_inference, - select_columns): - """Infers column types from the first N valid CSV records of files.""" - if select_columns is None: - select_columns = range(num_cols) - inferred_types = [None] * len(select_columns) - - for i, csv_row in enumerate( - _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header)): - if num_rows_for_inference is not None and i >= num_rows_for_inference: - break - - for j, col_index in enumerate(select_columns): - inferred_types[j] = _infer_type(csv_row[col_index], na_value, - inferred_types[j]) - - # Replace None's with a default type - inferred_types = [t or dtypes.string for t in inferred_types] - # Default to 0 or '' for null values - return [ - constant_op.constant([0 if t is not dtypes.string else ""], dtype=t) - for t in inferred_types - ] - - -def _infer_column_names(filenames, field_delim, use_quote_delim): - """Infers column names from first rows of files.""" - csv_kwargs = { - "delimiter": field_delim, - "quoting": csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE - } - with file_io.FileIO(filenames[0], "r") as f: - try: - column_names = next(csv.reader(f, **csv_kwargs)) - except StopIteration: - raise ValueError(("Received StopIteration when reading the header line " - "of %s. Empty file?") % filenames[0]) - - for name in filenames[1:]: - with file_io.FileIO(name, "r") as f: - try: - if next(csv.reader(f, **csv_kwargs)) != column_names: - raise ValueError( - "Files have different column names in the header row.") - except StopIteration: - raise ValueError(("Received StopIteration when reading the header line " - "of %s. Empty file?") % filenames[0]) - return column_names - - -def _get_sorted_col_indices(select_columns, column_names): - """Transforms select_columns argument into sorted column indices.""" - names_to_indices = {n: i for i, n in enumerate(column_names)} - num_cols = len(column_names) - for i, v in enumerate(select_columns): - if isinstance(v, int): - if v < 0 or v >= num_cols: - raise ValueError( - "Column index %d specified in select_columns out of valid range." % - v) - continue - if v not in names_to_indices: - raise ValueError( - "Value '%s' specified in select_columns not a valid column index or " - "name." % v) - select_columns[i] = names_to_indices[v] - - # Sort and ensure there are no duplicates - result = sorted(set(select_columns)) - if len(result) != len(select_columns): - raise ValueError("select_columns contains duplicate columns") - return result - - -def _maybe_shuffle_and_repeat( - dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed): - """Optionally shuffle and repeat dataset, as requested.""" - if num_epochs != 1 and shuffle: - # Use shuffle_and_repeat for perf - return dataset.apply( - shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs, - shuffle_seed)) - elif shuffle: - return dataset.shuffle(shuffle_buffer_size, shuffle_seed) - elif num_epochs != 1: - return dataset.repeat(num_epochs) - return dataset - - -def make_tf_record_dataset(file_pattern, - batch_size, - parser_fn=None, - num_epochs=None, - shuffle=True, - shuffle_buffer_size=None, - shuffle_seed=None, - prefetch_buffer_size=optimization.AUTOTUNE, - num_parallel_reads=None, - num_parallel_parser_calls=None, - drop_final_batch=False): - """Reads and optionally parses TFRecord files into a dataset. - - Provides common functionality such as batching, optional parsing, shuffling, - and performant defaults. - - Args: - file_pattern: List of files or patterns of TFRecord file paths. - See `tf.gfile.Glob` for pattern rules. - batch_size: An int representing the number of records to combine - in a single batch. - parser_fn: (Optional.) A function accepting string input to parse - and process the record contents. This function must map records - to components of a fixed shape, so they may be batched. By - default, uses the record contents unmodified. - num_epochs: (Optional.) An int specifying the number of times this - dataset is repeated. If None (the default), cycles through the - dataset forever. - shuffle: (Optional.) A bool that indicates whether the input - should be shuffled. Defaults to `True`. - shuffle_buffer_size: (Optional.) Buffer size to use for - shuffling. A large buffer size ensures better shuffling, but - increases memory usage and startup time. - shuffle_seed: (Optional.) Randomization seed to use for shuffling. - prefetch_buffer_size: (Optional.) An int specifying the number of - feature batches to prefetch for performance improvement. - Defaults to auto-tune. Set to 0 to disable prefetching. - num_parallel_reads: (Optional.) Number of threads used to read - records from files. By default or if set to a value >1, the - results will be interleaved. - num_parallel_parser_calls: (Optional.) Number of parallel - records to parse in parallel. Defaults to an automatic selection. - drop_final_batch: (Optional.) Whether the last batch should be - dropped in case its size is smaller than `batch_size`; the - default behavior is not to drop the smaller batch. - - Returns: - A dataset, where each element matches the output of `parser_fn` - except it will have an additional leading `batch-size` dimension, - or a `batch_size`-length 1-D tensor of strings if `parser_fn` is - unspecified. - """ - files = dataset_ops.Dataset.list_files( - file_pattern, shuffle=shuffle, seed=shuffle_seed) - - if num_parallel_reads is None: - # Note: We considered auto-tuning this value, but there is a concern - # that this affects the mixing of records from different files, which - # could affect training convergence/accuracy, so we are defaulting to - # a constant for now. - num_parallel_reads = 24 - dataset = core_readers.TFRecordDataset( - files, num_parallel_reads=num_parallel_reads) - - if shuffle_buffer_size is None: - # TODO(josh11b): Auto-tune this value when not specified - shuffle_buffer_size = 10000 - dataset = _maybe_shuffle_and_repeat( - dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed) - - # NOTE(mrry): We set `drop_final_batch=True` when `num_epochs is None` to - # improve the shape inference, because it makes the batch dimension static. - # It is safe to do this because in that case we are repeating the input - # indefinitely, and all batches will be full-sized. - drop_final_batch = drop_final_batch or num_epochs is None - - if parser_fn is None: - dataset = dataset.batch(batch_size, drop_remainder=drop_final_batch) - else: - # TODO(josh11b): if num_parallel_parser_calls is None, use some function - # of num cores instead of map_and_batch's default behavior of one batch. - dataset = dataset.apply(batching.map_and_batch( - parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls, - drop_remainder=drop_final_batch)) - - if prefetch_buffer_size == 0: - return dataset - else: - return dataset.prefetch(buffer_size=prefetch_buffer_size) - +@deprecation.deprecated(None, + "Use `tf.data.experimental.make_csv_dataset(...)`.") def make_csv_dataset( file_pattern, batch_size, @@ -387,7 +112,6 @@ def make_csv_dataset( prefetch_buffer_size: An int specifying the number of feature batches to prefetch for performance improvement. Recommended value is the number of batches consumed per training step. Defaults to auto-tune. - num_parallel_reads: Number of threads used to read CSV records from files. If >1, the results will be interleaved. sloppy: If `True`, reading performance will be improved at @@ -411,106 +135,18 @@ def make_csv_dataset( Raises: ValueError: If any of the arguments is malformed. """ - # Create dataset of all matching filenames - filenames = _get_file_names(file_pattern, False) - dataset = dataset_ops.Dataset.from_tensor_slices(filenames) - if shuffle: - dataset = dataset.shuffle(len(filenames), shuffle_seed) - - # Clean arguments; figure out column names and defaults + return readers.make_csv_dataset( + file_pattern, batch_size, column_names, column_defaults, label_name, + select_columns, field_delim, use_quote_delim, na_value, header, + num_epochs, shuffle, shuffle_buffer_size, shuffle_seed, + prefetch_buffer_size, num_parallel_reads, sloppy, num_rows_for_inference, + compression_type) - if column_names is None: - if not header: - raise ValueError("Cannot infer column names without a header line.") - # If column names are not provided, infer from the header lines - column_names = _infer_column_names(filenames, field_delim, use_quote_delim) - if len(column_names) != len(set(column_names)): - raise ValueError("Cannot have duplicate column names.") - if select_columns is not None: - select_columns = _get_sorted_col_indices(select_columns, column_names) - - if column_defaults is not None: - column_defaults = [ - constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x - for x in column_defaults - ] - else: - # If column defaults are not provided, infer from records at graph - # construction time - column_defaults = _infer_column_defaults( - filenames, len(column_names), field_delim, use_quote_delim, na_value, - header, num_rows_for_inference, select_columns) - - if select_columns is not None and len(column_defaults) != len(select_columns): - raise ValueError( - "If specified, column_defaults and select_columns must have same " - "length." - ) - if select_columns is not None and len(column_names) > len(select_columns): - # Pick the relevant subset of column names - column_names = [column_names[i] for i in select_columns] - - if label_name is not None and label_name not in column_names: - raise ValueError("`label_name` provided must be one of the columns.") - - def filename_to_dataset(filename): - return CsvDataset( - filename, - record_defaults=column_defaults, - field_delim=field_delim, - use_quote_delim=use_quote_delim, - na_value=na_value, - select_cols=select_columns, - header=header, - compression_type=compression_type, - ) - - def map_fn(*columns): - """Organizes columns into a features dictionary. - - Args: - *columns: list of `Tensor`s corresponding to one csv record. - Returns: - An OrderedDict of feature names to values for that particular record. If - label_name is provided, extracts the label feature to be returned as the - second element of the tuple. - """ - features = collections.OrderedDict(zip(column_names, columns)) - if label_name is not None: - label = features.pop(label_name) - return features, label - return features - - # Read files sequentially (if num_parallel_reads=1) or in parallel - dataset = dataset.apply( - interleave_ops.parallel_interleave( - filename_to_dataset, cycle_length=num_parallel_reads, sloppy=sloppy)) - - dataset = _maybe_shuffle_and_repeat( - dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed) - - # Apply batch before map for perf, because map has high overhead relative - # to the size of the computation in each map. - # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to - # improve the shape inference, because it makes the batch dimension static. - # It is safe to do this because in that case we are repeating the input - # indefinitely, and all batches will be full-sized. - dataset = dataset.batch(batch_size=batch_size, - drop_remainder=num_epochs is None) - dataset = dataset_ops.MapDataset( - dataset, map_fn, use_inter_op_parallelism=False) - dataset = dataset.prefetch(prefetch_buffer_size) - - return dataset - - -_DEFAULT_READER_BUFFER_SIZE_BYTES = 4 * 1024 * 1024 # 4 MB - - -class CsvDataset(dataset_ops.DatasetSource): +class CsvDataset(readers.CsvDataset): """A Dataset comprising lines from one or more CSV files.""" + @deprecation.deprecated(None, "Use `tf.data.experimental.CsvDataset(...)`.") def __init__(self, filenames, record_defaults, @@ -521,140 +157,13 @@ class CsvDataset(dataset_ops.DatasetSource): use_quote_delim=True, na_value="", select_cols=None): - """Creates a `CsvDataset` by reading and decoding CSV files. - - The elements of this dataset correspond to records from the file(s). - RFC 4180 format is expected for CSV files - (https://tools.ietf.org/html/rfc4180) - Note that we allow leading and trailing spaces with int or float field. - - - For example, suppose we have a file 'my_file0.csv' with four CSV columns of - different data types: - ``` - abcdefg,4.28E10,5.55E6,12 - hijklmn,-5.3E14,,2 - ``` - - We can construct a CsvDataset from it as follows: - ```python - dataset = tf.contrib.data.CsvDataset( - "my_file*.csv", - [tf.float32, # Required field, use dtype or empty tensor - tf.constant([0.0], dtype=tf.float32), # Optional field, default to 0.0 - tf.int32, # Required field, use dtype or empty tensor - ], - select_cols=[1,2,3] # Only parse last three columns - ) - ``` - - The expected output of its iterations is: - ```python - next_element = dataset.make_one_shot_iterator().get_next() - with tf.Session() as sess: - while True: - try: - print(sess.run(next_element)) - except tf.errors.OutOfRangeError: - break - - >> (4.28e10, 5.55e6, 12) - >> (-5.3e14, 0.0, 2) - ``` - - Args: - filenames: A `tf.string` tensor containing one or more filenames. - record_defaults: A list of default values for the CSV fields. Each item in - the list is either a valid CSV `DType` (float32, float64, int32, int64, - string), or a `Tensor` object with one of the above types. One per - column of CSV data, with either a scalar `Tensor` default value for the - column if it is optional, or `DType` or empty `Tensor` if required. If - both this and `select_columns` are specified, these must have the same - lengths, and `column_defaults` is assumed to be sorted in order of - increasing column index. - compression_type: (Optional.) A `tf.string` scalar evaluating to one of - `""` (no compression), `"ZLIB"`, or `"GZIP"`. Defaults to no - compression. - buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes - to buffer while reading files. Defaults to 4MB. - header: (Optional.) A `tf.bool` scalar indicating whether the CSV file(s) - have header line(s) that should be skipped when parsing. Defaults to - `False`. - field_delim: (Optional.) A `tf.string` scalar containing the delimiter - character that separates fields in a record. Defaults to `","`. - use_quote_delim: (Optional.) A `tf.bool` scalar. If `False`, treats - double quotation marks as regular characters inside of string fields - (ignoring RFC 4180, Section 2, Bullet 5). Defaults to `True`. - na_value: (Optional.) A `tf.string` scalar indicating a value that will - be treated as NA/NaN. - select_cols: (Optional.) A sorted list of column indices to select from - the input data. If specified, only this subset of columns will be - parsed. Defaults to parsing all columns. - """ - super(CsvDataset, self).__init__() - self._filenames = ops.convert_to_tensor( - filenames, dtype=dtypes.string, name="filenames") - self._compression_type = convert.optional_param_to_tensor( - "compression_type", - compression_type, - argument_default="", - argument_dtype=dtypes.string) - record_defaults = [ - constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x - for x in record_defaults - ] - self._record_defaults = ops.convert_n_to_tensor( - record_defaults, name="record_defaults") - self._buffer_size = convert.optional_param_to_tensor( - "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES) - self._header = ops.convert_to_tensor( - header, dtype=dtypes.bool, name="header") - self._field_delim = ops.convert_to_tensor( - field_delim, dtype=dtypes.string, name="field_delim") - self._use_quote_delim = ops.convert_to_tensor( - use_quote_delim, dtype=dtypes.bool, name="use_quote_delim") - self._na_value = ops.convert_to_tensor( - na_value, dtype=dtypes.string, name="na_value") - self._select_cols = convert.optional_param_to_tensor( - "select_cols", - select_cols, - argument_default=[], - argument_dtype=dtypes.int64, - ) - self._output_shapes = tuple( - tensor_shape.scalar() for _ in range(len(record_defaults))) - self._output_types = tuple(d.dtype for d in self._record_defaults) - self._output_classes = tuple( - ops.Tensor for _ in range(len(record_defaults))) - - def _as_variant_tensor(self): - # Constructs graph node for the dataset op. - return contrib_gen_dataset_ops.csv_dataset( - filenames=self._filenames, - record_defaults=self._record_defaults, - buffer_size=self._buffer_size, - header=self._header, - output_shapes=self._output_shapes, - field_delim=self._field_delim, - use_quote_delim=self._use_quote_delim, - na_value=self._na_value, - select_cols=self._select_cols, - compression_type=self._compression_type, - ) - - @property - def output_types(self): - return self._output_types - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_classes(self): - return self._output_classes + super(CsvDataset, self).__init__( + filenames, record_defaults, compression_type, buffer_size, header, + field_delim, use_quote_delim, na_value, select_cols) +@deprecation.deprecated( + None, "Use `tf.data.experimental.make_batched_features_dataset(...)`.") def make_batched_features_dataset(file_pattern, batch_size, features, @@ -759,57 +268,15 @@ def make_batched_features_dataset(file_pattern, Raises: ValueError: If `label_key` is not one of the `features` keys. """ - # Create dataset of all matching filenames - filenames = _get_file_names(file_pattern, False) - dataset = dataset_ops.Dataset.from_tensor_slices(filenames) - if shuffle: - dataset = dataset.shuffle(len(filenames), shuffle_seed) - - # Read `Example` records from files as tensor objects. - if reader_args is None: - reader_args = [] + return readers.make_batched_features_dataset( + file_pattern, batch_size, features, reader, label_key, reader_args, + num_epochs, shuffle, shuffle_buffer_size, shuffle_seed, + prefetch_buffer_size, reader_num_threads, parser_num_threads, + sloppy_ordering, drop_final_batch) - # Read files sequentially (if reader_num_threads=1) or in parallel - dataset = dataset.apply( - interleave_ops.parallel_interleave( - lambda filename: reader(filename, *reader_args), - cycle_length=reader_num_threads, - sloppy=sloppy_ordering)) - # Extract values if the `Example` tensors are stored as key-value tuples. - if dataset.output_types == (dtypes.string, dtypes.string): - dataset = dataset_ops.MapDataset( - dataset, lambda _, v: v, use_inter_op_parallelism=False) - - # Apply dataset repeat and shuffle transformations. - dataset = _maybe_shuffle_and_repeat( - dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed) - - # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to - # improve the shape inference, because it makes the batch dimension static. - # It is safe to do this because in that case we are repeating the input - # indefinitely, and all batches will be full-sized. - dataset = dataset.batch( - batch_size, drop_remainder=drop_final_batch or num_epochs is None) - - # Parse `Example` tensors to a dictionary of `Feature` tensors. - dataset = dataset.apply( - parsing_ops.parse_example_dataset( - features, num_parallel_calls=parser_num_threads)) - - if label_key: - if label_key not in features: - raise ValueError( - "The `label_key` provided (%r) must be one of the `features` keys." % - label_key) - dataset = dataset.map(lambda x: (x, x.pop(label_key))) - - dataset = dataset.prefetch(prefetch_buffer_size) - return dataset - - -@deprecation.deprecated(None, - "Use `tf.contrib.data.make_batched_features_dataset`") +@deprecation.deprecated( + None, "Use `tf.data.experimental.make_batched_features_dataset(...)`") def read_batch_features(file_pattern, batch_size, features, @@ -879,7 +346,7 @@ def read_batch_features(file_pattern, Returns: A dict from keys in features to `Tensor` or `SparseTensor` objects. """ - dataset = make_batched_features_dataset( + dataset = readers.make_batched_features_dataset( file_pattern, batch_size, features, @@ -893,96 +360,13 @@ def read_batch_features(file_pattern, return outputs -def _get_file_names(file_pattern, shuffle): - """Parse list of file names from pattern, optionally shuffled. - - Args: - file_pattern: File glob pattern, or list of glob patterns. - shuffle: Whether to shuffle the order of file names. - - Returns: - List of file names matching `file_pattern`. - - Raises: - ValueError: If `file_pattern` is empty, or pattern matches no files. - """ - if isinstance(file_pattern, list): - if not file_pattern: - raise ValueError("File pattern is empty.") - file_names = [] - for entry in file_pattern: - file_names.extend(gfile.Glob(entry)) - else: - file_names = list(gfile.Glob(file_pattern)) - - if not file_names: - raise ValueError("No files match %s." % file_pattern) - - # Sort files so it will be deterministic for unit tests. - if not shuffle: - file_names = sorted(file_names) - return file_names - - -class SqlDataset(dataset_ops.DatasetSource): +class SqlDataset(readers.SqlDataset): """A `Dataset` consisting of the results from a SQL query.""" + @deprecation.deprecated(None, "Use `tf.data.experimental.SqlDataset(...)`.") def __init__(self, driver_name, data_source_name, query, output_types): - """Creates a `SqlDataset`. - - `SqlDataset` allows a user to read data from the result set of a SQL query. - For example: - - ```python - dataset = tf.contrib.data.SqlDataset("sqlite", "/foo/bar.sqlite3", - "SELECT name, age FROM people", - (tf.string, tf.int32)) - iterator = dataset.make_one_shot_iterator() - next_element = iterator.get_next() - # Prints the rows of the result set of the above query. - while True: - try: - print(sess.run(next_element)) - except tf.errors.OutOfRangeError: - break - ``` - - Args: - driver_name: A 0-D `tf.string` tensor containing the database type. - Currently, the only supported value is 'sqlite'. - data_source_name: A 0-D `tf.string` tensor containing a connection string - to connect to the database. - query: A 0-D `tf.string` tensor containing the SQL query to execute. - output_types: A tuple of `tf.DType` objects representing the types of the - columns returned by `query`. - """ - super(SqlDataset, self).__init__() - self._driver_name = ops.convert_to_tensor( - driver_name, dtype=dtypes.string, name="driver_name") - self._data_source_name = ops.convert_to_tensor( - data_source_name, dtype=dtypes.string, name="data_source_name") - self._query = ops.convert_to_tensor( - query, dtype=dtypes.string, name="query") - self._output_types = output_types - - def _as_variant_tensor(self): - return gen_dataset_ops.sql_dataset(self._driver_name, - self._data_source_name, self._query, - nest.flatten(self.output_types), - nest.flatten(self.output_shapes)) - - @property - def output_classes(self): - return nest.map_structure(lambda _: ops.Tensor, self._output_types) - - @property - def output_shapes(self): - return nest.map_structure(lambda _: tensor_shape.TensorShape([]), - self._output_types) - - @property - def output_types(self): - return self._output_types + super(SqlDataset, self).__init__( + driver_name, data_source_name, query, output_types) class LMDBDataset(dataset_ops.DatasetSource): @@ -1013,7 +397,7 @@ class LMDBDataset(dataset_ops.DatasetSource): filenames, dtype=dtypes.string, name="filenames") def _as_variant_tensor(self): - return contrib_gen_dataset_ops.lmdb_dataset( + return gen_experimental_dataset_ops.experimental_lmdb_dataset( self._filenames, output_types=nest.flatten(self.output_types), output_shapes=nest.flatten(self.output_shapes)) diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index 75642f143e19c3d77e675384362c4dab94e10932..29d77528d95ba62783c1f7c1c0df530ed3929c9e 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -17,22 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - -from tensorflow.contrib.data.python.ops import batching -from tensorflow.contrib.data.python.ops import interleave_ops -from tensorflow.contrib.data.python.ops import scan_ops -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import logging_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops +from tensorflow.python.data.experimental.ops import resampling +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, + "Use `tf.data.experimental.rejection_resample(...)`.") def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): """A transformation that resamples a dataset to achieve a target distribution. @@ -52,243 +42,5 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - def _apply_fn(dataset): - """Function from `Dataset` to `Dataset` that applies the transformation.""" - target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist") - class_values_ds = dataset.map(class_func) - - # Get initial distribution. - if initial_dist is not None: - initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist") - acceptance_dist, prob_of_original = ( - _calculate_acceptance_probs_with_mixing(initial_dist_t, - target_dist_t)) - initial_dist_ds = dataset_ops.Dataset.from_tensors( - initial_dist_t).repeat() - acceptance_dist_ds = dataset_ops.Dataset.from_tensors( - acceptance_dist).repeat() - prob_of_original_ds = dataset_ops.Dataset.from_tensors( - prob_of_original).repeat() - else: - initial_dist_ds = _estimate_initial_dist_ds( - target_dist_t, class_values_ds) - acceptance_and_original_prob_ds = initial_dist_ds.map( - lambda initial: _calculate_acceptance_probs_with_mixing( - initial, target_dist_t)) - acceptance_dist_ds = acceptance_and_original_prob_ds.map( - lambda accept_prob, _: accept_prob) - prob_of_original_ds = acceptance_and_original_prob_ds.map( - lambda _, prob_original: prob_original) - filtered_ds = _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, - class_values_ds, seed) - # Prefetch filtered dataset for speed. - filtered_ds = filtered_ds.prefetch(3) - - prob_original_static = _get_prob_original_static( - initial_dist_t, target_dist_t) if initial_dist is not None else None - if prob_original_static == 1: - return dataset_ops.Dataset.zip((class_values_ds, dataset)) - elif prob_original_static == 0: - return filtered_ds - else: - return interleave_ops.sample_from_datasets( - [dataset_ops.Dataset.zip((class_values_ds, dataset)), filtered_ds], - weights=prob_of_original_ds.map(lambda prob: [(prob, 1.0 - prob)]), - seed=seed) - - return _apply_fn - - -def _get_prob_original_static(initial_dist_t, target_dist_t): - """Returns the static probability of sampling from the original. - - `tensor_util.constant_value(prob_of_original)` returns `None` if it encounters - an Op that it isn't defined for. We have some custom logic to avoid this. - - Args: - initial_dist_t: A tensor of the initial distribution. - target_dist_t: A tensor of the target distribution. - - Returns: - The probability of sampling from the original distribution as a constant, - if it is a constant, or `None`. - """ - init_static = tensor_util.constant_value(initial_dist_t) - target_static = tensor_util.constant_value(target_dist_t) - - if init_static is None or target_static is None: - return None - else: - return np.min(target_static / init_static) - - -def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds, - seed): - """Filters a dataset based on per-class acceptance probabilities. - - Args: - dataset: The dataset to be filtered. - acceptance_dist_ds: A dataset of acceptance probabilities. - initial_dist_ds: A dataset of the initial probability distribution, given or - estimated. - class_values_ds: A dataset of the corresponding classes. - seed: (Optional.) Python integer seed for the resampler. - - Returns: - A dataset of (class value, data) after filtering. - """ - def maybe_warn_on_large_rejection(accept_dist, initial_dist): - proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist) - return control_flow_ops.cond( - math_ops.less(proportion_rejected, .5), - lambda: accept_dist, - lambda: logging_ops.Print( # pylint: disable=g-long-lambda - accept_dist, [proportion_rejected, initial_dist, accept_dist], - message="Proportion of examples rejected by sampler is high: ", - summarize=100, - first_n=10)) - - acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds, - initial_dist_ds)) - .map(maybe_warn_on_large_rejection)) - - def _gather_and_copy(class_val, acceptance_prob, data): - return class_val, array_ops.gather(acceptance_prob, class_val), data - - current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( - (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) - filtered_ds = ( - current_probabilities_and_class_and_data_ds - .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) - return filtered_ds.map(lambda class_value, _, data: (class_value, data)) - - -def _estimate_initial_dist_ds( - target_dist_t, class_values_ds, dist_estimation_batch_size=32, - smoothing_constant=10): - num_classes = (target_dist_t.shape[0].value or - array_ops.shape(target_dist_t)[0]) - initial_examples_per_class_seen = array_ops.fill( - [num_classes], np.int64(smoothing_constant)) - - def update_estimate_and_tile(num_examples_per_class_seen, c): - updated_examples_per_class_seen, dist = _estimate_data_distribution( - c, num_examples_per_class_seen) - tiled_dist = array_ops.tile( - array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) - return updated_examples_per_class_seen, tiled_dist - - initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) - .apply(scan_ops.scan(initial_examples_per_class_seen, - update_estimate_and_tile)) - .apply(batching.unbatch())) - - return initial_dist_ds - - -def _get_target_to_initial_ratio(initial_probs, target_probs): - # Add tiny to initial_probs to avoid divide by zero. - denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny) - return target_probs / denom - - -def _estimate_data_distribution(c, num_examples_per_class_seen): - """Estimate data distribution as labels are seen. - - Args: - c: The class labels. Type `int32`, shape `[batch_size]`. - num_examples_per_class_seen: Type `int64`, shape `[num_classes]`, - containing counts. - - Returns: - num_examples_per_lass_seen: Updated counts. Type `int64`, shape - `[num_classes]`. - dist: The updated distribution. Type `float32`, shape `[num_classes]`. - """ - num_classes = num_examples_per_class_seen.get_shape()[0].value - # Update the class-count based on what labels are seen in batch. - num_examples_per_class_seen = math_ops.add( - num_examples_per_class_seen, math_ops.reduce_sum( - array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0)) - init_prob_estimate = math_ops.truediv( - num_examples_per_class_seen, - math_ops.reduce_sum(num_examples_per_class_seen)) - dist = math_ops.cast(init_prob_estimate, dtypes.float32) - return num_examples_per_class_seen, dist - - -def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs): - """Calculates the acceptance probabilities and mixing ratio. - - In this case, we assume that we can *either* sample from the original data - distribution with probability `m`, or sample from a reshaped distribution - that comes from rejection sampling on the original distribution. This - rejection sampling is done on a per-class basis, with `a_i` representing the - probability of accepting data from class `i`. - - This method is based on solving the following analysis for the reshaped - distribution: - - Let F be the probability of a rejection (on any example). - Let p_i be the proportion of examples in the data in class i (init_probs) - Let a_i is the rate the rejection sampler should *accept* class i - Let t_i is the target proportion in the minibatches for class i (target_probs) - - ``` - F = sum_i(p_i * (1-a_i)) - = 1 - sum_i(p_i * a_i) using sum_i(p_i) = 1 - ``` - - An example with class `i` will be accepted if `k` rejections occur, then an - example with class `i` is seen by the rejector, and it is accepted. This can - be written as follows: - - ``` - t_i = sum_k=0^inf(F^k * p_i * a_i) - = p_i * a_j / (1 - F) using geometric series identity, since 0 <= F < 1 - = p_i * a_i / sum_j(p_j * a_j) using F from above - ``` - - Note that the following constraints hold: - ``` - 0 <= p_i <= 1, sum_i(p_i) = 1 - 0 <= a_i <= 1 - 0 <= t_i <= 1, sum_i(t_i) = 1 - ``` - - A solution for a_i in terms of the other variables is the following: - ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` - - If we try to minimize the amount of data rejected, we get the following: - - M_max = max_i [ t_i / p_i ] - M_min = min_i [ t_i / p_i ] - - The desired probability of accepting data if it comes from class `i`: - - a_i = (t_i/p_i - m) / (M_max - m) - - The desired probability of pulling a data element from the original dataset, - rather than the filtered one: - - m = M_min - - Args: - initial_probs: A Tensor of the initial probability distribution, given or - estimated. - target_probs: A Tensor of the corresponding classes. - - Returns: - (A 1D Tensor with the per-class acceptance probabilities, the desired - probability of pull from the original distribution.) - """ - ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs) - max_ratio = math_ops.reduce_max(ratio_l) - min_ratio = math_ops.reduce_min(ratio_l) - - # Target prob to sample from original distribution. - m = min_ratio - - # TODO(joelshor): Simplify fraction, if possible. - a_i = (ratio_l - m) / (max_ratio - m) - return a_i, m + return resampling.rejection_resample(class_func, target_dist, initial_dist, + seed) diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index c52582cd35565bbfab9f49760684608656f77223..0ca9fddb23b20995bdcd4d45aa675537111c4552 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -17,137 +17,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections - -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import nest -from tensorflow.python.data.util import sparse -from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import gen_dataset_ops - - -class _ScanDataset(dataset_ops.UnaryDataset): - """A dataset that scans a function across its input.""" - - def __init__(self, input_dataset, initial_state, scan_func): - """See `scan()` for details.""" - super(_ScanDataset, self).__init__(input_dataset) - self._input_dataset = input_dataset - - with ops.name_scope("initial_state"): - # Convert any `SparseTensorValue`s to `SparseTensor`s and all other - # values to tensors. - self._initial_state = nest.pack_sequence_as(initial_state, [ - sparse_tensor.SparseTensor.from_value(t) - if sparse_tensor.is_sparse(t) else ops.convert_to_tensor( - t, name="component_%d" % i) - for i, t in enumerate(nest.flatten(initial_state)) - ]) - - # Compute initial values for the state classes, shapes and types based on - # the initial state. The shapes may be refined by running `tf_scan_func` one - # or more times below. - self._state_classes = sparse.get_classes(self._initial_state) - self._state_shapes = nest.pack_sequence_as( - self._initial_state, - [t.get_shape() for t in nest.flatten(self._initial_state)]) - self._state_types = nest.pack_sequence_as( - self._initial_state, - [t.dtype for t in nest.flatten(self._initial_state)]) - - # Will be populated by calling `tf_scan_func`. - self._output_classes = None - self._output_shapes = None - self._output_types = None - - # Iteratively rerun the scan function until reaching a fixed point on - # `self._state_shapes`. - need_to_rerun = True - while need_to_rerun: - - wrapped_func = dataset_ops.StructuredFunctionWrapper( - scan_func, "tf.contrib.data.scan()", - input_classes=(self._state_classes, input_dataset.output_classes), - input_shapes=(self._state_shapes, input_dataset.output_shapes), - input_types=(self._state_types, input_dataset.output_types), - add_to_graph=False) - if not ( - isinstance(wrapped_func.output_types, collections.Sequence) and - len(wrapped_func.output_types) == 2): - raise TypeError("The scan function must return a pair comprising the " - "new state and the output value.") - - new_state_classes, self._output_classes = wrapped_func.output_classes - - # Extract and validate class information from the returned values. - for new_state_class, state_class in zip( - nest.flatten(new_state_classes), - nest.flatten(self._state_classes)): - if not issubclass(new_state_class, state_class): - raise TypeError( - "The element classes for the new state must match the initial " - "state. Expected %s; got %s." % - (self._state_classes, new_state_classes)) - - # Extract and validate type information from the returned values. - new_state_types, self._output_types = wrapped_func.output_types - for new_state_type, state_type in zip( - nest.flatten(new_state_types), nest.flatten(self._state_types)): - if new_state_type != state_type: - raise TypeError( - "The element types for the new state must match the initial " - "state. Expected %s; got %s." % - (self._state_types, new_state_types)) - - # Extract shape information from the returned values. - new_state_shapes, self._output_shapes = wrapped_func.output_shapes - - flat_state_shapes = nest.flatten(self._state_shapes) - flat_new_state_shapes = nest.flatten(new_state_shapes) - weakened_state_shapes = [ - original.most_specific_compatible_shape(new) - for original, new in zip(flat_state_shapes, flat_new_state_shapes) - ] - - need_to_rerun = False - for original_shape, weakened_shape in zip(flat_state_shapes, - weakened_state_shapes): - if original_shape.ndims is not None and ( - weakened_shape.ndims is None or - original_shape.as_list() != weakened_shape.as_list()): - need_to_rerun = True - break - - if need_to_rerun: - self._state_shapes = nest.pack_sequence_as(self._state_shapes, - weakened_state_shapes) - - self._scan_func = wrapped_func.function - self._scan_func.add_to_graph(ops.get_default_graph()) - - def _as_variant_tensor(self): - input_t = self._input_dataset._as_variant_tensor() # pylint: disable=protected-access - return gen_dataset_ops.scan_dataset( - input_t, - nest.flatten(sparse.serialize_sparse_tensors(self._initial_state)), - self._scan_func.captured_inputs, - f=self._scan_func, - **dataset_ops.flat_structure(self)) - - @property - def output_classes(self): - return self._output_classes - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types +from tensorflow.python.data.experimental.ops import scan_ops +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, "Use `tf.data.experimental.scan(...)`.") def scan(initial_state, scan_func): """A transformation that scans a function across an input dataset. @@ -168,7 +42,4 @@ def scan(initial_state, scan_func): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - def _apply_fn(dataset): - return _ScanDataset(dataset, initial_state, scan_func) - - return _apply_fn + return scan_ops.scan(initial_state, scan_func) diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py index 985d1d87d0834a17f0a4f147ab8ace3e2db05f67..329b34fdfecf026688c3ebd210d3400a427940a8 100644 --- a/tensorflow/contrib/data/python/ops/shuffle_ops.py +++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py @@ -17,54 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import random_seed -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_dataset_ops - - -class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset): - """A `Dataset` that fuses `shuffle` and `repeat`.""" - - def __init__(self, input_dataset, buffer_size, count=None, seed=None): - super(_ShuffleAndRepeatDataset, self).__init__(input_dataset) - self._input_dataset = input_dataset - self._buffer_size = ops.convert_to_tensor( - buffer_size, dtype=dtypes.int64, name="buffer_size") - if count is None: - self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count") - else: - self._count = ops.convert_to_tensor( - count, dtype=dtypes.int64, name="count") - self._seed, self._seed2 = random_seed.get_seed(seed) - - def _as_variant_tensor(self): - # pylint: disable=protected-access - input_resource = self._input_dataset._as_variant_tensor() - return gen_dataset_ops.shuffle_and_repeat_dataset( - input_resource, - buffer_size=self._buffer_size, - count=self._count, - seed=self._seed, - seed2=self._seed2, - **dataset_ops.flat_structure(self)) - # pylint: enable=protected-access - - @property - def output_classes(self): - return self._input_dataset.output_classes - - @property - def output_shapes(self): - return self._input_dataset.output_shapes - - @property - def output_types(self): - return self._input_dataset.output_types +from tensorflow.python.data.experimental.ops import shuffle_ops +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, + "Use `tf.data.experimental.shuffle_and_repeat(...)`.") def shuffle_and_repeat(buffer_size, count=None, seed=None): """Shuffles and repeats a Dataset returning a new permutation for each epoch. @@ -93,8 +51,4 @@ def shuffle_and_repeat(buffer_size, count=None, seed=None): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - - def _apply_fn(dataset): # pylint: disable=missing-docstring - return _ShuffleAndRepeatDataset(dataset, buffer_size, count, seed) - - return _apply_fn + return shuffle_ops.shuffle_and_repeat(buffer_size, count, seed) diff --git a/tensorflow/contrib/data/python/ops/threadpool.py b/tensorflow/contrib/data/python/ops/threadpool.py index 9d165ad52a55376041c8e5d76abc32d8b39f7f99..20cceb4647ae6d5f80a9dbac3baed72d50254f09 100644 --- a/tensorflow/contrib/data/python/ops/threadpool.py +++ b/tensorflow/contrib/data/python/ops/threadpool.py @@ -17,89 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import threading - -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.eager import context -from tensorflow.python.ops import resource_variable_ops - -_uid_counter = 0 -_uid_lock = threading.Lock() - - -def _generate_shared_name(prefix): - with _uid_lock: - global _uid_counter - uid = _uid_counter - _uid_counter += 1 - return "{}{}".format(prefix, uid) - - -# TODO(b/73383364): Properly export in the `tf.contrib.data` API when stable -# or make private / remove. -class PrivateThreadPool(object): - """A stateful resource that represents a private thread pool.""" - - def __init__(self, num_threads, display_name=None, - max_intra_op_parallelism=1): - """Creates a `PrivateThreadPool` with the given number of threads.""" - if context.executing_eagerly(): - shared_name = _generate_shared_name("privatethreadpool") - self._resource = gen_dataset_ops.thread_pool_handle( - num_threads=num_threads, - max_intra_op_parallelism=max_intra_op_parallelism, - display_name=display_name, - shared_name=shared_name) - self._resource_deleter = resource_variable_ops.EagerResourceDeleter( - handle=self._resource, handle_device=context.context().device_name) - else: - self._resource = gen_dataset_ops.thread_pool_handle( - num_threads=num_threads, - max_intra_op_parallelism=max_intra_op_parallelism, - display_name=display_name) - - -class _ThreadPoolDataset(dataset_ops.UnaryDataset): - """A `Dataset` that acts as an identity, and sets a custom threadpool.""" - - def __init__(self, input_dataset, thread_pool): - super(_ThreadPoolDataset, self).__init__(input_dataset) - self._input_dataset = input_dataset - self._thread_pool = thread_pool - - def _as_variant_tensor(self): - return gen_dataset_ops.thread_pool_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - self._thread_pool._resource, # pylint: disable=protected-access - **dataset_ops.flat_structure(self)) - - @property - def output_shapes(self): - return self._input_dataset.output_shapes - - @property - def output_types(self): - return self._input_dataset.output_types - - @property - def output_classes(self): - return self._input_dataset.output_classes - - -# TODO(b/73383364): Properly export in the `tf.contrib.data` API when stable -# or make private / remove. -def override_threadpool(dataset, thread_pool): - """Returns a new dataset that uses the given thread pool for its operations. - - Args: - dataset: A `tf.data.Dataset` object. - thread_pool: A `PrivateThreadPool` object. - - Returns: - A dataset containing the same values as `dataset`, but which uses - `thread_pool` to compute any of its parallel operations (such as - `tf.data.Dataset.map`). - """ - return _ThreadPoolDataset(dataset, thread_pool) +# pylint: disable=unused-import +from tensorflow.python.data.experimental.ops.threadpool import override_threadpool +from tensorflow.python.data.experimental.ops.threadpool import PrivateThreadPool diff --git a/tensorflow/contrib/data/python/ops/unique.py b/tensorflow/contrib/data/python/ops/unique.py index bad67a580d6be5add1151a19e90097ea5ce0b581..909d06c677ea29733966e0c19a7543b149d2fe74 100644 --- a/tensorflow/contrib/data/python/ops/unique.py +++ b/tensorflow/contrib/data/python/ops/unique.py @@ -17,12 +17,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import dtypes +from tensorflow.python.data.experimental.ops import unique as experimental_unique +from tensorflow.python.util import deprecation +@deprecation.deprecated(None, "Use `tf.data.experimental.unique()`.") def unique(): """Creates a `Dataset` from another `Dataset`, discarding duplicates. @@ -40,39 +39,4 @@ def unique(): A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ - - def _apply_fn(dataset): - return _UniqueDataset(dataset) - - return _apply_fn - - -class _UniqueDataset(dataset_ops.UnaryDataset): - """A `Dataset` contains the unique elements from its input.""" - - def __init__(self, input_dataset): - """See `unique()` for details.""" - super(_UniqueDataset, self).__init__(input_dataset) - self._input_dataset = input_dataset - if input_dataset.output_types not in (dtypes.int32, dtypes.int64, - dtypes.string): - raise TypeError( - "`tf.contrib.data.unique()` only supports inputs with a single " - "`tf.int32`, `tf.int64`, or `tf.string` component.") - - def _as_variant_tensor(self): - return gen_dataset_ops.unique_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - **dataset_ops.flat_structure(self)) - - @property - def output_classes(self): - return self._input_dataset.output_classes - - @property - def output_shapes(self): - return self._input_dataset.output_shapes - - @property - def output_types(self): - return self._input_dataset.output_types + return experimental_unique.unique() diff --git a/tensorflow/contrib/data/python/ops/writers.py b/tensorflow/contrib/data/python/ops/writers.py index c455fdcba673853079ff0d162c4799e72bc8e627..42fb69bf077afbd2094f6eb1bf3fe7b17f761910 100644 --- a/tensorflow/contrib/data/python/ops/writers.py +++ b/tensorflow/contrib/data/python/ops/writers.py @@ -17,42 +17,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import convert -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.data.experimental.ops import writers +from tensorflow.python.util import deprecation -class TFRecordWriter(object): +class TFRecordWriter(writers.TFRecordWriter): """Writes data to a TFRecord file.""" + @deprecation.deprecated( + None, "Use `tf.data.experimental.TFRecordWriter(...)`.") def __init__(self, filename, compression_type=None): - self._filename = ops.convert_to_tensor( - filename, dtypes.string, name="filename") - self._compression_type = convert.optional_param_to_tensor( - "compression_type", - compression_type, - argument_default="", - argument_dtype=dtypes.string) - - def write(self, dataset): - """Returns a `tf.Operation` to write a dataset to a file. - - Args: - dataset: a `tf.data.Dataset` whose elements are to be written to a file - - Returns: - A `tf.Operation` that, when run, writes contents of `dataset` to a file. - """ - if not isinstance(dataset, dataset_ops.Dataset): - raise TypeError("`dataset` must be a `tf.data.Dataset` object.") - if (dataset.output_types != dtypes.string or - dataset.output_shapes != tensor_shape.scalar()): - raise TypeError( - "`dataset` must produce scalar `DT_STRING` tensors whereas it " - "produces shape {0} and types {1}".format(dataset.output_shapes, - dataset.output_types)) - return gen_dataset_ops.dataset_to_tf_record( - dataset._as_variant_tensor(), self._filename, self._compression_type) # pylint: disable=protected-access + super(TFRecordWriter, self).__init__(filename, compression_type) diff --git a/tensorflow/contrib/decision_trees/proto/BUILD b/tensorflow/contrib/decision_trees/proto/BUILD index 3b50a48336d77ebd9327fa24e5612a95d5d0c372..06940a90d5cf5c193b026a20c3a5fa41e778b0a9 100644 --- a/tensorflow/contrib/decision_trees/proto/BUILD +++ b/tensorflow/contrib/decision_trees/proto/BUILD @@ -17,7 +17,6 @@ tf_proto_library( name = "generic_tree_model", srcs = ["generic_tree_model.proto"], cc_api_version = 2, - java_api_version = 2, visibility = ["//visibility:public"], ) diff --git a/tensorflow/contrib/distribute/README.md b/tensorflow/contrib/distribute/README.md index 91a27f97b7f75511db4b377220a353787beca30e..2e025765e4aaab7114aa6e3e79336e48a71b5b55 100644 --- a/tensorflow/contrib/distribute/README.md +++ b/tensorflow/contrib/distribute/README.md @@ -231,7 +231,8 @@ The same `input_fn` will be used for all workers if you use important to shuffle your dataset in your `input_fn`. `MirroredStrategy` will insert a `tf.dataset.Dataset.shard` call in you -`input_fn`. As a result, each worker gets a fraction of your input data. +`input_fn` if `auto_shard_dataset` is set to `True`. As a result, each worker +gets a fraction of your input data. ### Performance Tips diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index 7eead6e4729dc81f42821c26d2fda694dbf88a43..76d5b59ce17279b7c6d2d930504153fc31deb8e2 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -22,14 +22,15 @@ py_library( visibility = ["//tensorflow:internal"], deps = [ ":input_ops", + ":prefetching_ops_v2", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:device_util", "//tensorflow/python:distribute", "//tensorflow/python:framework_ops", + "//tensorflow/python:resource_variable_ops", "//tensorflow/python:training", "//tensorflow/python:util", - "//tensorflow/python/data/ops:multi_device_iterator_ops", "//tensorflow/python/eager:context", "//tensorflow/python/training/checkpointable:base", "@six_archive//:six", @@ -410,6 +411,24 @@ cuda_py_test( ], ) +cuda_py_test( + name = "moving_averages_test", + srcs = ["moving_averages_test.py"], + additional_deps = [ + ":combinations", + "@absl_py//absl/testing:parameterized", + "//tensorflow/python/eager:test", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:training", + "//tensorflow/python:variables", + ], + tags = [ + "no_pip", + ], +) + cuda_py_test( name = "optimizer_v2_test", srcs = ["optimizer_v2_test.py"], @@ -453,7 +472,7 @@ cuda_py_test( cuda_py_test( name = "estimator_training_test", - size = "large", + size = "enormous", srcs = ["estimator_training_test.py"], additional_deps = [ ":combinations", @@ -647,6 +666,32 @@ cuda_py_test( ], ) +py_library( + name = "prefetching_ops_v2", + srcs = ["prefetching_ops_v2.py"], + deps = [ + "//tensorflow/contrib/data/python/ops:prefetching_ops", + "//tensorflow/python:experimental_dataset_ops_gen", + "//tensorflow/python:framework_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + +cuda_py_test( + name = "prefetching_ops_v2_test", + srcs = ["prefetching_ops_v2_test.py"], + additional_deps = [ + ":prefetching_ops_v2", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + ], +) + py_library( name = "input_ops", srcs = ["input_ops.py"], @@ -701,6 +746,7 @@ cuda_py_test( additional_deps = [ ":keras_test_lib", ], + shard_count = 16, tags = [ "multi_and_single_gpu", "no_pip", @@ -709,18 +755,27 @@ cuda_py_test( ], ) -cuda_py_test( - name = "metrics_v1_test", +py_library( + name = "metrics_v1_test_lib", + testonly = 1, srcs = ["metrics_v1_test.py"], - additional_deps = [ + deps = [ ":combinations", - "@absl_py//absl/testing:parameterized", "//tensorflow/contrib/data/python/ops:batching", "//tensorflow/python:math_ops", "//tensorflow/python:metrics", "//tensorflow/python:variables", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/eager:test", + "@absl_py//absl/testing:parameterized", + ], +) + +cuda_py_test( + name = "metrics_v1_test", + srcs = ["metrics_v1_test.py"], + additional_deps = [ + ":metrics_v1_test_lib", ], tags = [ "multi_and_single_gpu", diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py index c900b41e149d3e87fca698f3e94ad24ec1541bac..9809204f8f107270b5a7b51e65e06afdae7d96b8 100644 --- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py +++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py @@ -216,7 +216,7 @@ class CollectiveAllReduceStrategy(mirrored_strategy.MirroredStrategy): """Configures the object. Args: - session_config: a @{tf.ConfigProto} + session_config: a `tf.ConfigProto` cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the cluster configurations. task_type: the current task type, such as "worker". diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py index 33ffbf6abe1ef0cbd018d96d505482de8c76fd11..6796a23d464d344554ae9654e0992e30df5ad213 100644 --- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py +++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py @@ -128,7 +128,8 @@ class CollectiveAllReduceStrategyTestBase( # TODO(yuefengz): support non-Mirrored variable as destinations. g = d.reduce( variable_scope.VariableAggregation.SUM, g, destinations=v) - with ops.control_dependencies(d.unwrap(d.update(v, update, g))): + with ops.control_dependencies( + d.update(v, update, g, grouped=False)): after_list.append(d.read_var(v)) return before_list, after_list diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py index 244d1fcec8ba481337afeede181c29d0552e3c44..63a163e76cdd99c73399c657cbe9bc3d010369d2 100644 --- a/tensorflow/contrib/distribute/python/combinations.py +++ b/tensorflow/contrib/distribute/python/combinations.py @@ -59,6 +59,7 @@ from tensorflow.python.training import adagrad from tensorflow.python.training import adam from tensorflow.python.training import distribution_strategy_context from tensorflow.python.training import gradient_descent +from tensorflow.python.training import rmsprop from tensorflow.python.util import tf_inspect @@ -328,10 +329,10 @@ one_device_strategy = NamedDistribution( required_gpus=None) tpu_strategy = NamedDistribution( "TPU", lambda: tpu_lib.TPUStrategy( - TPUClusterResolver(""), steps_per_run=5), + TPUClusterResolver(""), steps_per_run=2), required_tpu=True) tpu_strategy_one_step = NamedDistribution( - "TPU", lambda: tpu_lib.TPUStrategy( + "TPUOneStep", lambda: tpu_lib.TPUStrategy( TPUClusterResolver(""), steps_per_run=1), required_tpu=True) # Note that we disable prefetching for testing since prefetching makes @@ -348,24 +349,26 @@ mirrored_strategy_with_two_gpus = NamedDistribution( required_gpus=2) -adam_optimizer_v1_fn = NamedObject( - "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1)) gradient_descent_optimizer_v1_fn = NamedObject( "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2)) adagrad_optimizer_v1_fn = NamedObject( "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001)) -optimizers_v1 = [adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn, - adagrad_optimizer_v1_fn] +adam_optimizer_v1_fn = NamedObject("AdamV1", + lambda: adam.AdamOptimizer(0.001, epsilon=1)) +rmsprop_optimizer_v1_fn = NamedObject( + "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001)) + +optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn] -adam_optimizer_v2_fn = NamedObject( - "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1)) gradient_descent_optimizer_v2_fn = NamedObject( "GradientDescentV2", lambda: gradient_descent_v2.GradientDescentOptimizer(0.2)) adagrad_optimizer_v2_fn = NamedObject( "AdagradV2", lambda: adagrad_v2.AdagradOptimizer(0.001)) -optimizers_v2 = [adam_optimizer_v2_fn, gradient_descent_optimizer_v2_fn, - adagrad_optimizer_v2_fn] +adam_optimizer_v2_fn = NamedObject( + "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1)) + +optimizers_v2 = [gradient_descent_optimizer_v2_fn, adagrad_optimizer_v2_fn] graph_and_eager_modes = ["graph", "eager"] diff --git a/tensorflow/contrib/distribute/python/examples/keras_mnist.py b/tensorflow/contrib/distribute/python/examples/keras_mnist.py index a84ef041960e389c08246fc8a16df2300856d968..da7f8c548f94972b6ec0a67848e1520386d1e28b 100644 --- a/tensorflow/contrib/distribute/python/examples/keras_mnist.py +++ b/tensorflow/contrib/distribute/python/examples/keras_mnist.py @@ -113,7 +113,7 @@ def main(_): distribute=strategy) # Train the model with the train dataset. - model.fit(x=train_ds, epochs=20, steps_per_epoch=310) + model.fit(x=train_ds, epochs=20, steps_per_epoch=468) # Evaluate the model with the eval dataset. score = model.evaluate(eval_ds, steps=10, verbose=0) diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py index a0b8bde132a17179a44e726d0075697153d73949..3511b7761ff4d8c995bfa40a1098b8e803f2a1b3 100644 --- a/tensorflow/contrib/distribute/python/keras_test.py +++ b/tensorflow/contrib/distribute/python/keras_test.py @@ -173,13 +173,50 @@ def batch_wrapper(dataset, batch_size, distribution): return dataset.batch(batch_size) -def all_combinations(): +def get_model(): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + return model + + +def get_dataset(distribution): + inputs = np.zeros((10, 3), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = batch_wrapper(dataset, 10, distribution) + return dataset + + +def get_predict_dataset(distribution): + inputs = np.zeros((10, 3), dtype=np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices(inputs) + dataset = dataset.repeat(100) + dataset = batch_wrapper(dataset, 10, distribution) + return dataset + + +strategies = [combinations.default_strategy, + combinations.one_device_strategy, + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.mirrored_strategy_with_two_gpus, + combinations.tpu_strategy_one_step] + + +def strategy_combinations(): + return combinations.combine( + distribution=strategies, + mode=['graph']) + + +def strategy_and_optimizer_combinations(): return combinations.combine( - distribution=[combinations.default_strategy, - combinations.one_device_strategy, - combinations.mirrored_strategy_with_gpu_and_cpu, - combinations.mirrored_strategy_with_two_gpus, - combinations.tpu_strategy_one_step], + distribution=strategies, + optimizer=[combinations.adagrad_optimizer_v1_fn, + combinations.adam_optimizer_v1_fn, + combinations.gradient_descent_optimizer_v1_fn, + combinations.rmsprop_optimizer_v1_fn], mode=['graph']) @@ -318,58 +355,27 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase): gfile.DeleteRecursively(self._config.model_dir) -class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): +class TestDistributionStrategyWithNumpyArrays(test.TestCase, + parameterized.TestCase): - def test_validating_dataset_input_tensors_with_shape_mismatch(self): + @combinations.generate(strategy_combinations()) + def test_creating_var_with_numpy_arrays(self, distribution): with self.cached_session(): - strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', - '/device:CPU:0']) - a = constant_op.constant([1, 2], shape=(1, 2)) - b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2)) - x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b}) - y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a}) - with strategy.scope(): - # Removed device and input tensor shape details from the error message - # since the order of the device and the corresponding input tensor shape - # is not deterministic over different runs. - with self.assertRaisesRegexp(ValueError, - 'Input tensor shapes do not match for ' - 'distributed tensor inputs ' - 'DistributedValues:.+'): - distributed_training_utils.validate_distributed_dataset_inputs( - strategy, x, y) - - def test_validating_dataset_input_tensors_with_dtype_mismatch(self): + x = np.asarray(np.random.random((64, 3)), dtype=np.float32) + var_x = distributed_training_utils.get_var_for_numpy(distribution, x) + val = self.evaluate(var_x.value()) + # Verify that the numpy value is copied to the variable. + self.assertAllEqual(x, val) + + @combinations.generate(strategy_combinations()) + def test_calling_model_with_numpy_arrays(self, distribution): with self.cached_session(): - strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', - '/device:CPU:0']) - a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32) - b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64) - x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b}) - y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a}) - with strategy.scope(): - # Removed device and input tensor dtype details from the error message - # since the order of the device and the corresponding input tensor dtype - # is not deterministic over different runs. - with self.assertRaisesRegexp(ValueError, - 'Input tensor dtypes do not match for ' - 'distributed tensor inputs ' - 'DistributedValues:.+'): - distributed_training_utils.validate_distributed_dataset_inputs( - strategy, x, y) - - def test_calling_model_with_numpy_arrays(self): - with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' - metrics = ['mae', keras.metrics.CategoricalAccuracy()] - strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1', - '/device:GPU:0']) - model.compile(optimizer, loss, metrics=metrics, distribute=strategy) + metrics = ['mae'] + model.compile(optimizer, loss, metrics=metrics, distribute=distribution) inputs = np.zeros((64, 3), dtype=np.float32) targets = np.zeros((64, 4), dtype=np.float32) @@ -392,30 +398,70 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): # with batch_size model.predict(inputs, batch_size=8) - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) + def test_calling_model_with_nested_numpy_arrays(self, distribution): + with self.cached_session(): + a = keras.layers.Input(shape=(3,), name='input_a') + b = keras.layers.Input(shape=(3,), name='input_b') + + dense = keras.layers.Dense(4, name='dense') + c = dense(a) + d = dense(b) + e = keras.layers.Dropout(0.5, name='dropout')(c) + + model = keras.models.Model([a, b], [d, e]) + + optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001) + loss = 'mse' + model.compile(optimizer, loss, distribute=distribution) + + input_a_np = np.asarray(np.random.random((64, 3)), dtype=np.float32) + input_b_np = np.asarray(np.random.random((64, 3)), dtype=np.float32) + inputs = [input_a_np, input_b_np] + + output_d_np = np.asarray(np.random.random((64, 4)), dtype=np.float32) + output_e_np = np.asarray(np.random.random((64, 4)), dtype=np.float32) + targets = [output_d_np, output_e_np] + + # Call fit with validation data + model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0) + + # TODO(anjalisridhar): We need tests for when the batch size and steps are + # smaller and results in a 0 batch_size and steps value. + model.evaluate(inputs, targets) + # with steps + model.evaluate(inputs, targets, steps=2) + # with batch_size + model.evaluate(inputs, targets, batch_size=8) + + model.predict(inputs) + # with steps + model.predict(inputs, steps=2) + # with batch_size + model.predict(inputs, batch_size=8) + + +class TestDistributionStrategyWithDatasets(test.TestCase, + parameterized.TestCase): + + @combinations.generate(strategy_combinations()) def test_calling_model_on_same_dataset(self, distribution): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = batch_wrapper(dataset, 10, distribution) + dataset = get_dataset(distribution) # Call fit with validation data model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) - model.predict(dataset, steps=2) + model.predict(get_predict_dataset(distribution), steps=2) # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work # as clone_model's input_tensors argument only seems to accept list and not @@ -461,128 +507,39 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1) - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) def test_fit_eval_and_predict_methods_on_dataset(self, distribution): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = batch_wrapper(dataset, 10, distribution) + dataset = get_dataset(distribution) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) - model.predict(dataset, steps=2) - # Test with validation data - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, - validation_data=dataset, validation_steps=2) + model.predict(get_predict_dataset(distribution), steps=2) - def test_unsupported_features(self): + @combinations.generate(strategy_and_optimizer_combinations()) + def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() - optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' - metrics = ['mae'] - strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1', - '/device:GPU:0']) + model.compile(optimizer(), loss, distribute=distribution) - model.compile(optimizer, loss, metrics=metrics, distribute=strategy) - - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) + dataset = get_dataset(distribution) - # Test with validation split - with self.assertRaisesRegexp( - ValueError, '`validation_split` argument is not ' - 'supported when input `x` is a dataset or a ' - 'dataset iterator.+'): - model.fit(dataset, - epochs=1, steps_per_epoch=2, verbose=0, - validation_split=0.5, validation_steps=2) - - # Test with sample weight. - sample_weight = np.random.random((10,)) - with self.assertRaisesRegexp( - NotImplementedError, '`sample_weight` is currently not supported ' - 'when using DistributionStrategy.'): - model.fit( - dataset, - epochs=1, - steps_per_epoch=2, - verbose=0, - sample_weight=sample_weight) - - # Test with not specifying the `steps` argument. - with self.assertRaisesRegexp( - ValueError, 'you should specify the `steps_per_epoch` argument'): - model.fit(dataset, epochs=1, verbose=0) - with self.assertRaisesRegexp(ValueError, - 'you should specify the `steps` argument'): - model.evaluate(dataset, verbose=0) - - with self.assertRaisesRegexp(ValueError, - 'you should specify the `steps` argument'): - model.predict(dataset, verbose=0) - - def test_calling_with_unsupported_predefined_callbacks(self): - with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) - - optimizer = gradient_descent.GradientDescentOptimizer(0.001) - loss = 'mse' - metrics = ['mae'] - strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1', - '/device:GPU:0']) - model.compile(optimizer, loss, metrics=metrics, distribute=strategy) - - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) - - def schedule(_): - return 0.001 - with self.assertRaisesRegexp(ValueError, - 'LearningRateScheduler callback is not ' - 'supported with DistributionStrategy.'): - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, - callbacks=[keras.callbacks.LearningRateScheduler(schedule)]) - - with self.assertRaisesRegexp(ValueError, - 'ReduceLROnPlateau callback is not ' - 'supported with DistributionStrategy.'): - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, - callbacks=[keras.callbacks.ReduceLROnPlateau()]) - with self.assertRaisesRegexp(ValueError, - 'histogram_freq in the TensorBoard callback ' - 'is not supported when using ' - 'DistributionStrategy.'): - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, - callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)]) + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset, steps=2, verbose=1) + model.predict(get_predict_dataset(distribution), steps=2) def test_dataset_input_shape_validation(self): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' @@ -616,17 +573,13 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): mode=['graph'])) def test_dataset_input_shape_fully_defined(self, distribution): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = get_dataset(distribution) # Input shapes are not fully known. Batch dimension is unknown as we are # not using the drop_remainder argument. dataset = dataset.repeat(100).batch(10) @@ -668,7 +621,128 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): self.assertNotEqual(np.mean(predict_output), 0) -class LossMaskingWithDistributionStrategyTest(test.TestCase): +class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase): + + def test_validating_dataset_input_tensors_with_shape_mismatch(self): + with self.cached_session(): + strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', + '/device:CPU:0']) + a = constant_op.constant([1, 2], shape=(1, 2)) + b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2)) + x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b}) + y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a}) + with strategy.scope(): + # Removed device and input tensor shape details from the error message + # since the order of the device and the corresponding input tensor shape + # is not deterministic over different runs. + with self.assertRaisesRegexp(ValueError, + 'Input tensor shapes do not match for ' + 'distributed tensor inputs ' + 'DistributedValues:.+'): + distributed_training_utils.validate_distributed_dataset_inputs( + strategy, x, y) + + def test_validating_dataset_input_tensors_with_dtype_mismatch(self): + with self.cached_session(): + strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', + '/device:CPU:0']) + a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32) + b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64) + x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b}) + y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a}) + with strategy.scope(): + # Removed device and input tensor dtype details from the error message + # since the order of the device and the corresponding input tensor dtype + # is not deterministic over different runs. + with self.assertRaisesRegexp(ValueError, + 'Input tensor dtypes do not match for ' + 'distributed tensor inputs ' + 'DistributedValues:.+'): + distributed_training_utils.validate_distributed_dataset_inputs( + strategy, x, y) + + def test_unsupported_features(self): + with self.cached_session(): + model = get_model() + + optimizer = gradient_descent.GradientDescentOptimizer(0.001) + loss = 'mse' + metrics = ['mae'] + strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1', + '/device:GPU:0']) + + model.compile(optimizer, loss, metrics=metrics, distribute=strategy) + + dataset = get_dataset(strategy) + + # Test with validation split + with self.assertRaisesRegexp( + ValueError, '`validation_split` argument is not ' + 'supported when input `x` is a dataset or a ' + 'dataset iterator.+'): + model.fit(dataset, + epochs=1, steps_per_epoch=2, verbose=0, + validation_split=0.5, validation_steps=2) + + # Test with sample weight. + sample_weight = np.random.random((10,)) + with self.assertRaisesRegexp( + NotImplementedError, '`sample_weight` is currently not supported ' + 'when using DistributionStrategy.'): + model.fit( + dataset, + epochs=1, + steps_per_epoch=2, + verbose=0, + sample_weight=sample_weight) + + # Test with not specifying the `steps` argument. + with self.assertRaisesRegexp( + ValueError, 'you should specify the `steps_per_epoch` argument'): + model.fit(dataset, epochs=1, verbose=0) + with self.assertRaisesRegexp(ValueError, + 'you should specify the `steps` argument'): + model.evaluate(dataset, verbose=0) + + with self.assertRaisesRegexp(ValueError, + 'you should specify the `steps` argument'): + model.predict(dataset, verbose=0) + + def test_calling_with_unsupported_predefined_callbacks(self): + with self.cached_session(): + model = get_model() + + optimizer = gradient_descent.GradientDescentOptimizer(0.001) + loss = 'mse' + metrics = ['mae'] + strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1', + '/device:GPU:0']) + model.compile(optimizer, loss, metrics=metrics, distribute=strategy) + + dataset = get_dataset(strategy) + + def schedule(_): + return 0.001 + with self.assertRaisesRegexp(ValueError, + 'LearningRateScheduler callback is not ' + 'supported with DistributionStrategy.'): + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, + callbacks=[keras.callbacks.LearningRateScheduler(schedule)]) + + with self.assertRaisesRegexp(ValueError, + 'ReduceLROnPlateau callback is not ' + 'supported with DistributionStrategy.'): + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, + callbacks=[keras.callbacks.ReduceLROnPlateau()]) + with self.assertRaisesRegexp(ValueError, + 'histogram_freq in the TensorBoard callback ' + 'is not supported when using ' + 'DistributionStrategy.'): + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, + callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)]) + + +class TestDistributionStrategyWithLossMasking(test.TestCase): # TODO(priyag): Enable all strategies for this test. Currently it does not # work for TPU due to some invalid datatype. @@ -695,10 +769,10 @@ class LossMaskingWithDistributionStrategyTest(test.TestCase): self.assertEqual(hist.history['loss'][0], 0) -class NormalizationLayerWithDistributionStrategyTest( +class TestDistributionStrategyWithNormalizationLayer( test.TestCase, parameterized.TestCase): - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) def test_batchnorm_correctness(self, distribution): with self.cached_session(): model = keras.models.Sequential() @@ -715,18 +789,22 @@ class NormalizationLayerWithDistributionStrategyTest( dataset = dataset.repeat(100) dataset = batch_wrapper(dataset, 32, distribution) + predict_dataset = dataset_ops.Dataset.from_tensor_slices(x) + predict_dataset = predict_dataset.repeat(100) + predict_dataset = batch_wrapper(predict_dataset, 32, distribution) + model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10) - out = model.predict(dataset, steps=2) + out = model.predict(predict_dataset, steps=2) out -= keras.backend.eval(norm.beta) out /= keras.backend.eval(norm.gamma) np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1) np.testing.assert_allclose(out.std(), 1.0, atol=1e-1) -class CorrectnessWithDistributionStrategyTest(test.TestCase, - parameterized.TestCase): +class TestDistributionStrategyCorrectness(test.TestCase, + parameterized.TestCase): - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) def test_metric_correctness(self, distribution): with self.cached_session(): keras.backend.set_image_data_format('channels_last') @@ -756,7 +834,7 @@ class CorrectnessWithDistributionStrategyTest(test.TestCase, history = model.fit(x=train_dataset, epochs=1, steps_per_epoch=10) self.assertEqual(history.history['binary_accuracy'], [1.0]) - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) def test_correctness(self, distribution): with self.cached_session(): keras.backend.set_image_data_format('channels_last') @@ -800,8 +878,7 @@ class CorrectnessWithDistributionStrategyTest(test.TestCase, predict_batch_size = 4 if with_distribution: predict_batch_size //= with_distribution.num_towers - predict_dataset = dataset_ops.Dataset.from_tensor_slices((x_predict, - x_predict)) + predict_dataset = dataset_ops.Dataset.from_tensor_slices(x_predict) predict_dataset = batch_wrapper(predict_dataset, predict_batch_size, distribution) predict_result = model.predict(predict_dataset, steps=1) diff --git a/tensorflow/contrib/distribute/python/metrics_v1_test.py b/tensorflow/contrib/distribute/python/metrics_v1_test.py index f7773aff4fff97b6437b2c9c31925a35a1e6766c..ae4189eb1cb217f8a209b57f91a0ddb82e63dcd9 100644 --- a/tensorflow/contrib/distribute/python/metrics_v1_test.py +++ b/tensorflow/contrib/distribute/python/metrics_v1_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from absl.testing import parameterized from tensorflow.contrib.distribute.python import combinations +from tensorflow.contrib.distribute.python import tpu_strategy from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import test from tensorflow.python.framework import ops @@ -35,7 +36,8 @@ def _labeled_dataset_fn(): # 8: 3, 2 -> False; 9: 4, 0 -> False; 10: 0, 1 -> False; 11: 1, 2 -> False # 12: 2, 0 -> False; 13: 3, 1 -> False; 14: 4, 2 -> False; 15: 0, 0 -> True return dataset_ops.Dataset.range(1000).map( - lambda x: {"labels": x % 5, "predictions": x % 3}).batch(4) + lambda x: {"labels": x % 5, "predictions": x % 3}).batch( + 4, drop_remainder=True) def _boolean_dataset_fn(): @@ -47,7 +49,8 @@ def _boolean_dataset_fn(): # F, T -> FP; T, F -> FN; F, F -> TN return dataset_ops.Dataset.from_tensor_slices({ "labels": [True, False, True, False], - "predictions": [True, True, False, False]}).repeat().batch(3) + "predictions": [True, True, False, False]}).repeat().batch( + 3, drop_remainder=True) def _threshold_dataset_fn(): @@ -59,7 +62,8 @@ def _threshold_dataset_fn(): # False, .75 -> FP; True, .25 -> FN; False, 0.0 -> TN return dataset_ops.Dataset.from_tensor_slices({ "labels": [True, False, True, False], - "predictions": [1.0, 0.75, 0.25, 0.]}).repeat().batch(3) + "predictions": [1.0, 0.75, 0.25, 0.]}).repeat().batch( + 3, drop_remainder=True) def _regression_dataset_fn(): @@ -79,6 +83,12 @@ def all_combinations(): mode=["graph"]) +def tpu_combinations(): + return combinations.combine(distribution=[combinations.tpu_strategy_one_step, + combinations.tpu_strategy], + mode=["graph"]) + + # TODO(josh11b): Test metrics.recall_at_top_k, metrics.average_precision_at_k, # metrics.precision_at_k class MetricsV1Test(test.TestCase, parameterized.TestCase): @@ -86,44 +96,51 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): def _test_metric(self, distribution, dataset_fn, metric_fn, expected_fn): with ops.Graph().as_default(), distribution.scope(): iterator = distribution.distribute_dataset( - dataset_fn).make_initializable_iterator() - value, update = distribution.call_for_each_tower( - metric_fn, iterator.get_next()) - update = distribution.group(update) - self.evaluate(iterator.initializer) + dataset_fn).make_one_shot_iterator() + if isinstance(distribution, tpu_strategy.TPUStrategy): + def step_fn(ctx, inputs): + value, update = distribution.call_for_each_tower( + metric_fn, inputs) + ctx.set_non_tensor_output(name="value", output=value) + return distribution.group(update) + + ctx = distribution.run_steps_on_dataset( + step_fn, iterator, iterations=distribution.steps_per_run) + update = ctx.run_op + value = ctx.non_tensor_outputs["value"] + # In each run, we run multiple steps, and each steps consumes as many + # batches as number of towers. + batches_per_update = ( + distribution.num_towers * distribution.steps_per_run) + else: + value, update = distribution.call_for_each_tower( + metric_fn, iterator.get_next()) + update = distribution.group(update) + # TODO(josh11b): Once we switch to using a global batch size for input, + # replace "distribution.num_towers" with "1". + batches_per_update = distribution.num_towers + + self.evaluate(distribution.initialize()) self.evaluate(variables.local_variables_initializer()) - # TODO(josh11b): Once we switch to using a global batch size for input, - # replace "distribution.num_towers" with "1". - batches_per_update = distribution.num_towers - - # Update variables using the first `num_towers` batches. - self.evaluate(update) - self.assertAllClose(expected_fn(batches_per_update), self.evaluate(value), - 0.001, msg="After first update") - - # Update variables using the second `num_towers` batches. - self.evaluate(update) - self.assertAllClose(expected_fn(2 * batches_per_update), - self.evaluate(value), - 0.001, - msg="After second update") - - if batches_per_update == 1: # Consume 4 input batches - self.evaluate(update) - self.assertAllClose(expected_fn(3 * batches_per_update), - self.evaluate(value), - 0.001, - msg="After third update") + + batches_consumed = 0 + for i in range(4): self.evaluate(update) - self.assertAllClose(expected_fn(4 * batches_per_update), + batches_consumed += batches_per_update + self.assertAllClose(expected_fn(batches_consumed), self.evaluate(value), 0.001, - msg="After fourth update") + msg="After update #" + str(i+1)) + if batches_consumed >= 4: # Consume 4 input batches in total. + break - @combinations.generate(all_combinations()) + self.evaluate(distribution.finalize()) + + @combinations.generate(all_combinations() + tpu_combinations()) def testMean(self, distribution): def _dataset_fn(): - return dataset_ops.Dataset.range(1000).map(math_ops.to_float).batch(4) + return dataset_ops.Dataset.range(1000).map(math_ops.to_float).batch( + 4, drop_remainder=True) def _expected_fn(num_batches): # Mean(0..3) = 1.5, Mean(0..7) = 3.5, Mean(0..11) = 5.5, etc. @@ -131,7 +148,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric(distribution, _dataset_fn, metrics.mean, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testAccuracy(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -144,6 +161,8 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _labeled_dataset_fn, _metric_fn, _expected_fn) + # TODO(priyag, jhseu): Enable TPU for this test once scatter_add is added + # for TPUMirroredVariable. @combinations.generate(all_combinations()) def testMeanPerClassAccuracy(self, distribution): def _metric_fn(x): @@ -162,6 +181,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _labeled_dataset_fn, _metric_fn, _expected_fn) + # NOTE(priyag): This metric doesn't work on TPUs yet. @combinations.generate(all_combinations()) def testMeanIOU(self, distribution): def _metric_fn(x): @@ -180,7 +200,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _labeled_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testMeanTensor(self, distribution): def _dataset_fn(): dataset = dataset_ops.Dataset.range(1000).map(math_ops.to_float) @@ -199,7 +219,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _dataset_fn, metrics.mean_tensor, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testAUCROC(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -213,7 +233,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _threshold_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testAUCPR(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -227,7 +247,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _threshold_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testFalseNegatives(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -240,7 +260,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _boolean_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testFalseNegativesAtThresholds(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -253,7 +273,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _threshold_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testTrueNegatives(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -266,7 +286,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _boolean_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testTrueNegativesAtThresholds(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -279,7 +299,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _threshold_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testFalsePositives(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -292,7 +312,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _boolean_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testFalsePositivesAtThresholds(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -305,7 +325,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _threshold_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testTruePositives(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -318,7 +338,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _boolean_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testTruePositivesAtThresholds(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -331,7 +351,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _threshold_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testPrecision(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -344,7 +364,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _boolean_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testPrecisionAtThreshold(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -357,7 +377,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _threshold_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testRecall(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -370,7 +390,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _boolean_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testRecallAtThreshold(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -383,7 +403,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _threshold_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testMeanSquaredError(self, distribution): def _metric_fn(x): labels = x["labels"] @@ -396,7 +416,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase): self._test_metric( distribution, _regression_dataset_fn, _metric_fn, _expected_fn) - @combinations.generate(all_combinations()) + @combinations.generate(all_combinations() + tpu_combinations()) def testRootMeanSquaredError(self, distribution): def _metric_fn(x): labels = x["labels"] diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py index d082d5c419ae3ae947285abdd1636e1dec564e73..60e134055ff3bd65ce717c5eb48168b07de4515c 100644 --- a/tensorflow/contrib/distribute/python/minimize_loss_test.py +++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py @@ -41,14 +41,6 @@ from tensorflow.python.ops.losses import losses_impl class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): - def _get_iterator(self, ds): - if context.executing_eagerly(): - iterator = ds.make_one_shot_iterator() - else: - iterator = ds.make_initializable_iterator() - self.evaluate(iterator.initializer) - return iterator - @combinations.generate( combinations.times( combinations.distributions_and_v1_optimizers(), @@ -70,7 +62,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): distribution.call_for_each_tower( model_fn, *inputs, run_concurrently=layer.built)) - iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn)) + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.run_steps_on_dataset( @@ -106,7 +99,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss) - iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn)) + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.group( @@ -165,7 +159,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): distribution.call_for_each_tower( model_fn, *inputs, run_concurrently=layer.built)) - iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn)) + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.run_steps_on_dataset( @@ -184,11 +179,6 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): def get_expected_variables(optimizer_fn, num_parameter_devices): variables_map = { "GradientDescent": ["dense/kernel", "dense/bias"], - "Adam": [ - "dense/kernel", "dense/bias", "beta1_power", "beta2_power", - "dense/kernel/Adam", "dense/kernel/Adam_1", "dense/bias/Adam", - "dense/bias/Adam_1" - ], "Adagrad": [ "dense/kernel/Adagrad", "dense/kernel", "dense/bias/Adagrad", "dense/bias" @@ -249,7 +239,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS) return control_flow_ops.group(fetches) - iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn)) + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.run_steps_on_dataset( @@ -342,7 +333,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): distribution.call_for_each_tower( model_fn, x, y, run_concurrently=False)) - iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn)) + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.run_steps_on_dataset( @@ -435,7 +427,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): output=loss) return distribution.group(train_op) - iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn)) + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): initial_loss = lambda: constant_op.constant(1e7) diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index 945f45038731050b6ca50c33b0177ab0daa58601..a32424b316b003cc58ccf28fd968acb6a764a542 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -318,12 +318,13 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): [TensorFlow's documentation](https://www.tensorflow.org/deploy/distributed). The distribution strategy inherits these concepts as well and in addition to that we also clarify several more concepts: - * **In-graph replication**: the `client` creates a single `tf.Graph` that + + * **In-graph replication**: the `client` creates a single `tf.Graph` that specifies tasks for devices on all workers. The `client` then creates a client session which will talk to the `master` service of a `worker`. Then the `master` will partition the graph and distribute the work to all participating workers. - * **Worker**: A `worker` is a TensorFlow `task` that usually maps to one + * **Worker**: A `worker` is a TensorFlow `task` that usually maps to one physical machine. We will have multiple `worker`s with different `task` index. They all do similar things except for one worker checkpointing model variables, writing summaries, etc. in addition to its ordinary work. @@ -347,6 +348,8 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): set, the `configure` method will try to find the best one. prefetch_on_device: optional boolean to specify whether to prefetch input data to devices. + auto_shard_dataset: whether to auto-shard the dataset when there are + multiple workers. """ def __init__(self, @@ -354,11 +357,13 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): num_gpus=None, num_gpus_per_worker=None, cross_tower_ops=None, - prefetch_on_device=None): + prefetch_on_device=None, + auto_shard_dataset=False): super(MirroredStrategy, self).__init__() self._cross_tower_ops = cross_tower_ops self._prefetch_on_device = prefetch_on_device + self._auto_shard_dataset = auto_shard_dataset # Rememeber num GPUs which might be needed by `configure` method. if num_gpus is not None and num_gpus_per_worker is not None: raise ValueError( @@ -477,13 +482,11 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): if self._cluster_spec: return values.MultiWorkerDataset( partial(self._call_dataset_fn, dataset_fn), self._worker_device_map, - self._prefetch_on_device) + self._prefetch_on_device, self._auto_shard_dataset) else: return values.PerDeviceDataset( - self._call_dataset_fn(dataset_fn), - self._devices, - self._prefetch_on_device, - source_device=device_util.resolve("/device:CPU:0")) + self._call_dataset_fn(dataset_fn), self._devices, + self._prefetch_on_device) # TODO(priyag): Deal with OutOfRange errors once b/111349762 is fixed. def _run_steps_on_dataset(self, fn, iterator, iterations, @@ -625,9 +628,11 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): return self._get_cross_tower_ops().batch_reduce(aggregation, value_destination_pairs) - def _update(self, var, fn, *args, **kwargs): + def _update(self, var, options, fn, *args, **kwargs): # TODO(josh11b): In eager mode, use one thread per device. assert isinstance(var, values.DistributedVariable) + should_group = options.pop("grouped") + assert not options # Validate that we are processing all of the options. updates = {} for d, v in var._index.items(): # pylint: disable=protected-access name = "update_%d" % self._device_index.get(d) @@ -636,10 +641,12 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): updates[d] = fn(v, *values.select_device_mirrored(d, args), **values.select_device_mirrored(d, kwargs)) - return values.regroup(updates, values.Mirrored) + return values.update_regroup(self, updates, should_group) - def _update_non_slot(self, colocate_with, fn, *args, **kwargs): + def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs): assert isinstance(colocate_with, list) + should_group = options.pop("grouped") + assert not options # Validate that we are processing all of the options. # TODO(josh11b): In eager mode, use one thread per device. updates = {} for d in colocate_with: @@ -647,7 +654,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name): updates[d] = fn(*values.select_device_mirrored(d, args), **values.select_device_mirrored(d, kwargs)) - return values.regroup(updates, values.Mirrored) + return values.update_regroup(self, updates, should_group) def read_var(self, tower_local_var): """Read the aggregate value of a tower-local variable.""" diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py index 04c712ce1db2b04682efc2693ae09db166568181..eeac528329a239f6a8a68a72c44272566b1d83d1 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py @@ -300,15 +300,9 @@ class MirroredStrategyVariableCreationTest(test.TestCase): dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) - ds = dist.distribute_dataset( - lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10)) - if context.executing_eagerly(): - iterator = ds.make_one_shot_iterator() - else: - iterator = ds.make_initializable_iterator() - self.evaluate([iterator.initializer]) - - features = iterator.get_next() + features = dist.distribute_dataset( + lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10) + ).make_one_shot_iterator().get_next() with dist.scope(): result = dist.call_for_each_tower( @@ -832,7 +826,7 @@ class MirroredStrategyVariableCreationTest(test.TestCase): with dist.scope(): ret_v_sum = dist.call_for_each_tower(model_fn, run_concurrently=False) - update_ops = dist.unwrap(dist.update(ret_v_sum, update, 5.0)) + update_ops = dist.update(ret_v_sum, update, 5.0, grouped=False) # Initialize variables. self.evaluate(variables.global_variables_initializer()) diff --git a/tensorflow/contrib/distribute/python/monitor.py b/tensorflow/contrib/distribute/python/monitor.py index 17b7ab74f63f42e1ee14a82d3bffdd1df9b25857..7644acedc99361d7287a91832d76bc68cbc6ac0a 100644 --- a/tensorflow/contrib/distribute/python/monitor.py +++ b/tensorflow/contrib/distribute/python/monitor.py @@ -51,7 +51,6 @@ class Monitor(object): else: if session is None: raise ValueError("Should provide a `session` in Graph mode.") - session.run(step_callable._iterator.initializer) # pylint: disable=protected-access self._run_step = session.make_callable(step_callable()) session.run(variables.global_variables_initializer()) diff --git a/tensorflow/contrib/distribute/python/moving_averages_test.py b/tensorflow/contrib/distribute/python/moving_averages_test.py new file mode 100644 index 0000000000000000000000000000000000000000..119352ad9195dc51201863f34aef19cb3289e635 --- /dev/null +++ b/tensorflow/contrib/distribute/python/moving_averages_test.py @@ -0,0 +1,141 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for training.moving_averages when using a DistributionStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variables +from tensorflow.python.training import moving_averages + + +all_combinations = combinations.combine( + distribution=[combinations.default_strategy, + combinations.one_device_strategy, + combinations.mirrored_strategy_with_gpu_and_cpu], + mode=["graph"]) + + +class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase): + + @combinations.generate(all_combinations) + def testTowerModeWithoutZeroDebias(self, distribution): + tower_id = [0] + + def tower_fn(): + var = variables.Variable([10.0, 11.0]) + val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]]) + tower_id[0] += 1 + decay = 0.25 + assign = moving_averages.assign_moving_average( + var, val, decay, zero_debias=False) + return var, assign + + with distribution.scope(), self.cached_session() as sess: + var, assign = distribution.call_for_each_tower(tower_fn) + variables.global_variables_initializer().run() + self.assertAllClose([10.0, 11.0], var.eval()) + sess.run(distribution.unwrap(assign)) + # Mean of val across calls to tower_fn(). + average_val = [1.0 + 0.5 * (tower_id[0] - 1), + 2.0 - 0.5 * (tower_id[0] - 1)] + val_weight = 1.0 - 0.25 + self.assertAllClose( + [10.0 * 0.25 + average_val[0] * val_weight, + 11.0 * 0.25 + average_val[1] * val_weight], + var.eval()) + + @combinations.generate(all_combinations) + def testTowerMode(self, distribution): + tower_id = [0] + + def tower_fn(): + var = variables.Variable([0.0, 0.0]) + val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]]) + tower_id[0] += 1 + decay = 0.25 + assign = moving_averages.assign_moving_average(var, val, decay) + return var, assign.op + + with distribution.scope(), self.cached_session() as sess: + var, assign_op = distribution.call_for_each_tower(tower_fn) + variables.global_variables_initializer().run() + self.assertAllClose([0.0, 0.0], var.eval()) + sess.run(distribution.unwrap(assign_op)) + # Mean of val across calls to tower_fn(). + average_val = [1.0 + 0.5 * (tower_id[0] - 1), + 2.0 - 0.5 * (tower_id[0] - 1)] + self.assertAllClose(average_val, var.eval()) + + @combinations.generate(all_combinations) + def testCrossTowerWithoutZeroDebias(self, distribution): + with distribution.scope(), self.cached_session() as sess: + var = variables.Variable([10.0, 11.0]) + val = constant_op.constant([1.0, 2.0]) + decay = 0.25 + # NOTE(josh11b): We currently generate an error if val is a PerDevice value. + assign = moving_averages.assign_moving_average( + var, val, decay, zero_debias=False) + + variables.global_variables_initializer().run() + self.assertAllClose([10.0, 11.0], var.eval()) + sess.run(assign) + average_val = [1.0, 2.0] + val_weight = 1.0 - 0.25 + self.assertAllClose( + [10.0 * 0.25 + average_val[0] * val_weight, + 11.0 * 0.25 + average_val[1] * val_weight], + var.eval()) + # Also try assign.op. + sess.run(assign.op) + orig_weight = 0.25 * 0.25 + val_weight = 1.0 - orig_weight + self.assertAllClose( + [10.0 * orig_weight + average_val[0] * val_weight, + 11.0 * orig_weight + average_val[1] * val_weight], + var.eval()) + + @combinations.generate(all_combinations) + def testCrossTower(self, distribution): + with distribution.scope(), self.cached_session() as sess: + var = variables.Variable([0.0, 0.0]) + val = array_ops.placeholder(dtypes.float32) + decay = 0.25 + # NOTE(josh11b): We currently generate an error if val is a PerDevice value. + assign = moving_averages.assign_moving_average(var, val, decay) + + variables.global_variables_initializer().run() + self.assertAllClose([0.0, 0.0], var.eval()) + sess.run(assign, feed_dict={val: [1.0, 2.0]}) + self.assertAllClose([1.0, 2.0], var.eval()) + + # Also try assign.op. + sess.run(assign.op, feed_dict={val: [10.0, 0.0]}) + self.assertAllClose( + [(1.0 * 0.25 + 10.0) / (1.0 * 0.25 + 1.0), + (2.0 * 0.25 + 0.0) / (1.0 * 0.25 + 1.0)], + var.eval()) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py index 23b220f64b843a83aba3f9867b61415b70f19668..f5259190485e701c190beb49220caff743f8fdcb 100644 --- a/tensorflow/contrib/distribute/python/one_device_strategy.py +++ b/tensorflow/contrib/distribute/python/one_device_strategy.py @@ -141,14 +141,21 @@ class OneDeviceStrategy(distribute_lib.DistributionStrategy): else: assert False - def _update(self, var, fn, *args, **kwargs): - with ops.device(self._device), distribute_lib.UpdateContext(self._device): - return fn(var, *args, **kwargs) + def _update(self, var, options, fn, *args, **kwargs): + # The implementations of _update() and _update_non_slot() are identical + # except _update() passes `var` as the first argument to `fn()`. + return self._update_non_slot(var, options, fn, var, *args, **kwargs) - def _update_non_slot(self, colocate_with, fn, *args, **kwargs): + def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs): del colocate_with + should_group = options.pop("grouped") + assert not options # Validate that we are processing all of the options. with ops.device(self._device), distribute_lib.UpdateContext(self._device): - return fn(*args, **kwargs) + result = fn(*args, **kwargs) + if should_group: + return result + else: + return nest.map_structure(self._unwrap, result) def read_var(self, tower_local_var): """Read the aggregate value of a tower-local variable.""" diff --git a/tensorflow/contrib/distribute/python/optimizer_v2_test.py b/tensorflow/contrib/distribute/python/optimizer_v2_test.py index 3064433129865703f574ba79e843880fc4390e74..6e9ba37a198fc8038c086d2672251adfac30fdcf 100644 --- a/tensorflow/contrib/distribute/python/optimizer_v2_test.py +++ b/tensorflow/contrib/distribute/python/optimizer_v2_test.py @@ -42,11 +42,8 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase): model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss) - ds = distribution.distribute_dataset(dataset_fn) - if context.executing_eagerly(): - iterator = ds.make_one_shot_iterator() - else: - iterator = ds.make_initializable_iterator() + iterator = distribution.distribute_dataset( + dataset_fn).make_one_shot_iterator() def run_step(): return control_flow_ops.group(distribution.unwrap( @@ -55,7 +52,6 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase): if not context.executing_eagerly(): with self.cached_session() as sess: - sess.run(iterator.initializer) run_step = sess.make_callable(run_step()) self.evaluate(variables.global_variables_initializer()) diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy.py b/tensorflow/contrib/distribute/python/parameter_server_strategy.py index 1125d027f64420863386d4fbd9db5564a5847825..6ddd91507bf86e8b0cf710a2340fd61abcdebe71 100644 --- a/tensorflow/contrib/distribute/python/parameter_server_strategy.py +++ b/tensorflow/contrib/distribute/python/parameter_server_strategy.py @@ -343,21 +343,33 @@ class ParameterServerStrategy(distribute_lib.DistributionStrategy): return nest.map_structure(_select_fn, structured) - def _update(self, var, fn, *args, **kwargs): + def _update(self, var, options, fn, *args, **kwargs): if isinstance(var, values.AggregatingVariable): var = var.get() if not isinstance(var, resource_variable_ops.ResourceVariable): raise ValueError( "You can not update `var` %r. It must be a Variable." % var) + should_group = options.pop("grouped") + assert not options # Validate that we are processing all of the options. with ops.colocate_with(var), distribute_lib.UpdateContext(var.device): - return fn(var, *self._select_single_value(args), - **self._select_single_value(kwargs)) + result = fn(var, *self._select_single_value(args), + **self._select_single_value(kwargs)) + if should_group: + return result + else: + return nest.map_structure(self._unwrap, result) # TODO(yuefengz): does it need to call _select_single_value? - def _update_non_slot(self, colocate_with, fn, *args, **kwargs): + def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs): + should_group = options.pop("grouped") + assert not options # Validate that we are processing all of the options. with ops.device( colocate_with.device), distribute_lib.UpdateContext(colocate_with): - return fn(*args, **kwargs) + result = fn(*args, **kwargs) + if should_group: + return result + else: + return nest.map_structure(self._unwrap, result) def _unwrap(self, val): if isinstance(val, values.DistributedValues): diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py index 12789e0bc9f1c89ef8d57c40a978e2bb9471997b..9c112e4f851b5e5e6f65c0bd9d9564420f8d4446 100644 --- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py +++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py @@ -262,7 +262,9 @@ class ParameterServerStrategyTestBase( h = f + 1.0 self.assertEqual( device_util.canonicalize(u.device), tower_variable_device) - self.assertEqual(device_util.canonicalize(x.device), h.device) + self.assertEqual( + device_util.canonicalize(x.device), + device_util.canonicalize(h.device)) return y_add, z_add, f y, z, f = d.call_for_each_tower(model_fn) @@ -395,7 +397,8 @@ class ParameterServerStrategyTestBase( # TODO(yuefengz): support non-Mirrored variable as destinations. g = d.reduce( variable_scope.VariableAggregation.SUM, g, destinations=v) - with ops.control_dependencies(d.unwrap(d.update(v, update, g))): + with ops.control_dependencies( + d.update(v, update, g, grouped=False)): after_list.append(d.read_var(v)) return before_list, after_list diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..d48aa9c89bc894a6afc4aab8b60fabc52a06b198 --- /dev/null +++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py @@ -0,0 +1,232 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Extension of prefetching_ops to support more than one device.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import warnings + +from tensorflow.python.data.experimental.ops import prefetching_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.util import nest as data_nest +from tensorflow.python.data.util import sparse +from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops +from tensorflow.python.util import nest + + +# pylint: disable=protected-access +class _PrefetchToDeviceIterator(object): + """A replacement for `tf.data.Iterator` that prefetches to another device. + + Args: + input_dataset: The input dataset. + one_shot: If true, we make a one shot iterator that's already initialized. + devices: Devices on which to prefetch. + buffer_size: Size of the prefetching buffer. + shared_name: (Optional.) If non-empty, the returned iterator will be shared + under the given name across multiple sessions that share the same devices + (e.g. when using a remote server). Only used if one_shot is False. + + Returns: + An Iterator type object. + """ + + def __init__(self, + input_dataset, + one_shot, + devices, + buffer_size, + shared_name=None): + self._input_dataset = input_dataset + self._get_next_call_count = 0 + self._one_shot = one_shot + if shared_name is None: + shared_name = "" + self._devices = devices + + if self._one_shot: + self._input_iterator = input_dataset.make_one_shot_iterator() + else: + self._input_iterator = iterator_ops.Iterator.from_structure( + self._input_dataset.output_types, self._input_dataset.output_shapes, + shared_name, self._input_dataset.output_classes) + input_iterator_handle = self._input_iterator.string_handle() + + @function.Defun(dtypes.string) + def _prefetch_fn(handle): + """Prefetches one element from `input_iterator`.""" + remote_iterator = iterator_ops.Iterator.from_string_handle( + handle, self._input_iterator.output_types, + self._input_iterator.output_shapes, + self._input_iterator.output_classes) + ret = remote_iterator.get_next() + return nest.flatten(sparse.serialize_sparse_tensors(ret)) + + target_device = ged_ops.experimental_iterator_get_device( + self._input_iterator._iterator_resource) + self._buffering_resources = [] + for device in nest.flatten(self._devices): + with ops.device(device): + buffer_resource_handle = prefetching_ops.function_buffering_resource( + f=_prefetch_fn, + output_types=data_nest.flatten( + sparse.as_dense_types(self._input_dataset.output_types, + self._input_dataset.output_classes)), + target_device=target_device, + string_arg=input_iterator_handle, + buffer_size=buffer_size, + shared_name=shared_name) + self._buffering_resources.append(buffer_resource_handle) + + if not self._one_shot: + reset_ops = [] + for buffer_resource in self._buffering_resources: + reset_ops.append( + ged_ops.experimental_function_buffering_resource_reset( + buffer_resource)) + with ops.control_dependencies(reset_ops): + self._initializer = self._input_iterator.make_initializer( + self._input_dataset) + + def get_next(self, name=None): + """See `tf.data.Iterator.get_next`.""" + self._get_next_call_count += 1 + if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD: + warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE) + + flat_result = [] + # TODO(priyag): This will fail if the input size (typically number of + # batches) is not divisible by number of devices. + # How do we handle that more gracefully / let the user know? + for buffer_resource in self._buffering_resources: + flat_ret = ged_ops.experimental_function_buffering_resource_get_next( + buffer_resource, + output_types=data_nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes)), + name=name) + + ret = sparse.deserialize_sparse_tensors( + data_nest.pack_sequence_as(self.output_types, flat_ret), + self.output_types, self.output_shapes, self.output_classes) + + for tensor, shape in zip( + data_nest.flatten(ret), data_nest.flatten(self.output_shapes)): + if isinstance(tensor, ops.Tensor): + tensor.set_shape(shape) + flat_result.append(ret) + + return nest.pack_sequence_as(self._devices, flat_result) + + @property + def initializer(self): + if self._one_shot: + raise NotImplementedError("Can't initialize a one_shot_iterator") + return self._initializer + + @property + def output_classes(self): + return self._input_dataset.output_classes + + @property + def output_shapes(self): + return self._input_dataset.output_shapes + + @property + def output_types(self): + return self._input_dataset.output_types + + +# pylint: enable=protected-access + + +class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset): + """A `Dataset` whose iterator prefetches elements to other device(s).""" + + def __init__(self, input_dataset, devices, buffer_size): + super(_PrefetchToDeviceDataset, self).__init__(input_dataset) + self._input_dataset = input_dataset + self._devices = devices + self._buffer_size = buffer_size if buffer_size is not None else 1 + + def make_one_shot_iterator(self): + return _PrefetchToDeviceIterator( + self._input_dataset, + one_shot=True, + devices=self._devices, + buffer_size=self._buffer_size) + + def make_initializable_iterator(self, shared_name=None): + if context.executing_eagerly(): + raise RuntimeError( + "make_initializable_iterator is not supported when eager " + "execution is enabled.") + + return _PrefetchToDeviceIterator( + self._input_dataset, + one_shot=False, + devices=self._devices, + buffer_size=self._buffer_size, + shared_name=shared_name) + + def _as_variant_tensor(self): + # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset + # transformation methods is called. + # TODO(mrry): Investigate support for chaining further transformations after + # the prefetch, including GPU support. + raise NotImplementedError("`prefetch_to_devices()` must be the last " + "transformation in a dataset pipeline.") + + # TODO(priyag): Fix the output types, shapes and classes to match the result + # of get_next (which has the additional nesting layer of devices now). + @property + def output_types(self): + return self._input_dataset.output_types + + @property + def output_shapes(self): + return self._input_dataset.output_shapes + + @property + def output_classes(self): + return self._input_dataset.output_classes + + +def prefetch_to_devices(devices, buffer_size=None): + """A transformation that prefetches dataset values to the given `devices`. + + NOTE: Although the transformation creates a `tf.data.Dataset`, the + transformation must be the final `Dataset` in the input pipeline. + + Args: + devices: A nested structure of devices on which to prefetch the data. It can + be a single device name, or a tuple or list of device names. + buffer_size: (Optional.) The number of elements to buffer on each device. + Defaults to an automatically chosen value. + + Returns: + A `Dataset` transformation function, which can be passed to + `tf.data.Dataset.apply`. + """ + + def _apply_fn(dataset): + return _PrefetchToDeviceDataset(dataset, devices, buffer_size) + + return _apply_fn diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..16799104e8112f4391152c0cf2a15af81f8c2c9d --- /dev/null +++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py @@ -0,0 +1,90 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for prefetching_ops_v2.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distribute.python import prefetching_ops_v2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import errors +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test + + +class PrefetchingOpsV2Test(test.TestCase): + + def testPrefetchToOneDevice(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + + host_dataset = dataset_ops.Dataset.range(10) + device_dataset = host_dataset.apply( + prefetching_ops_v2.prefetch_to_devices("/gpu:0")) + + iterator = device_dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.cached_session() as sess: + for i in range(10): + self.assertEqual(i, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testPrefetchToTwoDevicesInAList(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + + host_dataset = dataset_ops.Dataset.range(10) + device_dataset = host_dataset.apply( + prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"])) + + iterator = device_dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + output = [] + # TODO(rohanj): Modify test to go till the end of the dataset when we + # switch to MultiDeviceIterator. + with self.cached_session() as sess: + for _ in range(4): + result = sess.run(next_element) + self.assertEqual(2, len(result)) + output.extend(result) + self.assertEquals(set(range(8)), set(output)) + + def testPrefetchToTwoDevicesWithReinit(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + + host_dataset = dataset_ops.Dataset.range(10) + device_dataset = host_dataset.apply( + prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"])) + + iterator = device_dataset.make_initializable_iterator() + next_element = iterator.get_next() + + # TODO(rohanj): Modify test to go till the end of the dataset when we + # switch to MultiDeviceIterator. + with self.cached_session() as sess: + sess.run(iterator.initializer) + for _ in range(4): + sess.run(next_element) + sess.run(iterator.initializer) + for _ in range(4): + sess.run(next_element) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/step_fn.py b/tensorflow/contrib/distribute/python/step_fn.py index 23bf36184fa61105b43c71ada6c343b30dce8376..1b5a4f64e5bb1ffabfe1b87c150f713c755bb682 100644 --- a/tensorflow/contrib/distribute/python/step_fn.py +++ b/tensorflow/contrib/distribute/python/step_fn.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function from tensorflow.python.eager import backprop -from tensorflow.python.eager import context from tensorflow.python.training import optimizer as optimizer_lib @@ -51,11 +50,7 @@ class StandardInputStep(Step): def __init__(self, dataset_fn, distribution): super(StandardInputStep, self).__init__(distribution) self._distributed_input = distribution.distribute_dataset(dataset_fn) - if context.executing_eagerly(): - self._iterator = self._distributed_input.make_one_shot_iterator() - else: - # TODO(priyag): Expose initializer via some initializer property. - self._iterator = self._distributed_input.make_initializable_iterator() + self._iterator = self._distributed_input.make_one_shot_iterator() class StandardSingleLossStep(StandardInputStep): diff --git a/tensorflow/contrib/distribute/python/step_fn_test.py b/tensorflow/contrib/distribute/python/step_fn_test.py index 1ff9b9ceec13351b098d47ed3ff62f689a625a31..f1ada49fa378358f112fb75a4bcdbe9a8a09cd13 100644 --- a/tensorflow/contrib/distribute/python/step_fn_test.py +++ b/tensorflow/contrib/distribute/python/step_fn_test.py @@ -50,7 +50,6 @@ class SingleLossStepTest(test.TestCase, parameterized.TestCase): run_step = single_loss_step else: with self.cached_session() as sess: - sess.run(single_loss_step._iterator.initializer) run_step = sess.make_callable(single_loss_step()) self.evaluate(variables.global_variables_initializer()) diff --git a/tensorflow/contrib/distribute/python/strategy_test_lib.py b/tensorflow/contrib/distribute/python/strategy_test_lib.py index 5d498fb629d4a381f56aa7b2db95b09da9010a78..fd280f5754b34170cdd6b948236138d0e77dd8bc 100644 --- a/tensorflow/contrib/distribute/python/strategy_test_lib.py +++ b/tensorflow/contrib/distribute/python/strategy_test_lib.py @@ -115,7 +115,8 @@ class DistributionTestBase(test.TestCase): with ops.control_dependencies([fetched]): g = d.reduce( variable_scope.VariableAggregation.SUM, g, destinations=v) - with ops.control_dependencies(d.unwrap(d.update(v, update, g))): + with ops.control_dependencies(d.update( + v, update, g, grouped=False)): after_list.append(d.read_var(v)) return before_list, after_list @@ -169,7 +170,8 @@ class DistributionTestBase(test.TestCase): with ops.control_dependencies([fetched]): g = d.reduce( variable_scope.VariableAggregation.SUM, g, destinations=v) - with ops.control_dependencies(d.unwrap(d.update(v, update, g))): + with ops.control_dependencies(d.update( + v, update, g, grouped=False)): after_list.append(d.read_var(v)) return before_list, after_list diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py index a6762e5e87bfc84c375fade1634f84f8e76aed6f..1d9e299b38409b874610765e54fa0052fafd5f4b 100644 --- a/tensorflow/contrib/distribute/python/tpu_strategy.py +++ b/tensorflow/contrib/distribute/python/tpu_strategy.py @@ -29,6 +29,7 @@ from tensorflow.contrib.tpu.python.tpu import tpu from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib from tensorflow.contrib.tpu.python.tpu import training_loop from tensorflow.python.eager import context +from tensorflow.python.eager import tape from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -37,9 +38,13 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as variables_lib from tensorflow.python.training import device_util +from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.util import nest +_TPU_INITIALIZE_SYSTEM_COLLECTION = "TPU_STRATEGY_INITIALIZE" + + def get_tpu_system_metadata(tpu_cluster_resolver): """Retrieves TPU system metadata given a TPUClusterResolver.""" master = tpu_cluster_resolver.master() @@ -56,6 +61,58 @@ def get_tpu_system_metadata(tpu_cluster_resolver): return tpu_system_metadata +# TODO(jhseu): Deduplicate with MirroredStrategy? +def _create_tpu_mirrored_variable(devices, real_mirrored_creator, *args, + **kwargs): # pylint: disable=g-missing-docstring + # Figure out what collections this variable should be added to. + # We'll add the TPUMirroredVariable to those collections instead. + collections = kwargs.pop("collections", None) + if collections is None: + collections = [ops.GraphKeys.GLOBAL_VARIABLES] + kwargs["collections"] = [] + + # TODO(jhseu): Should we have different behavior for different + # synchronization settings? + + # Get aggregation value + # TODO(jhseu): Support aggregation in a tower context. + aggregation = kwargs.pop("aggregation", vs.VariableAggregation.NONE) + if aggregation not in [ + vs.VariableAggregation.NONE, + vs.VariableAggregation.SUM, + vs.VariableAggregation.MEAN, + vs.VariableAggregation.ONLY_FIRST_TOWER, + ]: + raise ValueError("Invalid variable aggregation mode: {} for variable: {}" + .format(aggregation, kwargs["name"])) + + # Ignore user-specified caching device, not needed for mirrored variables. + kwargs.pop("caching_device", None) + + # TODO(josh11b,apassos): It would be better if variable initialization + # was never recorded on the tape instead of having to do this manually + # here. + with tape.stop_recording(): + index = real_mirrored_creator(devices, *args, **kwargs) + result = values.TPUMirroredVariable(index, index[devices[0]], aggregation) + + if not context.executing_eagerly(): + g = ops.get_default_graph() + # If "trainable" is True, next_creator() will add the member variables + # to the TRAINABLE_VARIABLES collection, so we manually remove + # them and replace with the MirroredVariable. We can't set + # "trainable" to False for next_creator() since that causes functions + # like implicit_gradients to skip those variables. + if kwargs.get("trainable", True): + collections.append(ops.GraphKeys.TRAINABLE_VARIABLES) + l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES) + for v in index.values(): + l.remove(v) + g.add_to_collections(collections, result) + return result + + +# TODO(jhseu): Stop inheriting from OneDeviceStrategy. class TPUStrategy(one_device_strategy.OneDeviceStrategy): """Experimental TPU distribution strategy implementation.""" @@ -75,17 +132,28 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): """ # TODO(sourabhbajaj): OneDeviceStrategy should be initialized with the # master node fetched from the cluster resolver. - super(TPUStrategy, self).__init__('/device:CPU:0') + super(TPUStrategy, self).__init__("/device:CPU:0") self._tpu_cluster_resolver = tpu_cluster_resolver self._tpu_metadata = get_tpu_system_metadata(self._tpu_cluster_resolver) # TODO(sourabhbajaj): Change this from num_cores to metadata_override self._num_cores_override = num_cores + # TODO(jhseu): Switch to DeviceAssignment to support pods and model + # parallelism. + device_map = {d.name: i for i, d in enumerate(self._tpu_metadata.devices) + if "device:TPU:" in d.name} + self._device_index = values.PerDevice(device_map) + self._tpu_devices = sorted(device_map.keys()) + # Only create variables for the number of towers we're running. + self._tpu_devices = self._tpu_devices[:self.num_towers] + # TODO(sourabhbajaj): Remove this once performance of running one step # at a time is comparable to multiple steps. self.steps_per_run = steps_per_run + self._require_static_shapes = True + def _get_enqueue_op_per_host(self, host_id, iterator, input_shapes, iterations): """Create an enqueue op for a single host identified using host_id. @@ -231,6 +299,7 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): # For outputs that have already been aggregated, take the first value # from the list as each value should be the same. Else return the full # list of values. + # TODO(josh11b): If aggregation is NONE, we should return a PerDevice value. if aggregation is not variables_lib.VariableAggregation.NONE: # TODO(priyag): Should this return the element or a list with 1 element last_step_tensor_outputs_dict[name] = output[0] @@ -239,6 +308,8 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): return ctx def _call_for_each_tower(self, fn, *args, **kwargs): + # TODO(jhseu): Consider making it so call_for_each_tower implies that we're + # in a tpu.rewrite(), and update TPUMirroredVariable accordingly. kwargs.pop('run_concurrently', None) with one_device_strategy._OneDeviceTowerContext(self): # pylint: disable=protected-access return fn(*args, **kwargs) @@ -248,7 +319,15 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): # TODO(priyag): Add appopriate call here when eager is supported for TPUs. raise NotImplementedError('Eager mode not supported in TPUStrategy.') else: - return [tpu.initialize_system()] + # TODO(jhseu): We need this hack because DistributionStrategies must be + # pickleable for copy.deepcopy(). Remove when initialize_system goes away. + graph = ops.get_default_graph() + tpu_init = graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION) + if tpu_init: + return tpu_init + graph.add_to_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION, + tpu.initialize_system()) + return graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION) def finalize(self): if context.executing_eagerly(): @@ -257,21 +336,53 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): else: return [tpu.shutdown_system()] + def _get_devices_from(self, colocate_with=None): + # TODO(jhseu): Change this when we support model parallelism. + return self._tpu_devices + + def _create_variable(self, next_creator, *args, **kwargs): + """Create a TPUMirroredVariable. See `DistributionStrategy.scope`.""" + colocate_with = kwargs.pop("colocate_with", None) + devices = self._get_devices_from(colocate_with) + + def _real_mirrored_creator(devices, *args, **kwargs): # pylint: disable=g-missing-docstring + index = {} + for i, d in enumerate(devices): + with ops.device(d): + if i > 0: + # Give replicas meaningful distinct names: + var0name = index[devices[0]].name.split(":")[0] + # We append a / to variable names created on towers with id > 0 to + # ensure that we ignore the name scope and instead use the given + # name as the absolute name of the variable. + kwargs["name"] = "%s/replica_%d/" % (var0name, i) + # Initialize replicas with the same value: + if context.executing_eagerly(): + kwargs["initial_value"] = array_ops.identity( + index[devices[0]].value()) + else: + def initial_value_fn(device=d): + with ops.device(device): + return array_ops.identity(index[devices[0]].initial_value) + kwargs["initial_value"] = initial_value_fn + with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): + v = next_creator(*args, **kwargs) + assert not isinstance(v, values.TPUMirroredVariable) + index[d] = v + return index + + return _create_tpu_mirrored_variable(devices, _real_mirrored_creator, *args, + **kwargs) + def _reduce(self, aggregation, value, destinations): - graph = ops.get_default_graph() - cf_context = graph._get_control_flow_context() # pylint: disable=protected-access - # If we're inside the ReplicateContext, reduction should be done using - # CrossReplicaSum while outside we can directly use an add_n op. - while cf_context: - if isinstance(cf_context, tpu.TPUReplicateContext): - if aggregation == vs.VariableAggregation.MEAN: - # TODO(jhseu): Revisit once we support model-parallelism. - value *= (1. / self.num_towers) - elif aggregation != vs.VariableAggregation.SUM: - raise NotImplementedError( - 'Currently only support sum & mean in TPUStrategy.') - return tpu_ops.cross_replica_sum(value) - cf_context = cf_context.outer_context + if values._enclosing_tpu_context() is not None: # pylint: disable=protected-access + if aggregation == vs.VariableAggregation.MEAN: + # TODO(jhseu): Revisit once we support model-parallelism. + value *= (1. / self.num_towers) + elif aggregation != vs.VariableAggregation.SUM: + raise NotImplementedError( + "Currently only support sum & mean in TPUStrategy.") + return tpu_ops.cross_replica_sum(value) # Validate that the destination is same as the host device # Note we don't do this when in replicate context as the reduction is @@ -290,10 +401,46 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): return output * (1. / len(value)) return output - def _unwrap(self, value): - if isinstance(value, list): - return value - return [value] + def _update(self, var, options, fn, *args, **kwargs): + assert isinstance(var, values.TPUMirroredVariable) + should_group = options.pop("grouped") + assert not options # Validate that we are processing all of the options. + + if values._enclosing_tpu_context() is not None: # pylint: disable=protected-access + if should_group: + return fn(var, *args, **kwargs) + else: + return [fn(var, *args, **kwargs)] + + # Otherwise, we revert to MirroredStrategy behavior and update each variable + # directly. + updates = {} + for d, v in var._index.items(): # pylint: disable=protected-access + name = "update_%d" % self._device_index.get(d) + with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name): + # If args and kwargs are not mirrored, the value is returned as is. + updates[d] = fn(v, + *values.select_device_mirrored(d, args), + **values.select_device_mirrored(d, kwargs)) + return values.update_regroup(self, updates, should_group) + + # TODO(josh11b): Need to implement _update_non_slot()! + + def read_var(self, var): + assert isinstance(var, values.TPUMirroredVariable) + return var.read_value() + + def _unwrap(self, val): + if isinstance(val, values.DistributedValues): + # Return in a deterministic order. + return [val.get(device=d) for d in sorted(val.devices)] + elif isinstance(val, list): + # TODO(josh11b): We need to remove this case; per device values should + # be represented using a PerDevice wrapper instead of a list with + # one entry per device. + return val + return [val] + @property def num_towers(self): @@ -323,6 +470,14 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): def should_save_summary(self): return True + @property + def worker_devices(self): + return self._tpu_devices + + @property + def parameter_devices(self): + return self._tpu_devices + def get_host_cpu_device(self, host_id): if self._tpu_cluster_resolver.get_master() in ('', 'local'): return '/replica:0/task:0/device:CPU:0' diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py index a0cd029f51551cf8ac5aad8626dd01ca18194317..0dd78ba185bcbf126c2e1880a94f67a2781f229d 100644 --- a/tensorflow/contrib/distribute/python/values.py +++ b/tensorflow/contrib/distribute/python/values.py @@ -22,17 +22,20 @@ from __future__ import division from __future__ import print_function import collections +import contextlib import weakref import six from tensorflow.contrib.distribute.python import input_ops -from tensorflow.python.data.ops import multi_device_iterator_ops +from tensorflow.contrib.distribute.python import prefetching_ops_v2 from tensorflow.python.eager import context +from tensorflow.python.eager import tape from tensorflow.python.framework import device as tf_device from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as variables_lib @@ -363,18 +366,7 @@ class MirroredVariable(DistributedVariable, Mirrored, # We are calling assign on the mirrored variable in cross tower context, # use update to update the variable. strategy = distribution_strategy_context.get_distribution_strategy() - updates = strategy.update(self, f, *args, **kwargs) - grouped = strategy.group(updates) - if isinstance(updates, DistributedValues) and updates.is_tensor_like: - # Make sure we run all updates. Without this, something like - # session.run(mirrored_var.assign*(...)) may only update one tower. - index = {} - for d in updates.devices: - with ops.device(d), ops.control_dependencies([grouped]): - index[d] = array_ops.identity(updates.get(d)) - return Mirrored(index) - else: - return grouped + return strategy.update(self, f, *args, **kwargs) else: _assert_tower_context() # We are calling an assign function on the mirrored variable in tower @@ -453,6 +445,388 @@ ops.register_tensor_conversion_function(MirroredVariable, _tensor_conversion_mirrored) +def _enclosing_tpu_context(): + # pylint: disable=protected-access + tpu_context = ops.get_default_graph()._get_control_flow_context() + # pylint: enable=protected-access + while tpu_context is not None and not isinstance( + tpu_context, control_flow_ops.XLAControlFlowContext): + tpu_context = tpu_context.outer_context + return tpu_context + + +# TODO(jhseu): Deduplicate code. We copy code because we don't want to +# inherit from DistributedDelegate. DistributedDelegate will not work in a +# tpu.replicate() because it assumes that you're in a device context where you +# can operate on a single version of the variable, but a tpu.replicate() +# operates on all variables and is replicated during a rewrite pass. +class TPUMirroredVariable(checkpointable.CheckpointableBase): + """Holds a map from device to TPU variables whose values are kept in sync.""" + + def __init__(self, index, primary_var, aggregation): + # Use a weakref to make it easy to map from the contained values + # to the container without introducing a reference cycle. + for v in six.itervalues(index): + v._mirrored_container = weakref.ref(self) # pylint: disable=protected-access + self._index = {device_util.canonicalize(key): value + for key, value in six.iteritems(index)} + self._primary_var = primary_var + self._common_name = self._primary_var.name.split(":")[0] + self._aggregation = aggregation + # Needed for GradientTape + self._trainable = self._primary_var.trainable + + def _get(self, device=None): + """Returns the value for the current device or raises a ValueError.""" + if device is None: + tower_context = distribution_strategy_context.get_tower_context() + if tower_context: + device = tower_context.device + else: + device = distribute_lib.get_update_device() + if device is None: + return self._get_cross_tower() + device = device_util.canonicalize(device) + try: + return self._index[device] + except KeyError as e: + six.raise_from( + ValueError("Device %s not found in %s (current device %s)" % + (device, self._index.keys(), device_util.current())), e) + + # pylint: disable=multiple-statements + def __add__(self, o): return self.read_value() + o + def __radd__(self, o): return o + self.read_value() + def __sub__(self, o): return self.read_value() - o + def __rsub__(self, o): return o - self.read_value() + def __mul__(self, o): return self.read_value() * o + def __rmul__(self, o): return o * self.read_value() + def __truediv__(self, o): return self.read_value() / o + def __rtruediv__(self, o): return o / self.read_value() + def __floordiv__(self, o): return self.read_value() // o + def __rfloordiv__(self, o): return o // self.read_value() + def __mod__(self, o): return self.read_value() % o + def __rmod__(self, o): return o % self.read_value() + def __lt__(self, o): return self.read_value() < o + def __le__(self, o): return self.read_value() <= o + def __gt__(self, o): return self.read_value() > o + def __ge__(self, o): return self.read_value() >= o + def __and__(self, o): return self.read_value() & o + def __rand__(self, o): return o & self.read_value() + def __or__(self, o): return self.read_value() | o + def __ror__(self, o): return o | self.read_value() + def __xor__(self, o): return self.read_value() ^ o + def __rxor__(self, o): return o ^ self.read_value() + def __getitem__(self, o): return self.read_value()[o] + def __pow__(self, o, modulo=None): return pow(self.read_value(), o, modulo) + def __rpow__(self, o): return pow(o, self.read_value()) + def __invert__(self): return ~self.read_value() + def __neg__(self): return -self.read_value() + def __abs__(self): return abs(self.read_value()) + + def __div__(self, o): + try: + return self.read_value().__div__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + def __rdiv__(self, o): + try: + return self.read_value().__rdiv__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + def __matmul__(self, o): + try: + return self.read_value().__matmul__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + def __rmatmul__(self, o): + try: + return self.read_value().__rmatmul__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + @property + def handle(self): + # If we're in a tpu.rewrite(), return the replicated handle. + tpu_context = _enclosing_tpu_context() + if tpu_context is not None: + return tpu_context.get_replicated_var_handle( + self._common_name, nest.flatten(self._index)) + + device = distribute_lib.get_update_device() + if device is None: + return self._primary_var.handle + device = device_util.canonicalize(device) + try: + return self._index[device].handle + except KeyError as e: + six.raise_from( + ValueError("Device %s not found in %s (current device %s)" % + (device, self._index.keys(), device_util.current())), e) + + @property + def device(self): + return self._get().device + + # The arguments to update() are automatically unwrapped so the update() + # function would normally see regular variables, not MirroredVariables. + # However, the update function can still operate on wrapped MirroredVariables + # through object members, captured arguments, etc. This is more likely in an + # update_non_slot() function (like OptimizerV2._finish), which can + # update several non-slot variables in one call. + def _assign_func(self, *args, **kwargs): + if distribution_strategy_context.get_distribution_strategy().__class__.__name__ != "TPUStrategy": + raise ValueError("You may only assign to a TPUMirroredVariable within a " + "TPUStrategy.") + f = kwargs.pop("f") + if distribution_strategy_context.get_cross_tower_context(): + if _enclosing_tpu_context() is not None: + return distribution_strategy_context.get_distribution_strategy().update( + self, f, *args, **kwargs) + + update_device = distribute_lib.get_update_device() + # We are calling update on the mirrored variable in cross tower context. + if update_device is not None: + # We are calling an assign function on the mirrored variable in cross + # tower context. + v = self._get(device=update_device) + return f(v, *args, **kwargs) + + return distribution_strategy_context.get_distribution_strategy().update( + self, f, *args, **kwargs) + else: + _assert_tower_context() + # We are calling an assign function on the mirrored variable in tower + # context. + # We reduce the value we want to assign/add/sub. More details about how we + # handle the different use cases can be found in the _reduce method. + # We call the function on each of the mirrored variables with the reduced + # value. + if self._aggregation == vs.VariableAggregation.NONE: + raise ValueError("You must specify an aggregation method to update a " + "TPUMirroredVariable in Tower Context.") + + def merge_fn(strategy, value, *other_args, **other_kwargs): + return strategy.update( + self, f, + strategy.reduce( + aggregation=self._aggregation, value=value, destinations=self), + *other_args, **other_kwargs) + + return distribution_strategy_context.get_tower_context().merge_call( + merge_fn, *args, **kwargs) + + @contextlib.contextmanager + def _handle_graph(self, handle): + # Note: might have an eager tensor but not be executing eagerly when + # building functions. + if (context.executing_eagerly() or isinstance(handle, ops.EagerTensor) + or ops.has_default_graph()): + yield + else: + with handle.graph.as_default(): + yield + + @property + def trainable(self): + return self._trainable + + def _read_variable_op(self, parent_op=None): + if self.trainable: + tape.variable_accessed(self) + if parent_op is not None: + with ops.control_dependencies([parent_op]): + return gen_resource_variable_ops.read_variable_op( + self.handle, self.dtype) + + return gen_resource_variable_ops.read_variable_op( + self.handle, self.dtype) + + def read_value(self): + return self._read_variable_op() + + def assign_sub(self, *args, **kwargs): + def assign_sub_fn(var, delta, **kw): + name = kw.pop("name", None) + read_value = kw.pop("read_value", True) + with self._handle_graph(var.handle): + op = gen_resource_variable_ops.assign_sub_variable_op( + var.handle, ops.convert_to_tensor(delta, dtype=self.dtype), + name=name) + if read_value: + return self._read_variable_op(parent_op=op) + return op + + return self._assign_func(f=assign_sub_fn, *args, **kwargs) + + def assign_add(self, *args, **kwargs): + def assign_add_fn(var, delta, **kw): + name = kw.pop("name", None) + read_value = kw.pop("read_value", True) + with self._handle_graph(var.handle): + op = gen_resource_variable_ops.assign_add_variable_op( + var.handle, ops.convert_to_tensor(delta, dtype=self.dtype), + name=name) + if read_value: + return self._read_variable_op(parent_op=op) + return op + + return self._assign_func(f=assign_add_fn, *args, **kwargs) + + def assign(self, *args, **kwargs): + def assign_fn(var, value, **kw): + name = kw.pop("name", None) + read_value = kw.pop("read_value", True) + with self._handle_graph(var.handle): + op = gen_resource_variable_ops.assign_variable_op( + var.handle, ops.convert_to_tensor(value, dtype=self.dtype), + name=name) + if read_value: + return self._read_variable_op(parent_op=op) + return op + + return self._assign_func(f=assign_fn, *args, **kwargs) + + @property + def aggregation(self): + return self._aggregation + + @property + def constraint(self): + return None + + @property + def initializer(self): + return control_flow_ops.group( + [v.initializer for v in nest.flatten(self._index)]) + + @property + def graph(self): + return self._primary_var.graph + + @property + def _shared_name(self): + return self._common_name + + @property + def _unique_id(self): + return self._primary_var._unique_id # pylint: disable=protected-access + + @property + def name(self): + return self._primary_var.name + + @property + def dtype(self): + return self._primary_var.dtype + + @property + def shape(self): + return self._primary_var.shape + + def get_shape(self): + return self._primary_var.get_shape() + + def to_proto(self, export_scope=None): + return self._primary_var.to_proto(export_scope=export_scope) + + def _get_cross_tower(self): + device = device_util.canonicalize(device_util.current()) + if device in self._index: + return self._index[device] + return self._primary_var + + def _as_graph_element(self): + # pylint: disable=protected-access + if distribution_strategy_context.get_cross_tower_context(): + return self._primary_var._as_graph_element() + return self._read_variable_op() + + def _gather_saveables_for_checkpoint(self): + """Overrides CheckpointableBase method. + + This allows both name-based and object-based save and restore of + MirroredVariables. + + Returns: + A dictionary mapping attribute names to `SaveableObject` factories. + """ + def _saveable_factory(name=self._common_name): + return _MirroredSaveable(self, self._primary_var, name) + return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory} + + def _should_act_as_resource_variable(self): + """Pass resource_variable_ops.is_resource_variable check.""" + pass + + # Needed to pass ResourceVariable checks. + @property + def op(self): + return self._primary_var.op + + @property + def _in_graph_mode(self): + return self._primary_var._in_graph_mode # pylint: disable=protected-access + + def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False): + """Converts a variable to a tensor.""" + # pylint: disable=protected-access + if _enclosing_tpu_context() is None: + return self._get()._dense_var_to_tensor(dtype, name, as_ref) + # pylint: enable=protected-access + if dtype is not None and dtype != self.dtype: + raise NotImplementedError + if as_ref: + return self.handle + else: + return self.read_value() + + def is_initialized(self, name=None): + """Identifies if all the component variables are initialized. + + Args: + name: Name of the final `logical_and` op. + + Returns: + The op that evaluates to True or False depending on if all the + component variables are initialized. + """ + # TODO(jhseu): Do we need TPU context implementation? + + # We have to cast the self._index.values() to a `list` because when we + # use `model_to_estimator` to run tf.keras models, self._index.values() is + # of type `dict_values` and not `list`. + values_list = nest.flatten(self._index) + result = values_list[0].is_initialized() + # We iterate through the list of values except the last one to allow us to + # name the final `logical_and` op the same name that is passed by the user + # to the `is_initialized` op. For distributed variables, the + # `is_initialized` op is a `logical_and` op. + for v in values_list[1:-1]: + result = math_ops.logical_and(result, v.is_initialized()) + result = math_ops.logical_and(result, values_list[-1].is_initialized(), + name=name) + return result + + +# Register a conversion function which reads the value of the variable, +# allowing instances of the class to be used as tensors. +def _tensor_conversion_tpu_mirrored(var, dtype=None, name=None, as_ref=False): + return var._dense_var_to_tensor(dtype=dtype, name=name, as_ref=as_ref) # pylint: disable=protected-access + + +ops.register_tensor_conversion_function(TPUMirroredVariable, + _tensor_conversion_tpu_mirrored) +ops.register_dense_tensor_like_type(TPUMirroredVariable) + + class _TowerLocalSaveable(saver.BaseSaverBuilder.SaveableObject): """Class for defining how to restore a TowerLocalVariable.""" @@ -668,6 +1042,29 @@ def select_device_mirrored(device, structured): return nest.map_structure(_get_mirrored, structured) +def update_regroup(strategy, updates, should_group): + """Regroup for an update, with dependencies to ensure all updates execute.""" + regrouped = regroup(updates, Mirrored) + if not should_group: + return nest.map_structure(strategy.unwrap, regrouped) + grouped_flat = [] + for u in nest.flatten(regrouped): + if isinstance(u, DistributedValues): + g = strategy.group(u) + if u.is_tensor_like: + # Make sure we run all updates. Without this, something like + # session.run(strategy.update(...)) may only update one tower. + index = {} + for d in u.devices: + with ops.device(d), ops.control_dependencies([g]): + index[d] = array_ops.identity(u.get(d)) + g = Mirrored(index) + else: + g = u + grouped_flat.append(g) + return nest.pack_sequence_as(regrouped, grouped_flat) + + class PerDeviceDataIterator(object): """An iterator (like `tf.data.Iterator`) into a `PerDeviceDataset`.""" @@ -683,7 +1080,7 @@ class PerDeviceDataIterator(object): def get_next(self, name=None): """Scatter the input across devices.""" if self._prefetch_on_device: - data_list = self._iterator.get_next() + data_list = self._iterator.get_next(name=name) index = dict(zip(self._devices, data_list)) else: batch = self._iterator.get_next(name=name) @@ -703,26 +1100,21 @@ class PerDeviceDataIterator(object): class PerDeviceDataset(object): """Like `tf.data.Dataset` split devices, producing `PerDevice` data.""" - def __init__( - self, - dataset, - devices, - prefetch_on_device=None, - source_device="/cpu:0", - ): + def __init__(self, dataset, devices, prefetch_on_device=None): self._devices = devices - self._source_device = source_device if source_device is not None else "/cpu:0" # Default to using prefetching in graph mode, unless specified. - # TODO(rohanj): Enable prefetching in eager mode. + # TODO(priyag): Enable prefetching in eager mode. self._prefetch_on_device = prefetch_on_device if self._prefetch_on_device is None: self._prefetch_on_device = not context.executing_eagerly() assert not (self._prefetch_on_device and context.executing_eagerly()), ( "Prefetching is only supported in graph mode currently") - self._dataset = dataset - if not self._prefetch_on_device: + if self._prefetch_on_device: + self._dataset = dataset.apply( + prefetching_ops_v2.prefetch_to_devices(self._devices)) + else: # TODO(priyag): If dropping remainder is not appropriate, find another # approach to distributing the dataset when not possible to divide evenly. # Possibly not an issue when we start using PartitionedDataset. @@ -730,33 +1122,15 @@ class PerDeviceDataset(object): def make_one_shot_iterator(self): """Get a one time use iterator for the distributed PerDeviceDataset.""" - # Graph mode prefetching with one shot iterator is disabled. - if not context.executing_eagerly(): - raise ValueError("Cannot create a one shot iterator. Please use " - "`make_initializable_iterator()` instead.") - # Eager mode prefetching would error out in constructor. Only remaining - # cases are non-prefetching eager / graph mode. We delegate to - # PerDeviceDataIterator to handle them. dataset_iterator = self._dataset.make_one_shot_iterator() - return PerDeviceDataIterator( - dataset_iterator, self._devices, prefetch_on_device=False) + return PerDeviceDataIterator(dataset_iterator, self._devices, + self._prefetch_on_device) def make_initializable_iterator(self): """Get an initializable iterator for the distributed PerDeviceDataset.""" - # Eager mode generates already initialized iterators. Hence we cannot create - # an initializable iterator. - if context.executing_eagerly(): - raise ValueError("Cannot create initializable iterator in Eager mode. " - "Please use `make_one_shot_iterator` instead.") - if self._prefetch_on_device: - dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator( - self._dataset, self._devices, source_device=self._source_device) - else: - dataset_iterator = self._dataset.make_initializable_iterator() - return PerDeviceDataIterator( - dataset_iterator, - self._devices, - prefetch_on_device=self._prefetch_on_device) + dataset_iterator = self._dataset.make_initializable_iterator() + return PerDeviceDataIterator(dataset_iterator, self._devices, + self._prefetch_on_device) class MultiWorkerDataIterator(object): @@ -816,7 +1190,8 @@ class MultiWorkerDataset(object): eager mode. """ - def __init__(self, dataset_fn, worker_device_map, prefetch_on_device=None): + def __init__(self, dataset_fn, worker_device_map, prefetch_on_device=None, + auto_shard=False): """Initialize the MultiWorkerDataset object. Args: @@ -824,6 +1199,7 @@ class MultiWorkerDataset(object): worker_device_map: a dict mapping from each worker to a list of devices that belong to this worker. prefetch_on_device: whether to prefetch to devices. + auto_shard: whether to auto-shard the dataset. """ self._worker_device_map = worker_device_map self._datasets = {} @@ -833,13 +1209,11 @@ class MultiWorkerDataset(object): six.iteritems(worker_device_map)): with ops.device(worker): worker_input = dataset_fn() - worker_input = input_ops.auto_shard_dataset( - worker_input, len(worker_device_map), i) + if auto_shard: + worker_input = input_ops.auto_shard_dataset( + worker_input, len(worker_device_map), i) self._datasets[worker] = PerDeviceDataset( - worker_input, - worker_devices, - source_device=worker, - prefetch_on_device=prefetch_on_device) + worker_input, worker_devices, prefetch_on_device=prefetch_on_device) def make_one_shot_iterator(self): iterators = {} diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py index 002d61f46e7e4a34f1d6bcb1baaf6ff5e0bd5d03..121d2fbb3fbccd913599a581b3de9850ab33eae0 100644 --- a/tensorflow/contrib/distribute/python/values_test.py +++ b/tensorflow/contrib/distribute/python/values_test.py @@ -349,11 +349,7 @@ class PerDeviceDatasetTest(test.TestCase): def _test_iterator_no_prefetch(self, devices, dataset, expected_values): per_device_dataset = values.PerDeviceDataset( dataset, devices, prefetch_on_device=False) - if context.executing_eagerly(): - iterator = per_device_dataset.make_one_shot_iterator() - else: - iterator = per_device_dataset.make_initializable_iterator() - self.evaluate([iterator.initializer]) + iterator = per_device_dataset.make_one_shot_iterator() for expected_value in expected_values: next_element = iterator.get_next() @@ -370,14 +366,21 @@ class PerDeviceDatasetTest(test.TestCase): if not context.executing_eagerly(): per_device_dataset = values.PerDeviceDataset( dataset, devices, prefetch_on_device=True) - iterator = per_device_dataset.make_initializable_iterator() - self.evaluate([iterator.initializer]) + iterator = per_device_dataset.make_one_shot_iterator() + # With prefetching, we cannot guarantee which input ends up on which + # device, so we verify that the complete set seen on all devices is + # correct, and equal numbers are distributed to each device. + combined_actual = [] + combined_expected = [] for expected_value in expected_values: next_element = iterator.get_next() - computed_value = self.evaluate( - [values.select_device(d, next_element) for d in devices]) - self.assertEqual(expected_value, computed_value) + combined_actual.extend( + self.evaluate( + [values.select_device(d, next_element) for d in devices])) + combined_expected.extend(expected_value) + + self.assertEqual(set(combined_expected), set(combined_actual)) with self.assertRaises(errors.OutOfRangeError): next_element = iterator.get_next() @@ -638,7 +641,7 @@ class MirroredVariableTest(test.TestCase): if context.num_gpus() < 1 and context.executing_eagerly(): self.skipTest("A GPU is not available for this test in eager mode.") - with self.test_session() as sess: + with self.cached_session(config=self.config) as sess: v, devices, mirrored = _make_mirrored() # Overwrite the initial values. @@ -741,7 +744,7 @@ class MirroredVariableTest(test.TestCase): if context.num_gpus() < 1 or context.executing_eagerly(): self.skipTest("A GPU is not available for this test or it's eager mode.") - with self.test_session( + with self.session( graph=ops.Graph()) as sess, mirrored_strategy.MirroredStrategy( ["/device:GPU:0"]).scope(): with ops.device("/device:GPU:0"): @@ -824,7 +827,7 @@ class TowerLocalVariableTest(test.TestCase): if context.num_gpus() < 1 and context.executing_eagerly(): self.skipTest("A GPU is not available for this test in eager mode.") - with self.test_session() as sess: + with self.cached_session(config=self.config) as sess: v, tower_local = _make_tower_local(variable_scope.VariableAggregation.SUM) # Overwrite the initial values. @@ -847,7 +850,7 @@ class TowerLocalVariableTest(test.TestCase): if context.num_gpus() < 1 and context.executing_eagerly(): self.skipTest("A GPU is not available for this test in eager mode.") - with self.test_session() as sess: + with self.cached_session(config=self.config) as sess: v, tower_local = _make_tower_local( variable_scope.VariableAggregation.MEAN) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 3ff7da4f89c1145b323c52d671675343a4f5e98c..60f6b90edcb71f04bca29b90744db201e83cd545 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -299,7 +299,7 @@ cuda_py_test( cuda_py_test( name = "mvn_diag_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/mvn_diag_test.py"], additional_deps = [ ":distributions_py", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py b/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py index 3c988dad8a256a00531dbd7d7f609dac5b9e5b1e..be7c756bea5f47aa32e667326db551a2eec0b125 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py @@ -38,8 +38,8 @@ class MovingReduceMeanVarianceTest(test.TestCase): true_stddev = np.array([[1.1, 0.5]]) with self.cached_session() as sess: # Start "x" out with this mean. - mean_var = variables.Variable(array_ops.zeros_like(true_mean)) - variance_var = variables.Variable(array_ops.ones_like(true_stddev)) + mean_var = variables.VariableV1(array_ops.zeros_like(true_mean)) + variance_var = variables.VariableV1(array_ops.ones_like(true_stddev)) x = random_ops.random_normal(shape, dtype=np.float64, seed=0) x = true_stddev * x + true_mean ema, emv = moving_stats.assign_moving_mean_variance( @@ -115,7 +115,7 @@ class MovingLogExponentialMovingMeanExpTest(test.TestCase): # Start "x" out with this mean. x = random_ops.random_normal(shape, dtype=np.float64, seed=0) x = true_stddev * x + true_mean - log_mean_exp_var = variables.Variable(array_ops.zeros_like(true_mean)) + log_mean_exp_var = variables.VariableV1(array_ops.zeros_like(true_mean)) variables.global_variables_initializer().run() log_mean_exp = moving_stats.assign_log_moving_mean_exp( log_mean_exp_var, x, decay=decay) diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index 135095a97980da8988b976948fb18492526e390c..3aed121233be1268531495a2fa83fd323412e1fd 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -18,7 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import prefetching_ops +from tensorflow.python.data.experimental.ops import prefetching_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.eager import context from tensorflow.python.framework import ops @@ -54,7 +54,7 @@ class Iterator(iterator_ops.EagerIterator): """ if isinstance(dataset, prefetching_ops._PrefetchToDeviceDataset): # pylint: disable=protected-access raise TypeError( - "`tf.contrib.data.prefetch_to_device()` is not compatible with " + "`tf.data.experimental.prefetch_to_device()` is not compatible with " "`tf.contrib.eager.Iterator`. Use `for ... in dataset:` to iterate " "over the dataset instead.") diff --git a/tensorflow/contrib/eager/python/datasets_test.py b/tensorflow/contrib/eager/python/datasets_test.py index a753d77580758af9de8410de4a08f7ea278c4c79..6a508fc6ba98740c4d441a064dc8a3e2b321f585 100644 --- a/tensorflow/contrib/eager/python/datasets_test.py +++ b/tensorflow/contrib/eager/python/datasets_test.py @@ -24,11 +24,11 @@ import time import numpy as np from tensorflow.contrib import lookup -from tensorflow.contrib.data.python.ops import prefetching_ops -from tensorflow.contrib.data.python.ops import threadpool -from tensorflow.contrib.data.python.ops import unique from tensorflow.contrib.eager.python import datasets from tensorflow.python.data import Dataset +from tensorflow.python.data.experimental.ops import prefetching_ops +from tensorflow.python.data.experimental.ops import threadpool +from tensorflow.python.data.experimental.ops import unique from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes diff --git a/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py b/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py index c38a1597b87971ff619225eca08db6b847bb9674..1c925e455b9ce9f52b9e1a32fe0b0110ae8f4f41 100644 --- a/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py +++ b/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py @@ -45,6 +45,17 @@ def step(dynamics, optimizer, samples): return loss, samples +# To be defunnable, the function cannot return an Operation, so the above +# function is used for defun or eager, and this function is used in graph to be +# able to run the gradient updates. +def graph_step(dynamics, optimizer, samples): + loss, grads, samples, _ = l2hmc.loss_and_grads( + dynamics, samples, loss_fn=l2hmc.compute_loss) + train_op = optimizer.apply_gradients(zip(grads, dynamics.variables)) + + return train_op, loss, samples + + def warmup(dynamics, optimizer, n_iters=1, @@ -134,51 +145,48 @@ class L2hmcBenchmark(tf.test.Benchmark): """Benchmark Graph performance.""" hparams = get_default_hparams() - tf.reset_default_graph() - with tf.Graph().as_default(): - energy_fn, _, _ = l2hmc.get_scg_energy_fn() - dynamics = l2hmc.Dynamics( - x_dim=hparams.x_dim, - minus_loglikelihood_fn=energy_fn, - n_steps=hparams.n_steps, - eps=hparams.eps) - x = tf.placeholder(tf.float32, shape=[None, hparams.x_dim]) - loss, x_out, _ = l2hmc.compute_loss(dynamics, x) - - global_step = tf.Variable(0., name="global_step", trainable=False) - learning_rate = tf.train.exponential_decay( - hparams.learning_rate, global_step, 1000, 0.96, staircase=True) - optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) - train_op = optimizer.minimize(loss, global_step=global_step) - - # Single thread; fairer comparison against eager - session_conf = tf.ConfigProto( - intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) - - with tf.Session(config=session_conf) as sess: - sess.run(tf.global_variables_initializer()) - - # Warmup to reduce initialization effect when timing - samples = npr.normal(size=[hparams.n_samples, hparams.x_dim]) - for _ in range(hparams.n_warmup_iters): - _, _, _, _ = sess.run( - [x_out, loss, train_op, learning_rate], feed_dict={x: samples}) - - # Training - start_time = time.time() - for i in range(hparams.n_iters): - samples, loss_np, _, _ = sess.run( - [x_out, loss, train_op, learning_rate], feed_dict={x: samples}) - print("Iteration %d: loss %.4f" % (i, loss_np)) - wall_time = time.time() - start_time - examples_per_sec = hparams.n_samples / wall_time - - self.report_benchmark( - name="graph_train_%s" % ("gpu" - if tf.test.is_gpu_available() else "cpu"), - iters=hparams.n_iters, - extras={"examples_per_sec": examples_per_sec}, - wall_time=wall_time) + tf.enable_resource_variables() + for sample_size in [10, 25, 50, 100, 200]: + hparams.n_samples = sample_size + tf.reset_default_graph() + with tf.Graph().as_default(): + energy_fn, _, _ = l2hmc.get_scg_energy_fn() + x = tf.random_normal([hparams.n_samples, hparams.x_dim], + dtype=tf.float32) + dynamics = l2hmc.Dynamics( + x_dim=hparams.x_dim, + minus_loglikelihood_fn=energy_fn, + n_steps=hparams.n_steps, + eps=hparams.eps) + loss, _, _ = l2hmc.compute_loss(dynamics, x) + + optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) + train_op, loss, _ = graph_step(dynamics, optimizer, x) + + # Single thread; fairer comparison against eager + session_conf = tf.ConfigProto(inter_op_parallelism_threads=1) + + with tf.Session(config=session_conf) as sess: + sess.run(tf.global_variables_initializer()) + + # Warmup to reduce initialization effect when timing + for _ in range(hparams.n_warmup_iters): + _, _ = sess.run([train_op, loss]) + + # Training + start_time = time.time() + for i in range(hparams.n_iters): + _, loss_np = sess.run([train_op, loss]) + print("Iteration %d: loss %.4f" % (i, loss_np)) + wall_time = (time.time() - start_time) / hparams.n_iters + examples_per_sec = hparams.n_samples / wall_time + + self.report_benchmark( + name="graph_train_%s_%d" % + ("gpu" if tf.test.is_gpu_available() else "cpu", sample_size), + iters=hparams.n_iters, + extras={"examples_per_sec": examples_per_sec}, + wall_time=wall_time) def benchmark_eager(self): self._benchmark_eager() @@ -190,32 +198,44 @@ class L2hmcBenchmark(tf.test.Benchmark): """Benchmark Eager performance.""" hparams = get_default_hparams() - energy_fn, _, _ = l2hmc.get_scg_energy_fn() - dynamics = l2hmc.Dynamics( - x_dim=hparams.x_dim, - minus_loglikelihood_fn=energy_fn, - n_steps=hparams.n_steps, - eps=hparams.eps) - optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) - step_fn = tfe.defun(step) if defun else step - - # Warmup to reduce initialization effect when timing - warmup(dynamics, optimizer, n_iters=hparams.n_warmup_iters, step_fn=step_fn) - - # Training - samples = tf.random_normal( - shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32) - start_time = time.time() - fit(dynamics, samples, optimizer, step_fn=step_fn, n_iters=hparams.n_iters) - wall_time = time.time() - start_time - examples_per_sec = hparams.n_samples / wall_time - - self.report_benchmark( - name="eager_train_%s%s" % ("gpu" if tf.test.is_gpu_available() else - "cpu", "_defun" if defun else ""), - iters=hparams.n_iters, - extras={"examples_per_sec": examples_per_sec}, - wall_time=wall_time) + for sample_size in [10, 25, 50, 100, 200]: + hparams.n_samples = sample_size + energy_fn, _, _ = l2hmc.get_scg_energy_fn() + dynamics = l2hmc.Dynamics( + x_dim=hparams.x_dim, + minus_loglikelihood_fn=energy_fn, + n_steps=hparams.n_steps, + eps=hparams.eps) + optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) + step_fn = tfe.defun(step) if defun else step + + # Warmup to reduce initialization effect when timing + warmup( + dynamics, + optimizer, + n_iters=hparams.n_warmup_iters, + n_samples=hparams.n_samples, + step_fn=step_fn) + + # Training + samples = tf.random_normal( + shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32) + start_time = time.time() + fit(dynamics, + samples, + optimizer, + step_fn=step_fn, + n_iters=hparams.n_iters) + wall_time = (time.time() - start_time) / hparams.n_iters + examples_per_sec = hparams.n_samples / wall_time + + self.report_benchmark( + name="eager_train_%s%s_%d" % + ("gpu" if tf.test.is_gpu_available() else "cpu", + "_defun" if defun else "", sample_size), + iters=hparams.n_iters, + extras={"examples_per_sec": examples_per_sec}, + wall_time=wall_time) del dynamics diff --git a/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb index 8fae622e12864ddeee0cedd3cf99be8ea5e4bc48..446e3401184ded6bc34ed64cdd720e29a2851855 100644 --- a/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb +++ b/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb @@ -65,7 +65,7 @@ "\u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb\"\u003e\n", " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", "\u003c/td\u003e\u003ctd\u003e\n", - "\u003ca target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e" + "\u003ca target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e" ] } ], diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py index 551c76b0df71c88919df9cd6d81b4176b23b0ba3..f3bb978875e226f58d6a00e09154191673a97415 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py @@ -51,7 +51,9 @@ def random_batch(batch_size): class ResNet50GraphTest(tf.test.TestCase): def testApply(self): - batch_size = 64 + # Use small batches for tests because the OSS version runs + # in constrained GPU environment with 1-2GB of memory. + batch_size = 8 with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None)) model = resnet50.ResNet50(data_format()) @@ -63,7 +65,7 @@ class ResNet50GraphTest(tf.test.TestCase): sess.run(init) np_images, _ = random_batch(batch_size) out = sess.run(predictions, feed_dict={images: np_images}) - self.assertAllEqual([64, 1000], out.shape) + self.assertAllEqual([batch_size, 1000], out.shape) def testTrainWithSummary(self): with tf.Graph().as_default(): @@ -87,7 +89,9 @@ class ResNet50GraphTest(tf.test.TestCase): init = tf.global_variables_initializer() self.assertEqual(321, len(tf.global_variables())) - batch_size = 32 + # Use small batches for tests because the OSS version runs + # in constrained GPU environment with 1-2GB of memory. + batch_size = 2 with tf.Session() as sess: sess.run(init) sess.run(tf.contrib.summary.summary_writer_initializer_op()) diff --git a/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py b/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py index 34a9984b0ecc527ad1991c28146246b716e96c98..d85188de030af2bbab1c141b5c090371248110b9 100644 --- a/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py +++ b/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py @@ -169,11 +169,11 @@ class ImageNetInput(object): # Read the data from disk in parallel dataset = dataset.apply( - tf.contrib.data.parallel_interleave( + tf.data.experimental.parallel_interleave( fetch_dataset, cycle_length=self.num_parallel_calls, sloppy=True)) if self.cache: dataset = dataset.cache().apply( - tf.contrib.data.shuffle_and_repeat(1024 * 16)) + tf.data.experimental.shuffle_and_repeat(1024 * 16)) else: dataset = dataset.shuffle(1024) @@ -188,9 +188,11 @@ class ImageNetInput(object): # batch size. As long as this validation is done with consistent batch size, # exactly the same images will be used. dataset = dataset.apply( - tf.contrib.data.map_and_batch( - self.dataset_parser, batch_size=batch_size, - num_parallel_batches=self.num_cores, drop_remainder=True)) + tf.data.experimental.map_and_batch( + self.dataset_parser, + batch_size=batch_size, + num_parallel_batches=self.num_cores, + drop_remainder=True)) # Transpose for performance on TPU if self.transpose_input: diff --git a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py index 6a921e19978fdf6e3c20974b2c349bd6923b5782..4f4cc3af6f1d5c626b3e2ea7939ecad0ee2d41f1 100644 --- a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py +++ b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py @@ -50,6 +50,9 @@ class RevNetTest(tf.test.TestCase): # Reconstruction could cause numerical error, use double precision for tests config.dtype = tf.float64 config.fused = False # Fused batch norm does not support tf.float64 + # Reduce the batch size for tests because the OSS version runs + # in constrained GPU environment with 1-2GB of memory. + config.batch_size = 2 shape = (config.batch_size,) + config.input_shape self.model = revnet.RevNet(config=config) self.x = tf.random_normal(shape=shape, dtype=tf.float64) diff --git a/tensorflow/contrib/eager/python/remote_test.py b/tensorflow/contrib/eager/python/remote_test.py index ba6fe9701df74361f2160195606efbe5bbcb6857..7aa4b598b833c3419af501b49f1509d18f3530d5 100644 --- a/tensorflow/contrib/eager/python/remote_test.py +++ b/tensorflow/contrib/eager/python/remote_test.py @@ -47,8 +47,9 @@ def run_sync_and_async(f): @functools.wraps(f) def decorator(self, *args, **kwargs): - with context.execution_mode(context.ASYNC): - f(self, *args, **kwargs) + # TODO(b/117110239): Re-enable. + # with context.execution_mode(context.ASYNC): + # f(self, *args, **kwargs) with context.execution_mode(context.SYNC): f(self, *args, **kwargs) diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index a1f1c5f3d7a25ad28c58e9c215b862b6d51f4cd8..b131ed4f12a01a0087390b5bb65f3ac2d5aec657 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -75,7 +75,7 @@ class _BoostedTreesEstimator(canned_boosted_trees._BoostedTreesBase): # pylint: layer. head: the `Head` instance defined for Estimator. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing @@ -199,7 +199,7 @@ def boosted_trees_classifier_train_in_memory( the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. n_classes: number of label classes. Default is binary classification. Multiclass support is not yet implemented. @@ -345,7 +345,7 @@ def boosted_trees_regressor_train_in_memory( the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. label_dimension: Number of regression targets per example. Multi-dimensional support is not yet implemented. diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py index 724bc2c82f8289bbaa19a1dbbc1dc81b6e158e02..4e7965ef265022214f88ed74f4c8502fc8a4c897 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py @@ -118,7 +118,7 @@ class DNNLinearCombinedEstimator(estimator.Estimator): head: A `_Head` instance constructed with a method such as `tf.contrib.estimator.multi_label_head`. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py index 5faf0aacfe57b3fcd716bfb0f73842e4e8180cbc..40a91175b71f27bb9ca72a238a5aea172cf4c360 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py @@ -151,7 +151,7 @@ def make_input_layer_with_layer_annotations(original_input_layer): # spec and looking at the keys. spec = feature_column_lib.make_parse_example_spec(feature_columns) for key in spec.keys(): - tensor = ops.convert_to_tensor(features[key]) + tensor = ops.convert_to_tensor_or_indexed_slices(features[key]) ops.add_to_collection( LayerAnnotationsCollectionNames.keys( LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key) @@ -248,7 +248,7 @@ def DNNClassifierWithLayerAnnotations( # pylint: disable=invalid-name model. All items in the set should be instances of classes derived from `_FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also - be used to load checkpoints from the directory into a estimator to + be used to load checkpoints from the directory into an estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. diff --git a/tensorflow/contrib/estimator/python/estimator/hooks_test.py b/tensorflow/contrib/estimator/python/estimator/hooks_test.py index c6c6cad95a7575224c47bb5ec36e243691fed371..62ffad56da324ea3765dfdac64f3ef00d9b17a38 100644 --- a/tensorflow/contrib/estimator/python/estimator/hooks_test.py +++ b/tensorflow/contrib/estimator/python/estimator/hooks_test.py @@ -294,7 +294,7 @@ class InMemoryEvaluatorHookTest(test.TestCase): def model_fn(features, labels, mode): _, _ = features, labels - w = variables.Variable( + w = variables.VariableV1( initial_value=[0.], trainable=False, collections=[ops.GraphKeys.SAVEABLE_OBJECTS]) diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py index ce758992140d43529037b14cbbf958d5aa763fb4..6e793c830244e64cd11c4054918c18a8251be7ac 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py @@ -233,6 +233,22 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access self, features, mode, logits, labels=None, optimizer=None, train_op_fn=None): """See `_Head`.""" + return self._create_estimator_spec( + features=features, mode=mode, logits=logits, labels=labels, + optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=False) + + def _create_tpu_estimator_spec( + self, features, mode, logits, labels=None, optimizer=None, + train_op_fn=None): + """See `_Head`.""" + return self._create_estimator_spec( + features=features, mode=mode, logits=logits, labels=labels, + optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=True) + + def _create_estimator_spec( + self, features, mode, logits, labels=None, optimizer=None, + train_op_fn=None, use_tpu=False): + """Returns `EstimatorSpec` or `TPUEstimatorSpec`.""" if isinstance(logits, dict): logits_dict = logits else: @@ -255,14 +271,15 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access spec = self._merge_train( all_estimator_spec=all_estimator_spec, optimizer=optimizer, - train_op_fn=train_op_fn) + train_op_fn=train_op_fn, + use_tpu=use_tpu) with ops.name_scope(''): summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss) return spec if mode == model_fn.ModeKeys.PREDICT: - return self._merge_predict(all_estimator_spec) + return self._merge_predict(all_estimator_spec, use_tpu=use_tpu) if mode == model_fn.ModeKeys.EVAL: - return self._merge_eval(all_estimator_spec) + return self._merge_eval(all_estimator_spec, use_tpu=use_tpu) raise ValueError('mode={} unrecognized'.format(mode)) def _split_logits(self, logits): @@ -284,28 +301,28 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access begin_idx += head.logits_dimension return logits_dict - def _merge_train(self, all_estimator_spec, optimizer, train_op_fn): - """Merges list of `EstimatorSpec` for training. + def _merge_train( + self, all_estimator_spec, optimizer, train_op_fn, use_tpu=False): + """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for training. Args: - all_estimator_spec: list of `EstimatorSpec` for the individual heads. + all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the + individual heads. optimizer: `Optimizer` instance to create train op. See `create_estimator_spec` documentation for more details. train_op_fn: Function to create train op. Used if `optimizer` is `None`. + use_tpu: If `True`, returns `TPUEstimatorSpec`. Returns: - `EstimatorSpec` that merges all heads for TRAIN. + `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for TRAIN. Raises: ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN mode. """ losses = [] - metrics = {} for spec in all_estimator_spec: losses.append(spec.loss) - # Metric keys already contain head.name. - metrics.update(spec.eval_metric_ops or {}) loss = _merge_losses(losses, self._head_weights) if optimizer is not None: if train_op_fn is not None: @@ -317,20 +334,23 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access else: raise ValueError('train_op_fn and optimizer cannot both be None.') - return model_fn.EstimatorSpec( + spec_type = ( + model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec) # pylint:disable=protected-access + return spec_type( mode=model_fn.ModeKeys.TRAIN, loss=loss, - train_op=train_op, - eval_metric_ops=metrics) + train_op=train_op) - def _merge_predict(self, all_estimator_spec): - """Merges list of `EstimatorSpec` for prediction. + def _merge_predict(self, all_estimator_spec, use_tpu=False): + """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for prediction. Args: - all_estimator_spec: list of `EstimatorSpec` for the individual heads. + all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the + individual heads. + use_tpu: If `True`, returns `TPUEstimatorSpec`. Returns: - `EstimatorSpec` that merges all heads for PREDICT. + `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for PREDICT. """ predictions = {} export_outputs = { @@ -357,20 +377,29 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access export_outputs[head_lib._PREDICT_SERVING_KEY] = ( # pylint:disable=protected-access export_output_lib.PredictOutput(merged_predict_outputs)) - return model_fn.EstimatorSpec( + spec_type = ( + model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec) # pylint:disable=protected-access + return spec_type( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs=export_outputs) - def _merge_eval(self, all_estimator_spec): + def _merge_eval(self, all_estimator_spec, use_tpu=False): """Merges list of `EstimatorSpec` for eval. Args: all_estimator_spec: list of `EstimatorSpec` for the individual heads. + use_tpu: If `True`, will raise `NotImplementedError`, because TPU is not + yet supported for eval. Returns: `EstimatorSpec` that merges all heads for EVAL. + Raises: + NotImplementedError: If `use_tpu` is `True`. """ + if use_tpu: + raise NotImplementedError( + 'TPU evaluation is not implemented for multi_head.') predictions = {} metrics = {} losses = [] diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index 2b4d5f526199c500ad77a0422215381ac3a1cf69..a602f87b4a2b4062efddf819522fb2d1eeceaabe 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -106,7 +106,7 @@ class MultiHeadTest(test.TestCase): multi_head = multi_head_lib.multi_head([head1, head2]) self.assertEqual('head1_head2', multi_head.name) - def test_predict_two_heads_logits_dict(self): + def _test_predict_two_heads_logits_dict(self, use_tpu): """Tests predict with logits as dict.""" head1 = head_lib.multi_label_head(n_classes=2, name='head1') head2 = head_lib.multi_label_head(n_classes=3, name='head2') @@ -121,10 +121,16 @@ class MultiHeadTest(test.TestCase): 'head2': _sigmoid(logits['head2']), } - spec = multi_head.create_estimator_spec( - features={'x': np.array(((42,),), dtype=np.int32)}, - mode=model_fn.ModeKeys.PREDICT, - logits=logits) + if use_tpu: + spec = multi_head._create_tpu_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.PREDICT, + logits=logits).as_estimator_spec() + else: + spec = multi_head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.PREDICT, + logits=logits) self.assertItemsEqual( (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification', @@ -175,6 +181,12 @@ class MultiHeadTest(test.TestCase): sess.run( spec.export_outputs['head2/predict'].outputs['probabilities'])) + def test_predict_two_heads_logits_dict(self): + self._test_predict_two_heads_logits_dict(use_tpu=False) + + def test_predict_two_heads_logits_dict_tpu(self): + self._test_predict_two_heads_logits_dict(use_tpu=True) + def test_predict_two_heads_logits_tensor(self): """Tests predict with logits as Tensor.""" head1 = head_lib.multi_label_head(n_classes=2, name='head1') @@ -350,6 +362,31 @@ class MultiHeadTest(test.TestCase): rtol=tol, atol=tol) + def test_eval_tpu(self): + head1 = head_lib.multi_label_head(n_classes=2, name='head1') + head2 = head_lib.multi_label_head(n_classes=3, name='head2') + multi_head = multi_head_lib.multi_head( + [head1, head2], head_weights=[1., 2.]) + + logits = { + 'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32), + 'head2': np.array([[20., -20., 20.], [-30., 20., -20.]], + dtype=np.float32), + } + labels = { + 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), + 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), + } + + with self.assertRaisesRegexp( + NotImplementedError, + r'TPU evaluation is not implemented for multi_head\.'): + multi_head._create_tpu_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.EVAL, + logits=logits, + labels=labels) + def test_train_create_loss_one_head(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') multi_head = multi_head_lib.multi_head([head1]) @@ -587,7 +624,7 @@ class MultiHeadTest(test.TestCase): six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), train_result) - def test_train_two_heads_with_weights(self): + def _test_train_two_heads_with_weights(self, use_tpu): head1 = head_lib.multi_label_head(n_classes=2, name='head1') head2 = head_lib.multi_label_head(n_classes=3, name='head2') multi_head = multi_head_lib.multi_head( @@ -619,12 +656,20 @@ class MultiHeadTest(test.TestCase): [constant_op.constant(expected_train_result), string_ops.as_string(loss, precision=3)]) - spec = multi_head.create_estimator_spec( - features={'x': np.array(((42,),), dtype=np.int32)}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels, - train_op_fn=_train_op_fn) + if use_tpu: + spec = multi_head._create_tpu_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn).as_estimator_spec() + else: + spec = multi_head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) self.assertIsNotNone(spec.loss) self.assertEqual({}, spec.eval_metric_ops) @@ -649,6 +694,12 @@ class MultiHeadTest(test.TestCase): metric_keys.MetricKeys.LOSS + '/head2': expected_loss_head2, }, summary_str, tol) + def test_train_two_heads_with_weights(self): + self._test_train_two_heads_with_weights(use_tpu=False) + + def test_train_two_heads_with_weights_tpu(self): + self._test_train_two_heads_with_weights(use_tpu=True) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/estimator/python/estimator/rnn.py b/tensorflow/contrib/estimator/python/estimator/rnn.py index 98660bb7317ae76a7da7c90a5c890ab8e69037fe..c595f473950e28cd75cd1b56c1b3d409333dbc74 100644 --- a/tensorflow/contrib/estimator/python/estimator/rnn.py +++ b/tensorflow/contrib/estimator/python/estimator/rnn.py @@ -30,7 +30,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.layers import core as core_layers from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables @@ -92,55 +91,6 @@ def _make_rnn_cell_fn(num_units, cell_type='basic_rnn'): return rnn_cell_fn -def _concatenate_context_input(sequence_input, context_input): - """Replicates `context_input` across all timesteps of `sequence_input`. - - Expands dimension 1 of `context_input` then tiles it `sequence_length` times. - This value is appended to `sequence_input` on dimension 2 and the result is - returned. - - Args: - sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size, - padded_length, d0]`. - context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`. - - Returns: - A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, - d0 + d1]`. - - Raises: - ValueError: If `sequence_input` does not have rank 3 or `context_input` does - not have rank 2. - """ - seq_rank_check = check_ops.assert_rank( - sequence_input, - 3, - message='sequence_input must have rank 3', - data=[array_ops.shape(sequence_input)]) - seq_type_check = check_ops.assert_type( - sequence_input, - dtypes.float32, - message='sequence_input must have dtype float32; got {}.'.format( - sequence_input.dtype)) - ctx_rank_check = check_ops.assert_rank( - context_input, - 2, - message='context_input must have rank 2', - data=[array_ops.shape(context_input)]) - ctx_type_check = check_ops.assert_type( - context_input, - dtypes.float32, - message='context_input must have dtype float32; got {}.'.format( - context_input.dtype)) - with ops.control_dependencies( - [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]): - padded_length = array_ops.shape(sequence_input)[1] - tiled_context_input = array_ops.tile( - array_ops.expand_dims(context_input, 1), - array_ops.concat([[1], [padded_length], [1]], 0)) - return array_ops.concat([sequence_input, tiled_context_input], 2) - - def _select_last_activations(activations, sequence_lengths): """Selects the nth set of activations for each n in `sequence_length`. @@ -222,8 +172,8 @@ def _rnn_logit_fn_builder(output_units, rnn_cell_fn, sequence_feature_columns, context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) - sequence_input = _concatenate_context_input(sequence_input, - context_input) + sequence_input = seq_fc.concatenate_context_input( + context_input, sequence_input) cell = rnn_cell_fn(mode) # Ignore output state. diff --git a/tensorflow/contrib/estimator/python/estimator/rnn_test.py b/tensorflow/contrib/estimator/python/estimator/rnn_test.py index 1aebed348dcacf8fbe90421bdc7ff25f5b7bcc4a..89506ee6615cd838b0fe651e13eb3e7dd35d2aef 100644 --- a/tensorflow/contrib/estimator/python/estimator/rnn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/rnn_test.py @@ -25,12 +25,12 @@ import tempfile import numpy as np import six -from tensorflow.contrib.data.python.ops import readers from tensorflow.contrib.estimator.python.estimator import head as head_lib from tensorflow.contrib.estimator.python.estimator import rnn from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 +from tensorflow.python.data.experimental.ops import readers from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import metric_keys from tensorflow.python.estimator.canned import parsing_utils diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py index e076631bc16fd379a2ad31af9055a7388d98c7ca..d365ad111760247fc18b730657390f07ba6b865e 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py @@ -154,10 +154,10 @@ class GmmAlgorithm(object): def _create_variables(self): """Initializes GMM algorithm.""" init_value = array_ops.constant([], dtype=dtypes.float32) - self._means = variables.Variable(init_value, - name=self.CLUSTERS_VARIABLE, - validate_shape=False) - self._covs = variables.Variable( + self._means = variables.VariableV1(init_value, + name=self.CLUSTERS_VARIABLE, + validate_shape=False) + self._covs = variables.VariableV1( init_value, name=self.CLUSTERS_COVS_VARIABLE, validate_shape=False) # Mixture weights, representing the probability that a randomly # selected unobservable data (in EM terms) was generated by component k. @@ -165,9 +165,9 @@ class GmmAlgorithm(object): array_ops.tile([1.0 / self._num_classes], [self._num_classes]), name=self.CLUSTERS_WEIGHT, validate_shape=False) - self._cluster_centers_initialized = variables.Variable(False, - dtype=dtypes.bool, - name='initialized') + self._cluster_centers_initialized = variables.VariableV1(False, + dtype=dtypes.bool, + name='initialized') def _initialize_variables(self, data, initial_means=None): """Initializes variables. diff --git a/tensorflow/contrib/factorization/python/ops/wals_test.py b/tensorflow/contrib/factorization/python/ops/wals_test.py index 9bdbd050152261daff803e6e71abea93406402ed..75d577f42958d97ccb2632798e86ae059c399cb4 100644 --- a/tensorflow/contrib/factorization/python/ops/wals_test.py +++ b/tensorflow/contrib/factorization/python/ops/wals_test.py @@ -420,13 +420,13 @@ class WALSMatrixFactorizationUnsupportedTest(test.TestCase): class SweepHookTest(test.TestCase): def test_sweeps(self): - is_row_sweep_var = variables.Variable(True) - is_sweep_done_var = variables.Variable(False) - init_done = variables.Variable(False) - row_prep_done = variables.Variable(False) - col_prep_done = variables.Variable(False) - row_train_done = variables.Variable(False) - col_train_done = variables.Variable(False) + is_row_sweep_var = variables.VariableV1(True) + is_sweep_done_var = variables.VariableV1(False) + init_done = variables.VariableV1(False) + row_prep_done = variables.VariableV1(False) + col_prep_done = variables.VariableV1(False) + row_train_done = variables.VariableV1(False) + col_train_done = variables.VariableV1(False) init_op = state_ops.assign(init_done, True) row_prep_op = state_ops.assign(row_prep_done, True) @@ -486,7 +486,7 @@ class StopAtSweepHookTest(test.TestCase): def test_stop(self): hook = wals_lib._StopAtSweepHook(last_sweep=10) - completed_sweeps = variables.Variable( + completed_sweeps = variables.VariableV1( 8, name=wals_lib.WALSMatrixFactorization.COMPLETED_SWEEPS) train_op = state_ops.assign_add(completed_sweeps, 1) hook.begin() diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index aab7d0c9e8874269bfa5f33193b0dc0ba4bbc9cd..a926ffd5982116a21dc7a0fd1ff957d4ecc6bf94 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -27,6 +27,7 @@ py_library( "//tensorflow/python:check_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", "//tensorflow/python:parsing_ops", "//tensorflow/python:sparse_ops", "//tensorflow/python:tensor_shape", @@ -46,9 +47,29 @@ py_test( "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:parsing_ops", "//tensorflow/python:sparse_tensor", "//tensorflow/python:training", "//tensorflow/python/feature_column", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", + ], +) + +py_test( + name = "sequence_feature_column_integration_test", + srcs = ["python/feature_column/sequence_feature_column_integration_test.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + ":sequence_feature_column", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:training", + "//tensorflow/python:util", + "//tensorflow/python/feature_column", + "//tensorflow/python/keras:layers", ], ) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index 05bcdac2caa77062f9a8a44a948d2897b439ea1f..dd6da35ed009c07ad3819e7860a283c7837c1f83 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -33,7 +33,6 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope # pylint: disable=protected-access -# TODO(b/73827486): Support SequenceExample. def sequence_input_layer( @@ -110,6 +109,7 @@ def sequence_input_layer( output_tensors = [] sequence_lengths = [] ordered_columns = [] + for column in sorted(feature_columns, key=lambda x: x.name): ordered_columns.append(column) with variable_scope.variable_scope( @@ -121,17 +121,67 @@ def sequence_input_layer( # Flattens the final dimension to produce a 3D Tensor. num_elements = column._variable_shape.num_elements() shape = array_ops.shape(dense_tensor) + target_shape = [shape[0], shape[1], num_elements] output_tensors.append( - array_ops.reshape( - dense_tensor, - shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) + array_ops.reshape(dense_tensor, shape=target_shape)) sequence_lengths.append(sequence_length) + fc._verify_static_batch_size_equality(output_tensors, ordered_columns) fc._verify_static_batch_size_equality(sequence_lengths, ordered_columns) sequence_length = _assert_all_equal_and_return(sequence_lengths) + return array_ops.concat(output_tensors, -1), sequence_length +def concatenate_context_input(context_input, sequence_input): + """Replicates `context_input` across all timesteps of `sequence_input`. + + Expands dimension 1 of `context_input` then tiles it `sequence_length` times. + This value is appended to `sequence_input` on dimension 2 and the result is + returned. + + Args: + context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`. + sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size, + padded_length, d0]`. + + Returns: + A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, + d0 + d1]`. + + Raises: + ValueError: If `sequence_input` does not have rank 3 or `context_input` does + not have rank 2. + """ + seq_rank_check = check_ops.assert_rank( + sequence_input, + 3, + message='sequence_input must have rank 3', + data=[array_ops.shape(sequence_input)]) + seq_type_check = check_ops.assert_type( + sequence_input, + dtypes.float32, + message='sequence_input must have dtype float32; got {}.'.format( + sequence_input.dtype)) + ctx_rank_check = check_ops.assert_rank( + context_input, + 2, + message='context_input must have rank 2', + data=[array_ops.shape(context_input)]) + ctx_type_check = check_ops.assert_type( + context_input, + dtypes.float32, + message='context_input must have dtype float32; got {}.'.format( + context_input.dtype)) + with ops.control_dependencies( + [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]): + padded_length = array_ops.shape(sequence_input)[1] + tiled_context_input = array_ops.tile( + array_ops.expand_dims(context_input, 1), + array_ops.concat([[1], [padded_length], [1]], 0)) + return array_ops.concat([sequence_input, tiled_context_input], 2) + + def sequence_categorical_column_with_identity( key, num_buckets, default_value=None): """Returns a feature column that represents sequences of integers. @@ -453,9 +503,17 @@ class _SequenceNumericColumn( [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], axis=0) dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) - sequence_length = fc._sequence_length_from_sparse_tensor( - sp_tensor, num_elements=self._variable_shape.num_elements()) + + # Get the number of timesteps per example + # For the 2D case, the raw values are grouped according to num_elements; + # for the 3D case, the grouping happens in the third dimension, and + # sequence length is not affected. + num_elements = (self._variable_shape.num_elements() + if sp_tensor.shape.ndims == 2 else 1) + seq_length = fc._sequence_length_from_sparse_tensor( + sp_tensor, num_elements=num_elements) + return fc._SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) + dense_tensor=dense_tensor, sequence_length=seq_length) # pylint: enable=protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py new file mode 100644 index 0000000000000000000000000000000000000000..d8ca363627eace15e039679545366648df174c33 --- /dev/null +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py @@ -0,0 +1,280 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Integration test for sequence feature columns with SequenceExamples.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import string +import tempfile + +from google.protobuf import text_format + +from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.feature_column import feature_column as fc +from tensorflow.python.keras.layers import recurrent +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.util import compat + + +class SequenceFeatureColumnIntegrationTest(test.TestCase): + + def _make_sequence_example(self): + example = example_pb2.SequenceExample() + example.context.feature['int_ctx'].int64_list.value.extend([5]) + example.context.feature['float_ctx'].float_list.value.extend([123.6]) + for val in range(0, 10, 2): + feat = feature_pb2.Feature() + feat.int64_list.value.extend([val] * val) + example.feature_lists.feature_list['int_list'].feature.extend([feat]) + for val in range(1, 11, 2): + feat = feature_pb2.Feature() + feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val) + example.feature_lists.feature_list['str_list'].feature.extend([feat]) + + return example + + def _build_feature_columns(self): + col = fc.categorical_column_with_identity( + 'int_ctx', num_buckets=100) + ctx_cols = [ + fc.embedding_column(col, dimension=10), + fc.numeric_column('float_ctx')] + + identity_col = sfc.sequence_categorical_column_with_identity( + 'int_list', num_buckets=10) + bucket_col = sfc.sequence_categorical_column_with_hash_bucket( + 'bytes_list', hash_bucket_size=100) + seq_cols = [ + fc.embedding_column(identity_col, dimension=10), + fc.embedding_column(bucket_col, dimension=20)] + + return ctx_cols, seq_cols + + def test_sequence_example_into_input_layer(self): + examples = [_make_sequence_example().SerializeToString()] * 100 + ctx_cols, seq_cols = self._build_feature_columns() + + def _parse_example(example): + ctx, seq = parsing_ops.parse_single_sequence_example( + example, + context_features=fc.make_parse_example_spec(ctx_cols), + sequence_features=fc.make_parse_example_spec(seq_cols)) + ctx.update(seq) + return ctx + + ds = dataset_ops.Dataset.from_tensor_slices(examples) + ds = ds.map(_parse_example) + ds = ds.batch(20) + + # Test on a single batch + features = ds.make_one_shot_iterator().get_next() + + # Tile the context features across the sequence features + seq_layer, _ = sfc.sequence_input_layer(features, seq_cols) + ctx_layer = fc.input_layer(features, ctx_cols) + input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer) + + rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10)) + output = rnn_layer(input_layer) + + with self.cached_session() as sess: + sess.run(variables.global_variables_initializer()) + features_r = sess.run(features) + self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6]) + + output_r = sess.run(output) + self.assertAllEqual(output_r.shape, [20, 10]) + + +class SequenceExampleParsingTest(test.TestCase): + + def test_seq_ex_in_sequence_categorical_column_with_identity(self): + self._test_parsed_sequence_example( + 'int_list', sfc.sequence_categorical_column_with_identity, + 10, [3, 6], [2, 4, 6]) + + def test_seq_ex_in_sequence_categorical_column_with_hash_bucket(self): + self._test_parsed_sequence_example( + 'bytes_list', sfc.sequence_categorical_column_with_hash_bucket, + 10, [3, 4], [compat.as_bytes(x) for x in 'acg']) + + def test_seq_ex_in_sequence_categorical_column_with_vocabulary_list(self): + self._test_parsed_sequence_example( + 'bytes_list', sfc.sequence_categorical_column_with_vocabulary_list, + list(string.ascii_lowercase), [3, 4], + [compat.as_bytes(x) for x in 'acg']) + + def test_seq_ex_in_sequence_categorical_column_with_vocabulary_file(self): + _, fname = tempfile.mkstemp() + with open(fname, 'w') as f: + f.write(string.ascii_lowercase) + self._test_parsed_sequence_example( + 'bytes_list', sfc.sequence_categorical_column_with_vocabulary_file, + fname, [3, 4], [compat.as_bytes(x) for x in 'acg']) + + def _test_parsed_sequence_example( + self, col_name, col_fn, col_arg, shape, values): + """Helper function to check that each FeatureColumn parses correctly. + + Args: + col_name: string, name to give to the feature column. Should match + the name that the column will parse out of the features dict. + col_fn: function used to create the feature column. For example, + sequence_numeric_column. + col_arg: second arg that the target feature column is expecting. + shape: the expected dense_shape of the feature after parsing into + a SparseTensor. + values: the expected values at index [0, 2, 6] of the feature + after parsing into a SparseTensor. + """ + example = _make_sequence_example() + columns = [ + fc.categorical_column_with_identity('int_ctx', num_buckets=100), + fc.numeric_column('float_ctx'), + col_fn(col_name, col_arg) + ] + context, seq_features = parsing_ops.parse_single_sequence_example( + example.SerializeToString(), + context_features=fc.make_parse_example_spec(columns[:2]), + sequence_features=fc.make_parse_example_spec(columns[2:])) + + with self.cached_session() as sess: + ctx_result, seq_result = sess.run([context, seq_features]) + self.assertEqual(list(seq_result[col_name].dense_shape), shape) + self.assertEqual( + list(seq_result[col_name].values[[0, 2, 6]]), values) + self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1]) + self.assertEqual(ctx_result['int_ctx'].values[0], 5) + self.assertEqual(list(ctx_result['float_ctx'].shape), [1]) + self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1) + + +_SEQ_EX_PROTO = """ +context { + feature { + key: "float_ctx" + value { + float_list { + value: 123.6 + } + } + } + feature { + key: "int_ctx" + value { + int64_list { + value: 5 + } + } + } +} +feature_lists { + feature_list { + key: "bytes_list" + value { + feature { + bytes_list { + value: "a" + } + } + feature { + bytes_list { + value: "b" + value: "c" + } + } + feature { + bytes_list { + value: "d" + value: "e" + value: "f" + value: "g" + } + } + } + } + feature_list { + key: "float_list" + value { + feature { + float_list { + value: 1.0 + } + } + feature { + float_list { + value: 3.0 + value: 3.0 + value: 3.0 + } + } + feature { + float_list { + value: 5.0 + value: 5.0 + value: 5.0 + value: 5.0 + value: 5.0 + } + } + } + } + feature_list { + key: "int_list" + value { + feature { + int64_list { + value: 2 + value: 2 + } + } + feature { + int64_list { + value: 4 + value: 4 + value: 4 + value: 4 + } + } + feature { + int64_list { + value: 6 + value: 6 + value: 6 + value: 6 + value: 6 + value: 6 + } + } + } + } +} +""" + + +def _make_sequence_example(): + example = example_pb2.SequenceExample() + return text_format.Parse(_SEQ_EX_PROTO, example) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 45d7b740462ca21139e2e93e34b43668f1e08a94..929e83523a8645475d67b10f1489bb648049e2e5 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import os +from absl.testing import parameterized import numpy as np from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc @@ -28,28 +29,61 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import math_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test from tensorflow.python.training import monitored_session -class SequenceInputLayerTest(test.TestCase): +class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_a': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)), + 'sparse_input_b': sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [2, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)), + 'expected_input_layer': [ + # example 0, ids_a [2], ids_b [1] + [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],], + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'sparse_input_a': sparse_tensor.SparseTensorValue( + # feature 0, ids [[2], [0, 1]] + # feature 1, ids [[0, 0], [1]] + indices=( + (0, 0, 0), (0, 1, 0), (0, 1, 1), + (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2, 0, 1, 0, 0, 1), + dense_shape=(2, 2, 2)), + 'sparse_input_b': sparse_tensor.SparseTensorValue( + # feature 0, ids [[1, 1], [1]] + # feature 1, ids [[2], [0]] + indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + values=(1, 1, 1, 2, 0), + dense_shape=(2, 2, 2)), + 'expected_input_layer': [ + # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] + [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]], + # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]], + 'expected_sequence_length': [2, 2]}, + ) + def test_embedding_column( + self, sparse_input_a, sparse_input_b, expected_input_layer, + expected_sequence_length): - def test_embedding_column(self): vocabulary_size = 3 - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [1] - # example 1, ids [2, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - embedding_dimension_a = 2 embedding_values_a = ( (1., 2.), # id 0 @@ -70,14 +104,6 @@ class SequenceInputLayerTest(test.TestCase): return embedding_values return _initializer - expected_input_layer = [ - # example 0, ids_a [2], ids_b [1] - [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [2, 0] - [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], - ] - expected_sequence_length = [1, 2] - categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = fc.embedding_column( @@ -233,29 +259,53 @@ class SequenceInputLayerTest(test.TestCase): }, feature_columns=shared_embedding_columns) - def test_indicator_column(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_a': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)), + 'sparse_input_b': sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [1, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 1, 0), + dense_shape=(2, 2)), + 'expected_input_layer': [ + # example 0, ids_a [2], ids_b [1] + [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [1, 0] + [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'sparse_input_a': sparse_tensor.SparseTensorValue( + # feature 0, ids [[2], [0, 1]] + # feature 1, ids [[0, 0], [1]] + indices=( + (0, 0, 0), (0, 1, 0), (0, 1, 1), + (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2, 0, 1, 0, 0, 1), + dense_shape=(2, 2, 2)), + 'sparse_input_b': sparse_tensor.SparseTensorValue( + # feature 0, ids [[1, 1], [1]] + # feature 1, ids [[1], [0]] + indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + values=(1, 1, 1, 1, 0), + dense_shape=(2, 2, 2)), + 'expected_input_layer': [ + # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] + [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]], + # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -] + [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], + 'expected_sequence_length': [2, 2]}, + ) + def test_indicator_column( + self, sparse_input_a, sparse_input_b, expected_input_layer, + expected_sequence_length): vocabulary_size_a = 3 - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) vocabulary_size_b = 2 - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [1] - # example 1, ids [1, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 1, 0), - dense_shape=(2, 2)) - - expected_input_layer = [ - # example 0, ids_a [2], ids_b [1] - [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [1, 0] - [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], - ] - expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) @@ -298,18 +348,32 @@ class SequenceInputLayerTest(test.TestCase): features={'aaa': sparse_input}, feature_columns=[indicator_column_a]) - def test_numeric_column(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_input_layer = [ - [[0.], [1.]], - [[10.], [0.]], - ] - expected_sequence_length = [2, 1] + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [0., 1] + # example 1, [10.] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)), + 'expected_input_layer': [ + [[0.], [1.]], + [[10.], [0.]]], + 'expected_sequence_length': [2, 1]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # feature 0, ids [[20, 3], [5]] + # feature 1, ids [[3], [8]] + indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + values=(20, 3, 5., 3., 8.), + dense_shape=(2, 2, 2)), + 'expected_input_layer': [ + [[20.], [3.], [5.], [0.]], + [[3.], [0.], [8.], [0.]]], + 'expected_sequence_length': [2, 2]}, + ) + def test_numeric_column( + self, sparse_input, expected_input_layer, expected_sequence_length): numeric_column = sfc.sequence_numeric_column('aaa') input_layer, sequence_length = sfc.sequence_input_layer( @@ -321,21 +385,38 @@ class SequenceInputLayerTest(test.TestCase): self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess)) - def test_numeric_column_multi_dim(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [0., 1., 2., 3., 4., 5., 6., 7.] + # example 1, [10., 11., 12., 13.] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)), + 'expected_input_layer': [ + # The output of numeric_column._get_dense_tensor should be flattened. + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]]], + 'expected_sequence_length': [2, 1]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] + # example 1, [[10., 11., 12., 13.], []] + indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), + (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), + (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 2, 4)), + 'expected_input_layer': [ + # The output of numeric_column._get_dense_tensor should be flattened. + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]]], + 'expected_sequence_length': [2, 1]}, + ) + def test_numeric_column_multi_dim( + self, sparse_input, expected_input_layer, expected_sequence_length): """Tests sequence_input_layer for multi-dimensional numeric_column.""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] - # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), - (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)) - # The output of numeric_column._get_dense_tensor should be flattened. - expected_input_layer = [ - [[0., 1., 2., 3.], [4., 5., 6., 7.]], - [[10., 11., 12., 13.], [0., 0., 0., 0.]], - ] - expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) input_layer, sequence_length = sfc.sequence_input_layer( @@ -377,6 +458,134 @@ class SequenceInputLayerTest(test.TestCase): r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'): sess.run(sequence_length) + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)), + 'expected_shape': [2, 2, 4]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] + # example 1, [[10., 11., 12., 13.], []] + indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), + (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2), + (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 2, 4)), + 'expected_shape': [2, 2, 4]}, + ) + def test_static_shape_from_tensors_numeric( + self, sparse_input, expected_shape): + """Tests that we return a known static shape when we have one.""" + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + input_layer, _ = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + shape = input_layer.get_shape() + self.assertEqual(shape, expected_shape) + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)), + 'expected_shape': [4, 2, 3]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [0, 2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + values=(2, 0, 1, 2, 1, 0, 2), + dense_shape=(4, 2, 2)), + 'expected_shape': [4, 2, 3]} + ) + def test_static_shape_from_tensors_indicator( + self, sparse_input, expected_shape): + """Tests that we return a known static shape when we have one.""" + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=3) + indicator_column = fc.indicator_column(categorical_column) + + input_layer, _ = sfc.sequence_input_layer( + features={'aaa': sparse_input}, feature_columns=[indicator_column]) + shape = input_layer.get_shape() + self.assertEqual(shape, expected_shape) + + +class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase): + """Tests the utility fn concatenate_context_input.""" + + def test_concatenate_context_input(self): + seq_input = ops.convert_to_tensor(np.arange(12).reshape(2, 3, 2)) + context_input = ops.convert_to_tensor(np.arange(10).reshape(2, 5)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + input_layer = sfc.concatenate_context_input(context_input, seq_input) + + expected = np.array([ + [[0, 1, 0, 1, 2, 3, 4], [2, 3, 0, 1, 2, 3, 4], [4, 5, 0, 1, 2, 3, 4]], + [[6, 7, 5, 6, 7, 8, 9], [8, 9, 5, 6, 7, 8, 9], [10, 11, 5, 6, 7, 8, 9]] + ], dtype=np.float32) + with monitored_session.MonitoredSession() as sess: + output = sess.run(input_layer) + self.assertAllEqual(expected, output) + + @parameterized.named_parameters( + {'testcase_name': 'rank_lt_3', + 'seq_input': ops.convert_to_tensor(np.arange(100).reshape(10, 10))}, + {'testcase_name': 'rank_gt_3', + 'seq_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 2, 2))} + ) + def test_sequence_input_throws_error(self, seq_input): + context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + with self.assertRaisesRegexp(ValueError, 'sequence_input must have rank 3'): + sfc.concatenate_context_input(context_input, seq_input) + + @parameterized.named_parameters( + {'testcase_name': 'rank_lt_2', + 'context_input': ops.convert_to_tensor(np.arange(100))}, + {'testcase_name': 'rank_gt_2', + 'context_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))} + ) + def test_context_input_throws_error(self, context_input): + seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + with self.assertRaisesRegexp(ValueError, 'context_input must have rank 2'): + sfc.concatenate_context_input(context_input, seq_input) + + def test_integer_seq_input_throws_error(self): + seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) + context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + with self.assertRaisesRegexp( + TypeError, 'sequence_input must have dtype float32'): + sfc.concatenate_context_input(context_input, seq_input) + + def test_integer_context_input_throws_error(self): + seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) + context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + with self.assertRaisesRegexp( + TypeError, 'context_input must have dtype float32'): + sfc.concatenate_context_input(context_input, seq_input) + class InputLayerTest(test.TestCase): """Tests input_layer with sequence feature columns.""" @@ -443,75 +652,79 @@ def _assert_sparse_tensor_indices_shape(test_case, expected, actual): test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) -class SequenceCategoricalColumnWithIdentityTest(test.TestCase): - - def test_get_sparse_tensors(self): - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((1, 2, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) +class SequenceCategoricalColumnWithIdentityTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((1, 2, 0), dtype=np.int64), + dense_shape=(2, 2, 1))}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=(6, 7, 8), + dense_shape=(2, 2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=(6, 7, 8), + dense_shape=(2, 2, 2))} + ) + def test_get_sparse_tensors(self, inputs, expected): + column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - def test_get_sparse_tensors_inputs3d(self): - """Tests _get_sparse_tensors when the input is already 3D Tensor.""" - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=(1, 2, 0), - dense_shape=(2, 2, 1)) - - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r'Column aaa expected ID tensor of rank 2\.\s*' - r'id_tensor shape:\s*\[2 2 1\]'): - id_weight_pair = column._get_sparse_tensors( - _LazyBuilder({'aaa': inputs})) - with monitored_session.MonitoredSession() as sess: - id_weight_pair.id_tensor.eval(session=sess) - - -class SequenceCategoricalColumnWithHashBucketTest(test.TestCase): - - def test_get_sparse_tensors(self): + self, expected, id_weight_pair.id_tensor.eval(session=sess)) + + +class SequenceCategoricalColumnWithHashBucketTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('omar', 'stringer', 'marlo'), + dense_shape=(2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + # Ignored to avoid hash dependence in test. + values=np.array((0, 0, 0), dtype=np.int64), + dense_shape=(2, 2, 1))}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=('omar', 'stringer', 'marlo'), + dense_shape=(2, 2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + # Ignored to avoid hash dependence in test. + values=np.array((0, 0, 0), dtype=np.int64), + dense_shape=(2, 2, 2))} + ) + def test_get_sparse_tensors(self, inputs, expected): column = sfc.sequence_categorical_column_with_hash_bucket( 'aaa', hash_bucket_size=10) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('omar', 'stringer', 'marlo'), - dense_shape=(2, 2)) - - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - # Ignored to avoid hash dependence in test. - values=np.array((0, 0, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_indices_shape( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) + self, expected, id_weight_pair.id_tensor.eval(session=sess)) -class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase): +class SequenceCategoricalColumnWithVocabularyFileTest( + test.TestCase, parameterized.TestCase): def _write_vocab(self, vocab_strings, file_name): vocab_file = os.path.join(self.get_temp_dir(), file_name) @@ -527,68 +740,120 @@ class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase): 'wire_vocabulary.txt') self._wire_vocabulary_size = 3 - def test_get_sparse_tensors(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=(2, 2, 1))}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=('omar', 'skywalker', 'marlo'), + dense_shape=(2, 2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=np.array((0, -1, 2), dtype=np.int64), + dense_shape=(2, 2, 2))} + ) + def test_get_sparse_tensors(self, inputs, expected): column = sfc.sequence_categorical_column_with_vocabulary_file( key='aaa', vocabulary_file=self._wire_vocabulary_file_name, vocabulary_size=self._wire_vocabulary_size) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - -class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase): - - def test_get_sparse_tensors(self): + self, expected, id_weight_pair.id_tensor.eval(session=sess)) + + +class SequenceCategoricalColumnWithVocabularyListTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=(2, 2, 1))}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=('omar', 'skywalker', 'marlo'), + dense_shape=(2, 2, 2)), + 'expected': sparse_tensor.SparseTensorValue( + indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)), + values=np.array((0, -1, 2), dtype=np.int64), + dense_shape=(2, 2, 2))} + ) + def test_get_sparse_tensors(self, inputs, expected): column = sfc.sequence_categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - -class SequenceEmbeddingColumnTest(test.TestCase): - - def test_get_sequence_dense_tensor(self): + self, expected, id_weight_pair.id_tensor.eval(session=sess)) + + +class SequenceEmbeddingColumnTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)), + 'expected': [ + # example 0, ids [2] + [[7., 11.], [0., 0.]], + # example 1, ids [0, 1] + [[1., 2.], [3., 5.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [1] + [[3., 5.], [0., 0.]]]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [0, 2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + values=(2, 0, 1, 2, 1, 0, 2), + dense_shape=(4, 2, 2)), + 'expected': [ + # example 0, ids [[2]] + [[7., 11.], [0., 0.]], + # example 1, ids [[0, 1], [2]] + [[2, 3.5], [7., 11.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [[1], [0, 2]] + [[3., 5.], [4., 6.5]]]} + ) + def test_get_sequence_dense_tensor(self, inputs, expected): vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)) - embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 @@ -601,17 +866,6 @@ class SequenceEmbeddingColumnTest(test.TestCase): self.assertIsNone(partition_info) return embedding_values - expected_lookups = [ - # example 0, ids [2] - [[7., 11.], [0., 0.]], - # example 1, ids [0, 1] - [[1., 2.], [3., 5.]], - # example 2, ids [] - [[0., 0.], [0., 0.]], - # example 3, ids [1] - [[3., 5.], [0., 0.]], - ] - categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column( @@ -619,24 +873,35 @@ class SequenceEmbeddingColumnTest(test.TestCase): initializer=_initializer) embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('embedding_weights:0',), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) - self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess)) - - def test_sequence_length(self): + self.assertAllEqual(expected, embedding_lookup.eval(session=sess)) + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)), + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2, 0, 1, 2), + dense_shape=(2, 2, 2)), + 'expected_sequence_length': [1, 2]} + ) + def test_sequence_length(self, inputs, expected_sequence_length): vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) @@ -644,7 +909,7 @@ class SequenceEmbeddingColumnTest(test.TestCase): categorical_column, dimension=2) _, sequence_length = embedding_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) @@ -855,56 +1120,87 @@ class SequenceSharedEmbeddingColumnTest(test.TestCase): expected_sequence_length_b, sequence_length_b.eval(session=sess)) -class SequenceIndicatorColumnTest(test.TestCase): - - def test_get_sequence_dense_tensor(self): +class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)), + 'expected': [ + # example 0, ids [2] + [[0., 0., 1.], [0., 0., 0.]], + # example 1, ids [0, 1] + [[1., 0., 0.], [0., 1., 0.]], + # example 2, ids [] + [[0., 0., 0.], [0., 0., 0.]], + # example 3, ids [1] + [[0., 1., 0.], [0., 0., 0.]]]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [2, 2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + values=(2, 0, 1, 2, 1, 2, 2), + dense_shape=(4, 2, 2)), + 'expected': [ + # example 0, ids [[2]] + [[0., 0., 1.], [0., 0., 0.]], + # example 1, ids [[0, 1], [2]] + [[1., 1., 0.], [0., 0., 1.]], + # example 2, ids [] + [[0., 0., 0.], [0., 0., 0.]], + # example 3, ids [[1], [2, 2]] + [[0., 1., 0.], [0., 0., 2.]]]} + ) + def test_get_sequence_dense_tensor(self, inputs, expected): vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)) - - expected_lookups = [ - # example 0, ids [2] - [[0., 0., 1.], [0., 0., 0.]], - # example 1, ids [0, 1] - [[1., 0., 0.], [0., 1., 0.]], - # example 2, ids [] - [[0., 0., 0.], [0., 0., 0.]], - # example 3, ids [1] - [[0., 1., 0.], [0., 0., 0.]], - ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) indicator_tensor, _ = indicator_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess)) - - def test_sequence_length(self): + self.assertAllEqual(expected, indicator_tensor.eval(session=sess)) + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)), + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2, 0, 1, 2), + dense_shape=(2, 2, 2)), + 'expected_sequence_length': [1, 2]} + ) + def test_sequence_length(self, inputs, expected_sequence_length): vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) @@ -938,7 +1234,7 @@ class SequenceIndicatorColumnTest(test.TestCase): expected_sequence_length, sequence_length.eval(session=sess)) -class SequenceNumericColumnTest(test.TestCase): +class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase): def test_defaults(self): a = sfc.sequence_numeric_column('aaa') @@ -971,25 +1267,36 @@ class SequenceNumericColumnTest(test.TestCase): with self.assertRaisesRegexp(TypeError, 'must be a callable'): sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable') - def test_get_sequence_dense_tensor(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_dense_tensor = [ - [[0.], [1.]], - [[10.], [0.]], - ] + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, values [0., 1] + # example 1, [10.] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)), + 'expected': [ + [[0.], [1.]], + [[10.], [0.]]]}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # feature 0, ids [[20, 3], [5]] + # feature 1, ids [[3], [8]] + indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + values=(20, 3, 5., 3., 8.), + dense_shape=(2, 2, 2)), + 'expected': [ + [[20.], [3.], [5.], [0.]], + [[3.], [0.], [8.], [0.]]]}, + ) + def test_get_sequence_dense_tensor(self, inputs, expected): numeric_column = sfc.sequence_numeric_column('aaa') dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) + self.assertAllEqual(expected, dense_tensor.eval(session=sess)) def test_get_sequence_dense_tensor_with_normalizer_fn(self): @@ -1026,41 +1333,34 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) - def test_get_sequence_dense_tensor_with_shape(self): - """Tests get_sequence_dense_tensor with shape !=(1,).""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0., 1., 2.], [3., 4., 5.]] - # example 1, [[10., 11., 12.]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), - (1, 0), (1, 1), (1, 2)), - values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), - dense_shape=(2, 6)) - expected_dense_tensor = [ - [[0., 1., 2.], [3., 4., 5.]], - [[10., 11., 12.], [0., 0., 0.]], - ] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) - - dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) - - def test_get_dense_tensor_multi_dim(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input': sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)), + 'expected_dense_tensor': [ + [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], + [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]}, + {'testcase_name': '3D', + 'sparse_input': sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6), + (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6), + (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 2, 8)), + 'expected_dense_tensor': [ + [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]], + [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]], + [[[10., 0.], [11., 0.]], [[12., 0.], [13., 0.]], + [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]}, + ) + def test_get_dense_tensor_multi_dim( + self, sparse_input, expected_dense_tensor): """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] - # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), - (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)) - expected_dense_tensor = [ - [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], - [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], - ] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) dense_tensor, _ = numeric_column._get_sequence_dense_tensor( @@ -1070,43 +1370,55 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) - def test_sequence_length(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0., 1., 2.], [3., 4., 5.]] - # example 1, [[10., 11., 12.]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), - (1, 0), (1, 1), (1, 2)), - values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), - dense_shape=(2, 6)) - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2., 0., 1.), + dense_shape=(2, 2)), + 'expected_sequence_length': [1, 2], + 'shape': (1,)}, + {'testcase_name': '3D', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2., 0., 1., 2.), + dense_shape=(2, 2, 2)), + 'expected_sequence_length': [1, 2], + 'shape': (1,)}, + {'testcase_name': '2D_with_shape', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2., 0., 1.), + dense_shape=(2, 2)), + 'expected_sequence_length': [1, 1], + 'shape': (2,)}, + {'testcase_name': '3D_with_shape', + 'inputs': sparse_tensor.SparseTensorValue( + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + values=(2., 0., 1., 2.), + dense_shape=(2, 2, 2)), + 'expected_sequence_length': [1, 2], + 'shape': (2,)}, + ) + def test_sequence_length(self, inputs, expected_sequence_length, shape): + numeric_column = sfc.sequence_numeric_column('aaa', shape=shape) _, sequence_length = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype) - def test_sequence_length_with_shape(self): - """Tests _sequence_length with shape !=(1,).""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa') - - _, sequence_length = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" sparse_input = sparse_tensor.SparseTensorValue( diff --git a/tensorflow/contrib/framework/python/ops/variables_test.py b/tensorflow/contrib/framework/python/ops/variables_test.py index f9b0efd1daaee42be1043b100edeb327d253d6f8..c223df5b6e944a19aa949b726e89daa9f0cb6cc8 100644 --- a/tensorflow/contrib/framework/python/ops/variables_test.py +++ b/tensorflow/contrib/framework/python/ops/variables_test.py @@ -192,7 +192,7 @@ class GlobalStepTest(test.TestCase): def test_invalid_dtype(self): with ops.Graph().as_default() as g: self.assertEquals(None, variables_lib2.get_global_step()) - variables_lib.Variable( + variables_lib.VariableV1( 0.0, trainable=False, dtype=dtypes.float32, @@ -205,7 +205,7 @@ class GlobalStepTest(test.TestCase): def test_invalid_shape(self): with ops.Graph().as_default() as g: self.assertEquals(None, variables_lib2.get_global_step()) - variables_lib.Variable( + variables_lib.VariableV1( [0], trainable=False, dtype=dtypes.int32, @@ -229,7 +229,7 @@ class GlobalStepTest(test.TestCase): def test_get_global_step(self): with ops.Graph().as_default() as g: self.assertEquals(None, variables_lib2.get_global_step()) - variables_lib.Variable( + variables_lib.VariableV1( 0, trainable=False, dtype=dtypes.int32, @@ -607,10 +607,10 @@ class ModelVariablesTest(test.TestCase): with self.cached_session(): with variable_scope.variable_scope('A'): variables_lib2.local_variable([5]) - a = variables_lib.Variable([5]) + a = variables_lib.VariableV1([5]) with variable_scope.variable_scope('B'): variables_lib2.local_variable([5]) - b = variables_lib.Variable([5]) + b = variables_lib.VariableV1([5]) self.assertEquals([a], variables_lib2.get_trainable_variables('A')) self.assertEquals([b], variables_lib2.get_trainable_variables('B')) @@ -953,7 +953,7 @@ class AssignFromCheckpointTest(test.TestCase): # Create a set of variables to save in the checkpoint. for var_name in var_names_to_values: var_value = var_names_to_values[var_name] - var_list.append(variables_lib.Variable(var_value, name=var_name)) + var_list.append(variables_lib.VariableV1(var_value, name=var_name)) saver = saver_lib.Saver(var_list) init_op = variables_lib.variables_initializer(var_list) sess.run(init_op) @@ -1106,7 +1106,7 @@ class AssignFromCheckpointFnTest(test.TestCase): # Create a set of variables to save in the checkpoint. for var_name in var_names_to_values: var_value = var_names_to_values[var_name] - var_list.append(variables_lib.Variable(var_value, name=var_name)) + var_list.append(variables_lib.VariableV1(var_value, name=var_name)) saver = saver_lib.Saver(var_list) init_op = variables_lib.variables_initializer(var_list) sess.run(init_op) @@ -1297,7 +1297,7 @@ class AssignFromCheckpointFnTest(test.TestCase): class ZeroInitializerOpTest(test.TestCase): def _testZeroInitializer(self, shape, initializer, use_init): - var = variables_lib.Variable(initializer) + var = variables_lib.VariableV1(initializer) var_zero = variables_lib2.zero_initializer(var) with self.cached_session() as sess: with self.assertRaisesOpError('Attempting to use uninitialized value'): @@ -1350,12 +1350,12 @@ class FilterVariablesTest(test.TestCase): g = ops.Graph() with g.as_default(): var_list = [] - var_list.append(variables_lib.Variable(0, name='conv1/weights')) - var_list.append(variables_lib.Variable(0, name='conv1/biases')) - var_list.append(variables_lib.Variable(0, name='conv2/weights')) - var_list.append(variables_lib.Variable(0, name='conv2/biases')) - var_list.append(variables_lib.Variable(0, name='clfs/weights')) - var_list.append(variables_lib.Variable(0, name='clfs/biases')) + var_list.append(variables_lib.VariableV1(0, name='conv1/weights')) + var_list.append(variables_lib.VariableV1(0, name='conv1/biases')) + var_list.append(variables_lib.VariableV1(0, name='conv2/weights')) + var_list.append(variables_lib.VariableV1(0, name='conv2/biases')) + var_list.append(variables_lib.VariableV1(0, name='clfs/weights')) + var_list.append(variables_lib.VariableV1(0, name='clfs/biases')) self._var_list = var_list def _test_filter_variables(self, diff --git a/tensorflow/contrib/fused_conv/BUILD b/tensorflow/contrib/fused_conv/BUILD index 9725233e7f89a9f24451484c82f5793cbf77d351..57a5bfbf43c915775c6b0ef05baac19581213a09 100644 --- a/tensorflow/contrib/fused_conv/BUILD +++ b/tensorflow/contrib/fused_conv/BUILD @@ -17,11 +17,14 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load( + "//tensorflow:tensorflow.bzl", + "tf_kernel_library", + "tf_custom_op_library", + "tf_gen_op_libs", + "tf_gen_op_wrapper_py", +) load("//tensorflow:tensorflow.bzl", "cuda_py_test") -load("//tensorflow:tensorflow.bzl", "tf_custom_op_library") -load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") -load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") tf_custom_op_py_library( @@ -109,12 +112,13 @@ tf_gen_op_wrapper_py( deps = [":fused_conv2d_bias_activation_op_op_lib"], ) -cuda_py_test( - name = "fused_conv2d_bias_activation_op_test", - srcs = ["python/ops/fused_conv2d_bias_activation_op_test.py"], - additional_deps = [ +py_library( + name = "fused_conv2d_bias_activation_op_test_base", + testonly = 1, + srcs = ["python/ops/fused_conv2d_bias_activation_op_test_base.py"], + visibility = ["//tensorflow/compiler/tf2xla:internal"], + deps = [ ":fused_conv_py", - "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:client", "//tensorflow/python:client_testlib", @@ -127,8 +131,22 @@ cuda_py_test( "//tensorflow/python:random_ops", "//tensorflow/python:training", "//tensorflow/python:variables", + "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", + ], +) + +cuda_py_test( + name = "fused_conv2d_bias_activation_op_test", + size = "large", + srcs = ["python/ops/fused_conv2d_bias_activation_op_test.py"], + additional_deps = [ + ":fused_conv2d_bias_activation_op_test_base", + "//tensorflow/python:client_testlib", ], tags = [ + "manual", # TODO(b/117128481): re-enable after fixing OSS build + "no_pip", "requires-gpu-sm70", ], ) @@ -152,6 +170,7 @@ cuda_py_test( ], main = "python/ops/fused_conv2d_bias_activation_benchmark.py", tags = [ + "manual", # TODO(b/117128481): re-enable after fixing OSS build "requires-gpu-sm70", ], ) diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index e9e6464d06658ad7abf012e2c4e579bfbff7448c..93b1aaa85e88e00c1b12a388321a4d6fb10f1611 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -111,8 +111,8 @@ class FusedConv2DBiasActivationOp : public OpKernel { context, (GetTensorDim(strides, data_format_, 'N') == 1 && GetTensorDim(strides, data_format_, 'C') == 1), - errors::InvalidArgument("Convolutional strides are not supported in " - "the batch or depth dimensions.")); + errors::Unimplemented("Convolutional strides are not supported in " + "the batch and depth dimensions.")); // Assuming qint8 <--> NCHW_VECT_C, OIHW_VECT_I (int8x4) here. constexpr bool is_int8x4 = std::is_same::value; diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py index 4894298694bbfd1ed15be963e7dab9818d9aed18..e5c8a34fc14b01d6f6c9bdca065e96332ed87556 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py @@ -12,896 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Functional tests for fused conv2d bias and activation operation.""" + +"""Tests for fused convolutions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - -from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors_impl -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_array_ops -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import random_ops +from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op_test_base as test_base from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging - - -def GetShrunkInceptionShapes(shrink=10): - """Iterator for smaller versions of convolution shapes in 2015 Inception. - - Relative to inception, each depth value is `depth // shrink`. - - Args: - shrink: Factor to shrink each depth value by relative to Inception. - - Yields: - Tuple (input_size, filter_size, out_size, stride, padding), the convolution - parameters of Inception layers. - """ - input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384], [ - 4, 8, 8, 2048 - ], [4, 8, 8, 448], [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 2048], [ - 4, 8, 8, 1760 - ], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 17, 17, 192], [ - 4, 17, 17, 192 - ], [4, 17, 17, 1248], [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224], [ - 4, 17, 17, 192 - ], [4, 17, 17, 192], [4, 17, 17, 1216], [4, 17, 17, 1216], [4, 17, 17, 224], [ - 4, 17, 17, 192 - ], [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152], [4, 17, 17, 192], [ - 4, 17, 17, 160 - ], [4, 17, 17, 1152], [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024], - [4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128], [ - 4, 17, 17, 768 - ], [4, 17, 17, 128], [4, 17, 17, 128], [4, 17, 17, 768], - [4, 17, 17, 768], [4, 35, 35, 96], [4, 35, 35, 288], [ - 4, 35, 35, 64 - ], [4, 35, 35, 288], [4, 35, 35, 256], [4, 35, 35, 48], [ - 4, 35, 35, 256 - ], [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192], [ - 4, 35, 35, 192 - ], [4, 73, 73, 64], [4, 73, 73, 64], [4, 147, 147, 24]] - filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384], [ - 1, 1, 2048, 192 - ], [3, 3, 448, 384], [1, 1, 2048, 320], [1, 1, 2048, 448], [1, 1, 2048, 384], - [1, 1, 1760, 384], [1, 1, 1760, 192], [1, 1, 1760, 448], [ - 1, 1, 1760, 320 - ], [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192], [ - 3, 3, 128, 320 - ], [1, 1, 1248, 128], [1, 3, 224, 224], [3, 1, 192, 256], [ - 1, 3, 192, 256 - ], [1, 1, 1216, 192], [1, 1, 1216, 96], [3, 1, 224, 224], [ - 3, 3, 192, 224 - ], [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128], [ - 3, 1, 192, 192 - ], [3, 3, 160, 192], [1, 1, 1152, 160], [1, 1, 1024, 128], [ - 1, 3, 128, 192 - ], [1, 1, 1024, 160], [3, 1, 128, 192], [1, 1, 1024, 256], [ - 3, 1, 128, 128 - ], [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128], [ - 1, 1, 768, 128 - ], [1, 1, 768, 320], [3, 3, 96, 96], [3, 3, 288, 384], [ - 3, 3, 64, 96 - ], [1, 1, 288, 64], [1, 1, 256, 64], [5, 5, 48, 64], - [1, 1, 256, 48], [3, 3, 96, 96], [1, 1, 192, 32], [ - 1, 1, 192, 64 - ], [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64, - 64], [1, 1, 24, 64]] - out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384], [4, 8, 8, 192], [ - 4, 8, 8, 384 - ], [4, 8, 8, 320], [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384], [ - 4, 8, 8, 192 - ], [4, 8, 8, 448], [4, 8, 8, 320], [4, 8, 8, 192], [4, 17, 17, 192], [ - 4, 17, 17, 192 - ], [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224], [4, 17, 17, 256], [ - 4, 17, 17, 256 - ], [4, 17, 17, 192], [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224], [ - 4, 17, 17, 192 - ], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 192], [ - 4, 17, 17, 160 - ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 192], [ - 4, 17, 17, 256 - ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128], [ - 4, 17, 17, 128 - ], [4, 17, 17, 320], [4, 17, 17, 96], [4, 17, 17, 384], [4, 35, 35, 96], [ - 4, 35, 35, 64 - ], [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48], [4, 35, 35, 96], - [4, 35, 35, 32], [4, 35, 35, 64], [4, 35, 35, 48], - [4, 71, 71, 192], [4, 73, 73, 64], [4, 147, 147, 64]] - strides = [ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1 - ] - # Shrink sizes to make the test faster - for i in input_sizes: - i[3] //= shrink - for f in filter_sizes: - f[2] //= shrink - f[3] //= shrink - for o in out_sizes: - o[3] //= shrink - # pylint: disable=invalid-name - VALID = "VALID" - SAME = "SAME" - # pylint: enable=invalid-name - paddings = [ - SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, - VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, - SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, - SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME, - SAME, SAME, SAME, SAME, VALID, VALID, VALID - ] - for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides, - paddings): - yield i, f, o, s, p - - -def GetTestConfigs(): - """Get all the valid tests configs to run. - - Returns: - all the valid test configs as tuples of data_format and use_gpu. - """ - test_configs = [("NCHW", True), ("NHWC", True)] - return test_configs - - -class FusedConv2DBiasActivationTest(test.TestCase): - - def _DtypesToTest(self, use_gpu): - return [dtypes.float32] - - def _FilterFormatsToTest(self, use_gpu): - return ["HWIO", "OIHW"] - - def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias, - strides, padding, activation_mode, data_format, - filter_format, dtype): - """Verifies the output values of the convolution function. - - Args: - tensor_in_sizes: Input tensor dimensions in - [batch, input_rows, input_cols, input_depth]. - filter_in_sizes: Filter tensor dimensions in - [kernel_rows, kernel_cols, input_depth, output_depth]. - bias: 1-D bias tensor of length output_depth. - strides: Stride: [col_stride, row_stride] - padding: Padding type. - activation_mode: Activation mode. - data_format: Format of the data tensors. - filter_format: Filter format to use for the fused convolution. - dtype: Data type for inputs and outputs. - Returns: - Symbolic tensor value and reference value that can be used to - execute the computation and verify the results. - """ - input_size = np.prod(tensor_in_sizes) - filter_size = np.prod(filter_in_sizes) - bias_size = filter_in_sizes[-1] # equals to output depth - # Initializes the input tensor with array containing incrementing - # numbers from 1. - x1 = [f * 1.0 for f in range(1, input_size + 1)] - x2 = [f * 1.0 for f in range(1, filter_size + 1)] - # This is to guarantee that there is always negative values after - # bias add so that we can test whether relu works correctly. - x3 = bias - with self.test_session(use_gpu=True): - t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) - t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) - fused_t2 = t2 - if filter_format == "OIHW": - fused_t2 = HwioToOihw(t2) - t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype) - strides = [1] + strides + [1] - if data_format == "NCHW": - t1 = test_util.NHWCToNCHW(t1) - strides = test_util.NHWCToNCHW(strides) - output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - t1, - fused_t2, - t3, - strides=strides, - padding=padding, - data_format=data_format, - filter_format=filter_format, - activation_mode=activation_mode) - ref_conv_output = nn_ops.conv2d( - t1, t2, strides=strides, padding=padding, data_format=data_format) - ref_bias_output = nn_ops.bias_add( - ref_conv_output, t3, data_format=data_format) - ref_output = nn_ops.relu(ref_bias_output) - if data_format == "NCHW": - output = test_util.NCHWToNHWC(output) - ref_output = test_util.NCHWToNHWC(ref_output) - - return output, ref_output - - def _CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides, - padding): - """Verifies that CPU and GPU produce the same values. - - Args: - tensor_in_sizes: Input tensor dimensions in - [batch, input_rows, input_cols, input_depth]. - filter_in_sizes: Filter tensor dimensions in - [kernel_rows, kernel_cols, input_depth, output_depth]. - conv_strides: [row_stride, col_stride] for the convolution; - padding: Padding type. - """ - x1 = np.random.rand(*tensor_in_sizes).astype(np.float32) - x2 = np.random.rand(*filter_in_sizes).astype(np.float32) - x3 = np.random.rand(*[filter_in_sizes[-1]]).astype(np.float32) - - def _SetupVal(data_format, use_gpu): - with self.test_session(use_gpu=use_gpu): - t1 = constant_op.constant(x1, shape=tensor_in_sizes) - t2 = constant_op.constant(x2, shape=filter_in_sizes) - t3 = constant_op.constant(x3, shape=[filter_in_sizes[-1]]) - strides = [1] + conv_strides + [1] - if data_format == "NCHW": - t1 = test_util.NHWCToNCHW(t1) - strides = test_util.NHWCToNCHW(strides) - output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - t1, - t2, - t3, - strides=strides, - padding=padding, - data_format=data_format, - activation_mode="Relu") - - if data_format == "NCHW": - output = test_util.NCHWToNHWC(output) - return output - - tensors = [] - for (data_format, use_gpu) in GetTestConfigs(): - tensors.append(_SetupVal(data_format, use_gpu)) - with self.cached_session() as sess: - values = sess.run(tensors) - for i in range(1, len(values)): - self.assertAllClose(values[0], values[i], rtol=1e-3, atol=1e-3) - - def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, bias, strides, - padding): - tensors = [] - ref_tensors = [] - for (data_format, use_gpu) in GetTestConfigs(): - for dtype in self._DtypesToTest(use_gpu): - for filter_format in self._FilterFormatsToTest(use_gpu): - result, expected = self._SetupValuesForDevice( - tensor_in_sizes, filter_in_sizes, bias, strides, padding, "Relu", - data_format, filter_format, dtype) - tensors.append(result) - ref_tensors.append(expected) - with self.cached_session() as sess: - values = sess.run(tensors) - ref_values = sess.run(ref_tensors) - for i in range(len(tensors)): - conv = tensors[i] - value = values[i] - ref_value = ref_values[i] - tf_logging.info("expected = ", ref_value) - tf_logging.info("actual = ", value) - tol = 1e-5 - if value.dtype == np.float16: - tol = 1e-3 - self.assertAllClose( - np.ravel(ref_value), np.ravel(value), atol=tol, rtol=tol) - self.assertShapeEqual(value, conv) - - def testConv2D1x1Filter(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D1x1Filter test.") - return - # expected_output = [ - # 0.0, 0.0, 0.0, 21.0, 0.0, 0.0, 57.0, 0.0, 0.0, 93.0, 41.0, 0.0, 129.0, - # 86.0, 43.0, 165.0, 131.0, 97.0 - # ] - medians = [-45.0, -130.0, -215.0] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[1, 1, 3, 3], - bias=medians, - strides=[1, 1], - padding="VALID") - - def testConv2DEmpty(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2DEmpty test.") - return - # expected_output = [] - self._VerifyValues( - tensor_in_sizes=[0, 2, 3, 3], - filter_in_sizes=[1, 1, 3, 3], - bias=[0.0, 0.0, 0.0], - strides=[1, 1], - padding="VALID") - - def testConv2D2x2Filter(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D2x2Filter test.") - return - # expected_output = [0.0, 0.0, 0.0, 401.0, 533.0, 665.0] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[2, 2, 3, 3], - bias=[-2500.0, -2500.0, -2500.0], - strides=[1, 1], - padding="VALID") - - def testConv2D1x2Filter(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D1x2Filter test.") - return - # expected_output = [ - # 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 190.0, 265.0, 340.0, 343.0, 436.0, 529.0 - # ] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[1, 2, 3, 3], - bias=[-500.0, -500.0, -500.0], - strides=[1, 1], - padding="VALID") - - def testConv2D2x2FilterStride2(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D2x2FilterStride2 test.") - return - # expected_output = [0.0, 67.0, 163.0] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[2, 2, 3, 3], - bias=[-2300.0, -2300.0, -2300.0], - strides=[2, 2], - padding="VALID") - - def testConv2D2x2FilterStride2Same(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D2x2FilterStride2Same test.") - return - # expected_output = [0.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[2, 2, 3, 3], - bias=[-2300.0, -1000.0, -1000.0], - strides=[2, 2], - padding="SAME") - - def testConv2D2x2FilterStride1x2(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D2x2FilterStride1x2 test.") - return - # expected_output = [0.0, 0.0, 8.0, 28.0, 48.0, 68.0] - self._VerifyValues( - tensor_in_sizes=[1, 3, 6, 1], - filter_in_sizes=[2, 2, 1, 1], - bias=[-90.0], - strides=[1, 2], - padding="VALID") - - def testConv2DKernelSmallerThanStrideValid(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2DKernelSmallerThanStrideValid test.") - return - # expected_output = [0, 0, 175, 205] - self._VerifyValues( - tensor_in_sizes=[1, 7, 7, 1], - filter_in_sizes=[2, 2, 1, 1], - bias=[-100.0], - strides=[3, 3], - padding="VALID") - - def testConv2DKernelSmallerThanStrideSame(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2DKernelSmallerThanStrideSame test.") - return - # expected = [0, 0, 2, 4] - self._VerifyValues( - tensor_in_sizes=[1, 3, 3, 1], - filter_in_sizes=[1, 1, 1, 1], - bias=[-5.0], - strides=[2, 2], - padding="SAME") - - # expected = [0, 0, 4, 6] - self._VerifyValues( - tensor_in_sizes=[1, 4, 4, 1], - filter_in_sizes=[1, 1, 1, 1], - bias=[-5.0], - strides=[2, 2], - padding="SAME") - - # expected = [4, 0, 1, 0] - self._VerifyValues( - tensor_in_sizes=[1, 4, 4, 1], - filter_in_sizes=[2, 2, 1, 1], - bias=[-40.0], - strides=[3, 3], - padding="SAME") - - def testConv2DKernelSizeMatchesInputSize(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2DKernelSizeMatchesInputSize test.") - return - # expected = [0, 5] - self._VerifyValues( - tensor_in_sizes=[1, 2, 2, 1], - filter_in_sizes=[2, 2, 1, 2], - bias=[-50.0, -55.0], - strides=[1, 1], - padding="VALID") - - # expected = [0, 2, 282, 322] - self._VerifyValues( - tensor_in_sizes=[1, 8, 8, 1], - filter_in_sizes=[2, 2, 1, 1], - bias=[-200.0], - strides=[4, 4], - padding="SAME") - - def testShapeFunctionEdgeCases(self): - # All shapes unknown. - c1 = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Relu") - self.assertEqual([None, None, None, None], c1.get_shape().as_list()) - - # Incorrect input shape. - with self.assertRaises(ValueError): - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32, shape=[1, 3]), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Relu") - - # Incorrect filter shape. - with self.assertRaises(ValueError): - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32, shape=[1, 3]), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Relu") - - # Depth mismatch. - with self.assertRaises(ValueError): - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), - array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 2]), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Relu") - - def testOpEdgeCases(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping OpEdgeCases tests.") - return - with self.cached_session() as sess: - # Illegal strides. - with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, - "Convolutional strides are not supported in " - "the batch or depth dimensions."): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[2, 1, 1, 1], - padding="SAME", - activation_mode="Relu")) - with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, - "Convolutional strides are not supported in " - "the batch or depth dimensions."): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 2], - padding="SAME", - activation_mode="Relu")) - - # Illegal activation mode. - with self.assertRaisesRegexp(ValueError, - "Op passed string 'Tanh' not in:"): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Tanh")) - - # Filter larger than input. - with self.assertRaisesRegexp(ValueError, "Negative dimension size"): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), - array_ops.placeholder(dtypes.float32, shape=[20, 21, 3, 2]), - array_ops.placeholder(dtypes.float32, shape=[2]), - strides=[1, 1, 1, 1], - padding="VALID", - activation_mode="Relu")) - with self.assertRaisesRegexp(ValueError, "Negative dimension size"): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), - array_ops.placeholder(dtypes.float32, shape=[21, 20, 3, 2]), - array_ops.placeholder(dtypes.float32, shape=[2]), - strides=[1, 1, 1, 1], - padding="VALID", - activation_mode="Relu")) - - -def GetInceptionFwdTest(input_size, filter_size, stride, padding, - gpu_only=True): - - def Test(self): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping InceptionFwd %s", (input_size, filter_size, - stride, padding)) - return - tf_logging.info("Testing InceptionFwd %s", (input_size, filter_size, stride, - padding)) - self._CompareFwdValues(input_size, filter_size, [stride, stride], padding) - - return Test - - -def CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type): - """Calculates the size of an output dimension of a strided convolution. - - Given the sizes of the corresponding dimension of the input and filter shapes, - and the stride and padding_types, calculates the size of the output dimension. - This function can be called separately for each input dimension. - - Args: - input_dim: An `int` specifying the size of the input dimension. - filter_dim: An `int` specifying the size of the filter dimension. - stride: An `int` specifying the step size of the convolution along the - input dimension. - padding_type: either 'VALID' or 'SAME'. - - Returns: - The size of the output dimension. - """ - if padding_type == "VALID": - return (input_dim - filter_dim + stride) // stride - else: # padding_type == 'SAME' - return (input_dim + stride - 1) // stride - - -def NchwVectCToNchw(in_tensor): - # [N, C / 4, H, W, 4] => [N, C / 4, 4, H, W] == [N, C, H, W] - t = array_ops.transpose(in_tensor, [0, 1, 4, 2, 3]) - n = in_tensor.shape.dims[0].value - c = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value - h = in_tensor.shape.dims[2].value - w = in_tensor.shape.dims[3].value - return array_ops.reshape(t, [n, c, h, w]) - - -def OihwVectIToHwio(in_tensor): - # [O, I / 4, H, W, 4] => [O, I / 4, 4, H, W] == [O, I, H, W] - t = array_ops.transpose(in_tensor, [2, 3, 1, 4, 0]) - o = in_tensor.shape.dims[0].value - i = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value - h = in_tensor.shape.dims[2].value - w = in_tensor.shape.dims[3].value - return array_ops.reshape(t, [h, w, i, o]) - - -def NchwToNchwVectC(in_tensor): - n, c, h, w = in_tensor.shape.as_list() - assert c % 4 == 0 - t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w]) - return array_ops.transpose(t, [0, 1, 3, 4, 2]) - - -def HwioToOihw(in_tensor): - return array_ops.transpose(in_tensor, [3, 2, 0, 1]) - - -def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, - padding, strides, side_input_scale, - side_input, biases, apply_relu): - """Simulates the int8 fused 2-D convolution op using separate float ops. - - The arguments and return values have the same format, meanings and - restrictions as the actual op. - Args: - conv_input_scale: A scalar 'float'. - conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. - kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. - padding: A `string` from: `"SAME", "VALID"`. - strides: A list of `ints`. - side_input_scale: A scalar 'float'. - side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. - biases: A `Tensor` of type `float32` in NCHW layout. - apply_relu: A boolean to specify whether to apply "Relu" activation function - that clips outputs to the range [0, 127], or "None" activation that clips - to the range [-128, 127]. - Returns: - A `Tensor` of type `qint8` in NCHW_VECT_C layout. - """ - conv_result = nn_ops.conv2d( - NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), - OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), - strides=strides, - padding=padding, - data_format="NCHW") * conv_input_scale - - conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw( - gen_array_ops.dequantize(side_input, -128, 127)) - - output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") - if apply_relu: - output = nn_ops.relu(output) - - result, _, _ = gen_array_ops.quantize_v2( - NchwToNchwVectC(output), -128, 127, dtypes.qint8) - return result - - -class FusedConvInt8Tests(test.TestCase): - _test_params = [ - { - "batch_size": 1, - "input_channels": 4, - "output_channels": 4, - "input_height": 8, - "input_width": 8, - "filter_height": 6, - "filter_width": 6, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.0, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 1, - "input_channels": 4, - "output_channels": 4, - "input_height": 6, - "input_width": 6, - "filter_height": 6, - "filter_width": 6, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.0, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 2, - "input_channels": 8, - "output_channels": 16, - "input_height": 8, - "input_width": 8, - "filter_height": 3, - "filter_width": 3, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.0, - "bias_scale": 1, - "padding_type": "VALID" - }, - { - "batch_size": 2, - "input_channels": 8, - "output_channels": 16, - "input_height": 8, - "input_width": 8, - "filter_height": 3, - "filter_width": 3, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.0, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 2, - "input_channels": 8, - "output_channels": 16, - "input_height": 8, - "input_width": 8, - "filter_height": 3, - "filter_width": 3, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "VALID" - }, - { - "batch_size": 2, - "input_channels": 16, - "output_channels": 16, - "input_height": 9, - "input_width": 9, - "filter_height": 3, - "filter_width": 3, - "vertical_stride": 1, - "horizontal_stride": 1, - "conv_input_scale": 0.001, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 3, - "input_channels": 8, - "output_channels": 8, - "input_height": 9, - "input_width": 9, - "filter_height": 5, - "filter_width": 5, - "vertical_stride": 1, - "horizontal_stride": 1, - "conv_input_scale": 0.001, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 3, - "input_channels": 8, - "output_channels": 8, - "input_height": 9, - "input_width": 9, - "filter_height": 7, - "filter_width": 1, - "vertical_stride": 2, - "horizontal_stride": 1, - "conv_input_scale": 0.002, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 3, - "input_channels": 8, - "output_channels": 8, - "input_height": 9, - "input_width": 9, - "filter_height": 1, - "filter_width": 7, - "vertical_stride": 1, - "horizontal_stride": 1, - "conv_input_scale": 0.002, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "SAME" - }, - ] - - def runTest(self, test_param, apply_relu): - batch_size = test_param["batch_size"] - input_channels = test_param["input_channels"] - output_channels = test_param["output_channels"] - input_height = test_param["input_height"] - input_width = test_param["input_width"] - filter_height = test_param["filter_height"] - filter_width = test_param["filter_width"] - vertical_stride = test_param["vertical_stride"] - horizontal_stride = test_param["horizontal_stride"] - conv_input_scale = test_param["conv_input_scale"] - side_input_scale = test_param["side_input_scale"] - bias_scale = test_param["bias_scale"] - padding_type = test_param["padding_type"] - - conv_input, _, _ = gen_array_ops.quantize_v2( - random_ops.random_uniform( - [batch_size, input_channels // 4, input_height, input_width, 4], - minval=-0.0, - maxval=1.0, - dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) - - kernel, _, _ = gen_array_ops.quantize_v2( - random_ops.random_uniform( - [ - output_channels, input_channels // 4, filter_height, - filter_width, 4 - ], - minval=-1.0, - maxval=1.0, - dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) - - output_height = CalculateConvolvedOutputDim(input_height, filter_height, - vertical_stride, padding_type) - output_width = CalculateConvolvedOutputDim(input_width, filter_width, - horizontal_stride, padding_type) - tf_logging.info("output_height=", output_height, ", output_width=", - output_width) - - side_input, _, _ = gen_array_ops.quantize_v2( - random_ops.random_uniform( - [batch_size, output_channels // 4, output_height, output_width, 4], - minval=0.0, - maxval=1.0, - dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) - - biases = random_ops.random_uniform( - [output_channels], - minval=-10 * bias_scale, - maxval=20 * bias_scale, - dtype=dtypes.float32) - - strides = [1, 1, vertical_stride, horizontal_stride] - - actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - conv_input, - kernel, - biases, - strides=strides, - padding=padding_type, - conv_input_scale=conv_input_scale, - side_input_scale=side_input_scale, - side_input=side_input, - activation_mode="Relu" if apply_relu else "None", - data_format="NCHW_VECT_C", - filter_format="OIHW_VECT_I") - expected = SimulateFusedConv2dBiasActivationInt8( - conv_input_scale, conv_input, kernel, padding_type, strides, - side_input_scale, side_input, biases, apply_relu) - with self.test_session(use_gpu=True) as sess: - actual_y, expected_y = sess.run([actual, expected]) - self.assertAllClose(actual_y, expected_y, rtol=0, atol=1) +# Instantiate the two test suites from test_base, mixing in test.TestCase as +# the test framework. +class FusedConv2DBiasActivationTest(test_base.FusedConv2DBiasActivationTest, + test.TestCase): + pass - def testFusedConvInt8(self): - if not test.is_gpu_available( - cuda_only=True, min_cuda_compute_capability=(6, 1)): - tf_logging.info("int8 test skipped because not run with --config=cuda or " - "no GPUs with compute capability >= 6.1 are available.") - return - for apply_relu in [True, False]: - for test_param in self._test_params: - self.runTest(test_param, apply_relu) +class FusedConvInt8Tests(test_base.FusedConvInt8Tests, test.TestCase): + pass -if __name__ == "__main__": - for index, (input_size_, filter_size_, output_size_, stride_, - padding_) in enumerate(GetShrunkInceptionShapes()): - setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_" + str(index), - GetInceptionFwdTest(input_size_, filter_size_, stride_, padding_)) - # TODO(b/35359731) - # Fwd, BckInput, and BackFilter to test that for certain input parameter - # set, winograd nonfused algorithm will be excluded from conv autotune. If - # in such case, winograd nonfused algorithm is added as one option of the - # conv autotune, and cuDNN version is smaller than 7, the following tests - # will fail. - ishape = [1, 400, 400, 1] - fshape = [1, 1, 1, 256] - oshape = [1, 400, 400, 256] - setattr(FusedConv2DBiasActivationTest, - "testInceptionFwd_No_Winograd_Nonfused", - GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True)) +if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py new file mode 100644 index 0000000000000000000000000000000000000000..35fc65e4ba8ff5f38f0024930213468b3dc0bed6 --- /dev/null +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py @@ -0,0 +1,945 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Provides test suites that can be run to test fused convolutions. + +Each of the two test suites in this module, FusedConv2DBiasActivationTest and +FusedConvInt8Tests, should be "instantiated" by declaring a class which inherits +from the FusedConv test and a class that provides the standard test.TestCase +API. + +See e.g. fused_conv2d_bias_activation_op_test.py in this folder. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import numpy as np + +from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging + + +def _GetShrunkInceptionShapes(shrink=10): + """Iterator for smaller versions of convolution shapes in 2015 Inception. + + Relative to inception, each depth value is `depth // shrink`. + + Args: + shrink: Factor to shrink each depth value by relative to Inception. + + Yields: + Tuple (input_size, filter_size, out_size, stride, padding), the convolution + parameters of Inception layers. + """ + input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384], [ + 4, 8, 8, 2048 + ], [4, 8, 8, 448], [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 2048], [ + 4, 8, 8, 1760 + ], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 17, 17, 192], [ + 4, 17, 17, 192 + ], [4, 17, 17, 1248], [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224], [ + 4, 17, 17, 192 + ], [4, 17, 17, 192], [4, 17, 17, 1216], [4, 17, 17, 1216], [4, 17, 17, 224], [ + 4, 17, 17, 192 + ], [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152], [4, 17, 17, 192], [ + 4, 17, 17, 160 + ], [4, 17, 17, 1152], [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024], + [4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128], [ + 4, 17, 17, 768 + ], [4, 17, 17, 128], [4, 17, 17, 128], [4, 17, 17, 768], + [4, 17, 17, 768], [4, 35, 35, 96], [4, 35, 35, 288], [ + 4, 35, 35, 64 + ], [4, 35, 35, 288], [4, 35, 35, 256], [4, 35, 35, 48], [ + 4, 35, 35, 256 + ], [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192], [ + 4, 35, 35, 192 + ], [4, 73, 73, 64], [4, 73, 73, 64], [4, 147, 147, 24]] + filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384], [ + 1, 1, 2048, 192 + ], [3, 3, 448, 384], [1, 1, 2048, 320], [1, 1, 2048, 448], [1, 1, 2048, 384], + [1, 1, 1760, 384], [1, 1, 1760, 192], [1, 1, 1760, 448], [ + 1, 1, 1760, 320 + ], [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192], [ + 3, 3, 128, 320 + ], [1, 1, 1248, 128], [1, 3, 224, 224], [3, 1, 192, 256], [ + 1, 3, 192, 256 + ], [1, 1, 1216, 192], [1, 1, 1216, 96], [3, 1, 224, 224], [ + 3, 3, 192, 224 + ], [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128], [ + 3, 1, 192, 192 + ], [3, 3, 160, 192], [1, 1, 1152, 160], [1, 1, 1024, 128], [ + 1, 3, 128, 192 + ], [1, 1, 1024, 160], [3, 1, 128, 192], [1, 1, 1024, 256], [ + 3, 1, 128, 128 + ], [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128], [ + 1, 1, 768, 128 + ], [1, 1, 768, 320], [3, 3, 96, 96], [3, 3, 288, 384], [ + 3, 3, 64, 96 + ], [1, 1, 288, 64], [1, 1, 256, 64], [5, 5, 48, 64], + [1, 1, 256, 48], [3, 3, 96, 96], [1, 1, 192, 32], [ + 1, 1, 192, 64 + ], [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64, + 64], [1, 1, 24, 64]] + out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384], [4, 8, 8, 192], [ + 4, 8, 8, 384 + ], [4, 8, 8, 320], [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384], [ + 4, 8, 8, 192 + ], [4, 8, 8, 448], [4, 8, 8, 320], [4, 8, 8, 192], [4, 17, 17, 192], [ + 4, 17, 17, 192 + ], [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224], [4, 17, 17, 256], [ + 4, 17, 17, 256 + ], [4, 17, 17, 192], [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224], [ + 4, 17, 17, 192 + ], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 192], [ + 4, 17, 17, 160 + ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 192], [ + 4, 17, 17, 256 + ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128], [ + 4, 17, 17, 128 + ], [4, 17, 17, 320], [4, 17, 17, 96], [4, 17, 17, 384], [4, 35, 35, 96], [ + 4, 35, 35, 64 + ], [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48], [4, 35, 35, 96], + [4, 35, 35, 32], [4, 35, 35, 64], [4, 35, 35, 48], + [4, 71, 71, 192], [4, 73, 73, 64], [4, 147, 147, 64]] + strides = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + ] + # Shrink sizes to make the test faster + for i in input_sizes: + i[3] //= shrink + for f in filter_sizes: + f[2] //= shrink + f[3] //= shrink + for o in out_sizes: + o[3] //= shrink + # pylint: disable=invalid-name + VALID = "VALID" + SAME = "SAME" + # pylint: enable=invalid-name + paddings = [ + SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, + VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, + SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, + SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME, + SAME, SAME, SAME, SAME, VALID, VALID, VALID + ] + for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides, + paddings): + yield i, f, o, s, p + + +def _GetTestConfigs(): + """Get all the valid tests configs to run. + + Returns: + all the valid test configs as tuples of data_format and use_gpu. + """ + test_configs = [("NCHW", True), ("NHWC", True)] + return test_configs + + +def _IotaNdF32Constant(dim_sizes): + + def MakeList(dims): + if len(dims) == 1: + return [float(1 + f) for f in range(dims[0])] + return [MakeList(dims[1:]) for _ in range(dims[0])] + + return constant_op.constant(MakeList(dim_sizes), dtype=dtypes.float32) + + +def _GetInceptionFwdTest(input_size, + filter_size, + stride, + padding, + gpu_only=True): + + def Test(self): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping InceptionFwd %s", + (input_size, filter_size, stride, padding)) + return + tf_logging.info("Testing InceptionFwd %s", + (input_size, filter_size, stride, padding)) + self.CompareFwdValues(input_size, filter_size, [stride, stride], padding) + + return Test + + +class FusedConv2DBiasActivationTest(object): + + @contextlib.contextmanager + def test_scope(self): # pylint: disable=invalid-name + """Can be overridden in base classes to provide a test scope.""" + yield + + def _DtypesToTest(self, use_gpu): + return [dtypes.float32] + + def _FilterFormatsToTest(self, use_gpu): + return ["HWIO", "OIHW"] + + def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias, + strides, padding, activation_mode, data_format, + filter_format, dtype): + """Verifies the output values of the convolution function. + + Args: + tensor_in_sizes: Input tensor dimensions in + [batch, input_rows, input_cols, input_depth]. + filter_in_sizes: Filter tensor dimensions in + [kernel_rows, kernel_cols, input_depth, output_depth]. + bias: 1-D bias tensor of length output_depth. + strides: Stride: [col_stride, row_stride] + padding: Padding type. + activation_mode: Activation mode. + data_format: Format of the data tensors. + filter_format: Filter format to use for the fused convolution. + dtype: Data type for inputs and outputs. + Returns: + Symbolic tensor value and reference value that can be used to + execute the computation and verify the results. + """ + input_size = np.prod(tensor_in_sizes) + filter_size = np.prod(filter_in_sizes) + bias_size = filter_in_sizes[-1] # equals to output depth + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, input_size + 1)] + x2 = [f * 1.0 for f in range(1, filter_size + 1)] + # This is to guarantee that there are always negative values after + # bias add so that we can test whether relu works correctly. + x3 = bias + with self.cached_session(use_gpu=True), self.test_scope(): + t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) + t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) + fused_t2 = t2 + if filter_format == "OIHW": + fused_t2 = _HwioToOihw(t2) + t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype) + strides = [1] + strides + [1] + if data_format == "NCHW": + t1 = test_util.NHWCToNCHW(t1) + strides = test_util.NHWCToNCHW(strides) + output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + t1, + fused_t2, + t3, + strides=strides, + padding=padding, + data_format=data_format, + filter_format=filter_format, + activation_mode=activation_mode) + ref_conv_output = nn_ops.conv2d( + t1, t2, strides=strides, padding=padding, data_format=data_format) + ref_bias_output = nn_ops.bias_add( + ref_conv_output, t3, data_format=data_format) + ref_output = nn_ops.relu(ref_bias_output) + if data_format == "NCHW": + output = test_util.NCHWToNHWC(output) + ref_output = test_util.NCHWToNHWC(ref_output) + + return output, ref_output + + def CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides, + padding): + """Verifies that CPU and GPU produce the same values. + + Args: + tensor_in_sizes: Input tensor dimensions in + [batch, input_rows, input_cols, input_depth]. + filter_in_sizes: Filter tensor dimensions in + [kernel_rows, kernel_cols, input_depth, output_depth]. + conv_strides: [row_stride, col_stride] for the convolution; + padding: Padding type. + """ + x1 = np.random.rand(*tensor_in_sizes).astype(np.float32) + x2 = np.random.rand(*filter_in_sizes).astype(np.float32) + x3 = np.random.rand(*[filter_in_sizes[-1]]).astype(np.float32) + + def _SetupVal(data_format, use_gpu): + with self.cached_session(use_gpu=use_gpu), self.test_scope(): + t1 = constant_op.constant(x1, shape=tensor_in_sizes) + t2 = constant_op.constant(x2, shape=filter_in_sizes) + t3 = constant_op.constant(x3, shape=[filter_in_sizes[-1]]) + strides = [1] + conv_strides + [1] + if data_format == "NCHW": + t1 = test_util.NHWCToNCHW(t1) + strides = test_util.NHWCToNCHW(strides) + output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + t1, + t2, + t3, + strides=strides, + padding=padding, + data_format=data_format, + activation_mode="Relu") + + if data_format == "NCHW": + output = test_util.NCHWToNHWC(output) + return output + + tensors = [] + for (data_format, use_gpu) in _GetTestConfigs(): + tensors.append(_SetupVal(data_format, use_gpu)) + with self.cached_session() as sess, self.test_scope(): + values = sess.run(tensors) + for i in range(1, len(values)): + self.assertAllClose(values[0], values[i], rtol=1e-3, atol=1e-3) + + def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, bias, strides, + padding): + tensors = [] + ref_tensors = [] + for (data_format, use_gpu) in _GetTestConfigs(): + for dtype in self._DtypesToTest(use_gpu): + for filter_format in self._FilterFormatsToTest(use_gpu): + result, expected = self._SetupValuesForDevice( + tensor_in_sizes, filter_in_sizes, bias, strides, padding, "Relu", + data_format, filter_format, dtype) + tensors.append(result) + ref_tensors.append(expected) + with self.cached_session() as sess, self.test_scope(): + values = sess.run(tensors) + ref_values = sess.run(ref_tensors) + for i in range(len(tensors)): + conv = tensors[i] + value = values[i] + ref_value = ref_values[i] + tf_logging.info("expected = %s", ref_value) + tf_logging.info("actual = %s", value) + tol = 1e-5 + if value.dtype == np.float16: + tol = 1e-3 + self.assertAllClose( + np.ravel(ref_value), np.ravel(value), atol=tol, rtol=tol) + self.assertShapeEqual(value, conv) + + def testConv2D1x1Filter(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D1x1Filter test.") + return + # expected_output = [ + # 0.0, 0.0, 0.0, 21.0, 0.0, 0.0, 57.0, 0.0, 0.0, 93.0, 41.0, 0.0, 129.0, + # 86.0, 43.0, 165.0, 131.0, 97.0 + # ] + medians = [-45.0, -130.0, -215.0] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[1, 1, 3, 3], + bias=medians, + strides=[1, 1], + padding="VALID") + + def testConv2DEmpty(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2DEmpty test.") + return + # expected_output = [] + self._VerifyValues( + tensor_in_sizes=[0, 2, 3, 3], + filter_in_sizes=[1, 1, 3, 3], + bias=[0.0, 0.0, 0.0], + strides=[1, 1], + padding="VALID") + + def testConv2D2x2Filter(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D2x2Filter test.") + return + # expected_output = [0.0, 0.0, 0.0, 401.0, 533.0, 665.0] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + bias=[-2500.0, -2500.0, -2500.0], + strides=[1, 1], + padding="VALID") + + def testConv2D1x2Filter(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D1x2Filter test.") + return + # expected_output = [ + # 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 190.0, 265.0, 340.0, 343.0, 436.0, 529.0 + # ] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[1, 2, 3, 3], + bias=[-500.0, -500.0, -500.0], + strides=[1, 1], + padding="VALID") + + def testConv2D2x2FilterStride2(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D2x2FilterStride2 test.") + return + # expected_output = [0.0, 67.0, 163.0] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + bias=[-2300.0, -2300.0, -2300.0], + strides=[2, 2], + padding="VALID") + + def testConv2D2x2FilterStride2Same(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D2x2FilterStride2Same test.") + return + # expected_output = [0.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + bias=[-2300.0, -1000.0, -1000.0], + strides=[2, 2], + padding="SAME") + + def testConv2D2x2FilterStride1x2(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D2x2FilterStride1x2 test.") + return + # expected_output = [0.0, 0.0, 8.0, 28.0, 48.0, 68.0] + self._VerifyValues( + tensor_in_sizes=[1, 3, 6, 1], + filter_in_sizes=[2, 2, 1, 1], + bias=[-90.0], + strides=[1, 2], + padding="VALID") + + def testConv2DKernelSmallerThanStrideValid(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2DKernelSmallerThanStrideValid test.") + return + # expected_output = [0, 0, 175, 205] + self._VerifyValues( + tensor_in_sizes=[1, 7, 7, 1], + filter_in_sizes=[2, 2, 1, 1], + bias=[-100.0], + strides=[3, 3], + padding="VALID") + + def testConv2DKernelSmallerThanStrideSame(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2DKernelSmallerThanStrideSame test.") + return + # expected = [0, 0, 2, 4] + self._VerifyValues( + tensor_in_sizes=[1, 3, 3, 1], + filter_in_sizes=[1, 1, 1, 1], + bias=[-5.0], + strides=[2, 2], + padding="SAME") + + # expected = [0, 0, 4, 6] + self._VerifyValues( + tensor_in_sizes=[1, 4, 4, 1], + filter_in_sizes=[1, 1, 1, 1], + bias=[-5.0], + strides=[2, 2], + padding="SAME") + + # expected = [4, 0, 1, 0] + self._VerifyValues( + tensor_in_sizes=[1, 4, 4, 1], + filter_in_sizes=[2, 2, 1, 1], + bias=[-40.0], + strides=[3, 3], + padding="SAME") + + def testConv2DKernelSizeMatchesInputSize(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2DKernelSizeMatchesInputSize test.") + return + # expected = [0, 5] + self._VerifyValues( + tensor_in_sizes=[1, 2, 2, 1], + filter_in_sizes=[2, 2, 1, 2], + bias=[-50.0, -55.0], + strides=[1, 1], + padding="VALID") + + # expected = [0, 2, 282, 322] + self._VerifyValues( + tensor_in_sizes=[1, 8, 8, 1], + filter_in_sizes=[2, 2, 1, 1], + bias=[-200.0], + strides=[4, 4], + padding="SAME") + + def testShapeFunctionEdgeCases(self): + # All shapes unknown. + c1 = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + array_ops.placeholder(dtypes.float32), + array_ops.placeholder(dtypes.float32), + array_ops.placeholder(dtypes.float32), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Relu") + self.assertEqual([None, None, None, None], c1.get_shape().as_list()) + + # Incorrect input shape. + with self.assertRaises(ValueError): + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + array_ops.placeholder(dtypes.float32, shape=[1, 3]), + array_ops.placeholder(dtypes.float32), + array_ops.placeholder(dtypes.float32), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Relu") + + # Incorrect filter shape. + with self.assertRaises(ValueError): + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + array_ops.placeholder(dtypes.float32), + array_ops.placeholder(dtypes.float32, shape=[1, 3]), + array_ops.placeholder(dtypes.float32), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Relu") + + # Depth mismatch. + with self.assertRaises(ValueError): + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), + array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 2]), + array_ops.placeholder(dtypes.float32), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Relu") + + def testOpEdgeCases(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping OpEdgeCases tests.") + return + with self.cached_session() as sess, self.test_scope(): + # Illegal strides. + with self.assertRaisesRegexp( + errors_impl.UnimplementedError, + ".*strides.*in the batch and depth dimensions"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1]), + strides=[2, 1, 1, 1], + padding="SAME", + activation_mode="Relu")) + with self.assertRaisesRegexp( + errors_impl.UnimplementedError, + ".*strides.*in the batch and depth dimensions"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1]), + strides=[1, 1, 1, 2], + padding="SAME", + activation_mode="Relu")) + + # Illegal activation mode. + with self.assertRaisesRegexp(ValueError, + "Op passed string 'Tanh' not in:"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1]), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Tanh")) + + # Filter larger than input. + with self.assertRaisesRegexp(ValueError, "Negative dimension size"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([32, 20, 20, 3]), + _IotaNdF32Constant([20, 21, 3, 2]), + _IotaNdF32Constant([2]), + strides=[1, 1, 1, 1], + padding="VALID", + activation_mode="Relu")) + with self.assertRaisesRegexp(ValueError, "Negative dimension size"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([32, 20, 20, 3]), + _IotaNdF32Constant([21, 20, 3, 2]), + _IotaNdF32Constant([2]), + strides=[1, 1, 1, 1], + padding="VALID", + activation_mode="Relu")) + + +# Add InceptionFwd tests to FusedConv2DBiasActivationTest. +for index, (input_size_, filter_size_, output_size_, stride_, + padding_) in enumerate(_GetShrunkInceptionShapes()): + setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_" + str(index), + _GetInceptionFwdTest(input_size_, filter_size_, stride_, padding_)) + +# TODO(b/35359731) +# Fwd, BckInput, and BackFilter to test that for certain input parameter +# set, winograd nonfused algorithm will be excluded from conv autotune. If +# in such case, winograd nonfused algorithm is added as one option of the +# conv autotune, and cuDNN version is smaller than 7, the following tests +# will fail. +ishape = [1, 400, 400, 1] +fshape = [1, 1, 1, 256] +oshape = [1, 400, 400, 256] +setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_No_Winograd_Nonfused", + _GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True)) + + +def _CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type): + """Calculates the size of an output dimension of a strided convolution. + + Given the sizes of the corresponding dimension of the input and filter shapes, + and the stride and padding_types, calculates the size of the output dimension. + This function can be called separately for each input dimension. + + Args: + input_dim: An `int` specifying the size of the input dimension. + filter_dim: An `int` specifying the size of the filter dimension. + stride: An `int` specifying the step size of the convolution along the + input dimension. + padding_type: either 'VALID' or 'SAME'. + + Returns: + The size of the output dimension. + """ + if padding_type == "VALID": + return (input_dim - filter_dim + stride) // stride + else: # padding_type == 'SAME' + return (input_dim + stride - 1) // stride + + +def _NchwVectCToNchw(in_tensor): + # [N, C / 4, H, W, 4] => [N, C / 4, 4, H, W] == [N, C, H, W] + t = array_ops.transpose(in_tensor, [0, 1, 4, 2, 3]) + n = in_tensor.shape.dims[0].value + c = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value + h = in_tensor.shape.dims[2].value + w = in_tensor.shape.dims[3].value + return array_ops.reshape(t, [n, c, h, w]) + + +def _OihwVectIToHwio(in_tensor): + # [O, I / 4, H, W, 4] => [O, I / 4, 4, H, W] == [O, I, H, W] + t = array_ops.transpose(in_tensor, [2, 3, 1, 4, 0]) + o = in_tensor.shape.dims[0].value + i = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value + h = in_tensor.shape.dims[2].value + w = in_tensor.shape.dims[3].value + return array_ops.reshape(t, [h, w, i, o]) + + +def _NchwToNchwVectC(in_tensor): + n, c, h, w = in_tensor.shape.as_list() + assert c % 4 == 0 + t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w]) + return array_ops.transpose(t, [0, 1, 3, 4, 2]) + + +def _HwioToOihw(in_tensor): + return array_ops.transpose(in_tensor, [3, 2, 0, 1]) + + +def _SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, + padding, strides, side_input_scale, + side_input, biases, apply_relu): + """Simulates the int8 fused 2-D convolution op using separate float ops. + + The arguments and return values have the same format, meanings and + restrictions as the actual op. + Args: + conv_input_scale: A scalar 'float'. + conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. + kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. + padding: A `string` from: `"SAME", "VALID"`. + strides: A list of `ints`. + side_input_scale: A scalar 'float'. + side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. + biases: A `Tensor` of type `float32` in NCHW layout. + apply_relu: A boolean to specify whether to apply "Relu" activation function + that clips outputs to the range [0, 127], or "None" activation that clips + to the range [-128, 127]. + Returns: + A `Tensor` of type `qint8` in NCHW_VECT_C layout. + """ + conv_result = nn_ops.conv2d( + _NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), + _OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), + strides=strides, + padding=padding, + data_format="NCHW") * conv_input_scale + + conv_and_side_inputs = conv_result + side_input_scale * _NchwVectCToNchw( + gen_array_ops.dequantize(side_input, -128, 127)) + + output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") + if apply_relu: + output = nn_ops.relu(output) + + result, _, _ = gen_array_ops.quantize_v2( + _NchwToNchwVectC(output), -128, 127, dtypes.qint8) + return result + + +# TODO(b/114580749): XLA:CPU/GPU don't support int8 at the moment, so this test +# doesn't currently use XLA. +class FusedConvInt8Tests(object): + _test_params = [ + { + "batch_size": 1, + "input_channels": 4, + "output_channels": 4, + "input_height": 8, + "input_width": 8, + "filter_height": 6, + "filter_width": 6, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.0, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 1, + "input_channels": 4, + "output_channels": 4, + "input_height": 6, + "input_width": 6, + "filter_height": 6, + "filter_width": 6, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.0, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 2, + "input_channels": 8, + "output_channels": 16, + "input_height": 8, + "input_width": 8, + "filter_height": 3, + "filter_width": 3, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.0, + "bias_scale": 1, + "padding_type": "VALID" + }, + { + "batch_size": 2, + "input_channels": 8, + "output_channels": 16, + "input_height": 8, + "input_width": 8, + "filter_height": 3, + "filter_width": 3, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.0, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 2, + "input_channels": 8, + "output_channels": 16, + "input_height": 8, + "input_width": 8, + "filter_height": 3, + "filter_width": 3, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "VALID" + }, + { + "batch_size": 2, + "input_channels": 16, + "output_channels": 16, + "input_height": 9, + "input_width": 9, + "filter_height": 3, + "filter_width": 3, + "vertical_stride": 1, + "horizontal_stride": 1, + "conv_input_scale": 0.001, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 3, + "input_channels": 8, + "output_channels": 8, + "input_height": 9, + "input_width": 9, + "filter_height": 5, + "filter_width": 5, + "vertical_stride": 1, + "horizontal_stride": 1, + "conv_input_scale": 0.001, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 3, + "input_channels": 8, + "output_channels": 8, + "input_height": 9, + "input_width": 9, + "filter_height": 7, + "filter_width": 1, + "vertical_stride": 2, + "horizontal_stride": 1, + "conv_input_scale": 0.002, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 3, + "input_channels": 8, + "output_channels": 8, + "input_height": 9, + "input_width": 9, + "filter_height": 1, + "filter_width": 7, + "vertical_stride": 1, + "horizontal_stride": 1, + "conv_input_scale": 0.002, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "SAME" + }, + ] + + @contextlib.contextmanager + def test_scope(self): # pylint: disable=invalid-name + """Can be overridden in base classes to provide a test scope.""" + yield + + def runTest(self, test_param, apply_relu): + batch_size = test_param["batch_size"] + input_channels = test_param["input_channels"] + output_channels = test_param["output_channels"] + input_height = test_param["input_height"] + input_width = test_param["input_width"] + filter_height = test_param["filter_height"] + filter_width = test_param["filter_width"] + vertical_stride = test_param["vertical_stride"] + horizontal_stride = test_param["horizontal_stride"] + conv_input_scale = test_param["conv_input_scale"] + side_input_scale = test_param["side_input_scale"] + bias_scale = test_param["bias_scale"] + padding_type = test_param["padding_type"] + + with self.cached_session(use_gpu=True) as sess, self.test_scope(): + conv_input, _, _ = gen_array_ops.quantize_v2( + random_ops.random_uniform( + [batch_size, input_channels // 4, input_height, input_width, 4], + minval=-0.0, + maxval=1.0, + dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) + + kernel, _, _ = gen_array_ops.quantize_v2( + random_ops.random_uniform([ + output_channels, input_channels // 4, filter_height, filter_width, + 4 + ], + minval=-1.0, + maxval=1.0, + dtype=dtypes.float32), -1.0, 1.0, + dtypes.qint8) + + output_height = _CalculateConvolvedOutputDim( + input_height, filter_height, vertical_stride, padding_type) + output_width = _CalculateConvolvedOutputDim( + input_width, filter_width, horizontal_stride, padding_type) + tf_logging.info("output_height=%s, output_width=%s", output_height, + output_width) + + side_input, _, _ = gen_array_ops.quantize_v2( + random_ops.random_uniform([ + batch_size, output_channels // 4, output_height, output_width, 4 + ], + minval=0.0, + maxval=1.0, + dtype=dtypes.float32), -1.0, 1.0, + dtypes.qint8) + + biases = random_ops.random_uniform([output_channels], + minval=-10 * bias_scale, + maxval=20 * bias_scale, + dtype=dtypes.float32) + + strides = [1, 1, vertical_stride, horizontal_stride] + + actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + conv_input, + kernel, + biases, + strides=strides, + padding=padding_type, + conv_input_scale=conv_input_scale, + side_input_scale=side_input_scale, + side_input=side_input, + activation_mode="Relu" if apply_relu else "None", + data_format="NCHW_VECT_C", + filter_format="OIHW_VECT_I") + + expected = _SimulateFusedConv2dBiasActivationInt8( + conv_input_scale, conv_input, kernel, padding_type, strides, + side_input_scale, side_input, biases, apply_relu) + + actual_y, expected_y = sess.run([actual, expected]) + self.assertAllClose(actual_y, expected_y, rtol=0, atol=1) + + def testFusedConvInt8(self): + if not test.is_gpu_available( + cuda_only=True, min_cuda_compute_capability=(6, 1)): + tf_logging.info("int8 test skipped because not run with --config=cuda or " + "no GPUs with compute capability >= 6.1 are available.") + return + for apply_relu in [True, False]: + for test_param in self._test_params: + self.runTest(test_param, apply_relu) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc index bb06f1c41c1d60f3c3b3639e3b32ea85161510b2..3549cedb70a6104ff3d3829d1b94cb5f08c5119c 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.cc +++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc @@ -22,7 +22,6 @@ limitations under the License. #include #include #include -#include #include #include @@ -30,19 +29,17 @@ limitations under the License. #include #include "tensorflow/contrib/gdr/gdr.pb.h" -#include "tensorflow/core/common_runtime/bfc_allocator.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/dma_helper.h" -#include "tensorflow/core/common_runtime/pool_allocator.h" #include "tensorflow/core/common_runtime/process_state.h" #if GOOGLE_CUDA #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #endif // GOOGLE_CUDA -#include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/numa.h" namespace tensorflow { @@ -70,14 +67,11 @@ bool IsGDRAvailable() { int TryToReadNumaNode(ibv_device* device) { #if defined(__APPLE__) LOG(INFO) << "OS X does not support NUMA - returning NUMA node 0"; - return 0; + return port::kNUMANoAffinity; #elif defined(PLATFORM_WINDOWS) // Windows support for NUMA is not currently implemented. Return node 0. - return 0; + return port::kNUMANoAffinity; #else - VLOG(2) << "Trying to read NUMA node for device: " << device->name; - static const int kUnknownNumaNode = -1; - auto filename = string(device->ibdev_path) + "/device/numa_node"; std::ifstream ifs(filename.c_str()); @@ -91,12 +85,12 @@ int TryToReadNumaNode(ibv_device* device) { << value << "), but there must be at least one NUMA node" ", so returning NUMA node zero"; - return 0; + return port::kNUMANoAffinity; } LOG(INFO) << "NUMA node for device: " << device->name << " is " << value; return value; } - return kUnknownNumaNode; + return port::kNUMANoAffinity; #endif } @@ -138,8 +132,6 @@ class GdrMemoryManager : public RemoteMemoryManager { Device* device, DeviceContext* device_context, bool on_host, StatusCallback done) override; - static void RegMemVisitors(); - protected: Status CreateEndpoint(const string& host, const string& port, RdmaEndpointPtr& endpoint); @@ -150,7 +142,8 @@ class GdrMemoryManager : public RemoteMemoryManager { ibv_mr* FindMemoryRegion(void* addr, size_t length); - void InsertMemoryRegion(void* addr, size_t length); + void InsertMemoryRegion(void* addr, size_t length, + const std::string& allocator_name); void EvictMemoryRegion(void* addr, size_t length); @@ -160,6 +153,7 @@ class GdrMemoryManager : public RemoteMemoryManager { RdmaEndpointPtr listening_; std::atomic stopped_; int epfd_; + int numa_node_; // Server side endpoints // Accessed sequentially in Run() so not protected by lock @@ -190,46 +184,10 @@ GdrMemoryManager::GdrMemoryManager(const string& host, const string& port) port_(port), listening_(nullptr, EndpointDeleter), stopped_(true), - next_key_(0) { - static std::once_flag flag; - std::call_once(flag, []() { RegMemVisitors(); }); -} + next_key_(0) {} GdrMemoryManager::~GdrMemoryManager() { close(epfd_); } -/*static*/ void GdrMemoryManager::RegMemVisitors() { - SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - GdrMemoryManager::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("CPU:", numa_node)); - }; - SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes); - }; - ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); - ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); - -#if GOOGLE_CUDA - if (IsGDRAvailable()) { - int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1; - - // Note we don't free allocated GPU memory so there is no free visitor - SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id, - size_t num_bytes) { - RdmaMemoryMgr::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); - }; - GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, - cuda_alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, - alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); - LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; - } -#endif // GOOGLE_CUDA -} - Status GdrMemoryManager::Init() { epfd_ = epoll_create1(0); if (epfd_ == -1) { @@ -289,6 +247,42 @@ Status GdrMemoryManager::Init() { "cannot add server to epoll"); } + numa_node_ = TryToReadNumaNode(listening_->verbs->device); + + SubAllocator::Visitor alloc_visitor = [this](void* ptr, int numa_node, + size_t num_bytes) { + VLOG(2) << "Registering RDMA capable memory region on numa_node " + << numa_node; + InsertMemoryRegion(ptr, num_bytes, strings::StrCat("CPU:", numa_node)); + }; + SubAllocator::Visitor free_visitor = [this](void* ptr, int numa_node, + size_t num_bytes) { + VLOG(2) << "De-registering RDMA capable memory region on numa_node " + << numa_node; + EvictMemoryRegion(ptr, num_bytes); + }; + ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); + ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); + LOG(INFO) << "Instrumenting CPU allocator(s)"; + +#if GOOGLE_CUDA + if (IsGDRAvailable()) { + int bus_id = numa_node_ + 1; + + SubAllocator::Visitor cuda_alloc_visitor = [this](void* ptr, int gpu_id, + size_t num_bytes) { + VLOG(2) << "Registering RDMA capable memory region on GPU " << gpu_id; + InsertMemoryRegion(ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); + }; + GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, + cuda_alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, + alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); + LOG(INFO) << "Instrumenting GPU allocator(s) with bus_id " << bus_id; + } +#endif // GOOGLE_CUDA + return Status::OK(); } @@ -405,7 +399,7 @@ void GdrMemoryManager::TransportOptionsFromTensor( ibv_mr* mr = FindMemoryRegion(addr, length); #if GOOGLE_CUDA - if (!on_host) { + if (device->tensorflow_gpu_device_info() && !on_host) { Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0); Tensor* host_copy = new Tensor(alloc, tensor.dtype(), tensor.shape()); GPUUtil::CopyGPUTensorToCPU( @@ -456,11 +450,27 @@ void GdrMemoryManager::TransportOptionsFromTensor( #endif if (mr == nullptr) { - done(errors::Unavailable("Cannot find pinned memory region")); - return; + Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_); + Tensor host_copy(alloc, tensor.dtype(), tensor.shape()); + + std::memcpy(DMAHelper::buffer(&host_copy)->data(), buffer->data(), length); + VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer"; + + buffer = DMAHelper::buffer(&host_copy); + addr = buffer->data(); + length = buffer->size(); + + mr = FindMemoryRegion(addr, length); + if (mr == nullptr) { + done(errors::Unavailable("Cannot find pinned memory region")); + return; + } + + buffer->Ref(); + } else { + buffer->Ref(); } - buffer->Ref(); TensorKey tensor_key = next_key_++; { mutex_lock l(server_mu_); @@ -470,7 +480,7 @@ void GdrMemoryManager::TransportOptionsFromTensor( uint64_t checksum = 0; if (VLOG_IS_ON(2)) { #ifdef GOOGLE_CUDA - if (!on_host) { + if (device->tensorflow_gpu_device_info() && !on_host) { checksum = GPUUtil::Checksum(device, device_context, tensor); } else { checksum = GPUUtil::Checksum(tensor); @@ -508,7 +518,8 @@ void GdrMemoryManager::TensorFromTransportOptions( Tensor host_copy; #if GOOGLE_CUDA if (mr == nullptr && !on_host) { - Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0); + Allocator* alloc = + GPUProcessState::singleton()->GetCUDAHostAllocator(numa_node_); host_copy = Tensor(alloc, tensor->dtype(), tensor->shape()); buffer = DMAHelper::buffer(&host_copy); addr = buffer->data(); @@ -518,8 +529,18 @@ void GdrMemoryManager::TensorFromTransportOptions( #endif // GOOGLE_CUDA if (mr == nullptr) { - done(errors::Unavailable("Cannot find pinned memory region")); - return; + Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_); + host_copy = Tensor(alloc, tensor->dtype(), tensor->shape()); + + buffer = DMAHelper::buffer(&host_copy); + addr = buffer->data(); + length = buffer->size(); + + mr = FindMemoryRegion(addr, length); + if (mr == nullptr) { + done(errors::Unavailable("Cannot find pinned memory region")); + return; + } } decltype(clients_)::iterator iter; @@ -568,7 +589,8 @@ void GdrMemoryManager::TensorFromTransportOptions( } #if GOOGLE_CUDA - if (host_copy.NumElements() > 0) { + if (device->tensorflow_gpu_device_info() && !on_host && + host_copy.NumElements() > 0) { uint64_t checksum = 0; if (VLOG_IS_ON(2)) { checksum = GPUUtil::Checksum(host_copy); @@ -598,6 +620,12 @@ void GdrMemoryManager::TensorFromTransportOptions( } #endif // GOOGLE_CUDA + if ((on_host || !device->tensorflow_gpu_device_info()) && + host_copy.NumElements() > 0) { + std::memcpy(DMAHelper::buffer(tensor)->data(), addr, length); + VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer"; + } + uint64_t end = Env::Default()->NowMicros(); VLOG(2) << "RDMA from remote memory region " << remote_mr.rkey() @@ -607,7 +635,7 @@ void GdrMemoryManager::TensorFromTransportOptions( uint64_t checksum = 0; if (VLOG_IS_ON(2)) { #ifdef GOOGLE_CUDA - if (device->tensorflow_gpu_device_info() && (!on_host)) { + if (device->tensorflow_gpu_device_info() && !on_host) { checksum = GPUUtil::Checksum(device, device_context, *tensor); } else { checksum = GPUUtil::Checksum(*tensor); @@ -668,7 +696,8 @@ ibv_mr* GdrMemoryManager::FindMemoryRegion(void* addr, size_t length) { } } -void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) { +void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length, + const std::string& allocator_name) { if (length == 0) return; ibv_mr* mr = rdma_reg_read(listening_.get(), addr, length); if (mr != nullptr) { @@ -676,7 +705,8 @@ void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) { auto iter = std::upper_bound(mrs_.begin(), mrs_.end(), addr, &Comparator); mrs_.insert(iter, {mr, &MRDeleter}); } else { - LOG(WARNING) << "Cannot register memory region"; + LOG(WARNING) << "Cannot register memory region allocated by " + << allocator_name; } } diff --git a/tensorflow/contrib/graph_editor/tests/transform_test.py b/tensorflow/contrib/graph_editor/tests/transform_test.py index 97f38c923f4a19cedf3e16203ca1e66b7e5e45d2..0ebcdc26889bf8a574b1967cd8a6b1c466817127 100644 --- a/tensorflow/contrib/graph_editor/tests/transform_test.py +++ b/tensorflow/contrib/graph_editor/tests/transform_test.py @@ -214,7 +214,7 @@ class TransformTest(test.TestCase): def test_graph_replace_gradients(self): ops.reset_default_graph() - w = variables.Variable(0.0, name="w") + w = variables.VariableV1(0.0, name="w") y = math_ops.multiply(math_ops.multiply(w, w, name="mul1"), w, name="mul2") g = gradients_impl.gradients(y, w, name="grad")[0] diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..9393b702d11a2ef84586f712d30c26fe2a8972bb --- /dev/null +++ b/tensorflow/contrib/ignite/BUILD @@ -0,0 +1,139 @@ +package(default_visibility = ["//tensorflow:internal"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load( + "//tensorflow:tensorflow.bzl", + "if_not_windows", + "if_windows", + "tf_custom_op_library", + "tf_custom_op_py_library", + "tf_gen_op_libs", + "tf_gen_op_wrapper_py", + "tf_kernel_library", + "tf_py_test", +) + +py_library( + name = "ignite", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + deps = [ + ":dataset_ops", + ], +) + +tf_custom_op_library( + name = "_dataset_ops.so", + srcs = ["ops/dataset_ops.cc"], + deps = [":dataset_kernels"], +) + +tf_gen_op_libs( + op_lib_names = ["dataset_ops"], +) + +cc_library( + name = "dataset_kernels", + srcs = [ + "kernels/ignite_dataset_ops.cc", + "kernels/ignite_client.h", + "kernels/ignite_byte_swapper.h", + "kernels/ignite_plain_client.h", + "kernels/ignite_ssl_wrapper.h", + "kernels/ignite_ssl_wrapper.cc", + "kernels/ignite_binary_object_parser.h", + "kernels/ignite_binary_object_parser.cc", + "kernels/ignite_dataset.h", + "kernels/ignite_dataset.cc", + "kernels/ignite_dataset_iterator.h", + "kernels/ignite_dataset_iterator.cc", + ] + if_not_windows([ + "kernels/ignite_plain_client_unix.cc", + ]) + if_windows([ + "kernels/ignite_plain_client_windows.cc", + ]), + copts = if_windows([ + "-DWIN32_LEAN_AND_MEAN", + ]), + deps = [ + "//tensorflow/core:framework_headers_lib", + "//third_party/eigen3", + "@boringssl//:ssl", + "@protobuf_archive//:protobuf_headers", + ], + alwayslink = 1, +) + +py_library( + name = "dataset_ops", + srcs = [ + "python/ops/ignite_dataset_ops.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":ignite_op_loader", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + ], +) + +tf_gen_op_wrapper_py( + name = "gen_dataset_ops", + out = "python/ops/gen_dataset_ops.py", + deps = ["//tensorflow/contrib/ignite:dataset_ops_op_lib"], +) + +tf_kernel_library( + name = "dataset_ops_kernels", + deps = [ + ":dataset_kernels", + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +tf_custom_op_py_library( + name = "ignite_op_loader", + srcs = ["python/ops/ignite_op_loader.py"], + dso = ["//tensorflow/contrib/ignite:_dataset_ops.so"], + kernels = [ + ":dataset_ops_kernels", + "//tensorflow/contrib/ignite:dataset_ops_op_lib", + ], + srcs_version = "PY2AND3", + deps = [ + ":gen_dataset_ops", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + ], +) + +# The Apache Ignite servers have to setup before the test and tear down +# after the test manually. The docker engine has to be installed. +# +# To setup Apache Ignite servers: +# $ bash ./python/tests/start_ignite.sh +# +# To tear down Apache Ignite servers: +# $ bash ./python/tests/stop_ignite.sh +tf_py_test( + name = "ignite_dataset_test", + srcs = ["python/tests/ignite_dataset_test.py"], + additional_deps = [ + ":ignite", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], + tags = [ + "manual", + "no_windows", + "notap", + ], +) diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md new file mode 100644 index 0000000000000000000000000000000000000000..55c89d27996318dabb29bb15372411005301ebd9 --- /dev/null +++ b/tensorflow/contrib/ignite/README.md @@ -0,0 +1,167 @@ +# Ignite Dataset + +- [Overview](#overview) +- [Features](#features) + * [Distributed In-Memory Datasource](#distributed-in-memory-datasource) + * [Structured Objects](#structured-objects) + * [Distributed Training](#distributed-training) + * [SSL Connection](#ssl-connection) + * [Windows Support](#windows-support) +- [Try it out](#try-it-out) +- [Limitations](#limitations) + +## Overview + +[Apache Ignite](https://ignite.apache.org/) is a memory-centric distributed database, caching, and processing platform for +transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a data source for neural network training, inference and all other computations supported by TensorFlow. + +## Features + +Ignite Dataset provides features that that you can use in a wide range of cases. The most important and interesting features are described below. + +### Distributed In-Memory Datasource +[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that provides fast data access. It allows you to avoid limitations of hard drive and store and operate with as much data as you need in distributed cluster. You can utilize +these benefits of Apache Ignite by using Ignite Dataset. Moreover, Ignite Dataset can be used for the following use-cases: +- If you have a **gigabyte** of data you can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations. At the same time, you can store your data in Apache Ignite on the same machine and use it as a datasource for TensorFlow and thus avoid these limitations. +- If you have a **terabyte** of data you probably still can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations again. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow and thus avoid these limitations. +- If you have a **petabyte** of data you can't keep it on a single machine. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow. + +Note that Apache Ignite is not just a step of ETL pipeline between a database or a data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. By choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, at the same time, an ability to use this data for neural network training and inference. + +```bash +$ apache-ignite-fabric/bin/ignite.sh +$ apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://localhost:10800/" + +jdbc:ignite:thin://localhost/> CREATE TABLE KITTEN_CACHE (ID LONG PRIMARY KEY, NAME VARCHAR); +jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (1, 'WARM KITTY'); +jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (2, 'SOFT KITTY'); +jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL OF FUR'); +``` + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="SQL_PUBLIC_KITTEN_CACHE") +>>> iterator = dataset.make_one_shot_iterator() +>>> next_obj = iterator.get_next() +>>> +>>> with tf.Session() as sess: +>>> for _ in range(3): +>>> print(sess.run(next_obj)) + +{'key': 1, 'val': {'NAME': b'WARM KITTY'}} +{'key': 2, 'val': {'NAME': b'SOFT KITTY'}} +{'key': 3, 'val': {'NAME': b'LITTLE BALL OF FUR'}} +``` + +### Structured Objects +[Apache Ignite](https://ignite.apache.org/) allows to store any type of objects. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="IMAGES") +>>> iterator = dataset.make_one_shot_iterator() +>>> next_obj = iterator.get_next() +>>> +>>> with tf.Session() as sess: +>>> print(sess.run(next_obj)) + +{ + 'key': 'kitten.png', + 'val': { + 'metadata': { + 'file_name': b'kitten.png', + 'label': b'little ball of fur', + width: 800, + height: 600 + }, + 'pixels': [0, 0, 0, 0, ..., 0] + } +} +``` + Neural network training and other computations require transformations that can be done as part of [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="IMAGES").map(lambda obj: obj['val']['pixels']) +>>> iterator = dataset.make_one_shot_iterator() +>>> next_obj = iterator.get_next() +>>> +>>> with tf.Session() as sess: +>>> print(sess.run(next_obj)) + +[0, 0, 0, 0, ..., 0] +``` + +### Distributed Training + +TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is the ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. + + + +Using this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottlenecks. + +Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL), we can specify the number of partitions the data will be partitioned on. For example, if an Apache Ignite cluster consists of 10 machines and we create cache with 10 partitions, then every machine will maintain approximately one data partition. + +Ignite Dataset allows using these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that can be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach, we can assign a specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset("IMAGES") +>>> +>>> # Compute gradients locally on every worker node. +>>> gradients = [] +>>> for i in range(5): +>>> with tf.device("/job:WORKER/task:%d" % i): +>>> device_iterator = dataset.make_one_shot_iterator() +>>> device_next_obj = device_iterator.get_next() +>>> gradient = compute_gradient(device_next_obj) +>>> gradients.append(gradient) +>>> +>>> # Aggregate them on master node. +>>> result_gradient = tf.reduce_sum(gradients) +>>> +>>> with tf.Session("grpc://localhost:10000") as sess: +>>> print(sess.run(result_gradient)) +``` + +High-level TensorFlow API for [distributed training](https://www.tensorflow.org/api_docs/python/tf/contrib/distribute/DistributionStrategy) is supported as well. + +### SSL Connection + +Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information, please refer to the [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="IMAGES", certfile="client.pem", cert_password="password", username="ignite", password="ignite") +>>> ... +``` + +### Windows Support + +Ignite Dataset is fully compatible with Windows. You can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems. + +## Try it out + +The simplest way to try Ignite Dataset is to run a [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and after start interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine: + +``` +docker run -it -p 10800:10800 dmitrievanthony/ignite-with-mnist +``` + +After that you will be able to work with it following way: + +![ignite-dataset-mnist](https://s3.amazonaws.com/helloworld23423423ew23/ignite-dataset-mnist.png "Ignite Dataset Mnist") + +## Limitations + +Presently, Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of an object structure. diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f42947696f76e168f77b2316758209f1f71a7915 --- /dev/null +++ b/tensorflow/contrib/ignite/__init__.py @@ -0,0 +1,42 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""IgniteDataset that allows to get data from Apache Ignite. + +Apache Ignite is a memory-centric distributed database, caching, and +processing platform for transactional, analytical, and streaming workloads, +delivering in-memory speeds at petabyte scale. This contrib package +contains an integration between Apache Ignite and TensorFlow. The +integration is based on tf.data from TensorFlow side and Binary Client +Protocol from Apache Ignite side. It allows to use Apache Ignite as a +datasource for neural network training, inference and all other +computations supported by TensorFlow. Ignite Dataset is based on Apache +Ignite Binary Client Protocol: +https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. + +@@IgniteDataset +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops import IgniteDataset +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + "IgniteDataset", +] + +remove_undocumented(__name__) diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc new file mode 100644 index 0000000000000000000000000000000000000000..2c8a7d44b07b43f788bcbc0850b5162cc14dd951 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc @@ -0,0 +1,334 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +BinaryObjectParser::BinaryObjectParser() : byte_swapper_(ByteSwapper(false)) {} + +Status BinaryObjectParser::Parse(uint8_t** ptr, + std::vector* out_tensors, + std::vector* types) const { + uint8_t object_type_id = ParseByte(ptr); + + // Skip non-leaf nodes. + if (object_type_id != WRAPPED_OBJ && object_type_id != COMPLEX_OBJ) + types->push_back(object_type_id); + + switch (object_type_id) { + case BYTE: { + out_tensors->emplace_back(cpu_allocator(), DT_UINT8, TensorShape({})); + out_tensors->back().scalar()() = ParseByte(ptr); + break; + } + case SHORT: { + out_tensors->emplace_back(cpu_allocator(), DT_INT16, TensorShape({})); + out_tensors->back().scalar()() = ParseShort(ptr); + break; + } + case USHORT: { + out_tensors->emplace_back(cpu_allocator(), DT_UINT16, TensorShape({})); + out_tensors->back().scalar()() = ParseUnsignedShort(ptr); + break; + } + case INT: { + out_tensors->emplace_back(cpu_allocator(), DT_INT32, TensorShape({})); + out_tensors->back().scalar()() = ParseInt(ptr); + break; + } + case LONG: { + out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({})); + out_tensors->back().scalar()() = ParseLong(ptr); + break; + } + case FLOAT: { + out_tensors->emplace_back(cpu_allocator(), DT_FLOAT, TensorShape({})); + out_tensors->back().scalar()() = ParseFloat(ptr); + break; + } + case DOUBLE: { + out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE, TensorShape({})); + out_tensors->back().scalar()() = ParseDouble(ptr); + break; + } + case BOOL: { + out_tensors->emplace_back(cpu_allocator(), DT_BOOL, TensorShape({})); + out_tensors->back().scalar()() = ParseBool(ptr); + break; + } + case STRING: { + out_tensors->emplace_back(cpu_allocator(), DT_STRING, TensorShape({})); + out_tensors->back().scalar()() = ParseString(ptr); + break; + } + case DATE: { + out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({})); + out_tensors->back().scalar()() = ParseLong(ptr); + break; + } + case BYTE_ARR: { + int32_t length = ParseInt(ptr); + uint8_t* arr = ParseByteArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_UINT8, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case SHORT_ARR: { + int32_t length = ParseInt(ptr); + int16_t* arr = ParseShortArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT16, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case USHORT_ARR: { + int32_t length = ParseInt(ptr); + uint16_t* arr = ParseUnsignedShortArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_UINT16, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case INT_ARR: { + int32_t length = ParseInt(ptr); + int32_t* arr = ParseIntArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT32, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case LONG_ARR: { + int32_t length = ParseInt(ptr); + int64_t* arr = ParseLongArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT64, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case FLOAT_ARR: { + int32_t length = ParseInt(ptr); + float* arr = ParseFloatArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_FLOAT, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case DOUBLE_ARR: { + int32_t length = ParseInt(ptr); + double* arr = ParseDoubleArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case BOOL_ARR: { + int32_t length = ParseInt(ptr); + bool* arr = ParseBoolArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_BOOL, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case STRING_ARR: { + int32_t length = ParseInt(ptr); + out_tensors->emplace_back(cpu_allocator(), DT_STRING, + TensorShape({length})); + for (int32_t i = 0; i < length; i++) + out_tensors->back().vec()(i) = ParseString(ptr); + break; + } + case DATE_ARR: { + int32_t length = ParseInt(ptr); + int64_t* arr = ParseLongArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT64, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case WRAPPED_OBJ: { + int32_t byte_arr_size = ParseInt(ptr); + TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types)); + int32_t offset = ParseInt(ptr); + + break; + } + case COMPLEX_OBJ: { + uint8_t version = ParseByte(ptr); + int16_t flags = ParseShort(ptr); + int32_t type_id = ParseInt(ptr); + int32_t hash_code = ParseInt(ptr); + int32_t length = ParseInt(ptr); + int32_t schema_id = ParseInt(ptr); + int32_t schema_offset = ParseInt(ptr); + + // 24 is size of header just read. + uint8_t* end = *ptr + schema_offset - 24; + int32_t i = 0; + while (*ptr < end) { + i++; + TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types)); + } + + *ptr += (length - schema_offset); + + break; + } + default: { + return errors::Unknown("Unknowd binary type (type id ", + (int)object_type_id, ")"); + } + } + + return Status::OK(); +} + +uint8_t BinaryObjectParser::ParseByte(uint8_t** ptr) const { + uint8_t res = **ptr; + *ptr += 1; + + return res; +} + +int16_t BinaryObjectParser::ParseShort(uint8_t** ptr) const { + int16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt16(res); + *ptr += 2; + + return *res; +} + +uint16_t BinaryObjectParser::ParseUnsignedShort(uint8_t** ptr) const { + uint16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredUnsignedInt16(res); + *ptr += 2; + + return *res; +} + +int32_t BinaryObjectParser::ParseInt(uint8_t** ptr) const { + int32_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt32(res); + *ptr += 4; + + return *res; +} + +int64_t BinaryObjectParser::ParseLong(uint8_t** ptr) const { + int64_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt64(res); + *ptr += 8; + + return *res; +} + +float BinaryObjectParser::ParseFloat(uint8_t** ptr) const { + float* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredFloat(res); + *ptr += 4; + + return *res; +} + +double BinaryObjectParser::ParseDouble(uint8_t** ptr) const { + double* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredDouble(res); + *ptr += 8; + + return *res; +} + +bool BinaryObjectParser::ParseBool(uint8_t** ptr) const { + bool res = **reinterpret_cast(ptr); + *ptr += 1; + + return res; +} + +string BinaryObjectParser::ParseString(uint8_t** ptr) const { + int32_t length = ParseInt(ptr); + string res(*reinterpret_cast(ptr), length); + *ptr += length; + + return res; +} + +uint8_t* BinaryObjectParser::ParseByteArr(uint8_t** ptr, int length) const { + uint8_t* res = *reinterpret_cast(ptr); + *ptr += length; + + return res; +} + +int16_t* BinaryObjectParser::ParseShortArr(uint8_t** ptr, int length) const { + int16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt16Arr(res, length); + *ptr += length * 2; + + return res; +} + +uint16_t* BinaryObjectParser::ParseUnsignedShortArr(uint8_t** ptr, + int length) const { + uint16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredUnsignedInt16Arr(res, length); + *ptr += length * 2; + + return res; +} + +int32_t* BinaryObjectParser::ParseIntArr(uint8_t** ptr, int length) const { + int32_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt32Arr(res, length); + *ptr += length * 4; + + return res; +} + +int64_t* BinaryObjectParser::ParseLongArr(uint8_t** ptr, int length) const { + int64_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt64Arr(res, length); + *ptr += length * 8; + + return res; +} + +float* BinaryObjectParser::ParseFloatArr(uint8_t** ptr, int length) const { + float* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredFloatArr(res, length); + *ptr += length * 4; + + return res; +} + +double* BinaryObjectParser::ParseDoubleArr(uint8_t** ptr, int length) const { + double* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredDoubleArr(res, length); + *ptr += length * 8; + + return res; +} + +bool* BinaryObjectParser::ParseBoolArr(uint8_t** ptr, int length) const { + bool* res = *reinterpret_cast(ptr); + *ptr += length; + + return res; +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h new file mode 100644 index 0000000000000000000000000000000000000000..eb1f856643a790de6acaa82d4b8ad894fd364376 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h @@ -0,0 +1,81 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_ + +#include +#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +class BinaryObjectParser { + public: + BinaryObjectParser(); + Status Parse(uint8_t** ptr, std::vector* out_tensors, + std::vector* types) const; + + private: + uint8_t ParseByte(uint8_t** ptr) const; + int16_t ParseShort(uint8_t** ptr) const; + uint16_t ParseUnsignedShort(uint8_t** ptr) const; + int32_t ParseInt(uint8_t** ptr) const; + int64_t ParseLong(uint8_t** ptr) const; + float ParseFloat(uint8_t** ptr) const; + double ParseDouble(uint8_t** ptr) const; + bool ParseBool(uint8_t** ptr) const; + string ParseString(uint8_t** ptr) const; + uint8_t* ParseByteArr(uint8_t** ptr, int length) const; + int16_t* ParseShortArr(uint8_t** ptr, int length) const; + uint16_t* ParseUnsignedShortArr(uint8_t** ptr, int length) const; + int32_t* ParseIntArr(uint8_t** ptr, int length) const; + int64_t* ParseLongArr(uint8_t** ptr, int length) const; + float* ParseFloatArr(uint8_t** ptr, int length) const; + double* ParseDoubleArr(uint8_t** ptr, int length) const; + bool* ParseBoolArr(uint8_t** ptr, int length) const; + + const ByteSwapper byte_swapper_; +}; + +enum ObjectType { + BYTE = 1, + SHORT = 2, + INT = 3, + LONG = 4, + FLOAT = 5, + DOUBLE = 6, + USHORT = 7, + BOOL = 8, + STRING = 9, + DATE = 11, + BYTE_ARR = 12, + SHORT_ARR = 13, + INT_ARR = 14, + LONG_ARR = 15, + FLOAT_ARR = 16, + DOUBLE_ARR = 17, + USHORT_ARR = 18, + BOOL_ARR = 19, + STRING_ARR = 20, + DATE_ARR = 22, + WRAPPED_OBJ = 27, + COMPLEX_OBJ = 103 +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h new file mode 100644 index 0000000000000000000000000000000000000000..46df3e39dc4ec6dd4ef5730a184264eaa9fc5872 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h @@ -0,0 +1,126 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ + +#include +#include "tensorflow/core/platform/byte_order.h" + +namespace tensorflow { + +class ByteSwapper { + public: + ByteSwapper(bool big_endian) { swap_ = big_endian == port::kLittleEndian; } + + inline void SwapIfRequiredInt16(int16_t *x) const { + if (swap_) { + Swap16(x); + } + } + + inline void SwapIfRequiredUnsignedInt16(uint16_t *x) const { + if (swap_) { + Swap16(reinterpret_cast(x)); + } + } + + inline void SwapIfRequiredInt32(int32_t *x) const { + if (swap_) { + Swap32(x); + } + } + + inline void SwapIfRequiredFloat(float *x) const { + if (swap_) { + Swap32(reinterpret_cast(x)); + } + } + + inline void SwapIfRequiredInt64(int64_t *x) const { + if (swap_) { + Swap64(x); + } + } + + inline void SwapIfRequiredDouble(double *x) const { + if (swap_) { + Swap64(reinterpret_cast(x)); + } + } + + inline void SwapIfRequiredInt16Arr(int16_t *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) Swap16(&x[i]); + } + } + + inline void SwapIfRequiredUnsignedInt16Arr(uint16_t *x, + int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) + Swap16(reinterpret_cast(&x[i])); + } + } + + inline void SwapIfRequiredInt32Arr(int32_t *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) Swap32(&x[i]); + } + } + + inline void SwapIfRequiredFloatArr(float *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) + Swap32(reinterpret_cast(&x[i])); + } + } + + inline void SwapIfRequiredInt64Arr(int64_t *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) Swap64(&x[i]); + } + } + + inline void SwapIfRequiredDoubleArr(double *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) + Swap64(reinterpret_cast(&x[i])); + } + } + + private: + inline void Swap16(int16_t *x) const { + *x = ((*x & 0xFF) << 8) | ((*x >> 8) & 0xFF); + } + + inline void Swap32(int32_t *x) const { + *x = ((*x & 0xFF) << 24) | (((*x >> 8) & 0xFF) << 16) | + (((*x >> 16) & 0xFF) << 8) | ((*x >> 24) & 0xFF); + } + + inline void Swap64(int64_t *x) const { + *x = ((*x & 0xFF) << 56) | (((*x >> 8) & 0xFF) << 48) | + (((*x >> 16) & 0xFF) << 40) | (((*x >> 24) & 0xFF) << 32) | + (((*x >> 32) & 0xFF) << 24) | (((*x >> 40) & 0xFF) << 16) | + (((*x >> 48) & 0xFF) << 8) | ((*x >> 56) & 0xFF); + } + + bool swap_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h new file mode 100644 index 0000000000000000000000000000000000000000..459b50b48fd95ad105bccaca4076160e0ef152ee --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_client.h @@ -0,0 +1,84 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ + +#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +class Client { + public: + Client(bool big_endian) : byte_swapper_(ByteSwapper(big_endian)) {} + virtual Status Connect() = 0; + virtual Status Disconnect() = 0; + virtual bool IsConnected() = 0; + virtual int GetSocketDescriptor() = 0; + virtual Status ReadData(uint8_t *buf, const int32_t length) = 0; + virtual Status WriteData(const uint8_t *buf, const int32_t length) = 0; + + inline Status ReadByte(uint8_t *data) { return ReadData(data, 1); } + + inline Status ReadShort(int16_t *data) { + TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 2)); + byte_swapper_.SwapIfRequiredInt16(data); + + return Status::OK(); + } + + inline Status ReadInt(int32_t *data) { + TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 4)); + byte_swapper_.SwapIfRequiredInt32(data); + + return Status::OK(); + } + + inline Status ReadLong(int64_t *data) { + TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 8)); + byte_swapper_.SwapIfRequiredInt64(data); + + return Status::OK(); + } + + inline Status WriteByte(const uint8_t data) { return WriteData(&data, 1); } + + inline Status WriteShort(const int16_t data) { + int16_t tmp = data; + byte_swapper_.SwapIfRequiredInt16(&tmp); + return WriteData((uint8_t *)&tmp, 2); + } + + inline Status WriteInt(const int32_t data) { + int32_t tmp = data; + byte_swapper_.SwapIfRequiredInt32(&tmp); + return WriteData((uint8_t *)&tmp, 4); + } + + inline Status WriteLong(const int64_t data) { + int64_t tmp = data; + byte_swapper_.SwapIfRequiredInt64(&tmp); + return WriteData((uint8_t *)&tmp, 8); + } + + private: + const ByteSwapper byte_swapper_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc new file mode 100644 index 0000000000000000000000000000000000000000..c4a7d3c513a796c9d95b371bedc609fd75188817 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc @@ -0,0 +1,81 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +IgniteDataset::IgniteDataset(OpKernelContext* ctx, string cache_name, + string host, int32 port, bool local, int32 part, + int32 page_size, string username, string password, + string certfile, string keyfile, + string cert_password, std::vector schema, + std::vector permutation, + DataTypeVector dtypes, + std::vector shapes) + : DatasetBase(DatasetContext(ctx)), + cache_name_(std::move(cache_name)), + host_(std::move(host)), + port_(port), + local_(local), + part_(part), + page_size_(page_size), + username_(std::move(username)), + password_(std::move(password)), + certfile_(std::move(certfile)), + keyfile_(std::move(keyfile)), + cert_password_(std::move(cert_password)), + schema_(std::move(schema)), + permutation_(std::move(permutation)), + dtypes_(dtypes), + shapes_(shapes) { + LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name_ + << "', host='" << host_ << "', port=" << port_ + << ", local=" << local_ << ", part=" << part_ + << ", page_size=" << page_size_ << ", username='" << username_ + << "', certfile='" << certfile_ << "', keyfile='" + << keyfile_ + "']"; +} + +IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; } + +std::unique_ptr IgniteDataset::MakeIteratorInternal( + const string& prefix) const { + return std::unique_ptr(new IgniteDatasetIterator( + {this, strings::StrCat(prefix, "::Ignite")}, std::move(this->host_), + this->port_, std::move(this->cache_name_), this->local_, this->part_, + this->page_size_, std::move(this->username_), std::move(this->password_), + std::move(this->certfile_), std::move(this->keyfile_), + std::move(this->cert_password_), std::move(this->schema_), + std::move(this->permutation_))); +} + +const DataTypeVector& IgniteDataset::output_dtypes() const { return dtypes_; } + +const std::vector& IgniteDataset::output_shapes() const { + return shapes_; +} + +string IgniteDataset::DebugString() const { return "IgniteDatasetOp::Dataset"; } + +Status IgniteDataset::AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const { + return errors::Unimplemented( + "IgniteDataset does not support 'AsGraphDefInternal'"); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h new file mode 100644 index 0000000000000000000000000000000000000000..66bfdf2e2a168e59cd2fec8e2ac5b8fd482d5c15 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h @@ -0,0 +1,63 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_ + +#include "tensorflow/core/framework/dataset.h" + +namespace tensorflow { + +class IgniteDataset : public DatasetBase { + public: + IgniteDataset(OpKernelContext* ctx, string cache_name, string host, + int32 port, bool local, int32 part, int32 page_size, + string username, string password, string certfile, + string keyfile, string cert_password, std::vector schema, + std::vector permutation, DataTypeVector dtypes, + std::vector shapes); + ~IgniteDataset(); + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override; + const DataTypeVector& output_dtypes() const override; + const std::vector& output_shapes() const override; + string DebugString() const override; + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override; + + private: + const string cache_name_; + const string host_; + const int32 port_; + const bool local_; + const int32 part_; + const int32 page_size_; + const string username_; + const string password_; + const string certfile_; + const string keyfile_; + const string cert_password_; + const std::vector schema_; + const std::vector permutation_; + const DataTypeVector dtypes_; + const std::vector shapes_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc new file mode 100644 index 0000000000000000000000000000000000000000..5da9127aa6a3a4bc16347e6890cc1ba44406c0d5 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc @@ -0,0 +1,422 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h" + +#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +IgniteDatasetIterator::IgniteDatasetIterator( + const Params& params, string host, int32 port, string cache_name, + bool local, int32 part, int32 page_size, string username, string password, + string certfile, string keyfile, string cert_password, + std::vector schema, std::vector permutation) + : DatasetIterator(params), + cache_name_(std::move(cache_name)), + local_(local), + part_(part), + page_size_(page_size), + username_(std::move(username)), + password_(std::move(password)), + schema_(std::move(schema)), + permutation_(std::move(permutation)), + remainder_(-1), + cursor_id_(-1), + last_page_(false), + valid_state_(true) { + Client* p_client = new PlainClient(std::move(host), port, false); + + if (certfile.empty()) + client_ = std::unique_ptr(p_client); + else + client_ = std::unique_ptr( + new SslWrapper(std::unique_ptr(p_client), std::move(certfile), + std::move(keyfile), std::move(cert_password), false)); + + LOG(INFO) << "Ignite Dataset Iterator created"; +} + +IgniteDatasetIterator::~IgniteDatasetIterator() { + Status status = CloseConnection(); + if (!status.ok()) LOG(ERROR) << status.ToString(); + + LOG(INFO) << "Ignite Dataset Iterator destroyed"; +} + +Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) { + mutex_lock l(mutex_); + + if (valid_state_) { + Status status = + GetNextInternalWithValidState(ctx, out_tensors, end_of_sequence); + + if (!status.ok()) valid_state_ = false; + + return status; + } + + return errors::Unknown("Iterator is invalid"); +} + +Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) { + return errors::Unimplemented( + "Iterator for IgniteDataset does not support 'SaveInternal'"); +} + +Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) { + return errors::Unimplemented( + "Iterator for IgniteDataset does not support 'RestoreInternal')"); +} + +Status IgniteDatasetIterator::GetNextInternalWithValidState( + IteratorContext* ctx, std::vector* out_tensors, + bool* end_of_sequence) { + if (remainder_ == 0 && last_page_) { + cursor_id_ = -1; + *end_of_sequence = true; + + return Status::OK(); + } else { + TF_RETURN_IF_ERROR(EstablishConnection()); + + if (remainder_ == -1) { + TF_RETURN_IF_ERROR(ScanQuery()); + } else if (remainder_ == 0) { + TF_RETURN_IF_ERROR(LoadNextPage()); + } + + uint8_t* initial_ptr = ptr_; + std::vector tensors; + std::vector types; + + TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types)); // Parse key + TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types)); // Parse val + + remainder_ -= (ptr_ - initial_ptr); + + TF_RETURN_IF_ERROR(CheckTypes(types)); + + for (size_t i = 0; i < tensors.size(); i++) + out_tensors->push_back(tensors[permutation_[i]]); + + *end_of_sequence = false; + + return Status::OK(); + } + + *end_of_sequence = true; + + return Status::OK(); +} + +Status IgniteDatasetIterator::EstablishConnection() { + if (!client_->IsConnected()) { + TF_RETURN_IF_ERROR(client_->Connect()); + + Status status = Handshake(); + if (!status.ok()) { + Status disconnect_status = client_->Disconnect(); + if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString(); + + return status; + } + } + + return Status::OK(); +} + +Status IgniteDatasetIterator::CloseConnection() { + if (cursor_id_ != -1 && !last_page_) { + TF_RETURN_IF_ERROR(EstablishConnection()); + + TF_RETURN_IF_ERROR(client_->WriteInt(kCloseConnectionReqLength)); + TF_RETURN_IF_ERROR(client_->WriteShort(kCloseConnectionOpcode)); + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Resource ID + + int32_t res_len; + TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); + if (res_len < kMinResLength) + return errors::Unknown("Close Resource Response is corrupted"); + + int64_t req_id; + TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); + int32_t status; + TF_RETURN_IF_ERROR(client_->ReadInt(&status)); + if (status != 0) { + uint8_t err_msg_header; + TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); + if (err_msg_header == kStringVal) { + int32_t err_msg_length; + TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); + + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); + string err_msg(reinterpret_cast(err_msg_c), err_msg_length); + + return errors::Unknown("Close Resource Error [status=", status, + ", message=", err_msg, "]"); + } + return errors::Unknown("Close Resource Error [status=", status, "]"); + } + + cursor_id_ = -1; + + return client_->Disconnect(); + } else { + LOG(INFO) << "Query Cursor " << cursor_id_ << " is already closed"; + } + + return client_->IsConnected() ? client_->Disconnect() : Status::OK(); +} + +Status IgniteDatasetIterator::Handshake() { + int32_t msg_len = kHandshakeReqDefaultLength; + + if (username_.empty()) + msg_len += 1; + else + msg_len += 5 + username_.length(); // 1 byte header, 4 bytes length. + + if (password_.empty()) + msg_len += 1; + else + msg_len += 5 + password_.length(); // 1 byte header, 4 bytes length. + + TF_RETURN_IF_ERROR(client_->WriteInt(msg_len)); + TF_RETURN_IF_ERROR(client_->WriteByte(1)); + TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMajorVersion)); + TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMinorVersion)); + TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolPatchVersion)); + TF_RETURN_IF_ERROR(client_->WriteByte(2)); + if (username_.empty()) { + TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal)); + } else { + TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal)); + TF_RETURN_IF_ERROR(client_->WriteInt(username_.length())); + TF_RETURN_IF_ERROR( + client_->WriteData(reinterpret_cast(username_.c_str()), + username_.length())); + } + + if (password_.empty()) { + TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal)); + } else { + TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal)); + TF_RETURN_IF_ERROR(client_->WriteInt(password_.length())); + TF_RETURN_IF_ERROR( + client_->WriteData(reinterpret_cast(password_.c_str()), + password_.length())); + } + + int32_t handshake_res_len; + TF_RETURN_IF_ERROR(client_->ReadInt(&handshake_res_len)); + uint8_t handshake_res; + TF_RETURN_IF_ERROR(client_->ReadByte(&handshake_res)); + + if (handshake_res != 1) { + int16_t serv_ver_major; + TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_major)); + int16_t serv_ver_minor; + TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_minor)); + int16_t serv_ver_patch; + TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_patch)); + uint8_t header; + TF_RETURN_IF_ERROR(client_->ReadByte(&header)); + + if (header == kStringVal) { + int32_t length; + TF_RETURN_IF_ERROR(client_->ReadInt(&length)); + + uint8_t* err_msg_c = new uint8_t[length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, length)); + string err_msg(reinterpret_cast(err_msg_c), length); + + return errors::Unknown("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, ", message='", err_msg, "']"); + } else if (header == kNullVal) { + return errors::Unknown("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); + } else { + return errors::Unknown("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); + } + } + + return Status::OK(); +} + +Status IgniteDatasetIterator::ScanQuery() { + TF_RETURN_IF_ERROR(client_->WriteInt(kScanQueryReqLength)); + TF_RETURN_IF_ERROR(client_->WriteShort(kScanQueryOpcode)); + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR( + client_->WriteInt(JavaHashCode(cache_name_))); // Cache name + TF_RETURN_IF_ERROR(client_->WriteByte(0)); // Flags + TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal)); // Filter object + TF_RETURN_IF_ERROR(client_->WriteInt(page_size_)); // Cursor page size + TF_RETURN_IF_ERROR(client_->WriteInt(part_)); // part_ition to query + TF_RETURN_IF_ERROR(client_->WriteByte(local_)); // local_ flag + + uint64 wait_start = Env::Default()->NowMicros(); + int32_t res_len; + TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); + int64_t wait_stop = Env::Default()->NowMicros(); + + LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) / 1000 << " ms"; + + if (res_len < kMinResLength) + return errors::Unknown("Scan Query Response is corrupted"); + + int64_t req_id; + TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); + + int32_t status; + TF_RETURN_IF_ERROR(client_->ReadInt(&status)); + + if (status != 0) { + uint8_t err_msg_header; + TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); + + if (err_msg_header == kStringVal) { + int32_t err_msg_length; + TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); + + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); + string err_msg(reinterpret_cast(err_msg_c), err_msg_length); + + return errors::Unknown("Scan Query Error [status=", status, + ", message=", err_msg, "]"); + } + return errors::Unknown("Scan Query Error [status=", status, "]"); + } + + TF_RETURN_IF_ERROR(client_->ReadLong(&cursor_id_)); + + int32_t row_cnt; + TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); + + int32_t page_size = res_len - kScanQueryResHeaderLength; + + return ReceivePage(page_size); +} + +Status IgniteDatasetIterator::LoadNextPage() { + TF_RETURN_IF_ERROR(client_->WriteInt(kLoadNextPageReqLength)); + TF_RETURN_IF_ERROR(client_->WriteShort(kLoadNextPageOpcode)); + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Cursor ID + + uint64 wait_start = Env::Default()->NowMicros(); + int32_t res_len; + TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); + uint64 wait_stop = Env::Default()->NowMicros(); + + LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) / 1000 + << " ms"; + + if (res_len < kMinResLength) + return errors::Unknown("Load Next Page Response is corrupted"); + + int64_t req_id; + TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); + + int32_t status; + TF_RETURN_IF_ERROR(client_->ReadInt(&status)); + + if (status != 0) { + uint8_t err_msg_header; + TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); + + if (err_msg_header == kStringVal) { + int32_t err_msg_length; + TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); + + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); + string err_msg(reinterpret_cast(err_msg_c), err_msg_length); + + return errors::Unknown("Load Next Page Error [status=", status, + ", message=", err_msg, "]"); + } + return errors::Unknown("Load Next Page Error [status=", status, "]"); + } + + int32_t row_cnt; + TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); + + int32_t page_size = res_len - kLoadNextPageResHeaderLength; + + return ReceivePage(page_size); +} + +Status IgniteDatasetIterator::ReceivePage(int32_t page_size) { + remainder_ = page_size; + page_ = std::unique_ptr(new uint8_t[remainder_]); + ptr_ = page_.get(); + + uint64 start = Env::Default()->NowMicros(); + TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_)); + uint64 stop = Env::Default()->NowMicros(); + + double size_in_mb = 1.0 * remainder_ / 1024 / 1024; + double time_in_s = 1.0 * (stop - start) / 1000 / 1000; + LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 + << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; + + uint8_t last_page_b; + TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b)); + + last_page_ = !last_page_b; + + return Status::OK(); +} + +Status IgniteDatasetIterator::CheckTypes(const std::vector& types) { + if (schema_.size() != types.size()) + return errors::Unknown("Object has unexpected schema"); + + for (size_t i = 0; i < schema_.size(); i++) { + if (schema_[i] != types[permutation_[i]]) + return errors::Unknown("Object has unexpected schema"); + } + + return Status::OK(); +} + +int32_t IgniteDatasetIterator::JavaHashCode(string str) const { + int32_t h = 0; + for (char& c : str) { + h = 31 * h + c; + } + return h; +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h new file mode 100644 index 0000000000000000000000000000000000000000..c499e2c9ccfac5c15db08c8fd8b26c37aa0404f3 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h @@ -0,0 +1,99 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_ + +#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" +#include "tensorflow/contrib/ignite/kernels/ignite_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + +class IgniteDatasetIterator : public DatasetIterator { + public: + IgniteDatasetIterator(const Params& params, string host, int32 port, + string cache_name, bool local, int32 part, + int32 page_size, string username, string password, + string certfile, string keyfile, string cert_password, + std::vector schema, + std::vector permutation); + ~IgniteDatasetIterator(); + Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, + bool* end_of_sequence) override; + + protected: + Status SaveInternal(IteratorStateWriter* writer) override; + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override; + + private: + Status GetNextInternalWithValidState(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence); + + Status EstablishConnection(); + Status CloseConnection(); + Status Handshake(); + Status ScanQuery(); + Status LoadNextPage(); + Status ReceivePage(int32_t page_size); + Status CheckTypes(const std::vector& types); + int32_t JavaHashCode(string str) const; + + std::unique_ptr client_; + BinaryObjectParser parser_; + + const string cache_name_; + const bool local_; + const int32 part_; + const int32 page_size_; + const string username_; + const string password_; + const std::vector schema_; + const std::vector permutation_; + + int32_t remainder_; + int64_t cursor_id_; + bool last_page_; + + bool valid_state_; + + mutex mutex_; + + std::unique_ptr page_; + uint8_t* ptr_; +}; + +constexpr uint8_t kNullVal = 101; +constexpr uint8_t kStringVal = 9; +constexpr uint8_t kProtocolMajorVersion = 1; +constexpr uint8_t kProtocolMinorVersion = 1; +constexpr uint8_t kProtocolPatchVersion = 0; +constexpr int16_t kScanQueryOpcode = 2000; +constexpr int16_t kLoadNextPageOpcode = 2001; +constexpr int16_t kCloseConnectionOpcode = 0; +constexpr int32_t kScanQueryReqLength = 25; +constexpr int32_t kScanQueryResHeaderLength = 25; +constexpr int32_t kLoadNextPageReqLength = 18; +constexpr int32_t kLoadNextPageResHeaderLength = 17; +constexpr int32_t kCloseConnectionReqLength = 18; +constexpr int32_t kHandshakeReqDefaultLength = 8; +constexpr int32_t kMinResLength = 12; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..f75b1c5ff55ca9ee493148ff79c2edd4b15ac42a --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -0,0 +1,198 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/lib/strings/numbers.h" + +namespace tensorflow { +namespace { + +Status SchemaToTypes(const std::vector& schema, DataTypeVector* dtypes) { + for (auto e : schema) { + if (e == BYTE || e == BYTE_ARR) { + dtypes->push_back(DT_UINT8); + } else if (e == SHORT || e == SHORT_ARR) { + dtypes->push_back(DT_INT16); + } else if (e == INT || e == INT_ARR) { + dtypes->push_back(DT_INT32); + } else if (e == LONG || e == LONG_ARR) { + dtypes->push_back(DT_INT64); + } else if (e == FLOAT || e == FLOAT_ARR) { + dtypes->push_back(DT_FLOAT); + } else if (e == DOUBLE || e == DOUBLE_ARR) { + dtypes->push_back(DT_DOUBLE); + } else if (e == USHORT || e == USHORT_ARR) { + dtypes->push_back(DT_UINT8); + } else if (e == BOOL || e == BOOL_ARR) { + dtypes->push_back(DT_BOOL); + } else if (e == STRING || e == STRING_ARR) { + dtypes->push_back(DT_STRING); + } else { + return errors::Unknown("Unexpected type in schema [type_id=", e, "]"); + } + } + + return Status::OK(); +} + +Status SchemaToShapes(const std::vector& schema, + std::vector* shapes) { + for (auto e : schema) { + if (e >= 1 && e < 10) { + shapes->push_back(PartialTensorShape({})); + } else if (e >= 12 && e < 21) { + shapes->push_back(PartialTensorShape({-1})); + } else { + return errors::Unknown("Unexpected type in schema [type_id=", e, "]"); + } + } + + return Status::OK(); +} + +class IgniteDatasetOp : public DatasetOpKernel { + public: + using DatasetOpKernel::DatasetOpKernel; + + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + string cache_name = ""; + string host = ""; + int32 port = -1; + bool local = false; + int32 part = -1; + int32 page_size = -1; + string username = ""; + string password = ""; + string certfile = ""; + string keyfile = ""; + string cert_password = ""; + + const char* env_cache_name = std::getenv("IGNITE_DATASET_CACHE_NAME"); + const char* env_host = std::getenv("IGNITE_DATASET_HOST"); + const char* env_port = std::getenv("IGNITE_DATASET_PORT"); + const char* env_local = std::getenv("IGNITE_DATASET_LOCAL"); + const char* env_part = std::getenv("IGNITE_DATASET_PART"); + const char* env_page_size = std::getenv("IGNITE_DATASET_PAGE_SIZE"); + const char* env_username = std::getenv("IGNITE_DATASET_USERNAME"); + const char* env_password = std::getenv("IGNITE_DATASET_PASSWORD"); + const char* env_certfile = std::getenv("IGNITE_DATASET_CERTFILE"); + const char* env_keyfile = std::getenv("IGNITE_DATASET_KEYFILE"); + const char* env_cert_password = std::getenv("IGNITE_DATASET_CERT_PASSWORD"); + + if (env_cache_name) { + cache_name = string(env_cache_name); + } else { + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "cache_name", &cache_name)); + } + + if (env_host) { + host = string(env_host); + } else { + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "host", &host)); + } + + if (env_port) { + OP_REQUIRES(ctx, strings::safe_strto32(env_port, &port), + errors::InvalidArgument("IGNITE_DATASET_PORT environment " + "variable is not a valid integer: ", + env_port)); + } else { + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "port", &port)); + } + + if (env_local) { + local = true; + } else { + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "local", &local)); + } + + if (env_part) { + OP_REQUIRES(ctx, strings::safe_strto32(env_part, &part), + errors::InvalidArgument("IGNITE_DATASET_PART environment " + "variable is not a valid integer: ", + env_part)); + } else { + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "part", &part)); + } + + if (env_page_size) { + OP_REQUIRES(ctx, strings::safe_strto32(env_page_size, &page_size), + errors::InvalidArgument("IGNITE_DATASET_PAGE_SIZE " + "environment variable is not a valid " + "integer: ", + env_page_size)); + } else { + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "page_size", &page_size)); + } + + if (env_username) username = string(env_username); + + if (env_password) password = string(env_password); + + if (env_certfile) certfile = string(env_certfile); + + if (env_keyfile) keyfile = string(env_keyfile); + + if (env_cert_password) cert_password = string(env_cert_password); + + const Tensor* schema_tensor; + OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor)); + OP_REQUIRES(ctx, schema_tensor->dims() == 1, + errors::InvalidArgument("`schema` must be a vector.")); + + std::vector schema; + schema.reserve(schema_tensor->NumElements()); + for (int i = 0; i < schema_tensor->NumElements(); i++) { + schema.push_back(schema_tensor->flat()(i)); + } + + const Tensor* permutation_tensor; + OP_REQUIRES_OK(ctx, ctx->input("permutation", &permutation_tensor)); + OP_REQUIRES(ctx, permutation_tensor->dims() == 1, + errors::InvalidArgument("`permutation` must be a vector.")); + + std::vector permutation; + permutation.resize(permutation_tensor->NumElements()); + for (int i = 0; i < permutation_tensor->NumElements(); i++) { + // Inversed permutation. + permutation[permutation_tensor->flat()(i)] = i; + } + + DataTypeVector dtypes; + std::vector shapes; + + OP_REQUIRES_OK(ctx, SchemaToTypes(schema, &dtypes)); + OP_REQUIRES_OK(ctx, SchemaToShapes(schema, &shapes)); + + *output = new IgniteDataset( + ctx, std::move(cache_name), std::move(host), port, local, part, + page_size, std::move(username), std::move(password), + std::move(certfile), std::move(keyfile), std::move(cert_password), + std::move(schema), std::move(permutation), std::move(dtypes), + std::move(shapes)); + } +}; + +REGISTER_KERNEL_BUILDER(Name("IgniteDataset").Device(DEVICE_CPU), + IgniteDatasetOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h new file mode 100644 index 0000000000000000000000000000000000000000..75424c19ee4b7df5378aa23cb41db1752e8d0651 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_ + +#include "tensorflow/contrib/ignite/kernels/ignite_client.h" + +namespace tensorflow { + +class PlainClient : public Client { + public: + PlainClient(string host, int port, bool big_endian); + ~PlainClient(); + + Status Connect() override; + Status Disconnect() override; + bool IsConnected() override; + int GetSocketDescriptor() override; + Status ReadData(uint8_t* buf, const int32_t length) override; + Status WriteData(const uint8_t* buf, const int32_t length) override; + + private: + const string host_; + const int port_; + int sock_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc new file mode 100644 index 0000000000000000000000000000000000000000..cf672942c61e1239332711db12e62088737c4f41 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc @@ -0,0 +1,123 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h" + +#include +#include +#include +#include + +#include +#include + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +PlainClient::PlainClient(string host, int port, bool big_endian) + : Client(big_endian), host_(std::move(host)), port_(port), sock_(-1) {} + +PlainClient::~PlainClient() { + if (IsConnected()) { + Status status = Disconnect(); + if (!status.ok()) LOG(WARNING) << status.ToString(); + } +} + +Status PlainClient::Connect() { + if (sock_ == -1) { + sock_ = socket(AF_INET, SOCK_STREAM, 0); + if (sock_ == -1) return errors::Internal("Failed to create socket"); + } + + sockaddr_in server; + + server.sin_addr.s_addr = inet_addr(host_.c_str()); + if (server.sin_addr.s_addr == -1) { + hostent* he; + in_addr** addr_list; + + if ((he = gethostbyname(host_.c_str())) == NULL) + return errors::Internal("Failed to resolve hostname \"", host_, "\""); + + addr_list = (in_addr**)he->h_addr_list; + if (addr_list[0] != NULL) server.sin_addr = *addr_list[0]; + } + + server.sin_family = AF_INET; + server.sin_port = htons(port_); + + if (connect(sock_, (sockaddr*)&server, sizeof(server)) < 0) + return errors::Internal("Failed to connect to \"", host_, ":", port_, "\""); + + LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established"; + + return Status::OK(); +} + +Status PlainClient::Disconnect() { + int close_res = close(sock_); + sock_ = -1; + + LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" is closed"; + + return close_res == 0 + ? Status::OK() + : errors::Internal("Failed to correctly close connection"); +} + +bool PlainClient::IsConnected() { return sock_ != -1; } + +int PlainClient::GetSocketDescriptor() { return sock_; } + +Status PlainClient::ReadData(uint8_t* buf, const int32_t length) { + int received = 0; + + while (received < length) { + int res = recv(sock_, buf, length - received, 0); + + if (res < 0) + return errors::Internal("Error occurred while reading from socket: ", res, + ", ", string(strerror(errno))); + + if (res == 0) return errors::Internal("Server closed connection"); + + received += res; + buf += res; + } + + return Status::OK(); +} + +Status PlainClient::WriteData(const uint8_t* buf, const int32_t length) { + int sent = 0; + + while (sent < length) { + int res = send(sock_, buf, length - sent, 0); + + if (res < 0) + return errors::Internal("Error occurred while writing into socket: ", res, + ", ", string(strerror(errno))); + + sent += res; + buf += res; + } + + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc new file mode 100644 index 0000000000000000000000000000000000000000..dad5aace5fabe1df58bb9579bf578f4c35324315 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -0,0 +1,142 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h" + +#define WIN32_LEAN_AND_MEAN +#include +#include +#include + +#pragma comment(lib, "Ws2_32.lib") +#pragma comment(lib, "Mswsock.lib") +#pragma comment(lib, "AdvApi32.lib") + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +PlainClient::PlainClient(string host, int port, bool big_endian) + : Client(big_endian), + host_(std::move(host)), + port_(port), + sock_(INVALID_SOCKET) {} + +PlainClient::~PlainClient() { + if (IsConnected()) { + Status status = Disconnect(); + if (!status.ok()) LOG(WARNING) << status.ToString(); + } +} + +Status PlainClient::Connect() { + WSADATA wsaData; + addrinfo *result = NULL, *ptr = NULL, hints; + + int res = WSAStartup(MAKEWORD(2, 2), &wsaData); + if (res != 0) return errors::Internal("WSAStartup failed with error: ", res); + + ZeroMemory(&hints, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + + res = getaddrinfo(host_.c_str(), std::to_string(port_).c_str(), &hints, + &result); + if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res); + + auto clean = gtl::MakeCleanup([result] { freeaddrinfo(result); }); + + for (ptr = result; ptr != NULL; ptr = ptr->ai_next) { + sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); + if (sock_ == INVALID_SOCKET) { + WSACleanup(); + return errors::Internal("Socket failed with error: ", WSAGetLastError()); + } + + res = connect(sock_, ptr->ai_addr, (int)ptr->ai_addrlen); + if (res == SOCKET_ERROR) { + closesocket(sock_); + sock_ = INVALID_SOCKET; + continue; + } + + break; + } + + if (sock_ == INVALID_SOCKET) { + WSACleanup(); + return errors::Internal("Unable to connect to server"); + } + + LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established"; + + return Status::OK(); +} + +Status PlainClient::Disconnect() { + int res = shutdown(sock_, SD_SEND); + closesocket(sock_); + WSACleanup(); + + if (res == SOCKET_ERROR) + return errors::Internal("Shutdown failed with error: ", WSAGetLastError()); + else + return Status::OK(); +} + +bool PlainClient::IsConnected() { return sock_ != INVALID_SOCKET; } + +int PlainClient::GetSocketDescriptor() { return sock_; } + +Status PlainClient::ReadData(uint8_t *buf, const int32_t length) { + int received = 0; + + while (received < length) { + int res = recv(sock_, (char *)buf, length - received, 0); + + if (res < 0) + return errors::Internal("Error occurred while reading from socket: ", + res); + + if (res == 0) return errors::Internal("Server closed connection"); + + received += res; + buf += res; + } + + return Status::OK(); +} + +Status PlainClient::WriteData(const uint8_t *buf, const int32_t length) { + int sent = 0; + + while (sent < length) { + int res = send(sock_, (char *)buf, length - sent, 0); + + if (res < 0) + return errors::Internal("Error occurred while writing into socket: ", + res); + + sent += res; + buf += res; + } + + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc new file mode 100644 index 0000000000000000000000000000000000000000..ceb479b0846574a35d86002ebb9c3e8e1d3687ac --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc @@ -0,0 +1,151 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h" + +#include +#include + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +static int PasswordCb(char *buf, int size, int rwflag, void *password) { + strncpy(buf, (char *)(password), size); + buf[size - 1] = '\0'; + return (strlen(buf)); +} + +SslWrapper::SslWrapper(std::shared_ptr client, string certfile, + string keyfile, string cert_password, bool big_endian) + : Client(big_endian), + client_(client), + certfile_(std::move(certfile)), + keyfile_(std::move(keyfile)), + cert_password_(std::move(cert_password)), + ctx_(nullptr), + ssl_(nullptr) {} + +SslWrapper::~SslWrapper() { + if (IsConnected()) { + Status status = Disconnect(); + if (!status.ok()) LOG(WARNING) << status.ToString(); + } + + if (ctx_ != nullptr) { + SSL_CTX_free(ctx_); + ctx_ = nullptr; + } + + if (ssl_ != nullptr) { + SSL_free(ssl_); + ssl_ = nullptr; + } +} + +Status SslWrapper::InitSslContext() { + OpenSSL_add_all_algorithms(); + SSL_load_error_strings(); + + ctx_ = SSL_CTX_new(SSLv23_method()); + if (ctx_ == NULL) return errors::Internal("Couldn't create SSL context"); + + SSL_CTX_set_default_passwd_cb(ctx_, PasswordCb); + SSL_CTX_set_default_passwd_cb_userdata(ctx_, (void *)cert_password_.c_str()); + + if (SSL_CTX_use_certificate_chain_file(ctx_, certfile_.c_str()) != 1) + return errors::Internal("Couldn't load cetificate chain (file '", certfile_, + "')"); + + string private_key_file = keyfile_.empty() ? certfile_ : keyfile_; + if (SSL_CTX_use_PrivateKey_file(ctx_, private_key_file.c_str(), + SSL_FILETYPE_PEM) != 1) + return errors::Internal("Couldn't load private key (file '", + private_key_file, "')"); + + return Status::OK(); +} + +Status SslWrapper::Connect() { + if (ctx_ == NULL) { + TF_RETURN_IF_ERROR(InitSslContext()); + } + + ssl_ = SSL_new(ctx_); + if (ssl_ == NULL) + return errors::Internal("Failed to establish SSL connection"); + + TF_RETURN_IF_ERROR(client_->Connect()); + + SSL_set_fd(ssl_, client_->GetSocketDescriptor()); + if (SSL_connect(ssl_) != 1) + return errors::Internal("Failed to establish SSL connection"); + + LOG(INFO) << "SSL connection established"; + + return Status::OK(); +} + +Status SslWrapper::Disconnect() { + SSL_free(ssl_); + ssl_ = nullptr; + + LOG(INFO) << "SSL connection closed"; + + return client_->Disconnect(); +} + +bool SslWrapper::IsConnected() { return client_->IsConnected(); } + +int SslWrapper::GetSocketDescriptor() { return client_->GetSocketDescriptor(); } + +Status SslWrapper::ReadData(uint8_t *buf, const int32_t length) { + int received = 0; + + while (received < length) { + int res = SSL_read(ssl_, buf, length - received); + + if (res < 0) + return errors::Internal("Error occurred while reading from SSL socket: ", + res); + + if (res == 0) return errors::Internal("Server closed SSL connection"); + + received += res; + buf += res; + } + + return Status::OK(); +} + +Status SslWrapper::WriteData(const uint8_t *buf, const int32_t length) { + int sent = 0; + + while (sent < length) { + int res = SSL_write(ssl_, buf, length - sent); + + if (res < 0) + return errors::Internal("Error occurred while writing into socket: ", + res); + + sent += res; + buf += res; + } + + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..0406644bbaab3de816540ce85e84b489ea9fff12 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h @@ -0,0 +1,51 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_ + +#include "tensorflow/contrib/ignite/kernels/ignite_client.h" + +#include + +namespace tensorflow { + +class SslWrapper : public Client { + public: + SslWrapper(std::shared_ptr client, string certfile, string keyfile, + string cert_password, bool big_endian); + ~SslWrapper(); + + Status Connect() override; + Status Disconnect() override; + bool IsConnected() override; + int GetSocketDescriptor() override; + Status ReadData(uint8_t* buf, const int32_t length) override; + Status WriteData(const uint8_t* buf, const int32_t length) override; + + private: + Status InitSslContext(); + + std::shared_ptr client_; + string certfile_; + string keyfile_; + string cert_password_; + SSL_CTX* ctx_; + SSL* ssl_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_ diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..3d6fbe00e6296941b4ce77d1238a79099bb9a5aa --- /dev/null +++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc @@ -0,0 +1,56 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("IgniteDataset") + .Input("cache_name: string") + .Input("host: string") + .Input("port: int32") + .Input("local: bool") + .Input("part: int32") + .Input("page_size: int32") + .Input("schema: int32") + .Input("permutation: int32") + .Output("handle: variant") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +IgniteDataset that allows to get data from Apache Ignite. + +Apache Ignite is a memory-centric distributed database, caching, and processing +platform for transactional, analytical, and streaming workloads, delivering +in-memory speeds at petabyte scale. This contrib package contains an +integration between Apache Ignite and TensorFlow. The integration is based on +tf.data from TensorFlow side and Binary Client Protocol from Apache Ignite side. +It allows to use Apache Ignite as a datasource for neural network training, +inference and all other computations supported by TensorFlow. Ignite Dataset +is based on Apache Ignite Binary Client Protocol. + +cache_name: Ignite Cache Name. +host: Ignite Thin Client Host. +port: Ignite Thin Client Port. +local: Local flag that defines that data should be fetched from local host only. +part: Partition data should be fetched from. +page_size: Page size for Ignite Thin Client. +schema: Internal structure that defines schema of cache objects. +permutation: Internal structure that defines permutation of cache objects. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..288d4853207176b215cd8a0cdcbfb2de5791ecb8 --- /dev/null +++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py @@ -0,0 +1,772 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ignite Dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc +import socket +import ssl +import struct + +from tensorflow.contrib.ignite.python.ops import gen_dataset_ops +from tensorflow.contrib.ignite.python.ops import ignite_op_loader # pylint: disable=unused-import +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape + + +class Readable(object): + """Readable abstract class that exposes methods to do reading-related + + operations. + """ + + @abc.abstractmethod + def __init__(self): + pass + + def read_byte(self): + """Reads and returnes byte.""" + return self._read("b", 1) + + def read_short(self): + """Reads and returns short (2 bytes, little-endian).""" + return self._read("h", 2) + + def read_int(self): + """Reads and returns int (4 bytes, little-endian).""" + return self._read("i", 4) + + def read_long(self): + """Reads and returns long (8 bytes, little-endian).""" + return self._read("q", 8) + + def skip(self, length): + """Skips the specified number of bytes.""" + self.read_data(length) + + @abc.abstractmethod + def read_data(self, length): + """Reads the specified number of bytes and returns them as a buffer.""" + return None + + def _read(self, data_type, length): + """Reads, unpacks and returns specified type (little-endian).""" + data_buffer = self.read_data(length) + return struct.unpack("<" + data_type, data_buffer)[0] + + +class DataBuffer(Readable): + """DataBuffer class that exposes methods to read data from a byte buffer.""" + + def __init__(self, data_buffer): + """Constructs a new instance based on the specified byte buffer. + + Args: + data_buffer: Buffer to be read. + """ + Readable.__init__(self) + self.buffer = data_buffer + self.ptr = 0 + + def read_data(self, length): + """Reads the specified number of bytes and returns them as a buffer.""" + data_buffer = self.buffer[self.ptr:][:length] + self.ptr += length + return data_buffer + + +class TcpClient(Readable): + """TcpClient class that exposes methods to read data from a socket.""" + + def __init__(self, host, port, certfile=None, keyfile=None, password=None): + """Constructs a new instance based on the specified host and port. + + Args: + host: Host to be connected. + port: Port to be connected. + certfile: File in PEM format containing the certificate as well as any + number of CA certificates needed to establish the certificate's + authenticity. + keyfile: File containing the private key (otherwise the private key will + be taken from certfile as well). + password: Password to be used if the private key is encrypted and a + password is necessary. + + Raises: + ValueError: If the wrong combination of arguments is provided. + """ + Readable.__init__(self) + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + if certfile is not None: + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.load_cert_chain(certfile, keyfile, password) + self.sock = context.wrap_socket(self.sock) + else: + if keyfile is not None: + raise ValueError("SSL is disabled, keyfile must not be specified " + "(to enable SSL specify certfile)") + if password is not None: + raise ValueError("SSL is disabled, password must not be specified " + "(to enable SSL specify certfile)") + + self.host = host + self.port = port + + def __enter__(self): + """Connects to host and port specified in the constructor.""" + self.sock.connect((self.host, self.port)) + return self + + def __exit__(self, t, v, traceback): + """Disconnects the socket.""" + self.sock.close() + + def write_byte(self, v): + """Writes the specified byte.""" + self._write(v, "b") + + def write_short(self, v): + """Writes the specified short (2 bytes, little-endian).""" + self._write(v, "h") + + def write_int(self, v): + """Writes the specified short (4 bytes, little-endian).""" + self._write(v, "i") + + def write_long(self, v): + """Writes the specified int (8 bytes, little-endian).""" + self._write(v, "q") + + def write_string(self, v): + """Writes the specified string.""" + self.sock.sendall(v.encode("UTF-8")) + + def read_data(self, length): + """Reads the specified number of bytes and returns them as a buffer.""" + data_buffer = None + rem = length + while rem > 0: + buf = self.sock.recv(rem) + rem = rem - len(buf) + if data_buffer is None: + data_buffer = buf + else: + data_buffer += buf + return data_buffer + + def _write(self, value, data_type): + """Packs and writes data using the specified type (little-endian).""" + data_buffer = struct.pack("<" + data_type, value) + self.sock.sendall(data_buffer) + + +class BinaryType(object): + """BinaryType class that encapsulated type id, type name and fields.""" + + def __init__(self, type_id, type_name, fields): + """Constructs a new instance of BinaryType.""" + self.type_id = type_id + self.type_name = type_name + self.fields = fields + + +class BinaryField(object): + """BinaryField class that encapsulated field name, type id and field id.""" + + def __init__(self, field_name, type_id, field_id): + """Constructs a new instance of BinaryField.""" + self.field_name = field_name + self.type_id = type_id + self.field_id = field_id + + +# Binary types defined in Apache Ignite Thin client and supported by +# TensorFlow on Apache Ignite, see +# https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. +# True means that type is a vector, False means type is scalar. +types = { + 1: (dtypes.uint8, False), + 2: (dtypes.int16, False), + 3: (dtypes.int32, False), + 4: (dtypes.int64, False), + 5: (dtypes.float32, False), + 6: (dtypes.float64, False), + 7: (dtypes.uint16, False), + 8: (dtypes.bool, False), + 9: (dtypes.string, False), + 12: (dtypes.uint8, True), + 13: (dtypes.int16, True), + 14: (dtypes.int32, True), + 15: (dtypes.int64, True), + 16: (dtypes.float32, True), + 17: (dtypes.float64, True), + 18: (dtypes.uint16, True), + 19: (dtypes.bool, True), + 20: (dtypes.string, True) +} + + +class TypeTreeNode(object): + """TypeTreeNode class exposes methods to format object tree structure + + data. + """ + + def __init__(self, name, type_id, fields=None, permutation=None): + """Constructs a new instance of TypeTreeNode. + + Args: + name: Name of the object tree node. + type_id: Type id of the object tree node. + fields: List of fields (children of the object tree node). + permutation: Permutation that should be applied to order object children. + """ + self.name = name + self.type_id = type_id + self.fields = fields + self.permutation = permutation + + def to_output_classes(self): + """Formats the tree object as required by `Dataset.output_classes`.""" + if self.fields is None: + return ops.Tensor + output_classes = {} + for field in self.fields: + output_classes[field.name] = field.to_output_classes() + return output_classes + + def to_output_shapes(self): + """Formats the tree object as required by `Dataset.output_shapes`.""" + if self.fields is None: + if self.type_id in types: + object_type = types[self.type_id] + is_array = object_type[1] + if is_array: + return tensor_shape.TensorShape([None]) + return tensor_shape.TensorShape([]) + raise ValueError("Unsupported type [type_id=%d]" % self.type_id) + output_shapes = {} + for field in self.fields: + output_shapes[field.name] = field.to_output_shapes() + return output_shapes + + def to_output_types(self): + """Formats the tree object as required by `Dataset.output_types`.""" + if self.fields is None: + if self.type_id in types: + object_type = types[self.type_id] + return object_type[0] + raise ValueError("Unsupported type [type_id=%d]" % self.type_id) + else: + output_types = {} + for field in self.fields: + output_types[field.name] = field.to_output_types() + return output_types + + def to_flat(self): + """Returns a list of node types.""" + return self.to_flat_rec([]) + + def to_permutation(self): + """Returns a permutation that should be applied to order object leaves.""" + correct_order_dict = {} + self.traversal_rec(correct_order_dict, 0) + object_order = [] + self.traversal_permutation_rec(object_order) + return [correct_order_dict[o] for o in object_order] + + def to_flat_rec(self, flat): + """Formats a list of leaf node types in pre-order.""" + if self.fields is None: + flat.append(self.type_id) + else: + for field in self.fields: + field.to_flat_rec(flat) + return flat + + def traversal_permutation_rec(self, permutation): + """Collects nodes in accordance with permutation.""" + if self.fields is None: + permutation.append(self) + else: + for idx in self.permutation: + field = self.fields[idx] + field.traversal_permutation_rec(permutation) + + def traversal_rec(self, d, i): + """Collects nodes in pre-order traversal.""" + if self.fields is None: + d[self] = i + i += 1 + else: + for field in self.fields: + i = field.traversal_rec(d, i) + return i + + +class IgniteClient(TcpClient): + """IgniteClient enables working with Apache Ignite using a thin client. + + This client works with assumption that all object in the cache + have the same structure (homogeneous objects) and the cache contains at + least one object. + """ + + def __init__(self, + host, + port, + username=None, + password=None, + certfile=None, + keyfile=None, + cert_password=None): + """Constructs a new instance of IgniteClient. + + Args: + host: Apache Ignite Thin client host to be connected. + port: Apache Ignite Thin client port to be connected. + username: Apache Ignite Thin Client authentication username. + password: Apache Ignite Thin Client authentication password. + certfile: File in PEM format containing the certificate as well as any + number of CA certificates needed to establish the certificate's + authenticity. + keyfile: File containing the private key (otherwise the private key will + be taken from certfile as well). + cert_password: Password to be used if the private key is encrypted and a + password is necessary. + """ + TcpClient.__init__(self, host, port, certfile, keyfile, cert_password) + self.username = username + self.password = password + + def handshake(self): + """Makes a handshake after connect and before any other calls.""" + msg_len = 8 + + if self.username is None: + msg_len += 1 + else: + msg_len += 5 + len(self.username) + + if self.password is None: + msg_len += 1 + else: + msg_len += 5 + len(self.password) + + self.write_int(msg_len) # Message length + self.write_byte(1) # Handshake operation + self.write_short(1) # Version (1.1.0) + self.write_short(1) + self.write_short(0) + self.write_byte(2) # Thin client + + if self.username is None: # Username + self.write_byte(101) + else: + self.write_byte(9) + self.write_int(len(self.username)) + self.write_string(self.username) + + if self.password is None: # Password + self.write_byte(101) + else: + self.write_byte(9) + self.write_int(len(self.password)) + self.write_string(self.password) + + self.read_int() # Result length + res = self.read_byte() + + if res != 1: + serv_ver_major = self.read_short() + serv_ver_minor = self.read_short() + serv_ver_patch = self.read_short() + err_msg = self._parse_string() + if err_msg is None: + raise RuntimeError( + "Handshake Error [result=%d, version=%d.%d.%d]" % + (res, serv_ver_major, serv_ver_minor, serv_ver_patch)) + else: + raise RuntimeError( + "Handshake Error [result=%d, version=%d.%d.%d, message='%s']" % + (res, serv_ver_major, serv_ver_minor, serv_ver_patch, err_msg)) + + def get_cache_type(self, cache_name): + """Collects type information about objects stored in the specified cache.""" + cache_name_hash = self._java_hash_code(cache_name) + self.write_int(25) # Message length + self.write_short(2000) # Operation code + self.write_long(0) # Request ID + self.write_int(cache_name_hash) # Cache name + self.write_byte(0) # Flags + self.write_byte(101) # Filter (NULL) + self.write_int(1) # Cursor page size + self.write_int(-1) # Partition to query + self.write_byte(0) # Local flag + + result_length = self.read_int() + self.read_long() # Request id + status = self.read_int() + + if status != 0: + err_msg = self._parse_string() + if err_msg is None: + raise RuntimeError("Scan Query Error [status=%s]" % status) + else: + raise RuntimeError( + "Scan Query Error [status=%s, message='%s']" % (status, err_msg)) + + self.read_long() # Cursor id + row_count = self.read_int() + + if row_count == 0: + raise RuntimeError("Scan Query returned empty result, so it's " + "impossible to derive the cache type") + + payload = DataBuffer(self.read_data(result_length - 25)) + + self.read_byte() # Next page + + res = TypeTreeNode("root", 0, [ + self._collect_types("key", payload), + self._collect_types("val", payload) + ], [0, 1]) + + return res + + def _java_hash_code(self, s): + """Computes hash code of the specified string using Java code.""" + h = 0 + for c in s: + h = (31 * h + ord(c)) & 0xFFFFFFFF + return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000 + + def _collect_types(self, field_name, data): + """Extracts type information from the specified object.""" + type_id = data.read_byte() + + # Byte scalar. + if type_id == 1: + data.skip(1) + return TypeTreeNode(field_name, type_id) + + # Short scalar. + if type_id == 2: + data.skip(2) + return TypeTreeNode(field_name, type_id) + + # Integer scalar. + if type_id == 3: + data.skip(4) + return TypeTreeNode(field_name, type_id) + + # Long scalar. + if type_id == 4: + data.skip(8) + return TypeTreeNode(field_name, type_id) + + # Float scalar. + if type_id == 5: + data.skip(4) + return TypeTreeNode(field_name, type_id) + + # Double scalar. + if type_id == 6: + data.skip(8) + return TypeTreeNode(field_name, type_id) + + # Char scalar. + if type_id == 7: + data.skip(2) + return TypeTreeNode(field_name, type_id) + + # Bool scalar. + if type_id == 8: + data.skip(1) + return TypeTreeNode(field_name, type_id) + + # String scalar. + if type_id == 9: + length = data.read_int() + data.skip(length) + return TypeTreeNode(field_name, type_id) + + # UUID scalar. + if type_id == 10: + data.skip(16) + return TypeTreeNode(field_name, type_id) + + # Date scalar. + if type_id == 11: + data.skip(8) + return TypeTreeNode(field_name, type_id) + + # Byte array. + if type_id == 12: + length = data.read_int() + data.skip(length) + return TypeTreeNode(field_name, type_id) + + # Short array. + if type_id == 13: + length = data.read_int() + data.skip(length * 2) + return TypeTreeNode(field_name, type_id) + + # Integer array. + if type_id == 14: + length = data.read_int() + data.skip(length * 4) + return TypeTreeNode(field_name, type_id) + + # Long array. + if type_id == 15: + length = data.read_int() + data.skip(length * 8) + return TypeTreeNode(field_name, type_id) + + # Float array. + if type_id == 16: + length = data.read_int() + data.skip(length * 4) + return TypeTreeNode(field_name, type_id) + + # Double array. + if type_id == 17: + length = data.read_int() + data.skip(length * 8) + return TypeTreeNode(field_name, type_id) + + # Char array. + if type_id == 18: + length = data.read_int() + data.skip(length * 2) + return TypeTreeNode(field_name, type_id) + + # Bool array. + if type_id == 19: + length = data.read_int() + data.skip(length) + return TypeTreeNode(field_name, type_id) + + # String array. + if type_id == 20: + length = data.read_int() + for _ in range(length): + header = data.read_byte() + if header == 9: + str_length = data.read_int() + data.skip(str_length) + elif header == 101: + pass + else: + raise RuntimeError( + "Unknown binary type when expected string [type_id=%d]" % header) + return TypeTreeNode(field_name, type_id) + + # UUID array. + if type_id == 21: + length = data.read_int() + data.skip(length * 16) # TODO(dmitrievanthony): support NULL values. + return TypeTreeNode(field_name, type_id) + + # Date array. + if type_id == 22: + length = data.read_int() + data.skip(length * 8) + return TypeTreeNode(field_name, type_id) + + # Wrapped Binary Object. + if type_id == 27: + length = data.read_int() + inner_data = data.read_data(length) + data.read_int() # Offset + return self._collect_types(field_name, DataBuffer(inner_data)) + + # Complex Object. + if type_id == 103: + data.read_byte() # Object version + data.read_short() # Object flags + obj_type_id = data.read_int() + data.read_int() # Object hash code + obj_length = data.read_int() + data.read_int() # Object schema id + obj_schema_offset = data.read_int() + + obj_type = self._get_type(obj_type_id) + children = [] + + for obj_field in obj_type.fields: + child = self._collect_types(obj_field.field_name, data) + children.append(child) + + children_sorted = sorted(children, key=lambda child: child.name) + permutation = [children_sorted.index(child) for child in children] + children = children_sorted + + data.skip(obj_length - obj_schema_offset) + + return TypeTreeNode(field_name, type_id, children, permutation) + + raise RuntimeError("Unknown binary type [type_id=%d]" % type_id) + + def _get_type(self, type_id): + """Queries Apache Ignite information about type by type id.""" + self.write_int(14) # Message length + self.write_short(3002) # Operation code + self.write_long(0) # Request ID + self.write_int(type_id) # Type ID + + self.read_int() # Result length + self.read_long() # Request id + status = self.read_int() + + if status != 0: + err_msg = self._parse_string() + if err_msg is None: + raise RuntimeError("Get Binary Type Error [status=%d, message='%s']" % + (status, err_msg)) + else: + raise RuntimeError("Get Binary Type Error [status=%d]" % status) + + binary_type_exists = self.read_byte() + + if binary_type_exists == 0: + raise RuntimeError("Binary type not found [type_id=%d] " % type_id) + + binary_type_id = self.read_int() + binary_type_name = self._parse_string() + self._parse_string() # Affinity field name + + fields = [] + for _ in range(self.read_int()): + field_name = self._parse_string() + field_type_id = self.read_int() + field_id = self.read_int() + + field = BinaryField(field_name, field_type_id, field_id) + fields.append(field) + + is_enum = self.read_byte() + if is_enum == 1: + raise RuntimeError("Enum fields are not supported yet") + + schema_cnt = self.read_int() + for _ in range(schema_cnt): + self.read_int() # Schema id + field_cnt = self.read_int() + self.skip(field_cnt * 4) + + return BinaryType(binary_type_id, binary_type_name, fields) + + def _parse_string(self): + """Parses string.""" + header = self.read_byte() + if header == 9: + length = self.read_int() + return self.read_data(length).decode("utf-8") + if header == 101: + return None + raise RuntimeError( + "Unknown binary type when expected string [type_id=%d]" % header) + + +class IgniteDataset(dataset_ops.DatasetSource): + """Apache Ignite is a memory-centric distributed database, caching, and + + processing platform for transactional, analytical, and streaming workloads, + delivering in-memory speeds at petabyte scale. This contrib package + contains an integration between Apache Ignite and TensorFlow. The + integration is based on tf.data from TensorFlow side and Binary Client + Protocol from Apache Ignite side. It allows to use Apache Ignite as a + datasource for neural network training, inference and all other + computations supported by TensorFlow. Ignite Dataset is based on Apache + Ignite Binary Client Protocol. + """ + + def __init__(self, + cache_name, + host="localhost", + port=10800, + local=False, + part=-1, + page_size=100, + username=None, + password=None, + certfile=None, + keyfile=None, + cert_password=None): + """Create a IgniteDataset. + + Args: + cache_name: Cache name to be used as datasource. + host: Apache Ignite Thin Client host to be connected. + port: Apache Ignite Thin Client port to be connected. + local: Local flag that defines to query only local data. + part: Number of partitions to be queried. + page_size: Apache Ignite Thin Client page size. + username: Apache Ignite Thin Client authentication username. + password: Apache Ignite Thin Client authentication password. + certfile: File in PEM format containing the certificate as well as any + number of CA certificates needed to establish the certificate's + authenticity. + keyfile: File containing the private key (otherwise the private key will + be taken from certfile as well). + cert_password: Password to be used if the private key is encrypted and a + password is necessary. + """ + super(IgniteDataset, self).__init__() + + with IgniteClient(host, port, username, password, certfile, keyfile, + cert_password) as client: + client.handshake() + self.cache_type = client.get_cache_type(cache_name) + + self.cache_name = ops.convert_to_tensor( + cache_name, dtype=dtypes.string, name="cache_name") + self.host = ops.convert_to_tensor(host, dtype=dtypes.string, name="host") + self.port = ops.convert_to_tensor(port, dtype=dtypes.int32, name="port") + self.local = ops.convert_to_tensor(local, dtype=dtypes.bool, name="local") + self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part") + self.page_size = ops.convert_to_tensor( + page_size, dtype=dtypes.int32, name="page_size") + self.schema = ops.convert_to_tensor( + self.cache_type.to_flat(), dtype=dtypes.int32, name="schema") + self.permutation = ops.convert_to_tensor( + self.cache_type.to_permutation(), + dtype=dtypes.int32, + name="permutation") + + def _as_variant_tensor(self): + return gen_dataset_ops.ignite_dataset(self.cache_name, self.host, self.port, + self.local, self.part, self.page_size, + self.schema, self.permutation) + + @property + def output_classes(self): + return self.cache_type.to_output_classes() + + @property + def output_shapes(self): + return self.cache_type.to_output_shapes() + + @property + def output_types(self): + return self.cache_type.to_output_types() diff --git a/tensorflow/contrib/data/python/ops/contrib_op_loader.py b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py similarity index 94% rename from tensorflow/contrib/data/python/ops/contrib_op_loader.py rename to tensorflow/contrib/ignite/python/ops/ignite_op_loader.py index 8f495a9dc9c82311435e71d2ac9ed35fd9aea794..c9af7386cf0a26ed1a950130aa36caa7fb831fd0 100644 --- a/tensorflow/contrib/data/python/ops/contrib_op_loader.py +++ b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Python helper for loading contrib ops and kernels.""" +"""Python helper for loading Ignite ops and kernels.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh new file mode 100755 index 0000000000000000000000000000000000000000..f4607ce8adab38c27d040ad1118858d17b924a6a --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-plain.xml & +sleep 5 # Wait Apache Ignite to be started + +./apache-ignite-fabric/bin/sqlline.sh \ +-u "jdbc:ignite:thin://127.0.0.1/" \ +--run=/data/sql/init.sql + +tail -f nohup.out diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml new file mode 100644 index 0000000000000000000000000000000000000000..d900174a8abb3987c380e4a1a193ed81295fb88c --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + 127.0.0.1 + + + + + + + + + diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ef29b5f14a4b2fea2400ec4d56a7ad2cf44cf2cb --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py @@ -0,0 +1,82 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# ============================================================================== +"""Tests for IgniteDataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.ignite import IgniteDataset +from tensorflow.python.client import session +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.platform import test + + +class IgniteDatasetTest(test.TestCase): + """The Apache Ignite servers have to setup before the test and tear down + + after the test manually. The docker engine has to be installed. + + To setup Apache Ignite servers: + $ bash start_ignite.sh + + To tear down Apache Ignite servers: + $ bash stop_ignite.sh + """ + + def test_ignite_dataset_with_plain_client(self): + """Test Ignite Dataset with plain client. + + """ + self._clear_env() + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300) + self._check_dataset(ds) + + def _clear_env(self): + """Clears environment variables used by Ignite Dataset. + + """ + if "IGNITE_DATASET_USERNAME" in os.environ: + del os.environ["IGNITE_DATASET_USERNAME"] + if "IGNITE_DATASET_PASSWORD" in os.environ: + del os.environ["IGNITE_DATASET_PASSWORD"] + if "IGNITE_DATASET_CERTFILE" in os.environ: + del os.environ["IGNITE_DATASET_CERTFILE"] + if "IGNITE_DATASET_CERT_PASSWORD" in os.environ: + del os.environ["IGNITE_DATASET_CERT_PASSWORD"] + + def _check_dataset(self, dataset): + """Checks that dataset provides correct data.""" + self.assertEqual(dtypes.int64, dataset.output_types["key"]) + self.assertEqual(dtypes.string, dataset.output_types["val"]["NAME"]) + self.assertEqual(dtypes.int64, dataset.output_types["val"]["VAL"]) + + it = dataset.make_one_shot_iterator() + ne = it.get_next() + + with session.Session() as sess: + rows = [sess.run(ne), sess.run(ne), sess.run(ne)] + with self.assertRaises(errors.OutOfRangeError): + sess.run(ne) + + self.assertEqual({"key": 1, "val": {"NAME": b"TEST1", "VAL": 42}}, rows[0]) + self.assertEqual({"key": 2, "val": {"NAME": b"TEST2", "VAL": 43}}, rows[1]) + self.assertEqual({"key": 3, "val": {"NAME": b"TEST3", "VAL": 44}}, rows[2]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/ignite/python/tests/sql/init.sql b/tensorflow/contrib/ignite/python/tests/sql/init.sql new file mode 100644 index 0000000000000000000000000000000000000000..5a192aef17e22544e853cb78b4eb235beded42fe --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/sql/init.sql @@ -0,0 +1,20 @@ +-- Copyright 2018 The TensorFlow Authors. All Rights Reserved. +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- ============================================================================== + +CREATE TABLE TEST_CACHE (ID LONG PRIMARY KEY, NAME VARCHAR, VAL LONG); + +INSERT INTO TEST_CACHE VALUES (1, 'TEST1', 42); +INSERT INTO TEST_CACHE VALUES (2, 'TEST2', 43); +INSERT INTO TEST_CACHE VALUES (3, 'TEST3', 44); diff --git a/tensorflow/contrib/ignite/python/tests/start_ignite.sh b/tensorflow/contrib/ignite/python/tests/start_ignite.sh new file mode 100755 index 0000000000000000000000000000000000000000..a67bd44f2fb0d654ba07f022a5070c68df8e2ede --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/start_ignite.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +IGNITE_VERSION=2.6.0 +SCRIPT_PATH="$( cd "$(dirname "$0")" ; pwd -P )" + +# Start Apache Ignite with plain client listener. +docker run -itd --name ignite-plain -p 42300:10800 \ +-v ${SCRIPT_PATH}:/data apacheignite/ignite:${IGNITE_VERSION} /data/bin/start-plain.sh diff --git a/tensorflow/contrib/ignite/python/tests/stop_ignite.sh b/tensorflow/contrib/ignite/python/tests/stop_ignite.sh new file mode 100755 index 0000000000000000000000000000000000000000..8f03dbd1ede61f548d3de9d9738f97667e75df3c --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/stop_ignite.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +docker rm -f ignite-plain +docker rm -f ignite-ssl +docker rm -f ignite-ssl-auth diff --git a/tensorflow/contrib/image/kernels/image_ops.cc b/tensorflow/contrib/image/kernels/image_ops.cc index 370a8caf6a71cc09629a5e75fd9151ae3f0f3b6d..788bf04b28aaad5d532258e0946fd03111384c69 100644 --- a/tensorflow/contrib/image/kernels/image_ops.cc +++ b/tensorflow/contrib/image/kernels/image_ops.cc @@ -156,6 +156,7 @@ namespace functor { TF_CALL_uint8(DECLARE_FUNCTOR); TF_CALL_int32(DECLARE_FUNCTOR); TF_CALL_int64(DECLARE_FUNCTOR); +TF_CALL_half(DECLARE_FUNCTOR); TF_CALL_float(DECLARE_FUNCTOR); TF_CALL_double(DECLARE_FUNCTOR); @@ -175,6 +176,7 @@ TF_CALL_double(DECLARE_FUNCTOR); TF_CALL_uint8(REGISTER); TF_CALL_int32(REGISTER); TF_CALL_int64(REGISTER); +TF_CALL_half(REGISTER); TF_CALL_float(REGISTER); TF_CALL_double(REGISTER); diff --git a/tensorflow/contrib/image/kernels/image_ops.h b/tensorflow/contrib/image/kernels/image_ops.h index 6b63eed1303accc330293b3a44cdb9def7881666..7fac774d07fa8e07a0730ad018ba70e2c73a9cc5 100644 --- a/tensorflow/contrib/image/kernels/image_ops.h +++ b/tensorflow/contrib/image/kernels/image_ops.h @@ -71,14 +71,7 @@ class ProjectiveGenerator { (transform[3] * output_x + transform[4] * output_y + transform[5]) / projection; - // TODO(ringwalt): Add a fill value input. -#if (defined __CUDA_ARCH__) && (CUDART_VERSION < 8000) - // On CUDA versions previous to 8.0, only __shared__ variables - // could be declared as static in the device code. const T fill_value = T(0); -#else - static const T fill_value = T(0); -#endif switch (interpolation_) { case INTERPOLATION_NEAREST: // Switch the order of x and y again for indexing into the image. diff --git a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc index 8743a5ff724a5000ed0376045340f9ceaaccbfd2..36b9a236a6ea48e3b27dac956c93aecee321e2b7 100644 --- a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc +++ b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc @@ -32,6 +32,7 @@ typedef Eigen::GpuDevice GPUDevice; template class FillProjectiveTransform; template class FillProjectiveTransform; template class FillProjectiveTransform; +template class FillProjectiveTransform; template class FillProjectiveTransform; template class FillProjectiveTransform; diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index 376c0751eebb4906920ed338647630798d509113..4997c31a7fc7f4243d03b22fc9c01fb13a2a25a4 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -272,6 +272,15 @@ class ImageOpsTest(test_util.TensorFlowTestCase): with self.cached_session(): self.assertAllEqual([[[[1], [0]], [[0], [1]]]], result.eval()) + def test_transform_data_types(self): + for dtype in _DTYPES: + image = constant_op.constant([[1, 2], [3, 4]], dtype=dtype) + value = image_ops.transform(image, [1] * 8) + with self.test_session(use_gpu=True): + self.assertAllEqual( + value.eval(), + np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype())) + class BipartiteMatchTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index 17ee8c0733d535a140baa942c09adf12b3765f84..60e1d85ea9c08a51763fdaf08853f8d9b67347e5 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -112,11 +112,9 @@ def safe_embedding_lookup_sparse(embedding_weights, dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) - if not isinstance(embedding_weights[0], - resource_variable_ops.ResourceVariable): - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 85af9de4e4f6b4a90dd51830a482db357541f466..3b7ae72e9c460ee7a38f72b03e1c1ad48e335f57 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -2360,7 +2360,7 @@ class BatchNormTest(test.TestCase): batch_size * height * width, expected_var) images = constant_op.constant( image_values, shape=image_shape, dtype=dtypes.float32) - is_training = variables_lib.Variable(True) + is_training = variables_lib.VariableV1(True) output = _layers.batch_norm( images, decay=0.1, @@ -2507,7 +2507,7 @@ class BatchNormTest(test.TestCase): batch_size * height * width, expected_var) images = constant_op.constant( image_values, shape=image_shape, dtype=dtypes.float32) - is_training = variables_lib.Variable(True) + is_training = variables_lib.VariableV1(True) output = _layers.batch_norm( images, decay=0.1, diff --git a/tensorflow/contrib/learn/python/learn/graph_actions_test.py b/tensorflow/contrib/learn/python/learn/graph_actions_test.py index 33180b778a002e35da8e634b3db77a23ff4f65df..a160cb54a3954c73d35f0d3fb5b437c5f9f08984 100644 --- a/tensorflow/contrib/learn/python/learn/graph_actions_test.py +++ b/tensorflow/contrib/learn/python/learn/graph_actions_test.py @@ -162,9 +162,9 @@ class GraphActionsTest(test.TestCase): Tuple of 3 `Tensor` objects, 2 input and 1 output. """ variables_lib.create_global_step() - in0 = variables.Variable(1.0) + in0 = variables.VariableV1(1.0) in1 = variables_lib.local_variable(2.0) - fake_table = variables.Variable( + fake_table = variables.VariableV1( 3.0, trainable=False, collections=['fake_tables'], @@ -312,8 +312,8 @@ class GraphActionsTest(test.TestCase): def test_evaluate_ready_for_local_init(self): with ops.Graph().as_default() as g, self.session(g): variables_lib.create_global_step() - v = variables.Variable(1.0) - variables.Variable( + v = variables.VariableV1(1.0) + variables.VariableV1( v + 1, collections=[ops.GraphKeys.LOCAL_VARIABLES], trainable=False) ready_for_local_init_op = variables.report_uninitialized_variables( variables.global_variables()) @@ -456,9 +456,9 @@ class GraphActionsTrainTest(test.TestCase): Tuple of 3 `Tensor` objects, 2 input and 1 output. """ variables_lib.create_global_step() - in0 = variables.Variable(1.0) + in0 = variables.VariableV1(1.0) in1 = variables_lib.local_variable(2.0) - fake_table = variables.Variable( + fake_table = variables.VariableV1( 3.0, trainable=False, collections=['fake_tables'], diff --git a/tensorflow/contrib/learn/python/learn/monitors_test.py b/tensorflow/contrib/learn/python/learn/monitors_test.py index 83e48a36e71caae7474f6bb8a33379ab75f7abcf..d4a7169bb632f682a19268529fee56b01ab5fbcb 100644 --- a/tensorflow/contrib/learn/python/learn/monitors_test.py +++ b/tensorflow/contrib/learn/python/learn/monitors_test.py @@ -247,7 +247,7 @@ class MonitorsTest(test.TestCase): def test_logging_trainable(self): with ops.Graph().as_default() as g, self.session(g): - var = variables.Variable(constant_op.constant(42.0), name='foo') + var = variables.VariableV1(constant_op.constant(42.0), name='foo') var.initializer.run() cof = constant_op.constant(1.0) loss = math_ops.subtract( @@ -261,7 +261,7 @@ class MonitorsTest(test.TestCase): with ops.Graph().as_default() as g, self.session(g): log_dir = 'log/dir' summary_writer = testing.FakeSummaryWriter(log_dir, g) - var = variables.Variable(0.0) + var = variables.VariableV1(0.0) var.initializer.run() tensor = state_ops.assign_add(var, 1.0) summary_op = summary.scalar('my_summary', tensor) @@ -526,8 +526,8 @@ class MonitorsTest(test.TestCase): monitor0 = learn.monitors.GraphDump() monitor1 = learn.monitors.GraphDump() with ops.Graph().as_default() as g, self.session(g): - const_var = variables.Variable(42.0, name='my_const') - counter_var = variables.Variable(0.0, name='my_counter') + const_var = variables.VariableV1(42.0, name='my_const') + counter_var = variables.VariableV1(0.0, name='my_counter') assign_add = state_ops.assign_add(counter_var, 1.0, name='my_assign_add') variables.global_variables_initializer().run() @@ -569,7 +569,7 @@ class MonitorsTest(test.TestCase): monitor = learn.monitors.CaptureVariable( var_name='my_assign_add:0', every_n=8, first_n=2) with ops.Graph().as_default() as g, self.session(g): - var = variables.Variable(0.0, name='my_var') + var = variables.VariableV1(0.0, name='my_var') var.initializer.run() state_ops.assign_add(var, 1.0, name='my_assign_add') self._run_monitor(monitor, num_epochs=3, num_steps_per_epoch=10) diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index 9ecf023e03d92f10a44e89e27a9a017f184cd501..8466dc36d13e223aed4f1dfe8e39a6f91c99fa55 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -125,7 +125,7 @@ def make_random_examples_and_variables_dicts(num_examples, dim, num_non_zero): ], example_ids=[str(i) for i in range(num_examples)]) - weights = variables_lib.Variable( + weights = variables_lib.VariableV1( array_ops.zeros([dim], dtype=dtypes.float32)) variables_dict = dict( sparse_features_weights=[weights], @@ -184,7 +184,7 @@ def make_dense_examples_and_variables_dicts(dense_features_values, weights, dense_tensors.append(dense_tensor) # Add variables of shape [feature_column_dimension]. dense_weights.append( - variables_lib.Variable( + variables_lib.VariableV1( array_ops.zeros( [dense_tensor.get_shape().as_list()[1]], dtype=dtypes.float32))) @@ -341,7 +341,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): examples = make_example_dict(example_protos, example_weights) # Explicitly make age a [1]-shaped Variable (which cannot be # partitioned), while making gender a PartitionedVariable. - age_weights = variables_lib.Variable( + age_weights = variables_lib.VariableV1( array_ops.zeros([1], dtype=dtypes.float32)) with variable_scope.variable_scope( name_or_scope=('variables/shard_{}'.format(num_shards) @@ -801,7 +801,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): labels=[1.0, 0.0]) # Replace with a variable of size 1 instead of 2. variables['dense_features_weights'] = [ - variables_lib.Variable(array_ops.zeros( + variables_lib.VariableV1(array_ops.zeros( [1], dtype=dtypes.float32)) ] options = dict( diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index f320b53d940d3f2b76975fb1302aaf344e785aca..787a85644c35c807df84f74cbce06f80fd0b004d 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -4,6 +4,7 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 +load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops") exports_files(glob([ @@ -26,6 +27,14 @@ config_setting( }, ) +# Enables inclusion of TensorFlow kernels via the TF Lite Flex delegate. +# WARNING: This build flag is experimental and subject to change. +config_setting( + name = "with_tflite_flex", + define_values = {"with_tflite_flex": "true"}, + visibility = ["//visibility:public"], +) + cc_library( name = "schema_fbs_version", hdrs = ["version.h"], @@ -180,7 +189,12 @@ cc_library( "//tensorflow/contrib/lite/nnapi:nnapi_lib", "//tensorflow/contrib/lite/profiling:profiler", "//tensorflow/contrib/lite/schema:schema_fbs", - ], + ] + select({ + ":with_tflite_flex": [ + "//tensorflow/contrib/lite/delegates/flex:delegate", + ], + "//conditions:default": [], + }), ) cc_library( @@ -259,6 +273,7 @@ cc_test( "testdata/0_subgraphs.bin", "testdata/2_subgraphs.bin", "testdata/empty_model.bin", + "testdata/multi_add_flex.bin", "testdata/test_model.bin", "testdata/test_model_broken.bin", ], @@ -266,6 +281,26 @@ cc_test( ":framework", "//tensorflow/contrib/lite/c:c_api_internal", "//tensorflow/contrib/lite/core/api", + "//tensorflow/contrib/lite/kernels:builtin_ops", + "//tensorflow/contrib/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + +# Test model framework with the flex library linked into the target. +tf_cc_test( + name = "model_flex_test", + size = "small", + srcs = ["model_flex_test.cc"], + data = [ + "testdata/multi_add_flex.bin", + ], + tags = ["no_windows"], # TODO(b/116667551): No weak symbols with MSVC. + deps = [ + ":framework", + "//tensorflow/contrib/lite/core/api", + "//tensorflow/contrib/lite/delegates/flex:delegate", + "//tensorflow/contrib/lite/kernels:builtin_ops", "//tensorflow/contrib/lite/testing:util", "@com_google_googletest//:gtest", ], diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 7f5c6bdc2f34a962b2991c3d7cc8dfac720e93f2..b9e933a8b6977bc2b26aff70c9f307a444bcb64f 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -212,7 +212,8 @@ def json_to_tflite(name, src, out): # This is the master list of generated examples that will be made into tests. A # function called make_XXX_tests() must also appear in generate_examples.py. -# Disable a test by commenting it out. If you do, add a link to a bug or issue. +# Disable a test by adding it to the blacklists specified in +# generated_test_models_failing(). def generated_test_models(): return [ "add", @@ -291,17 +292,43 @@ def generated_test_models(): "tile", "topk", "transpose", - #"transpose_conv", # disabled due to b/111213074 + "transpose_conv", "unpack", "where", "zeros_like", ] +# List of models that fail generated tests for the conversion mode. +# If you have to disable a test, please add here with a link to the appropriate +# bug or issue. +def generated_test_models_failing(conversion_mode): + if not conversion_mode: + return [ + "transpose_conv", # disabled due to b/111213074 + ] + + if conversion_mode == "toco-flex": + # TODO(b/117328698): Fix and enable the known flex failures. + return [ + "arg_min_max", + "div", + "floor_div", + "gather ", + "lstm ", + "resize_bilinear", + "space_to_batch_nd", + "split", + "transpose", + "unpack", + ] + + return [] + def generated_test_conversion_modes(): """Returns a list of conversion modes.""" # TODO(nupurgarg): Add "pb2lite" when it's in open source. b/113614050. - return ["toco-extended", ""] + return ["toco-flex", ""] def generated_test_models_all(): """Generates a list of all tests with the different converters. @@ -313,10 +340,14 @@ def generated_test_models_all(): tests = generated_test_models() options = [] for conversion_mode in conversion_modes: + failing_tests = generated_test_models_failing(conversion_mode) for test in tests: + tags = [] + if test in failing_tests: + tags.append("notap") if conversion_mode: test += "_%s" % conversion_mode - options.append((conversion_mode, test)) + options.append((conversion_mode, test, tags)) return options def gen_zip_test(name, test_name, conversion_mode, **kwargs): @@ -335,10 +366,7 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs): # TODO(nupurgarg): Comment in when pb2lite is in open source. b/113614050. # if conversion_mode == "pb2lite": # toco = "//tensorflow/contrib/lite/experimental/pb2lite:pb2lite" - flags = "--ignore_toco_errors --run_with_extended" - kwargs["tags"].append("skip_already_failing") - kwargs["tags"].append("no_oss") - kwargs["tags"].append("notap") + flags = "--ignore_toco_errors --run_with_flex" gen_zipped_test_file( name = "zip_%s" % test_name, diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 7809d114e2f72991be98bfa760f1f240864b5aa6..6117cbf9f15074766f90971dbf695eaa0b4678e3 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -120,6 +120,7 @@ typedef enum { kTfLiteBuiltinSquare = 92, kTfLiteBuiltinZerosLike = 93, kTfLiteBuiltinFill = 94, + kTfLiteBuiltinFloorMod = 95, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h index be9d551ee4d5e94284e1cdb38f464327361d73b7..1e65c3cee27798990eb9888e67306c6285925a1f 100644 --- a/tensorflow/contrib/lite/c/builtin_op_data.h +++ b/tensorflow/contrib/lite/c/builtin_op_data.h @@ -99,6 +99,12 @@ typedef struct { TfLiteFusedActivation activation; } TfLiteSequenceRNNParams; +typedef struct { + bool time_major; + TfLiteFusedActivation activation; + bool merge_outputs; +} TfLiteBidirectionalSequenceRNNParams; + typedef enum { kTfLiteFullyConnectedWeightsFormatDefault = 0, kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1, @@ -180,6 +186,23 @@ typedef struct { TfLiteLSTMKernelType kernel_type; } TfLiteLSTMParams; +typedef struct { + // Parameters for the LSTM kernel. + TfLiteFusedActivation activation; + float cell_clip; + float proj_clip; +} TfLiteUnidirectionalSequenceLSTMParams; + +typedef struct { + // Parameters for the LSTM kernel. + TfLiteFusedActivation activation; + float cell_clip; + float proj_clip; + + // If true, store the outputs of both directions in the first output. + bool merge_outputs; +} TfLiteBidirectionalSequenceLSTMParams; + typedef struct { bool align_corners; } TfLiteResizeBilinearParams; diff --git a/tensorflow/contrib/lite/c/builtin_op_data_test.cc b/tensorflow/contrib/lite/c/builtin_op_data_test.cc index 4d0ba75e68367c9a0a7a7c9c3ac1ea14a875c201..ba458b4252c53ebc91adcd0afbd16f783037dd42 100644 --- a/tensorflow/contrib/lite/c/builtin_op_data_test.cc +++ b/tensorflow/contrib/lite/c/builtin_op_data_test.cc @@ -73,6 +73,8 @@ TEST(IntArray, CanCompileStructs) { TfLiteFakeQuantParams fake_quant_params; TfLitePackParams pack_params; TfLiteOneHotParams one_hot_params; + TfLiteBidirectionalSequenceRNNParams bidi_sequence_rnn_params; + TfLiteBidirectionalSequenceLSTMParams bidi_sequence_lstm_params; } } // namespace tflite diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc index e6900e0950305d5d482814c1f617a42231998cc0..890d9c04bb372fce2b86d503ec1f346a302786f2 100644 --- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc @@ -224,10 +224,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = reinterpret_cast(params); break; } - case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: { - TfLiteSequenceRNNParams* params = - allocator->AllocatePOD(); + auto params = allocator->AllocatePOD(); if (auto* sequence_rnn_params = op->builtin_options_as_SequenceRNNOptions()) { params->activation = @@ -237,6 +235,19 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = reinterpret_cast(params); break; } + case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: { + auto params = + allocator->AllocatePOD(); + if (auto* bidi_sequence_rnn_params = + op->builtin_options_as_BidirectionalSequenceRNNOptions()) { + params->activation = parse_activation( + bidi_sequence_rnn_params->fused_activation_function()); + params->time_major = bidi_sequence_rnn_params->time_major(); + params->merge_outputs = bidi_sequence_rnn_params->merge_outputs(); + } + *builtin_data = reinterpret_cast(params); + break; + } case BuiltinOperator_RNN: { TfLiteRNNParams* params = allocator->AllocatePOD(); if (auto* rnn_params = op->builtin_options_as_RNNOptions()) { @@ -360,10 +371,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = reinterpret_cast(params); break; } - case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: - case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: case BuiltinOperator_LSTM: { - TfLiteLSTMParams* params = allocator->AllocatePOD(); + auto params = allocator->AllocatePOD(); if (auto* lstm_params = op->builtin_options_as_LSTMOptions()) { params->activation = parse_activation(lstm_params->fused_activation_function()); @@ -381,6 +390,34 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = reinterpret_cast(params); break; } + case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: { + auto* params = + allocator->AllocatePOD(); + if (auto* seq_lstm_params = + op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) { + params->activation = + parse_activation(seq_lstm_params->fused_activation_function()); + params->cell_clip = seq_lstm_params->cell_clip(); + params->proj_clip = seq_lstm_params->proj_clip(); + } + *builtin_data = reinterpret_cast(params); + break; + } + + case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: { + auto params = + allocator->AllocatePOD(); + if (auto* bidi_lstm_params = + op->builtin_options_as_BidirectionalSequenceLSTMOptions()) { + params->activation = + parse_activation(bidi_lstm_params->fused_activation_function()); + params->cell_clip = bidi_lstm_params->cell_clip(); + params->proj_clip = bidi_lstm_params->proj_clip(); + params->merge_outputs = bidi_lstm_params->merge_outputs(); + } + *builtin_data = reinterpret_cast(params); + break; + } case BuiltinOperator_RESIZE_BILINEAR: { auto* params = allocator->AllocatePOD(); if (auto* schema_params = @@ -614,6 +651,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_SQUARE: case BuiltinOperator_ZEROS_LIKE: case BuiltinOperator_FILL: + case BuiltinOperator_FLOOR_MOD: break; } return kTfLiteOk; diff --git a/tensorflow/contrib/lite/delegates/eager/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD similarity index 94% rename from tensorflow/contrib/lite/delegates/eager/BUILD rename to tensorflow/contrib/lite/delegates/flex/BUILD index bf5d91899ca63142f69401229b9e06b27b6c2b0b..9b89ed4f849e224d36adae7c3a7581ac542d4f0f 100644 --- a/tensorflow/contrib/lite/delegates/eager/BUILD +++ b/tensorflow/contrib/lite/delegates/flex/BUILD @@ -2,7 +2,7 @@ # This is a TF Lite delegate that is powered by TensorFlow's Eager. # package(default_visibility = [ - "//visibility:public", + "//visibility:private", ]) licenses(["notice"]) # Apache 2.0 @@ -20,7 +20,7 @@ cc_library( "//tensorflow/contrib/lite:kernel_api", ] + select({ "//tensorflow:android": [ - "//tensorflow/core:android_tensorflow_lib_lite_no_runtime", + "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ "//tensorflow/core:framework", @@ -50,6 +50,7 @@ cc_library( hdrs = [ "delegate.h", ], + visibility = ["//visibility:public"], deps = [ ":buffer_map", ":delegate_data", @@ -60,12 +61,13 @@ cc_library( "//tensorflow/contrib/lite:util", ] + select({ "//tensorflow:android": [ - "//tensorflow/core:android_tensorflow_lib_lite_no_runtime", + "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ "//tensorflow/core:lib", ], }), + alwayslink = 1, ) tf_cc_test( @@ -178,7 +180,7 @@ cc_library( "//tensorflow/contrib/lite:kernel_api", ] + select({ "//tensorflow:android": [ - "//tensorflow/core:android_tensorflow_lib_lite_no_runtime", + "//tensorflow/core:android_tensorflow_lib", ], "//conditions:default": [ "//tensorflow/core:lib", diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map.cc b/tensorflow/contrib/lite/delegates/flex/buffer_map.cc similarity index 95% rename from tensorflow/contrib/lite/delegates/eager/buffer_map.cc rename to tensorflow/contrib/lite/delegates/flex/buffer_map.cc index e5a19c39976969a0b05b28596c6d7d5ebe7c7782..63e39196d96a176eca105e7b11107ab52fe528dd 100644 --- a/tensorflow/contrib/lite/delegates/eager/buffer_map.cc +++ b/tensorflow/contrib/lite/delegates/flex/buffer_map.cc @@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h" +#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h" #include "tensorflow/c/c_api_internal.h" -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" #include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/log_memory.h" namespace tflite { -namespace eager { +namespace flex { namespace { // A tensor buffer that is allocated, deallocated and populated by TF Lite. class TfLiteTensorBuffer : public tensorflow::TensorBuffer { @@ -107,5 +107,5 @@ void BufferMap::SetFromTensorFlow(int tensor_index, tensorflow::Tensor tensor) { id_to_tensor_[tensor_index] = std::move(tensor); } -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map.h b/tensorflow/contrib/lite/delegates/flex/buffer_map.h similarity index 86% rename from tensorflow/contrib/lite/delegates/eager/buffer_map.h rename to tensorflow/contrib/lite/delegates/flex/buffer_map.h index aaaa045840c4511f0018ad40414f8b36cea8994a..4ce886568a55773971bc0543ec973ec84c0aac1b 100644 --- a/tensorflow/contrib/lite/delegates/eager/buffer_map.h +++ b/tensorflow/contrib/lite/delegates/flex/buffer_map.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_ #include @@ -21,12 +21,12 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" namespace tflite { -namespace eager { +namespace flex { // Maps a TF Lite tensor index into a TensorFlow tensor. // // The TF Lite interpreter assigns integer indices to each of its tensors, but -// the Eager delegate deals in terms of TensorFlow tensors. This class maps +// the Flex delegate deals in terms of TensorFlow tensors. This class maps // from indices to tensors and allows the creation of new tensors to be // associated with a given index. class BufferMap { @@ -55,7 +55,7 @@ class BufferMap { std::map id_to_tensor_; }; -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc b/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc similarity index 98% rename from tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc rename to tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc index a046943e56d2b80f2670b7fc3dd57b36dc4d2425..bb80e25e8076bb95782e4137945ad1c7cd178aee 100644 --- a/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h" +#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h" #include #include @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/contrib/lite/util.h" namespace tflite { -namespace eager { +namespace flex { namespace { using ::testing::ElementsAre; @@ -164,7 +164,7 @@ TEST(BufferMapTest, TensorFlowOverwritesTfLite) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/delegates/eager/delegate.cc b/tensorflow/contrib/lite/delegates/flex/delegate.cc similarity index 70% rename from tensorflow/contrib/lite/delegates/eager/delegate.cc rename to tensorflow/contrib/lite/delegates/flex/delegate.cc index 45fc158157b624ae99bd99ecfd136efcc69ca550..c72b0cf51383897ce3afec0c39ed6bfe178d88c1 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate.cc +++ b/tensorflow/contrib/lite/delegates/flex/delegate.cc @@ -12,19 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/delegate.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" #include #include "tensorflow/contrib/lite/context_util.h" -#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h" -#include "tensorflow/contrib/lite/delegates/eager/kernel.h" -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h" +#include "tensorflow/contrib/lite/delegates/flex/kernel.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" #include "tensorflow/contrib/lite/util.h" #include "tensorflow/core/lib/core/status.h" namespace tflite { -namespace eager { +namespace flex { namespace delegate { TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) { @@ -32,7 +32,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) { TfLiteIntArray* plan; TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan)); - // Add all custom ops starting with "Eager" to list of supported nodes. + // Add all custom ops starting with "Flex" to list of supported nodes. std::vector supported_nodes; for (int node_index : TfLiteIntArrayView(plan)) { TfLiteNode* node; @@ -40,7 +40,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) { TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration( context, node_index, &node, ®istration)); - if (IsEagerOp(registration->custom_name)) { + if (IsFlexOp(registration->custom_name)) { supported_nodes.push_back(node_index); } } @@ -81,28 +81,37 @@ TfLiteStatus CopyFromBufferHandle(TfLiteContext* context, } } // namespace delegate -} // namespace eager +} // namespace flex + +// Corresponding weak declaration found in lite/model.cc. +std::unique_ptr +AcquireFlexDelegate() { + return std::unique_ptr( + tflite::FlexDelegate::Create().release(), [](TfLiteDelegate* delegate) { + delete reinterpret_cast(delegate); + }); +} -std::unique_ptr EagerDelegate::Create() { - std::unique_ptr delegate_data; - if (!eager::DelegateData::Create(&delegate_data).ok()) { +std::unique_ptr FlexDelegate::Create() { + std::unique_ptr delegate_data; + if (!flex::DelegateData::Create(&delegate_data).ok()) { fprintf(stderr, "Unable to initialize TensorFlow context.\n"); return nullptr; } - return std::unique_ptr( - new EagerDelegate(std::move(delegate_data))); + return std::unique_ptr( + new FlexDelegate(std::move(delegate_data))); } -EagerDelegate::EagerDelegate(std::unique_ptr delegate_data) +FlexDelegate::FlexDelegate(std::unique_ptr delegate_data) : TfLiteDelegate{ /*data_=*/delegate_data.get(), - /*nullptr,*/ &eager::delegate::Prepare, - /*CopyFromBufferHandle=*/&eager::delegate::CopyFromBufferHandle, + /*nullptr,*/ &flex::delegate::Prepare, + /*CopyFromBufferHandle=*/&flex::delegate::CopyFromBufferHandle, /*CopyToBufferHandle=*/nullptr, /*FreeBufferHandle=*/nullptr}, delegate_data_(std::move(delegate_data)) {} -EagerDelegate::~EagerDelegate() {} +FlexDelegate::~FlexDelegate() {} } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/delegate.h b/tensorflow/contrib/lite/delegates/flex/delegate.h similarity index 64% rename from tensorflow/contrib/lite/delegates/eager/delegate.h rename to tensorflow/contrib/lite/delegates/flex/delegate.h index 70f3c15af4af99380113e7404d7d487c2d9237cd..1017780dc75de1cd334e0cca901bbe20ddf0bf41 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate.h +++ b/tensorflow/contrib/lite/delegates/flex/delegate.h @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_ #include "tensorflow/contrib/lite/c/c_api_internal.h" -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" namespace tflite { @@ -24,12 +24,12 @@ namespace tflite { // Delegate that can be used to extract parts of a graph that are designed to be // executed by TensorFlow's runtime via Eager. // -// The interpreter must be constructed after the EagerDelegate and destructed -// before the EagerDelegate. This delegate may be used with multiple +// The interpreter must be constructed after the FlexDelegate and destructed +// before the FlexDelegate. This delegate may be used with multiple // interpreters, but it is *not* thread-safe. // // Usage: -// auto delegate = EagerDelegate::Create(); +// auto delegate = FlexDelegate::Create(); // ... build interpreter ... // // if (delegate) { @@ -39,21 +39,21 @@ namespace tflite { // ... run inference ... // ... destroy interpreter ... // ... destroy delegate ... -class EagerDelegate : public TfLiteDelegate { +class FlexDelegate : public TfLiteDelegate { public: // Creates a delegate that supports TF ops. // - // If the underyling TF Eager context creation fails, returns null. - static std::unique_ptr Create(); + // If the underyling TF Flex context creation fails, returns null. + static std::unique_ptr Create(); - ~EagerDelegate(); + ~FlexDelegate(); private: - explicit EagerDelegate(std::unique_ptr delegate_data); + explicit FlexDelegate(std::unique_ptr delegate_data); - std::unique_ptr delegate_data_; + std::unique_ptr delegate_data_; }; } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data.cc b/tensorflow/contrib/lite/delegates/flex/delegate_data.cc similarity index 94% rename from tensorflow/contrib/lite/delegates/eager/delegate_data.cc rename to tensorflow/contrib/lite/delegates/flex/delegate_data.cc index 0fd5c976f8ca9be16f7e3c5e610573755b40c506..8f985f770cfba9fc6a7184cfdb0a35e9e6c754af 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate_data.cc +++ b/tensorflow/contrib/lite/delegates/flex/delegate_data.cc @@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/lib/core/status.h" namespace tflite { -namespace eager { +namespace flex { tensorflow::Status DelegateData::Create(std::unique_ptr* data) { std::vector devices; @@ -43,5 +43,5 @@ DelegateData::DelegateData(tensorflow::EagerContext* eager_context) DelegateData::~DelegateData() {} -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data.h b/tensorflow/contrib/lite/delegates/flex/delegate_data.h similarity index 78% rename from tensorflow/contrib/lite/delegates/eager/delegate_data.h rename to tensorflow/contrib/lite/delegates/flex/delegate_data.h index 772d26f44e8b5b2b962c06f42b86df29ee1c1f8d..8d75f0b0efe758074d035f0ebcf0f5f12602323b 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate_data.h +++ b/tensorflow/contrib/lite/delegates/flex/delegate_data.h @@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_ -#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h" +#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h" #include "tensorflow/core/common_runtime/eager/context.h" namespace tflite { -namespace eager { +namespace flex { -// Data kept by the Eager delegate for the lifetime of an Interpreter. +// Data kept by the Flex delegate for the lifetime of an Interpreter. class DelegateData { public: // Create a new DelegateData, initialized with a newly-created EagerContext. @@ -29,7 +29,7 @@ class DelegateData { ~DelegateData(); - // The EagerContext that is required for execution of Eager Ops. + // The EagerContext that is required for execution of Flex Ops. tensorflow::EagerContext* GetEagerContext() { return eager_context_.get(); } // Map from TF Lite tensor index to TensorFlow tensor for a given context. @@ -46,7 +46,7 @@ class DelegateData { std::unordered_map buffer_map_; }; -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc b/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc similarity index 93% rename from tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc rename to tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc index def063309fb8019b2835cfd8943dd1b7a7034a14..30b10f435a23785f88e2645714a414501bc2fab9 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" #include #include @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/contrib/lite/testing/util.h" namespace tflite { -namespace eager { +namespace flex { namespace { TEST(DelegateDataTest, Basic) { @@ -39,7 +39,7 @@ TEST(DelegateDataTest, Basic) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_test.cc b/tensorflow/contrib/lite/delegates/flex/delegate_test.cc similarity index 95% rename from tensorflow/contrib/lite/delegates/eager/delegate_test.cc rename to tensorflow/contrib/lite/delegates/flex/delegate_test.cc index 43ec5d53b81e385b2ca3a9614ccd7b8fdff2557d..1813952cef99ef10b638ade7bcfcca486b2b3b76 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/delegate_test.cc @@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/delegate.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" #include #include -#include "tensorflow/contrib/lite/delegates/eager/test_util.h" +#include "tensorflow/contrib/lite/delegates/flex/test_util.h" namespace tflite { -namespace eager { +namespace flex { namespace { using ::testing::ContainsRegex; using ::testing::ElementsAre; -class DelegateTest : public testing::EagerModelTest { +class DelegateTest : public testing::FlexModelTest { public: DelegateTest() { - delegate_ = EagerDelegate::Create(); + delegate_ = FlexDelegate::Create(); interpreter_.reset(new Interpreter(&error_reporter_)); } @@ -46,7 +46,7 @@ class DelegateTest : public testing::EagerModelTest { } private: - std::unique_ptr delegate_; + std::unique_ptr delegate_; }; TEST_F(DelegateTest, FullGraph) { @@ -236,7 +236,7 @@ TEST_F(DelegateTest, MultipleInterpretersSameDelegate) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.cc b/tensorflow/contrib/lite/delegates/flex/kernel.cc similarity index 91% rename from tensorflow/contrib/lite/delegates/eager/kernel.cc rename to tensorflow/contrib/lite/delegates/flex/kernel.cc index 48a2f56baf4aa1c1f6e9d2008cfd3223dc0bd4de..e4f1aea990da97da08a3e5adf2dd70307b20fe88 100644 --- a/tensorflow/contrib/lite/delegates/eager/kernel.cc +++ b/tensorflow/contrib/lite/delegates/flex/kernel.cc @@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/kernel.h" +#include "tensorflow/contrib/lite/delegates/flex/kernel.h" #include "flatbuffers/flexbuffers.h" // TF:flatbuffers #include "tensorflow/contrib/lite/builtin_ops.h" #include "tensorflow/contrib/lite/c/c_api_internal.h" #include "tensorflow/contrib/lite/context_util.h" -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/string.h" #include "tensorflow/core/common_runtime/eager/context.h" @@ -28,10 +28,10 @@ limitations under the License. #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/node_def_util.h" -// Note: this is part of TF Lite's Eager delegation code which is to be +// Note: this is part of TF Lite's Flex delegation code which is to be // completed soon. -// This is the TF Lite op that is created by the eager delegate to handle +// This is the TF Lite op that is created by the flex delegate to handle // execution of a supported subgraph. The usual flow is that the delegate // informs the interpreter of supported nodes in a graph, and each supported // subgraph is replaced with one instance of this kernel. @@ -46,7 +46,7 @@ limitations under the License. // corresponding TensorFlow/Eager Op. namespace tflite { -namespace eager { +namespace flex { namespace kernel { // Controls the lifetime of tensor handles in a vector. @@ -72,11 +72,11 @@ class VectorOfHandles { // Executes the TensorFlow op given by 'op_name', with the attributes specified // in 'nodedef'. Inputs and outputs are given as indices into the 'buffer_map'. -tensorflow::Status ExecuteEagerOp(tensorflow::EagerContext* eager_context, - BufferMap* buffer_map, const string& op_name, - const tensorflow::NodeDef& nodedef, - const std::vector& inputs, - const std::vector& outputs) { +tensorflow::Status ExecuteFlexOp(tensorflow::EagerContext* eager_context, + BufferMap* buffer_map, const string& op_name, + const tensorflow::NodeDef& nodedef, + const std::vector& inputs, + const std::vector& outputs) { const tensorflow::AttrTypeMap* attr_types; TF_RETURN_WITH_CONTEXT_IF_ERROR( tensorflow::AttrTypeMapForOp(op_name.c_str(), &attr_types), @@ -258,13 +258,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // Execute the TensorFlow Ops sequentially. for (const auto& node_data : op_data->nodes) { if (node_data.nodedef.op().empty()) { - context->ReportError(context, "Invalid NodeDef in Eager op '%s'", + context->ReportError(context, "Invalid NodeDef in Flex op '%s'", node_data.name.c_str()); return kTfLiteError; } auto status = - ExecuteEagerOp(eager_context, buffer_map, node_data.name, - node_data.nodedef, node_data.inputs, node_data.outputs); + ExecuteFlexOp(eager_context, buffer_map, node_data.name, + node_data.nodedef, node_data.inputs, node_data.outputs); TF_LITE_ENSURE_OK(context, ConvertStatus(context, status)); } @@ -295,5 +295,5 @@ TfLiteRegistration GetKernel() { return registration; } -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.h b/tensorflow/contrib/lite/delegates/flex/kernel.h similarity index 79% rename from tensorflow/contrib/lite/delegates/eager/kernel.h rename to tensorflow/contrib/lite/delegates/flex/kernel.h index 2478abccaa1d9466cbadad08e73ba58bb970b4a1..ac9313a37bd5a3f5e23057512f07674c44801989 100644 --- a/tensorflow/contrib/lite/delegates/eager/kernel.h +++ b/tensorflow/contrib/lite/delegates/flex/kernel.h @@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_ #include "tensorflow/contrib/lite/c/c_api_internal.h" namespace tflite { -namespace eager { +namespace flex { // Return the registration object used to initialize and execute ops that will // be delegated to TensorFlow's Eager runtime. This TF Lite op is created by -// the eager delegate to handle execution of a supported subgraph. The usual +// the flex delegate to handle execution of a supported subgraph. The usual // flow is that the delegate informs the interpreter of supported nodes in a // graph, and each supported subgraph is replaced with one instance of this // kernel. TfLiteRegistration GetKernel(); -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/kernel_test.cc b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc similarity index 94% rename from tensorflow/contrib/lite/delegates/eager/kernel_test.cc rename to tensorflow/contrib/lite/delegates/flex/kernel_test.cc index 66f2226626677fa26a8c0eb2ae8ef448ed35c141..94a6f8b61ad28144f6b8d0d462338ab4176af168 100644 --- a/tensorflow/contrib/lite/delegates/eager/kernel_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc @@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/kernel.h" +#include "tensorflow/contrib/lite/delegates/flex/kernel.h" #include #include -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" -#include "tensorflow/contrib/lite/delegates/eager/test_util.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/test_util.h" namespace tflite { -namespace eager { +namespace flex { namespace { using ::testing::ContainsRegex; @@ -31,12 +31,12 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteDelegate* delegate, TfLiteIntArray* size_and_nodes = ConvertVectorToTfLiteIntArray(supported_nodes); TF_LITE_ENSURE_STATUS(context->ReplaceSubgraphsWithDelegateKernels( - context, eager::GetKernel(), size_and_nodes, delegate)); + context, flex::GetKernel(), size_and_nodes, delegate)); TfLiteIntArrayFree(size_and_nodes); return kTfLiteOk; } -class KernelTest : public testing::EagerModelTest { +class KernelTest : public testing::FlexModelTest { public: KernelTest() { CHECK(DelegateData::Create(&delegate_data_).ok()); @@ -167,7 +167,7 @@ TEST_F(KernelTest, WrongSetOfNodes) { ASSERT_FALSE(Invoke()); ASSERT_THAT(error_reporter().error_messages(), - ContainsRegex("Invalid NodeDef in Eager op")); + ContainsRegex("Invalid NodeDef in Flex op")); } TEST_F(KernelTest, MixedGraph) { @@ -220,7 +220,7 @@ TEST_F(KernelTest, SplitGraph) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.cc b/tensorflow/contrib/lite/delegates/flex/test_util.cc similarity index 76% rename from tensorflow/contrib/lite/delegates/eager/test_util.cc rename to tensorflow/contrib/lite/delegates/flex/test_util.cc index d47be761fb191842b30884565b5d4d703d1644d7..69c336a01a57416bb331a897faba03ad75a38f95 100644 --- a/tensorflow/contrib/lite/delegates/eager/test_util.cc +++ b/tensorflow/contrib/lite/delegates/flex/test_util.cc @@ -13,25 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/test_util.h" +#include "tensorflow/contrib/lite/delegates/flex/test_util.h" #include "absl/memory/memory.h" #include "flatbuffers/flexbuffers.h" // TF:flatbuffers #include "tensorflow/contrib/lite/string.h" namespace tflite { -namespace eager { +namespace flex { namespace testing { -bool EagerModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; } +bool FlexModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; } -void EagerModelTest::SetShape(int tensor_index, - const std::vector& values) { +void FlexModelTest::SetShape(int tensor_index, const std::vector& values) { ASSERT_EQ(interpreter_->ResizeInputTensor(tensor_index, values), kTfLiteOk); ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); } -std::vector EagerModelTest::GetShape(int tensor_index) { +std::vector FlexModelTest::GetShape(int tensor_index) { std::vector result; auto* dims = interpreter_->tensor(tensor_index)->dims; result.reserve(dims->size); @@ -41,13 +40,13 @@ std::vector EagerModelTest::GetShape(int tensor_index) { return result; } -TfLiteType EagerModelTest::GetType(int tensor_index) { +TfLiteType FlexModelTest::GetType(int tensor_index) { return interpreter_->tensor(tensor_index)->type; } -void EagerModelTest::AddTensors(int num_tensors, const std::vector& inputs, - const std::vector& outputs, - TfLiteType type, const std::vector& dims) { +void FlexModelTest::AddTensors(int num_tensors, const std::vector& inputs, + const std::vector& outputs, TfLiteType type, + const std::vector& dims) { interpreter_->AddTensors(num_tensors); for (int i = 0; i < num_tensors; ++i) { TfLiteQuantizationParams quant; @@ -66,8 +65,8 @@ void EagerModelTest::AddTensors(int num_tensors, const std::vector& inputs, CHECK_EQ(interpreter_->SetOutputs(outputs), kTfLiteOk); } -void EagerModelTest::AddTfLiteMulOp(const std::vector& inputs, - const std::vector& outputs) { +void FlexModelTest::AddTfLiteMulOp(const std::vector& inputs, + const std::vector& outputs) { static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; reg.builtin_code = BuiltinOperator_MUL; reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { @@ -90,8 +89,8 @@ void EagerModelTest::AddTfLiteMulOp(const std::vector& inputs, kTfLiteOk); } -void EagerModelTest::AddTfOp(TfOpType op, const std::vector& inputs, - const std::vector& outputs) { +void FlexModelTest::AddTfOp(TfOpType op, const std::vector& inputs, + const std::vector& outputs) { auto attr = [](const string& key, const string& value) { return " attr{ key: '" + key + "' value {" + value + "}}"; }; @@ -107,28 +106,28 @@ void EagerModelTest::AddTfOp(TfOpType op, const std::vector& inputs, if (op == kUnpack) { string attributes = type_attribute + attr("num", "i: 2") + attr("axis", "i: 0"); - AddTfOp("EagerUnpack", "Unpack", attributes, inputs, outputs); + AddTfOp("FlexUnpack", "Unpack", attributes, inputs, outputs); } else if (op == kIdentity) { string attributes = type_attribute; - AddTfOp("EagerIdentity", "Identity", attributes, inputs, outputs); + AddTfOp("FlexIdentity", "Identity", attributes, inputs, outputs); } else if (op == kAdd) { string attributes = type_attribute; - AddTfOp("EagerAdd", "Add", attributes, inputs, outputs); + AddTfOp("FlexAdd", "Add", attributes, inputs, outputs); } else if (op == kMul) { string attributes = type_attribute; - AddTfOp("EagerMul", "Mul", attributes, inputs, outputs); + AddTfOp("FlexMul", "Mul", attributes, inputs, outputs); } else if (op == kNonExistent) { AddTfOp("NonExistentOp", "NonExistentOp", "", inputs, outputs); } else if (op == kIncompatibleNodeDef) { // "Cast" op is created without attributes - making it incompatible. - AddTfOp("EagerCast", "Cast", "", inputs, outputs); + AddTfOp("FlexCast", "Cast", "", inputs, outputs); } } -void EagerModelTest::AddTfOp(const char* tflite_name, const string& tf_name, - const string& nodedef_str, - const std::vector& inputs, - const std::vector& outputs) { +void FlexModelTest::AddTfOp(const char* tflite_name, const string& tf_name, + const string& nodedef_str, + const std::vector& inputs, + const std::vector& outputs) { static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; reg.builtin_code = BuiltinOperator_CUSTOM; reg.custom_name = tflite_name; @@ -154,5 +153,5 @@ void EagerModelTest::AddTfOp(const char* tflite_name, const string& tf_name, } } // namespace testing -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.h b/tensorflow/contrib/lite/delegates/flex/test_util.h similarity index 90% rename from tensorflow/contrib/lite/delegates/eager/test_util.h rename to tensorflow/contrib/lite/delegates/flex/test_util.h index 816db41931610c913cf406095c308a5f7682f7cb..a8c81b90a3b8dc49ae058adb172456fe4d6e7172 100644 --- a/tensorflow/contrib/lite/delegates/eager/test_util.h +++ b/tensorflow/contrib/lite/delegates/flex/test_util.h @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_ #include "tensorflow/c/c_api_internal.h" #include "tensorflow/contrib/lite/kernels/test_util.h" namespace tflite { -namespace eager { +namespace flex { namespace testing { enum TfOpType { @@ -35,12 +35,12 @@ enum TfOpType { }; // This class creates models with TF and TFLite ops. In order to use this class -// to test the Eager delegate, implement a function that calls +// to test the Flex delegate, implement a function that calls // interpreter->ModifyGraphWithDelegate. -class EagerModelTest : public ::testing::Test { +class FlexModelTest : public ::testing::Test { public: - EagerModelTest() {} - ~EagerModelTest() {} + FlexModelTest() {} + ~FlexModelTest() {} bool Invoke(); @@ -104,7 +104,7 @@ class EagerModelTest : public ::testing::Test { private: // Helper method to add a TensorFlow op. tflite_names needs to start with - // "Eager" in order to work with the Eager delegate. + // "Flex" in order to work with the Flex delegate. void AddTfOp(const char* tflite_name, const string& tf_name, const string& nodedef_str, const std::vector& inputs, const std::vector& outputs); @@ -113,7 +113,7 @@ class EagerModelTest : public ::testing::Test { }; } // namespace testing -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/util.cc b/tensorflow/contrib/lite/delegates/flex/util.cc similarity index 96% rename from tensorflow/contrib/lite/delegates/eager/util.cc rename to tensorflow/contrib/lite/delegates/flex/util.cc index 051246bf866c1f1d479102e8c3dd335ab1b1ca1e..829bc388bf4f613e82600edfc7363d0774d49878 100644 --- a/tensorflow/contrib/lite/delegates/eager/util.cc +++ b/tensorflow/contrib/lite/delegates/flex/util.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" namespace tflite { -namespace eager { +namespace flex { TfLiteStatus ConvertStatus(TfLiteContext* context, const tensorflow::Status& status) { @@ -100,5 +100,5 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) { } } -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/util.h b/tensorflow/contrib/lite/delegates/flex/util.h similarity index 89% rename from tensorflow/contrib/lite/delegates/eager/util.h rename to tensorflow/contrib/lite/delegates/flex/util.h index 930cb99cb953982fc88ab82710a6ebeb0271a970..7f910e7316e67363a6e54389f1d0cc94b3e009a0 100644 --- a/tensorflow/contrib/lite/delegates/eager/util.h +++ b/tensorflow/contrib/lite/delegates/flex/util.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_ #include "tensorflow/c/c_api_internal.h" #include "tensorflow/contrib/lite/c/c_api_internal.h" @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" namespace tflite { -namespace eager { +namespace flex { // Converts a tensorflow:Status into a TfLiteStatus. If the original status // represented an error, reports it using the given 'context'. @@ -41,7 +41,7 @@ TF_DataType GetTensorFlowDataType(TfLiteType type); // Returns the TfLiteType that corresponds to the given TF C API Data type. TfLiteType GetTensorFlowLiteType(TF_DataType); -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/util_test.cc b/tensorflow/contrib/lite/delegates/flex/util_test.cc similarity index 97% rename from tensorflow/contrib/lite/delegates/eager/util_test.cc rename to tensorflow/contrib/lite/delegates/flex/util_test.cc index aebc91149ce0c4677f4cf48f87a9a2a92279fd8d..5f049e7b0a0c1f7be28d33b532157c6f9211c7c1 100644 --- a/tensorflow/contrib/lite/delegates/eager/util_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/util_test.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" #include @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/contrib/lite/testing/util.h" namespace tflite { -namespace eager { +namespace flex { namespace { using tensorflow::DT_FLOAT; @@ -132,7 +132,7 @@ TEST(UtilTest, TypeConversionsFromTensorFlow) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/examples/android/BUILD b/tensorflow/contrib/lite/examples/android/BUILD index 4d2437e7d3714e1b8b427b0c6197b295c0355b07..d180cb478566a9e5df24b2e67445f24a2f623215 100644 --- a/tensorflow/contrib/lite/examples/android/BUILD +++ b/tensorflow/contrib/lite/examples/android/BUILD @@ -28,6 +28,7 @@ android_binary( srcs = glob([ "app/src/main/java/**/*.java", ]), + aapt_version = "aapt", # Package assets from assets dir as well as all model targets. # Remove undesired models (and corresponding Activities in source) # to reduce APK size. diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc index 0f165958112dbe1d49ae71a64bede9ca736b07a8..29f8701f53407dc47adfaca8c85c86210e4cb09a 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc +++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc @@ -21,9 +21,8 @@ limitations under the License. extern "C" { #endif // __cplusplus -TFL_Status TFL_InterpreterResetVariableTensorsToZero( - TFL_Interpreter* interpreter) { - return interpreter->impl->ResetVariableTensorsToZero(); +TFL_Status TFL_InterpreterResetVariableTensors(TFL_Interpreter* interpreter) { + return interpreter->impl->ResetVariableTensors(); } void TFL_InterpreterOptionsAddBuiltinOp(TFL_InterpreterOptions* options, diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h index b8de7b9964641aa981debd53039a401532b36d36..fca5d92f77caff987f6a70c3a8fd03849bce1165 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h +++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h @@ -25,7 +25,7 @@ extern "C" { typedef TfLiteBuiltinOperator TFL_BuiltinOperator; // Resets all variable tensors to zero. -TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensorsToZero( +TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensors( TFL_Interpreter* interpreter); // Adds an op registration for a builtin operator. diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc index d86ad00d6d0da404ed8cf84f1a781e3a11b39a74..1b1bedb75470638d4b3cfac92819e18b8fe6e65a 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc +++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc @@ -44,7 +44,7 @@ TEST(CApiExperimentalSimple, Smoke) { TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options); ASSERT_NE(interpreter, nullptr); ASSERT_EQ(TFL_InterpreterAllocateTensors(interpreter), kTfLiteOk); - EXPECT_EQ(TFL_InterpreterResetVariableTensorsToZero(interpreter), kTfLiteOk); + EXPECT_EQ(TFL_InterpreterResetVariableTensors(interpreter), kTfLiteOk); EXPECT_EQ(TFL_InterpreterInvoke(interpreter), kTfLiteOk); TFL_DeleteInterpreter(interpreter); diff --git a/tensorflow/contrib/lite/experimental/micro/BUILD b/tensorflow/contrib/lite/experimental/micro/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..df1036bc8b9cc84f4b63ae2a771e3aa8f8989060 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/BUILD @@ -0,0 +1,76 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test.bzl", + "tflite_micro_cc_test", +) + +cc_library( + name = "micro_framework", + srcs = [ + "micro_error_reporter.cc", + "micro_interpreter.cc", + "micro_mutable_op_resolver.cc", + "simple_tensor_allocator.cc", + ], + hdrs = [ + "compatibility.h", + "micro_error_reporter.h", + "micro_interpreter.h", + "micro_mutable_op_resolver.h", + "simple_tensor_allocator.h", + ], + deps = [ + "//tensorflow/contrib/lite:schema_fbs_version", + "//tensorflow/contrib/lite/c:c_api_internal", + "//tensorflow/contrib/lite/core/api", + "//tensorflow/contrib/lite/schema:schema_fbs", + ], +) + +tflite_micro_cc_test( + name = "micro_error_reporter_test", + srcs = [ + "micro_error_reporter_test.cc", + ], + deps = [ + ":micro_framework", + ], +) + +tflite_micro_cc_test( + name = "micro_mutable_op_resolver_test", + srcs = [ + "micro_mutable_op_resolver_test.cc", + ], + deps = [ + ":micro_framework", + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test", + ], +) + +tflite_micro_cc_test( + name = "micro_interpreter_test", + srcs = [ + "micro_interpreter_test.cc", + ], + deps = [ + ":micro_framework", + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test", + ], +) + +tflite_micro_cc_test( + name = "simple_tensor_allocator_test", + srcs = [ + "simple_tensor_allocator_test.cc", + ], + deps = [ + ":micro_framework", + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test", + ], +) diff --git a/tensorflow/contrib/lite/experimental/micro/README.md b/tensorflow/contrib/lite/experimental/micro/README.md new file mode 100644 index 0000000000000000000000000000000000000000..414cafde4d489eac36f739f163033bf27f0fc818 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/README.md @@ -0,0 +1,114 @@ +# TensorFlow Lite for Microcontrollers + +This an experimental port of TensorFlow Lite aimed at micro controllers and other devices with only kilobytes of memory. It doesn't require any operating system support, any standard C or C++ libraries, or dynamic memory allocation, so it's designed to be portable even to 'bare metal' systems. The core runtime fits in 16KB on a Cortex M3, and with enough operators to run a speech keyword detection model, takes up a total of 22KB. + +The design goals are for the framework to be: + +- **Readable**: We want embedded software engineers to be able to understand what's required to run ML inference without having to study research papers. We've tried to keep the code base small, modular, and have reference implementations of all operations to help with this. + +- **Easy to modify**: We know that there are a lot of different platforms and requirements in the embedded world, and we don't expect to cover all of them in one framework. Instead, we're hoping that it can be a good starting point for developers to build on top of to meet their own needs. For example, we tried to make it easy to replace the implementations of key computational operators that are often crucial for performance, without having to touch the data flow and other runtime code. We want it to make more sense to use our workflow to handle things like model import and less-important operations, and customize the parts that matter, rather than having to reimplement everything in your own engine. + +- **Well-tested**: If you're modifying code, you need to know if your changes are correct. Having an easy way to test lets you develop much faster. To help there, we've written tests for all the components, and we've made sure that the tests can be run on almost any platform, with no dependencies apart from the ability to log text to a debug console somewhere. We also provide an easy way to run all the tests on-device as part of an automated test framework, and we use qemu/Renode emulation so that tests can be run even without physical devices present. + +- **Easy to integrate**: We want to be as open a system as possible, and use the best code available for each platform. To do that, we're going to rely on projects like [CMSIS-NN](https://www.keil.com/pack/doc/CMSIS/NN/html/index.html), [uTensor](https://github.com/uTensor/uTensor), and other vendor libraries to handle as much performance-critical code as possible. We know that there are an increasing number of options to accelerate neural networks on microcontrollers, so we're aiming to be a good host for deploying those hardware technologies too. + +- **Compatible**: We're using the same file schema, interpreter API, and kernel interface as regular TensorFlow Lite, so we leverage the large existing set of tools, documentation, and examples for the project. The biggest barrier to deploying ML models is getting them from a training environment into a form that's easy to run inference on, so we see reusing this rich ecosystem as being crucial to being easily usable. We also hope to integrate this experimental work back into the main codebase in the future. + +To meet those goals, we've made some tradeoffs: + +- **Simple C++**: To help with readability, our code is written in a modern version of C++, but we generally treat it as a "better C", rather relying on more complex features such as template meta-programming. As mentioned earlier, we avoid any use of dynamic memory allocation (new/delete) or the standard C/C++ libraries, so we believe this should still be fairly portable. It does mean that some older devices with C-only toolchains won't be supported, but we're hoping that the reference operator implementations (which are simple C-like functions) can still be useful in those cases. The interfaces are also designed to be C-only, so it should be possible to integrate the resulting library with pure C projects. + +- **Interpreted**: Code generation is a popular pattern for embedded code, because it gives standalone code that's easy to modify and step through, but we've chosen to go with an interpreted approach. In our internal microcontroller work we've found that using an extremely stripped-down interpreter with almost no dependencies gives us a lot of the same advantages, but is easier to maintain. For example, when new updates come out for the underlying library, you can just merge your local modifications in a single step, rather than having to regenerate new code and then patch in any changes you subsequently made. The coarse granularity of the interpreted primitives means that each operation call typically takes hundreds of thousands of instruction cycles at least, so we don't see noticeable performance gains from avoiding what's essentially a single switch statement at the interpreter level to call each operation. We're still working on improving the packaging though, for example we're considering having the ability to snapshot all the source files and headers used for a particular model, being able to compile the code and data together as a library, and then access it through a minimal set of C interface calls which hide the underlying complexity. + +- **Flatbuffers**: We represent our models using [the standard flatbuffer schema used by the rest of TensorFlow Lite](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/schema/schema.fbs), with the difference that we always keep it in read-only program memory (typically flash) rather than relying on having a file system to read it from. This is a good fit because flatbuffer's serialized format is designed to be mapped into memory without requiring any extra memory allocations or modifications to access it. All of the functions to read model values work directly on the serialized bytes, and large sections of data like weights are directly accessible as sequential C-style arrays of their data type, with no strides or unpacking needed. We do get a lot of value from using flatbuffers, but there is a cost in complexity. The flat buffer library code is all inline [inside the main headers](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/schema/schema_generated.h), but it isn't straightforward to inspect their implementations, and the model data structures aren't easy to comprehend from the debugger. The header for the schema itself also has to be periodically updated when new information is added to the file format, though we try to handle that transparently for most developers by checking in a pre-generated version. + +- **Code Duplication**: Some of the code in this prototype largely duplicates the logic in other parts of the TensorFlow Lite code base, for example the operator wrappers. We've tried to keep share as much as we can between the two interpreters, but there are some assumptions built into the original runtime that make this difficult. We'll be working on modularizing the main interpreter so that we can move to an entirely shared system. + +This initial preview release is designed to get early feedback, and is not intended to be a final product. It only includes enough operations to run a simple keyword recognition model, and the implementations are not optimized. We're hoping this will be a good way to get feedback and collaborate to improve the framework. + +## Getting Started + +Building requires a Linux or OS X machine. + + - Open a terminal + - Download the TensorFlow source with `git clone https://github.com/tensorflow` + - Enter the source root directory by running `cd tensorflow` + - Download the dependencies by running `tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh`. This may take a few minutes + - Build and test the library with `make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile test` + +You should see a series of compilation steps, followed by "~~~ALL TESTS PASSED~~~" for the various tests of the code that it will run. If there's an error, you should get an informative message from make about what went wrong. + +These tests are all built as simple binaries with few dependencies, so you can run them manually. For example, here's how to run the depthwise convolution test, and its output: + +``` +tensorflow/contrib/lite/experimental/micro/tools/make/gen/linux_x86_64/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test + +Testing SimpleTest +Testing SimpleTestQuantized +Testing SimpleTestRelu +Testing SimpleTestReluQuantized +4/4 tests passed +~ALL TESTS PASSED~~~ +``` + +Looking at the [depthwise_conv_test.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc) code, you'll see a sequence that looks like this: + +``` +... +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(SimpleTest) { +... +} +... +TF_LITE_MICRO_TESTS_END +``` + +These macros work a lot like [the Google test framework](https://github.com/google/googletest), but they don't require any dependencies and just write results to stderr, rather than aborting the program. If all the tests pass, then "~~~ALL TESTS PASSED~~~" is output, and the test harness that runs the binary during the make process knows that everything ran correctly. If there's an error, the lack of the expected string lets the harness know that the test failed. + +So, why are we running tests in this complicated way? So far, we've been building binaries that run locally on the Mac OS or Linux machine you're building on, but this approach becomes important when we're targeting simple micro controller devices. + +## Building for the "Blue Pill" STM32F103 + +The goal of this library is to enable machine learning on resource-constrained micro controllers and DSPs, and as part of that we've targeted the ["Blue Pill" STM32F103-compatible development board](https://github.com/google/googletest) as a cheap and popular platform. It only has 20KB of RAM and 64KB of flash, so it's a good device to ensure we can run efficiently on small chips. + +It's fairly easy to [buy and wire up a physical board](https://github.com/google/stm32_bare_lib#wiring-up-your-blue-pill), but even if you don't have an actual device, the [Renode project](https://renode.io/) makes it easy to run a faithful emulation on your desktop machine. You'll need [Docker](https://www.docker.com/) installed, but once you have that set up, try running the following command: + +`make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile TARGET=bluepill test` + +You should see a similar set of outputs as you did in the previous section, with the addition of some extra Docker logging messages. These are because we're using Docker to run the Renode micro controller emulation tool, and the tests themselves are being run on a simulated STM32F103 device. The communication channels between an embedded device and the host are quite limited, so the test harness looks at the output of the debug log to see if tests have passed, just as it did in the previous section. This makes it a very flexible way to run cross-platform tests, even when a platform has no operating system facilities, as long as it can output debugging text logs. + +To understand what's happening here, try running the same depthwise convolution test, but through the emulated device test harness, with the following command: + +``` +tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh \ +tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test + +``` + +You should see output that looks something like this: + +``` +Sending build context to Docker daemon 21.5kB +Step 1/2 : FROM antmicro/renode:latest + ---> 1b670a243e8f +Step 2/2 : LABEL maintainer="Pete Warden " + ---> Using cache + ---> 3afcd410846d +Successfully built 3afcd410846d +Successfully tagged renode_bluepill:latest +LOGS: +... +03:27:32.4340 [INFO] machine-0: Machine started. +03:27:32.4790 [DEBUG] cpu.uartSemihosting: [+0.22s host +0s virt 0s virt from start] Testing SimpleTest +03:27:32.4812 [DEBUG] cpu.uartSemihosting: [+2.21ms host +0s virt 0s virt from start] Testing SimpleTestQuantized +03:27:32.4833 [DEBUG] cpu.uartSemihosting: [+2.14ms host +0s virt 0s virt from start] Testing SimpleTestRelu +03:27:32.4834 [DEBUG] cpu.uartSemihosting: [+0.18ms host +0s virt 0s virt from start] Testing SimpleTestReluQuantized +03:27:32.4838 [DEBUG] cpu.uartSemihosting: [+0.4ms host +0s virt 0s virt from start] 4/4 tests passed +03:27:32.4839 [DEBUG] cpu.uartSemihosting: [+41µs host +0s virt 0s virt from start] ~~~ALL TESTS PASSED~~~ +03:27:32.4839 [DEBUG] cpu.uartSemihosting: [+5µs host +0s virt 0s virt from start] +... +tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test: PASS +``` + +There's a lot of output here, but you should be able to see that the same tests that were covered when we ran locally on the development machine show up in the debug logs here, along with the magic string "~~~ALL TESTS PASSED~~~". This is the exact same code as before, just compiled and run on the STM32F103 rather than your desktop. We hope that the simplicity of this testing approach will help make adding support for new platforms as easy as possible. diff --git a/tensorflow/contrib/lite/experimental/micro/compatibility.h b/tensorflow/contrib/lite/experimental/micro/compatibility.h new file mode 100644 index 0000000000000000000000000000000000000000..4f0fd9f3120a5db74cdfb84e7b17a0f3656520bc --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/compatibility.h @@ -0,0 +1,32 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_COMPATIBILITY_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_COMPATIBILITY_H_ + +// C++ will automatically create class-specific delete operators for virtual +// objects, which by default call the global delete function. For embedded +// applications we want to avoid this, and won't be calling new/delete on these +// objects, so we need to override the default implementation with one that does +// nothing to avoid linking in ::delete(). +// This macro needs to be included in all subclasses of a virtual base class in +// the private section. +#ifdef TF_LITE_STATIC_MEMORY +#define TF_LITE_REMOVE_VIRTUAL_DELETE \ + void operator delete(void* p) {} +#else +#define TF_LITE_REMOVE_VIRTUAL_DELETE +#endif + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_COMPATIBILITY_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..dad58b6c1cc818d3ae68dd4fdf5ec47315e1b5cc --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD @@ -0,0 +1,31 @@ +# Description: +# TensorFlow Lite microcontroller example. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test.bzl", + "tflite_micro_cc_test", +) + +tflite_micro_cc_test( + name = "micro_speech_test", + srcs = [ + "micro_speech_test.cc", + "tiny_conv_model_data.cc", + "tiny_conv_model_data.h", + ], + tags = [ + "nomsan", + ], + deps = [ + "//tensorflow/contrib/lite:schema_fbs_version", + "//tensorflow/contrib/lite/experimental/micro:micro_framework", + "//tensorflow/contrib/lite/experimental/micro/kernels:all_ops_resolver", + "//tensorflow/contrib/lite/experimental/micro/kernels:micro_ops", + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test", + "//tensorflow/contrib/lite/schema:schema_fbs", + ], +) diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..86cd056a7216aa57126be3f6e660a7dcee0c6c44 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc @@ -0,0 +1,55 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h" +#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h" +#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h" +#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h" +#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" +#include "tensorflow/contrib/lite/version.h" + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(TestInvoke) { + tflite::MicroErrorReporter micro_error_reporter; + tflite::ErrorReporter* error_reporter = µ_error_reporter; + + const tflite::Model* model = ::tflite::GetModel(g_tiny_conv_model_data); + if (model->version() != TFLITE_SCHEMA_VERSION) { + error_reporter->Report( + "Model provided is schema version %d not equal " + "to supported version %d.\n", + model->version(), TFLITE_SCHEMA_VERSION); + } + tflite::ops::micro::AllOpsResolver resolver; + + const int tensor_arena_size = 10 * 1024; + uint8_t tensor_arena[tensor_arena_size]; + tflite::SimpleTensorAllocator tensor_allocator(tensor_arena, + tensor_arena_size); + + tflite::MicroInterpreter interpreter(model, resolver, &tensor_allocator, + error_reporter); + TfLiteStatus invoke_status = interpreter.Invoke(); + if (invoke_status != kTfLiteOk) { + error_reporter->Report("Invoke failed\n"); + } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); + + error_reporter->Report("Ran successfully\n"); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc new file mode 100644 index 0000000000000000000000000000000000000000..f1f9e0e21994b0a79241690e533e4edc8bfe5565 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc @@ -0,0 +1,1672 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Automatically created from a TensorFlow Lite flatbuffer using the command: +// xxd -i tiny_conv.tflite > tiny_conv_model_data.cc + +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h" + +const unsigned char g_tiny_conv_model_data[] = { + 0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x4d, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xf4, 0x47, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, + 0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, + 0x65, 0x64, 0x2e, 0x00, 0x09, 0x00, 0x00, 0x00, 0xd4, 0x47, 0x00, 0x00, + 0x04, 0x03, 0x00, 0x00, 0xfc, 0x02, 0x00, 0x00, 0xf4, 0x02, 0x00, 0x00, + 0x64, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb8, 0xb3, 0xff, 0xff, + 0x16, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xd7, 0x02, 0x00, 0x00, 0x2f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0xb3, 0xff, 0xff, + 0x46, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0xab, 0x00, 0x00, 0x00, 0x1e, 0xff, 0xff, 0xff, 0xed, 0xff, 0xff, 0xff, + 0x4a, 0x00, 0x00, 0x00, 0x62, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x80, 0x02, 0x00, 0x00, 0xce, 0xad, 0xaf, 0x3c, 0xc8, 0xe9, 0xb0, 0x83, + 0xa1, 0xbf, 0xb2, 0xb1, 0xab, 0xd0, 0xa7, 0x53, 0xa5, 0xe9, 0xb5, 0xac, + 0xa2, 0xd3, 0xc4, 0x9e, 0x8b, 0xb2, 0x64, 0xb3, 0x9d, 0xa2, 0xae, 0xa6, + 0xd5, 0xbe, 0x43, 0x9f, 0x9c, 0x54, 0xb5, 0xa8, 0x49, 0x78, 0x86, 0xa2, + 0xa3, 0x55, 0x35, 0x96, 0x3d, 0x7f, 0xe2, 0xb5, 0xb0, 0x47, 0x28, 0xa9, + 0x9d, 0xbb, 0xd6, 0xff, 0xb7, 0x79, 0x63, 0xb5, 0xaf, 0xa7, 0xab, 0x7e, + 0xbc, 0xc7, 0xa0, 0xc3, 0xb1, 0xb6, 0xb2, 0xa1, 0xc2, 0xbb, 0x79, 0x57, + 0xbe, 0xc1, 0xb7, 0xb0, 0x6b, 0xb7, 0xa5, 0x75, 0x97, 0xb8, 0xe7, 0xac, + 0xad, 0x7e, 0xb1, 0x9b, 0xc3, 0xba, 0x6b, 0xa2, 0x7f, 0x58, 0xb9, 0x7a, + 0x4c, 0x91, 0x74, 0x9e, 0xa7, 0x3d, 0xc2, 0x94, 0x75, 0xa1, 0xa4, 0xac, + 0xab, 0x45, 0x2e, 0xb4, 0xb6, 0xbf, 0xc1, 0xdb, 0xaf, 0x6c, 0x67, 0xb1, + 0xa9, 0xa6, 0xa8, 0xca, 0xc2, 0xc4, 0xb9, 0xbf, 0xb4, 0xb9, 0xaa, 0x9d, + 0x9f, 0xb9, 0xb2, 0x71, 0xb2, 0xca, 0xbe, 0xaf, 0x5f, 0xbc, 0xa0, 0x5b, + 0xa8, 0xb4, 0xa4, 0xa8, 0xd8, 0x69, 0xb7, 0x8a, 0xbc, 0xb8, 0xaf, 0x9c, + 0x7c, 0x5d, 0xb3, 0x6b, 0x49, 0x95, 0x64, 0xa0, 0xa2, 0x49, 0xcb, 0x87, + 0xa5, 0xb5, 0xa1, 0xb2, 0xa3, 0x40, 0x6d, 0x9f, 0xc5, 0xb6, 0xbb, 0xd4, + 0x9c, 0x6d, 0x69, 0xa9, 0xa8, 0x91, 0xad, 0xb8, 0xd2, 0xc6, 0xaf, 0xb8, + 0xac, 0xa9, 0xa2, 0xa7, 0x60, 0xa6, 0xa1, 0xc9, 0xb8, 0xd6, 0xcf, 0xb1, + 0x56, 0xb4, 0xac, 0x40, 0xae, 0xbd, 0xbf, 0xa2, 0x54, 0x72, 0x9b, 0x8c, + 0xc2, 0xb5, 0xc2, 0x9b, 0x64, 0x6d, 0xb4, 0x62, 0x4e, 0x9b, 0x6c, 0xa6, + 0x8f, 0x4c, 0xca, 0x95, 0xb6, 0xbf, 0x92, 0xae, 0x9c, 0x49, 0xae, 0xb2, + 0xc0, 0xb6, 0xbc, 0xd1, 0xa4, 0x7b, 0x64, 0xa0, 0xa6, 0x81, 0xac, 0xa6, + 0xbd, 0xc8, 0xbc, 0xae, 0xaa, 0x9e, 0x61, 0xb1, 0x57, 0xac, 0xbf, 0xbf, + 0xbb, 0xe0, 0xa6, 0xae, 0x47, 0xc9, 0xbc, 0x57, 0xb0, 0xb5, 0xc7, 0x98, + 0xf4, 0x93, 0xb6, 0x70, 0xc3, 0xb3, 0xca, 0xab, 0x77, 0x9a, 0xac, 0x45, + 0x5c, 0x9e, 0x9a, 0xa9, 0x9b, 0x35, 0xc0, 0x6f, 0xc6, 0xc7, 0x91, 0xb4, + 0xa8, 0x3c, 0xce, 0xb8, 0xad, 0xb9, 0xb5, 0xdd, 0x9c, 0x6d, 0xbf, 0x91, + 0xb2, 0x7d, 0xa0, 0xaf, 0x9f, 0xbd, 0xb9, 0xcf, 0x9b, 0x5d, 0x3f, 0xac, + 0x64, 0xae, 0xaf, 0xb8, 0xbc, 0xb8, 0x86, 0xb5, 0x36, 0xcf, 0xb4, 0xa9, + 0xad, 0xcd, 0xdb, 0xa4, 0x68, 0xa6, 0xa4, 0x67, 0xc8, 0xb7, 0xe5, 0xa4, + 0x76, 0xb8, 0xa8, 0x28, 0x6b, 0xa5, 0xba, 0xad, 0x9f, 0x3a, 0xa5, 0x42, + 0xc5, 0xb0, 0x88, 0xad, 0xa5, 0x4d, 0xea, 0x8a, 0xb8, 0xb5, 0xb3, 0xd9, + 0xa0, 0x77, 0xbb, 0x92, 0x9e, 0x80, 0xbd, 0xbd, 0x6d, 0xcc, 0xab, 0x99, + 0x88, 0x58, 0x4d, 0xb0, 0x6c, 0xbc, 0x96, 0xbd, 0xae, 0xab, 0x5b, 0xac, + 0x2f, 0xc3, 0x9a, 0xbe, 0xac, 0xb3, 0x84, 0x9b, 0xe3, 0xaf, 0x95, 0x6b, + 0xc2, 0xb5, 0xca, 0xb7, 0x4e, 0xbc, 0x9d, 0x24, 0x75, 0xa9, 0xd2, 0xae, + 0xa0, 0x2b, 0x90, 0x34, 0xd1, 0xb5, 0x96, 0xae, 0xaa, 0x4d, 0xc1, 0xa3, + 0xb1, 0xb4, 0xaa, 0xd2, 0x9c, 0x7d, 0xc0, 0x91, 0x91, 0x7a, 0xb8, 0x83, + 0x44, 0xcb, 0xaf, 0x9b, 0x6b, 0x5b, 0x75, 0xb2, 0x62, 0xb6, 0xaa, 0xcb, + 0x99, 0xa8, 0x63, 0xae, 0x24, 0xc7, 0x8a, 0xbe, 0xa9, 0xb6, 0xa0, 0xa1, + 0x41, 0xac, 0x84, 0xb5, 0xb9, 0xb3, 0x9b, 0xad, 0x77, 0xbf, 0xa8, 0x7e, + 0x82, 0xb9, 0xbe, 0xaa, 0xa3, 0x47, 0x6d, 0xb5, 0xc3, 0xb1, 0xbf, 0xa7, + 0xb1, 0x57, 0x75, 0xb5, 0xb0, 0xb6, 0xb9, 0xce, 0xa4, 0x86, 0xb0, 0xa4, + 0x98, 0x80, 0xc5, 0x3e, 0x90, 0xca, 0x9b, 0xa2, 0x5a, 0x50, 0xc5, 0xa5, + 0xad, 0xc1, 0x9c, 0x91, 0x83, 0x8f, 0x21, 0xab, 0xac, 0xba, 0x70, 0xb4, + 0xae, 0x85, 0x7e, 0xa7, 0xbd, 0xba, 0x7c, 0xb2, 0xb5, 0xb2, 0x7e, 0xb3, + 0xc3, 0xcd, 0x82, 0xac, 0x9b, 0xb3, 0xa6, 0xb0, 0xbc, 0x6f, 0x52, 0xb9, + 0xbf, 0xb1, 0xa6, 0xa4, 0xc1, 0x7a, 0x90, 0xc0, 0xae, 0xab, 0x94, 0xd8, + 0xab, 0xa4, 0x98, 0xbb, 0x8b, 0x86, 0x94, 0x01, 0xad, 0xe7, 0xb1, 0x9b, + 0x57, 0x48, 0xc1, 0x88, 0xbf, 0xcc, 0xb4, 0x4b, 0x62, 0x8b, 0x48, 0xa7, + 0xbe, 0xe1, 0x80, 0xa6, 0xb3, 0x64, 0xaa, 0xa4, 0xcf, 0xba, 0x6d, 0xa6, + 0xb8, 0xa0, 0x8f, 0xb3, 0xce, 0xc3, 0x87, 0xb2, 0xa0, 0xc0, 0x78, 0xb0, + 0xb9, 0xaa, 0x40, 0xb8, 0xd8, 0xa3, 0x9a, 0xaa, 0xcc, 0xa2, 0x9f, 0xb9, + 0xbe, 0xc2, 0x89, 0xd6, 0xc6, 0x9c, 0xa3, 0xc7, 0x94, 0xb6, 0xff, 0xff, + 0x98, 0xb6, 0xff, 0xff, 0xf6, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0xc0, 0x44, 0x00, 0x00, 0x4a, 0x4d, 0x59, 0x60, 0x5a, 0x45, 0x3d, 0x50, + 0x4a, 0x43, 0x3d, 0x59, 0x3e, 0x49, 0x4a, 0x59, 0x45, 0x44, 0x41, 0x5d, + 0x50, 0x2f, 0x4e, 0x34, 0x46, 0x48, 0x41, 0x4a, 0x4c, 0x3b, 0x4b, 0x3e, + 0x49, 0x49, 0x43, 0x4b, 0x3e, 0x49, 0x47, 0x41, 0x3e, 0x4a, 0x46, 0x43, + 0x41, 0x43, 0x47, 0x49, 0x4a, 0x4c, 0x46, 0x58, 0x3f, 0x4c, 0x4b, 0x4c, + 0x4d, 0x4b, 0x45, 0x52, 0x45, 0x42, 0x52, 0x52, 0x48, 0x40, 0x46, 0x5f, + 0x4c, 0x41, 0x47, 0x48, 0x48, 0x4c, 0x43, 0x61, 0x50, 0x4b, 0x49, 0x49, + 0x46, 0x3f, 0x40, 0x67, 0x40, 0x4d, 0x45, 0x40, 0x40, 0x45, 0x47, 0x56, + 0x44, 0x3a, 0x4a, 0x4c, 0x52, 0x48, 0x46, 0x50, 0x4b, 0x44, 0x51, 0x45, + 0x40, 0x45, 0x45, 0x48, 0x4e, 0x4e, 0x43, 0x48, 0x44, 0x4b, 0x45, 0x4a, + 0x53, 0x45, 0x4a, 0x4b, 0x3f, 0x43, 0x45, 0x53, 0x4d, 0x43, 0x46, 0x3f, + 0x47, 0x4e, 0x51, 0x50, 0x48, 0x4f, 0x4f, 0x4a, 0x4a, 0x4e, 0x45, 0x4e, + 0x46, 0x41, 0x4a, 0x46, 0x45, 0x47, 0x45, 0x4b, 0x50, 0x4c, 0x46, 0x45, + 0x41, 0x47, 0x41, 0x47, 0x46, 0x4f, 0x3f, 0x4f, 0x4a, 0x51, 0x4f, 0x53, + 0x54, 0x48, 0x51, 0x43, 0x4b, 0x48, 0x4d, 0x46, 0x48, 0x4f, 0x49, 0x44, + 0x43, 0x53, 0x50, 0x59, 0x56, 0x3d, 0x45, 0x44, 0x48, 0x38, 0x3b, 0x5f, + 0x39, 0x43, 0x43, 0x52, 0x46, 0x3e, 0x43, 0x58, 0x43, 0x1e, 0x50, 0x3c, + 0x46, 0x4b, 0x46, 0x50, 0x3c, 0x37, 0x4c, 0x47, 0x47, 0x4b, 0x47, 0x54, + 0x43, 0x3e, 0x47, 0x4f, 0x4b, 0x41, 0x53, 0x50, 0x42, 0x46, 0x4f, 0x4b, + 0x4e, 0x3f, 0x49, 0x52, 0x4a, 0x4a, 0x49, 0x53, 0x52, 0x47, 0x52, 0x5a, + 0x40, 0x42, 0x4d, 0x4b, 0x50, 0x43, 0x49, 0x59, 0x47, 0x4c, 0x4d, 0x50, + 0x4e, 0x3c, 0x44, 0x61, 0x51, 0x49, 0x49, 0x46, 0x49, 0x47, 0x4b, 0x5a, + 0x45, 0x4b, 0x43, 0x40, 0x44, 0x52, 0x4d, 0x54, 0x49, 0x47, 0x44, 0x48, + 0x46, 0x48, 0x3e, 0x40, 0x45, 0x4f, 0x4d, 0x4b, 0x4c, 0x40, 0x3d, 0x40, + 0x3e, 0x48, 0x50, 0x4e, 0x4c, 0x42, 0x48, 0x4b, 0x3d, 0x48, 0x4b, 0x44, + 0x52, 0x4b, 0x49, 0x4f, 0x49, 0x3f, 0x47, 0x43, 0x4d, 0x3f, 0x53, 0x4e, + 0x4a, 0x4f, 0x4e, 0x4e, 0x53, 0x42, 0x46, 0x4c, 0x44, 0x4c, 0x46, 0x51, + 0x45, 0x48, 0x4a, 0x50, 0x47, 0x41, 0x45, 0x54, 0x4a, 0x44, 0x50, 0x49, + 0x48, 0x50, 0x51, 0x4b, 0x50, 0x4c, 0x4a, 0x49, 0x43, 0x47, 0x50, 0x4a, + 0x4d, 0x4c, 0x4e, 0x49, 0x42, 0x50, 0x52, 0x48, 0x45, 0x5a, 0x4e, 0x55, + 0x51, 0x3d, 0x3d, 0x4d, 0x42, 0x32, 0x36, 0x64, 0x39, 0x4c, 0x41, 0x48, + 0x44, 0x35, 0x43, 0x56, 0x47, 0x1e, 0x4b, 0x3e, 0x47, 0x3f, 0x43, 0x52, + 0x51, 0x34, 0x41, 0x4d, 0x3e, 0x41, 0x41, 0x48, 0x3c, 0x4b, 0x45, 0x3b, + 0x40, 0x43, 0x4c, 0x46, 0x46, 0x47, 0x3e, 0x4f, 0x4b, 0x48, 0x42, 0x47, + 0x4e, 0x3e, 0x49, 0x47, 0x43, 0x43, 0x4e, 0x52, 0x51, 0x45, 0x3f, 0x54, + 0x46, 0x44, 0x48, 0x5d, 0x3e, 0x4a, 0x47, 0x52, 0x53, 0x3a, 0x4f, 0x5d, + 0x41, 0x4c, 0x48, 0x51, 0x43, 0x4b, 0x4b, 0x67, 0x48, 0x4b, 0x45, 0x4d, + 0x4b, 0x43, 0x4a, 0x54, 0x4c, 0x46, 0x43, 0x4a, 0x4d, 0x43, 0x4c, 0x47, + 0x4a, 0x48, 0x4d, 0x42, 0x4d, 0x48, 0x3f, 0x43, 0x4c, 0x44, 0x4e, 0x4c, + 0x40, 0x45, 0x4b, 0x48, 0x47, 0x47, 0x3e, 0x4c, 0x52, 0x41, 0x44, 0x4e, + 0x4d, 0x44, 0x49, 0x4d, 0x3d, 0x45, 0x48, 0x4f, 0x4c, 0x4a, 0x55, 0x51, + 0x4d, 0x4c, 0x45, 0x4e, 0x46, 0x45, 0x44, 0x49, 0x4e, 0x44, 0x40, 0x48, + 0x49, 0x44, 0x53, 0x51, 0x42, 0x41, 0x51, 0x49, 0x51, 0x45, 0x51, 0x3f, + 0x4b, 0x3f, 0x52, 0x3c, 0x50, 0x4d, 0x4f, 0x4b, 0x44, 0x4f, 0x40, 0x52, + 0x49, 0x4a, 0x50, 0x3f, 0x3d, 0x54, 0x4c, 0x53, 0x52, 0x45, 0x41, 0x43, + 0x47, 0x2d, 0x40, 0x63, 0x3a, 0x51, 0x43, 0x4e, 0x40, 0x2b, 0x36, 0x5b, + 0x4b, 0x12, 0x4d, 0x35, 0x4b, 0x3f, 0x44, 0x4a, 0x46, 0x31, 0x54, 0x48, + 0x43, 0x42, 0x3d, 0x51, 0x41, 0x45, 0x49, 0x4b, 0x47, 0x49, 0x3d, 0x3e, + 0x46, 0x3d, 0x4d, 0x48, 0x3d, 0x45, 0x48, 0x4b, 0x49, 0x52, 0x44, 0x4c, + 0x45, 0x44, 0x45, 0x49, 0x50, 0x48, 0x45, 0x46, 0x45, 0x44, 0x52, 0x55, + 0x46, 0x45, 0x4b, 0x3d, 0x42, 0x4a, 0x3e, 0x57, 0x48, 0x4b, 0x3c, 0x42, + 0x4a, 0x46, 0x47, 0x6c, 0x54, 0x4b, 0x41, 0x49, 0x49, 0x50, 0x43, 0x56, + 0x44, 0x43, 0x4d, 0x3e, 0x44, 0x41, 0x47, 0x40, 0x4a, 0x4b, 0x4d, 0x4d, + 0x3e, 0x46, 0x45, 0x47, 0x3e, 0x42, 0x4a, 0x45, 0x49, 0x3d, 0x3f, 0x43, + 0x40, 0x44, 0x47, 0x4a, 0x45, 0x4d, 0x4b, 0x4c, 0x43, 0x40, 0x3d, 0x3e, + 0x4c, 0x4c, 0x42, 0x4d, 0x48, 0x4d, 0x49, 0x42, 0x51, 0x51, 0x4c, 0x4b, + 0x53, 0x4f, 0x48, 0x4d, 0x40, 0x46, 0x45, 0x4b, 0x47, 0x47, 0x4b, 0x46, + 0x54, 0x42, 0x42, 0x46, 0x46, 0x4a, 0x4c, 0x55, 0x3f, 0x3c, 0x52, 0x4b, + 0x4b, 0x4d, 0x4e, 0x48, 0x53, 0x4c, 0x4b, 0x42, 0x52, 0x54, 0x50, 0x4b, + 0x40, 0x5f, 0x58, 0x53, 0x50, 0x42, 0x35, 0x48, 0x39, 0x24, 0x3c, 0x5e, + 0x41, 0x50, 0x3c, 0x51, 0x42, 0x26, 0x42, 0x56, 0x41, 0x0c, 0x3e, 0x3d, + 0x48, 0x3e, 0x50, 0x4b, 0x3a, 0x2c, 0x43, 0x3d, 0x48, 0x3e, 0x43, 0x48, + 0x4c, 0x3f, 0x4a, 0x3e, 0x51, 0x4a, 0x4f, 0x40, 0x47, 0x43, 0x50, 0x4c, + 0x43, 0x4d, 0x3f, 0x45, 0x4d, 0x3e, 0x4c, 0x44, 0x51, 0x47, 0x4b, 0x51, + 0x45, 0x49, 0x44, 0x3f, 0x46, 0x46, 0x46, 0x57, 0x49, 0x4c, 0x49, 0x4e, + 0x47, 0x4c, 0x47, 0x5e, 0x43, 0x46, 0x45, 0x4b, 0x52, 0x49, 0x45, 0x5f, + 0x47, 0x41, 0x46, 0x43, 0x4f, 0x3b, 0x43, 0x51, 0x46, 0x53, 0x4a, 0x4e, + 0x4b, 0x43, 0x4e, 0x40, 0x48, 0x49, 0x46, 0x3f, 0x48, 0x50, 0x4b, 0x41, + 0x4a, 0x47, 0x4b, 0x3d, 0x46, 0x49, 0x4b, 0x43, 0x43, 0x42, 0x3e, 0x47, + 0x47, 0x4a, 0x45, 0x46, 0x51, 0x48, 0x51, 0x4e, 0x3f, 0x50, 0x44, 0x4b, + 0x4d, 0x4e, 0x44, 0x4d, 0x3d, 0x49, 0x4a, 0x4e, 0x42, 0x51, 0x43, 0x42, + 0x46, 0x3e, 0x48, 0x4b, 0x4f, 0x50, 0x3d, 0x48, 0x4c, 0x4f, 0x46, 0x44, + 0x44, 0x48, 0x42, 0x4b, 0x48, 0x41, 0x43, 0x46, 0x4d, 0x49, 0x4f, 0x43, + 0x41, 0x44, 0x3f, 0x3d, 0x45, 0x4f, 0x45, 0x41, 0x40, 0x58, 0x4f, 0x54, + 0x5b, 0x4b, 0x3a, 0x47, 0x3d, 0x28, 0x3d, 0x57, 0x3e, 0x51, 0x3f, 0x47, + 0x3f, 0x2e, 0x3e, 0x54, 0x4e, 0x0b, 0x41, 0x3d, 0x3b, 0x3d, 0x43, 0x47, + 0x47, 0x28, 0x4d, 0x43, 0x43, 0x3b, 0x4e, 0x4a, 0x4d, 0x42, 0x51, 0x46, + 0x4f, 0x3d, 0x4c, 0x3a, 0x49, 0x49, 0x4a, 0x43, 0x42, 0x4b, 0x47, 0x42, + 0x42, 0x49, 0x3f, 0x4d, 0x46, 0x4a, 0x49, 0x4e, 0x42, 0x3c, 0x4a, 0x41, + 0x4c, 0x40, 0x4d, 0x5a, 0x49, 0x46, 0x51, 0x46, 0x4b, 0x4c, 0x46, 0x62, + 0x45, 0x42, 0x51, 0x4e, 0x4d, 0x3e, 0x4d, 0x5b, 0x4d, 0x43, 0x45, 0x50, + 0x4b, 0x40, 0x50, 0x53, 0x4f, 0x4f, 0x51, 0x53, 0x46, 0x41, 0x4e, 0x3a, + 0x4b, 0x47, 0x3f, 0x3e, 0x4d, 0x48, 0x53, 0x3f, 0x45, 0x42, 0x4c, 0x45, + 0x55, 0x4c, 0x4b, 0x39, 0x4a, 0x45, 0x48, 0x4d, 0x47, 0x40, 0x48, 0x4f, + 0x4d, 0x49, 0x3e, 0x41, 0x46, 0x4e, 0x40, 0x49, 0x4b, 0x47, 0x4c, 0x45, + 0x44, 0x51, 0x4f, 0x4b, 0x48, 0x49, 0x44, 0x41, 0x43, 0x46, 0x51, 0x45, + 0x40, 0x48, 0x4b, 0x42, 0x44, 0x4f, 0x53, 0x4d, 0x44, 0x46, 0x4e, 0x4c, + 0x48, 0x50, 0x41, 0x45, 0x42, 0x48, 0x4d, 0x4d, 0x47, 0x45, 0x41, 0x45, + 0x48, 0x58, 0x4e, 0x46, 0x43, 0x53, 0x57, 0x52, 0x5e, 0x42, 0x45, 0x4e, + 0x39, 0x24, 0x32, 0x56, 0x47, 0x56, 0x49, 0x52, 0x46, 0x26, 0x3a, 0x51, + 0x4b, 0x05, 0x3e, 0x43, 0x3f, 0x38, 0x4d, 0x4b, 0x4f, 0x27, 0x51, 0x46, + 0x47, 0x41, 0x4a, 0x47, 0x4a, 0x3e, 0x44, 0x51, 0x3f, 0x3a, 0x43, 0x46, + 0x4d, 0x49, 0x46, 0x52, 0x43, 0x48, 0x49, 0x3e, 0x47, 0x46, 0x4a, 0x4d, + 0x47, 0x46, 0x52, 0x50, 0x44, 0x48, 0x4c, 0x47, 0x45, 0x41, 0x49, 0x5b, + 0x4d, 0x4b, 0x47, 0x4c, 0x4a, 0x47, 0x45, 0x5b, 0x49, 0x46, 0x52, 0x47, + 0x47, 0x3d, 0x55, 0x59, 0x40, 0x4b, 0x3e, 0x50, 0x42, 0x43, 0x40, 0x4f, + 0x48, 0x3f, 0x47, 0x53, 0x4d, 0x44, 0x4e, 0x37, 0x4c, 0x43, 0x51, 0x4d, + 0x46, 0x4e, 0x40, 0x41, 0x52, 0x44, 0x43, 0x4a, 0x50, 0x48, 0x47, 0x42, + 0x48, 0x45, 0x50, 0x4d, 0x42, 0x52, 0x44, 0x43, 0x45, 0x43, 0x4c, 0x4d, + 0x44, 0x51, 0x47, 0x48, 0x51, 0x4f, 0x48, 0x45, 0x49, 0x4a, 0x3e, 0x43, + 0x4d, 0x4e, 0x4e, 0x46, 0x54, 0x4d, 0x49, 0x4d, 0x47, 0x46, 0x4b, 0x41, + 0x4a, 0x49, 0x44, 0x45, 0x4d, 0x3e, 0x53, 0x50, 0x47, 0x4d, 0x4e, 0x43, + 0x4f, 0x45, 0x4e, 0x4a, 0x47, 0x49, 0x4c, 0x4c, 0x4d, 0x54, 0x42, 0x4c, + 0x43, 0x5d, 0x59, 0x50, 0x5e, 0x4b, 0x44, 0x43, 0x3c, 0x25, 0x31, 0x5b, + 0x46, 0x5a, 0x50, 0x4d, 0x41, 0x2a, 0x41, 0x4f, 0x44, 0x00, 0x41, 0x3d, + 0x43, 0x4b, 0x47, 0x45, 0x4e, 0x2e, 0x44, 0x46, 0x53, 0x3d, 0x43, 0x41, + 0x44, 0x46, 0x49, 0x42, 0x45, 0x4f, 0x4d, 0x3a, 0x43, 0x3c, 0x47, 0x53, + 0x43, 0x4e, 0x3f, 0x41, 0x4d, 0x50, 0x4b, 0x4c, 0x51, 0x47, 0x53, 0x4f, + 0x45, 0x4a, 0x44, 0x45, 0x41, 0x46, 0x47, 0x50, 0x51, 0x3f, 0x3e, 0x41, + 0x48, 0x45, 0x46, 0x5d, 0x45, 0x4a, 0x4c, 0x46, 0x4a, 0x49, 0x50, 0x51, + 0x51, 0x4c, 0x4f, 0x47, 0x47, 0x42, 0x45, 0x47, 0x4e, 0x48, 0x46, 0x40, + 0x45, 0x46, 0x4d, 0x3b, 0x4d, 0x52, 0x4c, 0x51, 0x49, 0x51, 0x47, 0x3d, + 0x4d, 0x42, 0x4f, 0x4e, 0x43, 0x43, 0x45, 0x3a, 0x42, 0x50, 0x4c, 0x4a, + 0x41, 0x53, 0x4c, 0x45, 0x51, 0x3f, 0x54, 0x43, 0x4b, 0x54, 0x56, 0x4d, + 0x4f, 0x4a, 0x50, 0x4b, 0x44, 0x45, 0x4f, 0x4f, 0x47, 0x3e, 0x50, 0x4f, + 0x4b, 0x48, 0x4d, 0x49, 0x55, 0x4d, 0x45, 0x4d, 0x4a, 0x53, 0x43, 0x46, + 0x4c, 0x45, 0x41, 0x46, 0x49, 0x49, 0x4f, 0x4b, 0x49, 0x50, 0x52, 0x49, + 0x41, 0x54, 0x44, 0x4c, 0x44, 0x63, 0x4a, 0x49, 0x40, 0x59, 0x52, 0x52, + 0x59, 0x3f, 0x3e, 0x3e, 0x40, 0x25, 0x3c, 0x5c, 0x4f, 0x57, 0x44, 0x50, + 0x41, 0x2a, 0x48, 0x4f, 0x43, 0x08, 0x47, 0x43, 0x49, 0x48, 0x4d, 0x49, + 0x46, 0x2b, 0x48, 0x44, 0x4e, 0x47, 0x47, 0x43, 0x44, 0x3e, 0x4a, 0x52, + 0x3f, 0x4a, 0x53, 0x42, 0x49, 0x47, 0x4c, 0x50, 0x43, 0x46, 0x46, 0x3c, + 0x4c, 0x47, 0x4e, 0x4d, 0x42, 0x41, 0x53, 0x52, 0x4f, 0x40, 0x54, 0x50, + 0x46, 0x43, 0x50, 0x56, 0x51, 0x48, 0x48, 0x48, 0x49, 0x39, 0x47, 0x5e, + 0x4e, 0x4b, 0x4f, 0x4e, 0x43, 0x45, 0x42, 0x58, 0x4a, 0x3b, 0x48, 0x4d, + 0x43, 0x3e, 0x4b, 0x43, 0x3c, 0x45, 0x46, 0x4b, 0x42, 0x42, 0x4e, 0x3d, + 0x4b, 0x4e, 0x51, 0x52, 0x48, 0x3e, 0x4b, 0x3f, 0x4c, 0x4a, 0x4b, 0x4c, + 0x46, 0x48, 0x3e, 0x48, 0x47, 0x4d, 0x4a, 0x46, 0x49, 0x4d, 0x4a, 0x48, + 0x50, 0x4b, 0x40, 0x48, 0x4b, 0x52, 0x46, 0x50, 0x4f, 0x3e, 0x42, 0x44, + 0x44, 0x42, 0x43, 0x49, 0x4f, 0x4f, 0x46, 0x42, 0x4a, 0x54, 0x42, 0x48, + 0x50, 0x4f, 0x4f, 0x4c, 0x4c, 0x47, 0x52, 0x49, 0x4c, 0x45, 0x4a, 0x4d, + 0x4a, 0x41, 0x47, 0x4a, 0x4d, 0x4a, 0x4c, 0x46, 0x51, 0x44, 0x4b, 0x49, + 0x53, 0x5e, 0x45, 0x4a, 0x3b, 0x57, 0x5a, 0x4c, 0x59, 0x43, 0x3e, 0x4a, + 0x3e, 0x20, 0x36, 0x5d, 0x47, 0x5b, 0x3f, 0x55, 0x3e, 0x24, 0x41, 0x52, + 0x3f, 0x01, 0x49, 0x41, 0x40, 0x45, 0x42, 0x46, 0x49, 0x2a, 0x47, 0x40, + 0x44, 0x3f, 0x42, 0x47, 0x4e, 0x42, 0x4b, 0x3d, 0x45, 0x4c, 0x47, 0x3d, + 0x4c, 0x44, 0x48, 0x43, 0x43, 0x41, 0x4a, 0x3d, 0x48, 0x4b, 0x46, 0x4e, + 0x4c, 0x45, 0x48, 0x4d, 0x54, 0x4d, 0x3e, 0x46, 0x3e, 0x47, 0x44, 0x4e, + 0x48, 0x49, 0x53, 0x4b, 0x41, 0x45, 0x4c, 0x57, 0x52, 0x4e, 0x40, 0x48, + 0x4d, 0x43, 0x44, 0x5a, 0x4a, 0x4c, 0x48, 0x4d, 0x3f, 0x52, 0x41, 0x50, + 0x4a, 0x47, 0x3e, 0x43, 0x4c, 0x42, 0x48, 0x3e, 0x4f, 0x4b, 0x41, 0x43, + 0x49, 0x40, 0x43, 0x36, 0x3f, 0x4b, 0x49, 0x49, 0x51, 0x43, 0x48, 0x40, + 0x4c, 0x51, 0x4d, 0x4a, 0x49, 0x3f, 0x4b, 0x3d, 0x4f, 0x4b, 0x43, 0x4d, + 0x46, 0x40, 0x46, 0x4d, 0x49, 0x48, 0x4d, 0x4c, 0x52, 0x4c, 0x49, 0x4f, + 0x53, 0x40, 0x49, 0x53, 0x47, 0x43, 0x4c, 0x45, 0x42, 0x48, 0x42, 0x4e, + 0x49, 0x43, 0x42, 0x40, 0x4f, 0x46, 0x50, 0x47, 0x51, 0x4a, 0x52, 0x45, + 0x4c, 0x51, 0x48, 0x47, 0x40, 0x41, 0x52, 0x4f, 0x41, 0x5a, 0x53, 0x47, + 0x42, 0x5f, 0x55, 0x4f, 0x53, 0x3e, 0x41, 0x49, 0x3d, 0x20, 0x3f, 0x54, + 0x42, 0x5b, 0x49, 0x4d, 0x3d, 0x22, 0x3e, 0x48, 0x41, 0x01, 0x4c, 0x3d, + 0x43, 0x4a, 0x46, 0x43, 0x4f, 0x2b, 0x49, 0x46, 0x47, 0x4a, 0x51, 0x3d, + 0x4b, 0x44, 0x49, 0x41, 0x47, 0x47, 0x45, 0x3a, 0x44, 0x42, 0x40, 0x52, + 0x46, 0x51, 0x4a, 0x41, 0x4a, 0x52, 0x44, 0x52, 0x4a, 0x40, 0x46, 0x45, + 0x52, 0x4c, 0x4e, 0x42, 0x42, 0x48, 0x40, 0x4f, 0x4b, 0x4f, 0x51, 0x4c, + 0x4e, 0x48, 0x4a, 0x5a, 0x46, 0x3d, 0x41, 0x50, 0x52, 0x4c, 0x44, 0x53, + 0x4b, 0x4d, 0x4f, 0x49, 0x47, 0x4c, 0x48, 0x45, 0x48, 0x4a, 0x44, 0x4e, + 0x4c, 0x40, 0x4d, 0x35, 0x40, 0x49, 0x4a, 0x51, 0x49, 0x4a, 0x46, 0x36, + 0x46, 0x47, 0x4a, 0x4c, 0x40, 0x4e, 0x42, 0x38, 0x48, 0x45, 0x42, 0x49, + 0x54, 0x4c, 0x3f, 0x49, 0x4c, 0x39, 0x47, 0x45, 0x4e, 0x4a, 0x42, 0x44, + 0x4b, 0x53, 0x43, 0x40, 0x46, 0x51, 0x3d, 0x50, 0x4b, 0x43, 0x4a, 0x4c, + 0x55, 0x54, 0x4a, 0x43, 0x48, 0x40, 0x44, 0x3f, 0x47, 0x45, 0x3e, 0x41, + 0x49, 0x44, 0x4d, 0x49, 0x44, 0x41, 0x4a, 0x50, 0x44, 0x49, 0x4d, 0x47, + 0x4a, 0x49, 0x46, 0x49, 0x40, 0x5b, 0x4d, 0x51, 0x47, 0x57, 0x49, 0x4f, + 0x56, 0x46, 0x3a, 0x4a, 0x3e, 0x22, 0x36, 0x5c, 0x44, 0x56, 0x46, 0x48, + 0x3a, 0x2d, 0x4a, 0x48, 0x44, 0x17, 0x41, 0x42, 0x40, 0x3d, 0x4e, 0x45, + 0x40, 0x26, 0x43, 0x52, 0x41, 0x40, 0x44, 0x4a, 0x48, 0x42, 0x4f, 0x47, + 0x46, 0x4c, 0x4a, 0x3b, 0x42, 0x3e, 0x3e, 0x49, 0x4e, 0x44, 0x4e, 0x49, + 0x47, 0x41, 0x47, 0x44, 0x4c, 0x45, 0x4d, 0x49, 0x49, 0x48, 0x55, 0x3d, + 0x4a, 0x45, 0x50, 0x4f, 0x46, 0x4c, 0x46, 0x45, 0x3c, 0x51, 0x4b, 0x5a, + 0x46, 0x47, 0x54, 0x41, 0x44, 0x40, 0x4f, 0x53, 0x49, 0x46, 0x46, 0x48, + 0x44, 0x40, 0x50, 0x49, 0x49, 0x43, 0x50, 0x41, 0x52, 0x4b, 0x46, 0x3e, + 0x44, 0x44, 0x46, 0x4e, 0x47, 0x48, 0x3e, 0x38, 0x4c, 0x4c, 0x48, 0x43, + 0x48, 0x3e, 0x50, 0x42, 0x51, 0x50, 0x4a, 0x48, 0x4a, 0x42, 0x44, 0x3d, + 0x4a, 0x46, 0x46, 0x3d, 0x4e, 0x47, 0x3d, 0x48, 0x4c, 0x46, 0x50, 0x4d, + 0x49, 0x45, 0x4a, 0x4c, 0x4c, 0x47, 0x4a, 0x42, 0x4a, 0x45, 0x50, 0x52, + 0x4b, 0x4d, 0x4c, 0x43, 0x42, 0x53, 0x41, 0x45, 0x49, 0x41, 0x4b, 0x4c, + 0x52, 0x54, 0x4b, 0x41, 0x48, 0x4c, 0x47, 0x4c, 0x41, 0x49, 0x4a, 0x47, + 0x50, 0x59, 0x4e, 0x45, 0x3c, 0x5d, 0x53, 0x4c, 0x5a, 0x3e, 0x3a, 0x51, + 0x3a, 0x22, 0x35, 0x59, 0x40, 0x5a, 0x43, 0x46, 0x41, 0x32, 0x44, 0x4b, + 0x47, 0x04, 0x4c, 0x3a, 0x4a, 0x49, 0x48, 0x3d, 0x45, 0x2b, 0x50, 0x41, + 0x3e, 0x44, 0x4f, 0x43, 0x4a, 0x3f, 0x48, 0x4b, 0x53, 0x49, 0x4b, 0x38, + 0x44, 0x40, 0x48, 0x4c, 0x41, 0x3f, 0x47, 0x3e, 0x47, 0x49, 0x45, 0x42, + 0x43, 0x3e, 0x46, 0x44, 0x53, 0x4d, 0x48, 0x44, 0x45, 0x42, 0x43, 0x53, + 0x55, 0x49, 0x4d, 0x4b, 0x45, 0x44, 0x47, 0x5f, 0x48, 0x44, 0x4a, 0x48, + 0x45, 0x4d, 0x4f, 0x5e, 0x4e, 0x46, 0x49, 0x49, 0x4d, 0x49, 0x44, 0x48, + 0x4d, 0x41, 0x50, 0x48, 0x3d, 0x3f, 0x4d, 0x38, 0x46, 0x4a, 0x50, 0x4a, + 0x45, 0x3e, 0x43, 0x36, 0x42, 0x48, 0x53, 0x54, 0x49, 0x43, 0x4b, 0x3a, + 0x45, 0x48, 0x50, 0x45, 0x4a, 0x4c, 0x4a, 0x4d, 0x43, 0x4c, 0x55, 0x4e, + 0x4c, 0x42, 0x45, 0x52, 0x52, 0x45, 0x46, 0x40, 0x54, 0x4c, 0x3d, 0x4e, + 0x49, 0x4e, 0x44, 0x47, 0x45, 0x48, 0x4b, 0x50, 0x49, 0x4b, 0x44, 0x4b, + 0x4f, 0x49, 0x47, 0x47, 0x53, 0x3f, 0x4b, 0x42, 0x45, 0x3e, 0x4d, 0x4d, + 0x48, 0x51, 0x45, 0x40, 0x43, 0x43, 0x4e, 0x44, 0x51, 0x55, 0x4a, 0x3e, + 0x45, 0x55, 0x58, 0x50, 0x50, 0x38, 0x44, 0x4f, 0x3b, 0x23, 0x3c, 0x55, + 0x3c, 0x54, 0x49, 0x42, 0x44, 0x2f, 0x3e, 0x47, 0x42, 0x01, 0x42, 0x37, + 0x3f, 0x42, 0x45, 0x45, 0x47, 0x2a, 0x52, 0x4b, 0x45, 0x3c, 0x47, 0x44, + 0x44, 0x40, 0x50, 0x53, 0x48, 0x42, 0x4d, 0x36, 0x50, 0x3d, 0x49, 0x44, + 0x4f, 0x4c, 0x4a, 0x42, 0x4d, 0x3e, 0x3d, 0x3f, 0x4e, 0x44, 0x4d, 0x4e, + 0x54, 0x3d, 0x42, 0x46, 0x49, 0x47, 0x4b, 0x53, 0x45, 0x46, 0x47, 0x4a, + 0x45, 0x3d, 0x4a, 0x5f, 0x51, 0x3e, 0x45, 0x45, 0x44, 0x3a, 0x4d, 0x57, + 0x45, 0x47, 0x4d, 0x45, 0x4e, 0x4b, 0x51, 0x48, 0x4b, 0x4a, 0x3c, 0x4e, + 0x51, 0x41, 0x4d, 0x36, 0x47, 0x4a, 0x46, 0x51, 0x4e, 0x4c, 0x52, 0x41, + 0x55, 0x47, 0x41, 0x47, 0x4d, 0x47, 0x4b, 0x3d, 0x4a, 0x4a, 0x46, 0x49, + 0x4d, 0x48, 0x46, 0x46, 0x4d, 0x52, 0x52, 0x48, 0x49, 0x3f, 0x4b, 0x4e, + 0x4c, 0x49, 0x45, 0x47, 0x41, 0x4b, 0x44, 0x48, 0x52, 0x4b, 0x53, 0x44, + 0x46, 0x4e, 0x44, 0x49, 0x52, 0x50, 0x46, 0x4b, 0x44, 0x43, 0x50, 0x49, + 0x4a, 0x53, 0x45, 0x49, 0x52, 0x3f, 0x4a, 0x4e, 0x49, 0x4c, 0x4d, 0x4d, + 0x40, 0x40, 0x3f, 0x4a, 0x47, 0x56, 0x51, 0x43, 0x40, 0x5a, 0x58, 0x52, + 0x4f, 0x3d, 0x3d, 0x45, 0x38, 0x29, 0x33, 0x59, 0x45, 0x54, 0x3c, 0x42, + 0x3f, 0x27, 0x3e, 0x49, 0x48, 0x06, 0x4a, 0x3f, 0x41, 0x49, 0x4c, 0x48, + 0x46, 0x2b, 0x4a, 0x4f, 0x44, 0x46, 0x4c, 0x46, 0x4a, 0x3b, 0x4d, 0x4a, + 0x40, 0x41, 0x45, 0x38, 0x51, 0x39, 0x46, 0x46, 0x41, 0x51, 0x4e, 0x41, + 0x49, 0x44, 0x48, 0x4a, 0x4b, 0x46, 0x47, 0x46, 0x4a, 0x4c, 0x47, 0x48, + 0x3d, 0x42, 0x50, 0x4f, 0x50, 0x4a, 0x4a, 0x48, 0x4a, 0x45, 0x45, 0x61, + 0x4a, 0x4c, 0x49, 0x3d, 0x4b, 0x4a, 0x4a, 0x5a, 0x48, 0x49, 0x50, 0x4f, + 0x42, 0x48, 0x3e, 0x44, 0x43, 0x3b, 0x4f, 0x54, 0x4b, 0x4a, 0x47, 0x31, + 0x4a, 0x49, 0x47, 0x4e, 0x48, 0x48, 0x46, 0x42, 0x4a, 0x45, 0x4c, 0x49, + 0x4b, 0x4e, 0x53, 0x43, 0x4c, 0x49, 0x4f, 0x4b, 0x46, 0x4c, 0x4b, 0x4e, + 0x51, 0x4b, 0x49, 0x52, 0x44, 0x55, 0x45, 0x49, 0x4b, 0x4a, 0x50, 0x4c, + 0x4d, 0x4a, 0x4b, 0x48, 0x41, 0x46, 0x47, 0x43, 0x4b, 0x3f, 0x54, 0x4a, + 0x46, 0x49, 0x51, 0x48, 0x4e, 0x4a, 0x41, 0x52, 0x52, 0x4e, 0x53, 0x47, + 0x42, 0x48, 0x43, 0x44, 0x54, 0x51, 0x40, 0x49, 0x4c, 0x48, 0x49, 0x44, + 0x4c, 0x56, 0x52, 0x49, 0x3d, 0x59, 0x4f, 0x56, 0x56, 0x42, 0x46, 0x45, + 0x3e, 0x28, 0x3f, 0x5b, 0x3f, 0x5a, 0x4c, 0x42, 0x44, 0x22, 0x3f, 0x46, + 0x47, 0x0d, 0x3e, 0x41, 0x45, 0x49, 0x4a, 0x3b, 0x45, 0x2d, 0x4d, 0x4a, + 0x44, 0x43, 0x49, 0x46, 0x4b, 0x47, 0x49, 0x45, 0x4e, 0x40, 0x4c, 0x3c, + 0x42, 0x3e, 0x4b, 0x50, 0x48, 0x49, 0x4c, 0x42, 0x3c, 0x43, 0x50, 0x43, + 0x49, 0x4e, 0x4e, 0x43, 0x46, 0x4c, 0x48, 0x4a, 0x43, 0x4c, 0x49, 0x4e, + 0x47, 0x44, 0x50, 0x4c, 0x4a, 0x48, 0x47, 0x5f, 0x3f, 0x3e, 0x48, 0x4f, + 0x4f, 0x49, 0x4a, 0x5f, 0x4e, 0x40, 0x4e, 0x48, 0x47, 0x44, 0x40, 0x4d, + 0x3f, 0x4a, 0x53, 0x45, 0x3e, 0x50, 0x3f, 0x39, 0x50, 0x45, 0x45, 0x4b, + 0x43, 0x41, 0x46, 0x41, 0x49, 0x47, 0x4b, 0x41, 0x3c, 0x4b, 0x46, 0x3f, + 0x41, 0x4a, 0x4e, 0x4c, 0x49, 0x4c, 0x3f, 0x44, 0x53, 0x4c, 0x45, 0x49, + 0x48, 0x4d, 0x48, 0x4a, 0x48, 0x4f, 0x45, 0x4d, 0x48, 0x4c, 0x41, 0x49, + 0x42, 0x48, 0x53, 0x46, 0x4a, 0x46, 0x4b, 0x4f, 0x4c, 0x52, 0x4c, 0x51, + 0x41, 0x4d, 0x49, 0x41, 0x49, 0x4f, 0x49, 0x42, 0x4a, 0x48, 0x51, 0x4a, + 0x44, 0x4d, 0x55, 0x48, 0x47, 0x4d, 0x4d, 0x45, 0x42, 0x60, 0x4a, 0x51, + 0x42, 0x54, 0x56, 0x56, 0x50, 0x4a, 0x3f, 0x4a, 0x40, 0x25, 0x3a, 0x59, + 0x46, 0x58, 0x52, 0x46, 0x41, 0x28, 0x3d, 0x3e, 0x45, 0x13, 0x47, 0x41, + 0x3d, 0x44, 0x48, 0x45, 0x49, 0x26, 0x46, 0x4c, 0x3b, 0x4a, 0x42, 0x47, + 0x46, 0x41, 0x44, 0x52, 0x50, 0x4a, 0x4f, 0x40, 0x4b, 0x39, 0x42, 0x45, + 0x4a, 0x4d, 0x4f, 0x3f, 0x42, 0x4f, 0x49, 0x45, 0x42, 0x4a, 0x46, 0x47, + 0x48, 0x40, 0x4a, 0x46, 0x41, 0x3b, 0x48, 0x55, 0x4b, 0x4e, 0x4e, 0x48, + 0x4b, 0x44, 0x46, 0x53, 0x48, 0x45, 0x4b, 0x53, 0x49, 0x43, 0x4a, 0x5c, + 0x46, 0x45, 0x45, 0x49, 0x49, 0x49, 0x4c, 0x43, 0x4e, 0x4a, 0x41, 0x4a, + 0x42, 0x43, 0x4a, 0x38, 0x44, 0x4a, 0x4b, 0x3f, 0x45, 0x49, 0x45, 0x38, + 0x43, 0x40, 0x45, 0x4c, 0x47, 0x42, 0x3f, 0x42, 0x3e, 0x4a, 0x43, 0x50, + 0x4a, 0x4e, 0x4f, 0x47, 0x4d, 0x49, 0x49, 0x47, 0x4a, 0x4d, 0x46, 0x4c, + 0x4f, 0x3d, 0x52, 0x4a, 0x41, 0x44, 0x4b, 0x50, 0x4c, 0x52, 0x49, 0x50, + 0x4b, 0x45, 0x49, 0x4d, 0x48, 0x55, 0x50, 0x47, 0x4e, 0x50, 0x4f, 0x48, + 0x46, 0x4d, 0x4d, 0x41, 0x48, 0x51, 0x4b, 0x4c, 0x47, 0x51, 0x42, 0x42, + 0x4d, 0x47, 0x43, 0x4c, 0x4c, 0x5a, 0x4e, 0x47, 0x3b, 0x59, 0x51, 0x57, + 0x4c, 0x40, 0x46, 0x4c, 0x37, 0x2a, 0x35, 0x58, 0x44, 0x5b, 0x4c, 0x44, + 0x3e, 0x2e, 0x3f, 0x43, 0x46, 0x23, 0x49, 0x3e, 0x41, 0x3f, 0x4b, 0x3e, + 0x4e, 0x2f, 0x4d, 0x4a, 0x4e, 0x40, 0x4e, 0x41, 0x40, 0x3f, 0x4a, 0x42, + 0x4d, 0x4c, 0x44, 0x47, 0x4e, 0x44, 0x40, 0x43, 0x4d, 0x49, 0x4f, 0x3d, + 0x49, 0x3f, 0x51, 0x48, 0x42, 0x4a, 0x49, 0x47, 0x49, 0x46, 0x4a, 0x45, + 0x45, 0x49, 0x53, 0x4d, 0x4c, 0x4e, 0x44, 0x50, 0x4b, 0x43, 0x4e, 0x5f, + 0x3c, 0x40, 0x44, 0x46, 0x48, 0x4b, 0x42, 0x62, 0x4e, 0x50, 0x4c, 0x49, + 0x4a, 0x4f, 0x44, 0x53, 0x42, 0x43, 0x49, 0x48, 0x4b, 0x3c, 0x4a, 0x37, + 0x4c, 0x41, 0x49, 0x46, 0x46, 0x47, 0x43, 0x40, 0x4d, 0x4d, 0x4a, 0x48, + 0x50, 0x4b, 0x50, 0x41, 0x44, 0x3e, 0x51, 0x47, 0x44, 0x4a, 0x44, 0x45, + 0x48, 0x4d, 0x52, 0x4e, 0x44, 0x48, 0x4d, 0x43, 0x42, 0x45, 0x48, 0x52, + 0x44, 0x42, 0x50, 0x42, 0x4d, 0x45, 0x48, 0x4d, 0x4f, 0x4e, 0x45, 0x49, + 0x51, 0x48, 0x4f, 0x53, 0x4d, 0x4c, 0x48, 0x50, 0x4e, 0x4d, 0x50, 0x48, + 0x49, 0x42, 0x4c, 0x42, 0x4b, 0x4b, 0x49, 0x48, 0x48, 0x49, 0x4a, 0x54, + 0x44, 0x57, 0x4d, 0x4b, 0x3f, 0x56, 0x53, 0x5c, 0x50, 0x4e, 0x46, 0x49, + 0x40, 0x24, 0x44, 0x58, 0x49, 0x54, 0x48, 0x49, 0x41, 0x22, 0x44, 0x3f, + 0x48, 0x1c, 0x4d, 0x39, 0x3e, 0x4c, 0x3d, 0x4a, 0x48, 0x2d, 0x48, 0x3e, + 0x3f, 0x3a, 0x46, 0x4e, 0x44, 0x43, 0x49, 0x51, 0x4d, 0x3c, 0x44, 0x41, + 0x4e, 0x44, 0x42, 0x4c, 0x45, 0x48, 0x45, 0x46, 0x42, 0x46, 0x47, 0x42, + 0x4f, 0x45, 0x47, 0x44, 0x48, 0x47, 0x4a, 0x42, 0x4d, 0x48, 0x3e, 0x53, + 0x47, 0x4b, 0x44, 0x4b, 0x45, 0x4a, 0x50, 0x55, 0x4c, 0x45, 0x48, 0x43, + 0x53, 0x3d, 0x4e, 0x5f, 0x42, 0x44, 0x4a, 0x4f, 0x3f, 0x48, 0x4e, 0x4b, + 0x43, 0x48, 0x43, 0x41, 0x4a, 0x4b, 0x51, 0x39, 0x52, 0x46, 0x44, 0x49, + 0x48, 0x45, 0x4c, 0x40, 0x45, 0x49, 0x51, 0x48, 0x45, 0x42, 0x45, 0x48, + 0x40, 0x43, 0x3d, 0x47, 0x53, 0x54, 0x4d, 0x4a, 0x4a, 0x47, 0x48, 0x43, + 0x4c, 0x46, 0x43, 0x4f, 0x49, 0x4c, 0x3f, 0x3d, 0x4b, 0x41, 0x40, 0x48, + 0x4e, 0x4c, 0x4b, 0x40, 0x4c, 0x43, 0x49, 0x4d, 0x47, 0x4f, 0x47, 0x42, + 0x47, 0x4a, 0x4d, 0x4f, 0x46, 0x4d, 0x51, 0x49, 0x48, 0x4d, 0x4e, 0x46, + 0x47, 0x41, 0x44, 0x4d, 0x4b, 0x55, 0x4b, 0x4c, 0x41, 0x5e, 0x50, 0x45, + 0x40, 0x55, 0x4b, 0x60, 0x55, 0x47, 0x3d, 0x4a, 0x42, 0x22, 0x46, 0x5a, + 0x47, 0x53, 0x49, 0x44, 0x44, 0x27, 0x41, 0x4f, 0x3e, 0x22, 0x4a, 0x44, + 0x49, 0x3e, 0x4e, 0x4d, 0x3f, 0x3a, 0x4c, 0x44, 0x4a, 0x44, 0x46, 0x51, + 0x4f, 0x42, 0x4c, 0x4e, 0x39, 0x4b, 0x42, 0x39, 0x4b, 0x3e, 0x4f, 0x47, + 0x4a, 0x4f, 0x3f, 0x4d, 0x43, 0x4c, 0x4a, 0x4b, 0x4b, 0x3d, 0x51, 0x46, + 0x49, 0x4c, 0x47, 0x44, 0x43, 0x3d, 0x3c, 0x54, 0x4a, 0x47, 0x4d, 0x50, + 0x4a, 0x46, 0x51, 0x62, 0x46, 0x4d, 0x4b, 0x46, 0x49, 0x3c, 0x50, 0x57, + 0x47, 0x40, 0x3e, 0x4c, 0x4b, 0x3f, 0x55, 0x46, 0x3d, 0x45, 0x42, 0x4e, + 0x50, 0x49, 0x46, 0x3a, 0x4c, 0x47, 0x4a, 0x49, 0x42, 0x42, 0x4a, 0x44, + 0x42, 0x40, 0x49, 0x54, 0x46, 0x4b, 0x47, 0x45, 0x51, 0x47, 0x41, 0x42, + 0x49, 0x50, 0x4e, 0x48, 0x4b, 0x4b, 0x47, 0x4a, 0x47, 0x49, 0x4b, 0x45, + 0x4b, 0x54, 0x48, 0x54, 0x4b, 0x49, 0x51, 0x4a, 0x4a, 0x40, 0x46, 0x42, + 0x44, 0x44, 0x4d, 0x4b, 0x47, 0x43, 0x45, 0x41, 0x3e, 0x49, 0x43, 0x51, + 0x3e, 0x4b, 0x52, 0x46, 0x48, 0x3f, 0x4e, 0x51, 0x51, 0x49, 0x3f, 0x48, + 0x4c, 0x4c, 0x52, 0x47, 0x43, 0x57, 0x44, 0x42, 0x40, 0x52, 0x50, 0x5d, + 0x4f, 0x40, 0x42, 0x45, 0x46, 0x26, 0x3c, 0x51, 0x4b, 0x4e, 0x4b, 0x49, + 0x46, 0x35, 0x49, 0x53, 0x49, 0x2b, 0x4d, 0x3e, 0x50, 0x44, 0x4f, 0x54, + 0x46, 0x34, 0x49, 0x4d, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x44, 0x52, 0x41, + 0x4d, 0x4c, 0x52, 0x41, 0x49, 0x3a, 0x4e, 0x49, 0x40, 0x4b, 0x45, 0x4d, + 0x4b, 0x4a, 0x47, 0x49, 0x45, 0x49, 0x4d, 0x50, 0x3e, 0x47, 0x44, 0x51, + 0x4c, 0x41, 0x45, 0x50, 0x47, 0x41, 0x4a, 0x52, 0x4b, 0x3d, 0x4b, 0x5b, + 0x4c, 0x4c, 0x4d, 0x3f, 0x47, 0x44, 0x49, 0x5d, 0x4a, 0x53, 0x44, 0x45, + 0x45, 0x46, 0x3d, 0x4f, 0x50, 0x3b, 0x44, 0x4e, 0x40, 0x41, 0x4c, 0x3a, + 0x4a, 0x45, 0x49, 0x48, 0x45, 0x4a, 0x45, 0x36, 0x45, 0x4d, 0x4c, 0x49, + 0x3f, 0x47, 0x4d, 0x40, 0x53, 0x48, 0x49, 0x4c, 0x47, 0x4f, 0x42, 0x44, + 0x45, 0x40, 0x4a, 0x4c, 0x49, 0x4f, 0x4b, 0x4d, 0x42, 0x45, 0x3e, 0x4a, + 0x48, 0x4a, 0x49, 0x50, 0x4c, 0x53, 0x50, 0x45, 0x4b, 0x4c, 0x46, 0x4f, + 0x44, 0x43, 0x54, 0x50, 0x3f, 0x48, 0x42, 0x4b, 0x43, 0x3f, 0x4d, 0x4c, + 0x43, 0x49, 0x4a, 0x47, 0x54, 0x4b, 0x4f, 0x4d, 0x44, 0x47, 0x49, 0x4e, + 0x4e, 0x55, 0x40, 0x46, 0x44, 0x56, 0x4e, 0x65, 0x4f, 0x3f, 0x43, 0x48, + 0x39, 0x27, 0x43, 0x55, 0x4b, 0x4c, 0x44, 0x46, 0x42, 0x34, 0x44, 0x52, + 0x43, 0x22, 0x4e, 0x41, 0x49, 0x48, 0x49, 0x51, 0x3b, 0x37, 0x4b, 0x40, + 0x4f, 0x45, 0x53, 0x4c, 0x47, 0x46, 0x47, 0x4c, 0x3e, 0x44, 0x45, 0x49, + 0x48, 0x50, 0x45, 0x40, 0x46, 0x4c, 0x47, 0x4d, 0x44, 0x48, 0x49, 0x50, + 0x4f, 0x4a, 0x46, 0x55, 0x4e, 0x42, 0x4c, 0x4c, 0x50, 0x48, 0x3d, 0x55, + 0x46, 0x3e, 0x4a, 0x4b, 0x4f, 0x46, 0x46, 0x60, 0x50, 0x3f, 0x55, 0x40, + 0x42, 0x44, 0x48, 0x63, 0x50, 0x3d, 0x45, 0x4f, 0x4e, 0x41, 0x47, 0x48, + 0x4a, 0x3c, 0x3d, 0x46, 0x3f, 0x42, 0x43, 0x37, 0x4f, 0x4f, 0x50, 0x47, + 0x47, 0x4b, 0x52, 0x40, 0x3f, 0x44, 0x4a, 0x40, 0x4d, 0x44, 0x4e, 0x37, + 0x43, 0x48, 0x47, 0x3f, 0x51, 0x4d, 0x45, 0x42, 0x41, 0x46, 0x3d, 0x53, + 0x4f, 0x4b, 0x54, 0x45, 0x51, 0x40, 0x4a, 0x4a, 0x48, 0x4f, 0x43, 0x4a, + 0x4f, 0x4c, 0x4c, 0x4f, 0x48, 0x4c, 0x44, 0x4e, 0x43, 0x46, 0x4f, 0x4a, + 0x43, 0x41, 0x49, 0x49, 0x47, 0x53, 0x45, 0x49, 0x4e, 0x46, 0x4c, 0x4e, + 0x3c, 0x49, 0x44, 0x45, 0x4c, 0x42, 0x49, 0x41, 0x48, 0x58, 0x54, 0x4d, + 0x35, 0x52, 0x4e, 0x5b, 0x4f, 0x40, 0x3e, 0x46, 0x46, 0x36, 0x3d, 0x60, + 0x4d, 0x49, 0x4a, 0x43, 0x44, 0x36, 0x49, 0x67, 0x4a, 0x2d, 0x4b, 0x40, + 0x3f, 0x49, 0x43, 0x5f, 0x45, 0x3c, 0x49, 0x4c, 0x4a, 0x43, 0x48, 0x55, + 0x49, 0x46, 0x49, 0x46, 0x44, 0x4e, 0x42, 0x4e, 0x40, 0x45, 0x42, 0x52, + 0x4a, 0x40, 0x4a, 0x44, 0x40, 0x45, 0x54, 0x3d, 0x4c, 0x3e, 0x4c, 0x55, + 0x4d, 0x45, 0x4d, 0x51, 0x4a, 0x4b, 0x44, 0x5b, 0x48, 0x3d, 0x3e, 0x46, + 0x4f, 0x4d, 0x3f, 0x62, 0x4d, 0x45, 0x3f, 0x47, 0x47, 0x47, 0x44, 0x5b, + 0x4b, 0x4f, 0x51, 0x4c, 0x4a, 0x47, 0x48, 0x5b, 0x47, 0x40, 0x4a, 0x47, + 0x42, 0x44, 0x46, 0x46, 0x45, 0x48, 0x4a, 0x3f, 0x40, 0x4f, 0x48, 0x3a, + 0x49, 0x52, 0x4a, 0x53, 0x43, 0x4c, 0x4b, 0x4a, 0x4a, 0x4a, 0x4e, 0x42, + 0x4b, 0x46, 0x3d, 0x50, 0x51, 0x4b, 0x4b, 0x4f, 0x50, 0x4c, 0x4f, 0x4c, + 0x4d, 0x41, 0x41, 0x3c, 0x40, 0x43, 0x54, 0x51, 0x48, 0x3d, 0x48, 0x51, + 0x42, 0x42, 0x4c, 0x4e, 0x4d, 0x4b, 0x49, 0x43, 0x48, 0x47, 0x4b, 0x49, + 0x49, 0x4e, 0x4d, 0x46, 0x4c, 0x52, 0x49, 0x49, 0x51, 0x4e, 0x45, 0x47, + 0x44, 0x47, 0x42, 0x4a, 0x46, 0x59, 0x48, 0x48, 0x4b, 0x4f, 0x4c, 0x5e, + 0x5c, 0x45, 0x3f, 0x48, 0x3d, 0x3f, 0x37, 0x5a, 0x4b, 0x4b, 0x45, 0x49, + 0x3e, 0x42, 0x41, 0x6b, 0x49, 0x2d, 0x45, 0x43, 0x47, 0x45, 0x49, 0x61, + 0x3d, 0x3b, 0x49, 0x43, 0x49, 0x4b, 0x4b, 0x55, 0x4b, 0x47, 0x46, 0x46, + 0x48, 0x4d, 0x49, 0x4f, 0x4a, 0x4c, 0x42, 0x51, 0x41, 0x44, 0x45, 0x4f, + 0x4e, 0x44, 0x3f, 0x55, 0x3e, 0x4a, 0x45, 0x50, 0x46, 0x42, 0x41, 0x49, + 0x49, 0x47, 0x49, 0x61, 0x47, 0x40, 0x41, 0x4e, 0x4d, 0x4b, 0x4a, 0x5e, + 0x52, 0x49, 0x4b, 0x52, 0x51, 0x55, 0x42, 0x61, 0x53, 0x4c, 0x48, 0x4a, + 0x4e, 0x48, 0x48, 0x57, 0x4c, 0x40, 0x40, 0x48, 0x45, 0x43, 0x3e, 0x46, + 0x43, 0x4a, 0x45, 0x45, 0x44, 0x4f, 0x44, 0x40, 0x49, 0x48, 0x4e, 0x49, + 0x4a, 0x4e, 0x49, 0x51, 0x46, 0x4f, 0x47, 0x44, 0x42, 0x4d, 0x43, 0x4e, + 0x4f, 0x4d, 0x44, 0x51, 0x47, 0x49, 0x40, 0x57, 0x4b, 0x49, 0x47, 0x4c, + 0x4d, 0x4d, 0x3e, 0x47, 0x45, 0x41, 0x50, 0x4b, 0x4b, 0x45, 0x42, 0x4e, + 0x48, 0x47, 0x4e, 0x4b, 0x56, 0x4c, 0x4f, 0x52, 0x51, 0x49, 0x4d, 0x4a, + 0x4b, 0x52, 0x4d, 0x55, 0x4b, 0x4e, 0x4e, 0x4b, 0x51, 0x57, 0x47, 0x42, + 0x49, 0x48, 0x56, 0x44, 0x52, 0x56, 0x53, 0x5a, 0x63, 0x53, 0x4c, 0x4c, + 0x43, 0x56, 0x3c, 0x57, 0x47, 0x47, 0x4d, 0x52, 0x43, 0x48, 0x45, 0x5f, + 0x45, 0x29, 0x47, 0x45, 0x48, 0x40, 0x41, 0x4b, 0x3f, 0x39, 0x49, 0x4e, + 0x47, 0x55, 0x42, 0x56, 0x4d, 0x43, 0x48, 0x44, 0x45, 0x53, 0x43, 0x46, + 0x49, 0x43, 0x49, 0x4a, 0x40, 0x4e, 0x4a, 0x4a, 0x47, 0x43, 0x45, 0x4d, + 0x4a, 0x47, 0x3f, 0x53, 0x45, 0x43, 0x4b, 0x4c, 0x42, 0x47, 0x47, 0x5f, + 0x48, 0x48, 0x46, 0x44, 0x50, 0x47, 0x41, 0x64, 0x4e, 0x46, 0x49, 0x4a, + 0x4d, 0x55, 0x42, 0x55, 0x46, 0x3d, 0x49, 0x43, 0x52, 0x52, 0x47, 0x52, + 0x4e, 0x46, 0x47, 0x41, 0x49, 0x4d, 0x50, 0x47, 0x42, 0x49, 0x41, 0x42, + 0x4b, 0x48, 0x49, 0x42, 0x4d, 0x48, 0x51, 0x54, 0x43, 0x56, 0x4c, 0x52, + 0x53, 0x4d, 0x54, 0x4a, 0x51, 0x50, 0x48, 0x4c, 0x4e, 0x48, 0x4c, 0x4c, + 0x52, 0x49, 0x4a, 0x4e, 0x4e, 0x41, 0x4f, 0x53, 0x49, 0x52, 0x42, 0x4b, + 0x50, 0x46, 0x50, 0x4a, 0x53, 0x56, 0x46, 0x4f, 0x4b, 0x49, 0x3d, 0x41, + 0x4c, 0x52, 0x42, 0x50, 0x4d, 0x45, 0x4e, 0x51, 0x4b, 0x4c, 0x46, 0x42, + 0x41, 0x4b, 0x40, 0x4a, 0x42, 0x57, 0x4f, 0x43, 0x40, 0x50, 0x4c, 0x51, + 0x4f, 0x48, 0x3a, 0x4e, 0x51, 0x40, 0x49, 0x66, 0x4b, 0x42, 0x48, 0x3c, + 0x5b, 0x47, 0x53, 0x40, 0x4a, 0x48, 0x35, 0x44, 0x5f, 0x50, 0x4a, 0x3c, + 0x41, 0x45, 0x48, 0x3b, 0x42, 0x59, 0x43, 0x4b, 0x48, 0x49, 0x4a, 0x40, + 0x4f, 0x5c, 0x50, 0x54, 0x53, 0x55, 0x4c, 0x4a, 0x43, 0x46, 0x49, 0x47, + 0x49, 0x48, 0x4b, 0x43, 0x42, 0x44, 0x42, 0x46, 0x44, 0x3f, 0x4b, 0x42, + 0x4d, 0x49, 0x41, 0x46, 0x47, 0x51, 0x51, 0x44, 0x4c, 0x54, 0x4e, 0x4b, + 0x42, 0x52, 0x4e, 0x4c, 0x4b, 0x4a, 0x50, 0x4e, 0x44, 0x4b, 0x4e, 0x4e, + 0x4f, 0x42, 0x4b, 0x48, 0x46, 0x43, 0x48, 0x54, 0x4b, 0x4e, 0x48, 0x4f, + 0x4a, 0x4d, 0x43, 0x4e, 0x47, 0x50, 0x4a, 0x44, 0x47, 0x52, 0x46, 0x53, + 0x4a, 0x40, 0x46, 0x54, 0x50, 0x4a, 0x47, 0x51, 0x49, 0x45, 0x4b, 0x4e, + 0x4b, 0x46, 0x4c, 0x4c, 0x52, 0x47, 0x45, 0x45, 0x4a, 0x47, 0x4c, 0x52, + 0x44, 0x51, 0x47, 0x42, 0x47, 0x43, 0x43, 0x49, 0x52, 0x5a, 0x55, 0x3e, + 0x45, 0x4b, 0x4c, 0x46, 0x4f, 0x4b, 0x45, 0x49, 0x4a, 0x4e, 0x4a, 0x50, + 0x3e, 0x4e, 0x42, 0x4e, 0x44, 0x55, 0x3d, 0x4a, 0x4d, 0x49, 0x4d, 0x42, + 0x49, 0x4e, 0x50, 0x44, 0x4b, 0x3c, 0x41, 0x49, 0x51, 0x49, 0x3c, 0x4e, + 0x4c, 0x39, 0x4c, 0x72, 0x44, 0x4b, 0x49, 0x42, 0x5f, 0x48, 0x4a, 0x48, + 0x41, 0x4c, 0x43, 0x40, 0x62, 0x5e, 0x47, 0x3c, 0x4a, 0x4c, 0x55, 0x49, + 0x4b, 0x52, 0x4e, 0x4b, 0x4d, 0x48, 0x4c, 0x3c, 0x3f, 0x4f, 0x4e, 0x48, + 0x45, 0x55, 0x4a, 0x46, 0x48, 0x3d, 0x45, 0x44, 0x4b, 0x4a, 0x46, 0x3a, + 0x4e, 0x44, 0x4d, 0x49, 0x49, 0x49, 0x40, 0x3e, 0x40, 0x47, 0x48, 0x43, + 0x3f, 0x51, 0x46, 0x4c, 0x45, 0x4c, 0x49, 0x44, 0x3e, 0x57, 0x49, 0x4e, + 0x48, 0x3f, 0x48, 0x47, 0x53, 0x4d, 0x50, 0x51, 0x49, 0x42, 0x45, 0x44, + 0x49, 0x49, 0x46, 0x4b, 0x45, 0x49, 0x4f, 0x49, 0x46, 0x48, 0x4c, 0x55, + 0x46, 0x51, 0x48, 0x4a, 0x48, 0x54, 0x4b, 0x5a, 0x4c, 0x47, 0x40, 0x47, + 0x40, 0x55, 0x50, 0x52, 0x4a, 0x4b, 0x4f, 0x49, 0x4b, 0x50, 0x4b, 0x5b, + 0x51, 0x53, 0x4f, 0x4e, 0x49, 0x48, 0x44, 0x52, 0x46, 0x4e, 0x47, 0x48, + 0x44, 0x43, 0x49, 0x55, 0x48, 0x58, 0x4f, 0x46, 0x45, 0x53, 0x45, 0x4a, + 0x4c, 0x4c, 0x49, 0x46, 0x47, 0x4d, 0x41, 0x4d, 0x4f, 0x59, 0x4a, 0x49, + 0x46, 0x4e, 0x44, 0x49, 0x4d, 0x48, 0x54, 0x47, 0x48, 0x4e, 0x48, 0x43, + 0x46, 0x41, 0x46, 0x44, 0x52, 0x46, 0x42, 0x4c, 0x4c, 0x31, 0x4d, 0x6f, + 0x51, 0x4f, 0x4d, 0x43, 0x5c, 0x48, 0x49, 0x49, 0x46, 0x4c, 0x43, 0x3b, + 0x5d, 0x63, 0x58, 0x46, 0x49, 0x45, 0x4e, 0x48, 0x49, 0x5d, 0x45, 0x50, + 0x56, 0x4d, 0x57, 0x37, 0x40, 0x55, 0x43, 0x4b, 0x4e, 0x46, 0x4c, 0x3b, + 0x3d, 0x4b, 0x49, 0x4b, 0x52, 0x47, 0x4d, 0x34, 0x4c, 0x4c, 0x47, 0x4e, + 0x4d, 0x4c, 0x3d, 0x3f, 0x4a, 0x49, 0x44, 0x45, 0x4a, 0x54, 0x43, 0x44, + 0x50, 0x4b, 0x4d, 0x4c, 0x4e, 0x48, 0x46, 0x51, 0x43, 0x48, 0x48, 0x48, + 0x42, 0x44, 0x4e, 0x48, 0x47, 0x45, 0x48, 0x51, 0x53, 0x4a, 0x4f, 0x58, + 0x42, 0x4d, 0x48, 0x4f, 0x4c, 0x45, 0x4a, 0x57, 0x4b, 0x43, 0x4d, 0x4b, + 0x4a, 0x4e, 0x4c, 0x5f, 0x3f, 0x4f, 0x4a, 0x42, 0x4b, 0x48, 0x4d, 0x62, + 0x4f, 0x4b, 0x50, 0x4c, 0x45, 0x49, 0x44, 0x53, 0x4a, 0x4f, 0x45, 0x56, + 0x4b, 0x44, 0x41, 0x53, 0x49, 0x48, 0x4d, 0x49, 0x47, 0x4b, 0x46, 0x4c, + 0x49, 0x4b, 0x4c, 0x54, 0x4f, 0x4b, 0x47, 0x49, 0x44, 0x4a, 0x4e, 0x53, + 0x4f, 0x49, 0x54, 0x4e, 0x4a, 0x48, 0x42, 0x54, 0x51, 0x46, 0x4b, 0x52, + 0x45, 0x48, 0x51, 0x4a, 0x40, 0x4a, 0x50, 0x45, 0x4a, 0x46, 0x49, 0x46, + 0x54, 0x46, 0x42, 0x48, 0x50, 0x36, 0x4a, 0x6b, 0x46, 0x59, 0x51, 0x47, + 0x5f, 0x4d, 0x43, 0x4d, 0x44, 0x4d, 0x42, 0x3b, 0x65, 0x6a, 0x56, 0x48, + 0x4d, 0x4c, 0x52, 0x4a, 0x4d, 0x61, 0x52, 0x4b, 0x47, 0x4f, 0x48, 0x49, + 0x3f, 0x5b, 0x45, 0x51, 0x48, 0x48, 0x4b, 0x3c, 0x3b, 0x4c, 0x54, 0x52, + 0x4f, 0x51, 0x53, 0x31, 0x47, 0x4c, 0x45, 0x4a, 0x42, 0x4b, 0x47, 0x40, + 0x41, 0x49, 0x4c, 0x46, 0x4b, 0x53, 0x46, 0x49, 0x44, 0x4b, 0x4e, 0x4b, + 0x48, 0x51, 0x49, 0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x45, 0x43, 0x46, 0x56, + 0x42, 0x4b, 0x49, 0x4e, 0x4e, 0x53, 0x42, 0x5c, 0x4b, 0x46, 0x49, 0x46, + 0x4e, 0x41, 0x42, 0x67, 0x41, 0x49, 0x4d, 0x48, 0x49, 0x4e, 0x3f, 0x61, + 0x48, 0x4a, 0x40, 0x42, 0x4c, 0x51, 0x50, 0x63, 0x49, 0x44, 0x49, 0x47, + 0x45, 0x4d, 0x49, 0x61, 0x3f, 0x48, 0x40, 0x41, 0x49, 0x49, 0x45, 0x57, + 0x45, 0x46, 0x4d, 0x46, 0x4c, 0x4a, 0x4d, 0x4b, 0x43, 0x54, 0x4b, 0x49, + 0x4c, 0x49, 0x41, 0x49, 0x4b, 0x47, 0x45, 0x4b, 0x44, 0x43, 0x46, 0x3f, + 0x47, 0x47, 0x43, 0x4c, 0x49, 0x4c, 0x3d, 0x4d, 0x4b, 0x54, 0x4a, 0x4f, + 0x44, 0x4c, 0x4b, 0x47, 0x4c, 0x45, 0x3d, 0x52, 0x58, 0x4b, 0x45, 0x4e, + 0x48, 0x39, 0x53, 0x70, 0x4a, 0x5d, 0x4c, 0x4e, 0x5a, 0x4f, 0x46, 0x4b, + 0x3e, 0x4f, 0x44, 0x3d, 0x66, 0x6b, 0x50, 0x4d, 0x4d, 0x57, 0x52, 0x4a, + 0x4c, 0x5b, 0x4e, 0x53, 0x4d, 0x54, 0x50, 0x42, 0x3c, 0x5d, 0x4a, 0x4c, + 0x56, 0x52, 0x50, 0x40, 0x48, 0x4c, 0x4d, 0x49, 0x49, 0x4f, 0x51, 0x38, + 0x42, 0x49, 0x4d, 0x4f, 0x45, 0x40, 0x4d, 0x41, 0x4b, 0x4a, 0x47, 0x51, + 0x4b, 0x53, 0x4c, 0x4a, 0x51, 0x4c, 0x42, 0x56, 0x48, 0x4a, 0x47, 0x58, + 0x49, 0x46, 0x52, 0x4a, 0x45, 0x47, 0x51, 0x54, 0x4f, 0x50, 0x50, 0x53, + 0x49, 0x4a, 0x4d, 0x56, 0x56, 0x4b, 0x4d, 0x45, 0x40, 0x4d, 0x48, 0x60, + 0x4e, 0x56, 0x48, 0x4b, 0x47, 0x45, 0x47, 0x62, 0x4e, 0x4f, 0x41, 0x49, + 0x48, 0x57, 0x44, 0x64, 0x4f, 0x4f, 0x49, 0x44, 0x49, 0x4c, 0x3f, 0x53, + 0x40, 0x41, 0x4e, 0x4b, 0x4d, 0x54, 0x42, 0x53, 0x4e, 0x41, 0x49, 0x44, + 0x41, 0x45, 0x4d, 0x4f, 0x47, 0x51, 0x45, 0x4a, 0x42, 0x45, 0x4e, 0x40, + 0x4b, 0x52, 0x48, 0x47, 0x4e, 0x4f, 0x47, 0x41, 0x48, 0x53, 0x47, 0x47, + 0x46, 0x42, 0x48, 0x4b, 0x42, 0x4c, 0x49, 0x4c, 0x45, 0x4c, 0x54, 0x45, + 0x4c, 0x43, 0x4e, 0x49, 0x56, 0x47, 0x45, 0x4f, 0x4d, 0x3a, 0x58, 0x74, + 0x49, 0x5b, 0x4c, 0x4f, 0x64, 0x4e, 0x45, 0x43, 0x44, 0x5b, 0x43, 0x41, + 0x63, 0x70, 0x55, 0x45, 0x4a, 0x4a, 0x4d, 0x51, 0x4b, 0x5a, 0x51, 0x57, + 0x54, 0x5b, 0x55, 0x44, 0x38, 0x57, 0x4e, 0x50, 0x4e, 0x56, 0x57, 0x3a, + 0x3a, 0x4b, 0x57, 0x4c, 0x51, 0x53, 0x4d, 0x3b, 0x44, 0x43, 0x47, 0x4c, + 0x48, 0x59, 0x51, 0x41, 0x43, 0x44, 0x51, 0x51, 0x4a, 0x54, 0x51, 0x4b, + 0x4e, 0x45, 0x51, 0x4a, 0x49, 0x4a, 0x4f, 0x52, 0x4c, 0x3e, 0x4e, 0x55, + 0x42, 0x46, 0x46, 0x4a, 0x42, 0x52, 0x49, 0x47, 0x4a, 0x56, 0x4f, 0x50, + 0x46, 0x4f, 0x43, 0x51, 0x53, 0x46, 0x40, 0x60, 0x44, 0x4d, 0x46, 0x54, + 0x3d, 0x49, 0x43, 0x64, 0x45, 0x4d, 0x50, 0x49, 0x4f, 0x4d, 0x53, 0x60, + 0x4a, 0x52, 0x49, 0x47, 0x48, 0x5a, 0x48, 0x58, 0x4e, 0x4f, 0x43, 0x4f, + 0x50, 0x51, 0x41, 0x52, 0x4c, 0x4d, 0x45, 0x42, 0x41, 0x4c, 0x44, 0x54, + 0x4e, 0x4d, 0x4a, 0x47, 0x40, 0x4a, 0x3e, 0x47, 0x4c, 0x58, 0x46, 0x46, + 0x55, 0x4c, 0x4d, 0x45, 0x49, 0x51, 0x53, 0x46, 0x46, 0x43, 0x43, 0x48, + 0x52, 0x3d, 0x4b, 0x4e, 0x49, 0x47, 0x3f, 0x3d, 0x4f, 0x45, 0x44, 0x3f, + 0x5a, 0x43, 0x4b, 0x4d, 0x51, 0x35, 0x54, 0x76, 0x4f, 0x5e, 0x4c, 0x50, + 0x5a, 0x51, 0x46, 0x49, 0x44, 0x61, 0x4f, 0x41, 0x67, 0x72, 0x56, 0x4f, + 0x42, 0x48, 0x4b, 0x52, 0x46, 0x60, 0x50, 0x4e, 0x4a, 0x5b, 0x5f, 0x46, + 0x31, 0x5b, 0x4a, 0x48, 0x4b, 0x58, 0x51, 0x41, 0x37, 0x4e, 0x4f, 0x55, + 0x51, 0x5c, 0x4f, 0x42, 0x4b, 0x4e, 0x4f, 0x54, 0x4f, 0x52, 0x43, 0x43, + 0x48, 0x53, 0x53, 0x41, 0x4b, 0x49, 0x4e, 0x50, 0x46, 0x4c, 0x4f, 0x49, + 0x42, 0x49, 0x4c, 0x4c, 0x4c, 0x41, 0x4e, 0x48, 0x47, 0x4c, 0x49, 0x53, + 0x44, 0x46, 0x51, 0x53, 0x45, 0x52, 0x4e, 0x53, 0x50, 0x58, 0x42, 0x45, + 0x44, 0x42, 0x48, 0x58, 0x4e, 0x4d, 0x54, 0x56, 0x4c, 0x46, 0x4a, 0x58, + 0x48, 0x4f, 0x47, 0x51, 0x47, 0x4f, 0x4f, 0x5b, 0x41, 0x4e, 0x45, 0x45, + 0x4a, 0x50, 0x3e, 0x57, 0x48, 0x4e, 0x41, 0x4c, 0x45, 0x51, 0x46, 0x4c, + 0x46, 0x4f, 0x42, 0x45, 0x4b, 0x4c, 0x49, 0x4c, 0x44, 0x4f, 0x4e, 0x4d, + 0x48, 0x56, 0x43, 0x48, 0x42, 0x54, 0x48, 0x43, 0x3e, 0x51, 0x43, 0x47, + 0x47, 0x47, 0x49, 0x4d, 0x46, 0x4e, 0x52, 0x42, 0x48, 0x4e, 0x4c, 0x4a, + 0x4d, 0x3e, 0x43, 0x40, 0x48, 0x41, 0x47, 0x4f, 0x5e, 0x49, 0x40, 0x4c, + 0x50, 0x42, 0x56, 0x75, 0x51, 0x5e, 0x51, 0x4e, 0x62, 0x58, 0x49, 0x47, + 0x51, 0x59, 0x46, 0x46, 0x6c, 0x72, 0x55, 0x44, 0x4c, 0x4a, 0x4d, 0x59, + 0x53, 0x64, 0x4d, 0x51, 0x55, 0x5e, 0x59, 0x50, 0x30, 0x58, 0x50, 0x4c, + 0x4c, 0x60, 0x59, 0x42, 0x32, 0x53, 0x50, 0x55, 0x4d, 0x53, 0x59, 0x43, + 0x3e, 0x49, 0x4f, 0x52, 0x4d, 0x51, 0x47, 0x45, 0x4d, 0x4e, 0x53, 0x4e, + 0x54, 0x4f, 0x4d, 0x4d, 0x4e, 0x40, 0x47, 0x53, 0x53, 0x49, 0x56, 0x4d, + 0x4d, 0x3a, 0x4c, 0x4e, 0x45, 0x4a, 0x47, 0x45, 0x53, 0x4a, 0x4e, 0x52, + 0x4d, 0x4e, 0x48, 0x56, 0x4e, 0x4a, 0x4d, 0x52, 0x49, 0x4e, 0x4e, 0x58, + 0x47, 0x50, 0x4c, 0x54, 0x49, 0x42, 0x46, 0x54, 0x50, 0x54, 0x54, 0x46, + 0x40, 0x49, 0x4b, 0x57, 0x4b, 0x59, 0x44, 0x46, 0x52, 0x55, 0x51, 0x55, + 0x4f, 0x50, 0x4d, 0x4d, 0x48, 0x50, 0x4e, 0x49, 0x4e, 0x42, 0x45, 0x3f, + 0x4d, 0x4f, 0x51, 0x47, 0x4a, 0x4c, 0x4b, 0x4b, 0x46, 0x4d, 0x44, 0x52, + 0x4d, 0x44, 0x40, 0x4d, 0x54, 0x46, 0x54, 0x44, 0x4b, 0x46, 0x47, 0x45, + 0x50, 0x45, 0x45, 0x4b, 0x4c, 0x48, 0x3f, 0x55, 0x4a, 0x45, 0x49, 0x4e, + 0x40, 0x49, 0x4a, 0x41, 0x56, 0x4b, 0x49, 0x4e, 0x4a, 0x41, 0x50, 0x70, + 0x56, 0x59, 0x4b, 0x55, 0x58, 0x59, 0x49, 0x47, 0x4a, 0x5a, 0x4c, 0x46, + 0x62, 0x7b, 0x58, 0x51, 0x44, 0x47, 0x44, 0x57, 0x4f, 0x65, 0x4e, 0x50, + 0x4d, 0x67, 0x5c, 0x4a, 0x2b, 0x61, 0x48, 0x4b, 0x4b, 0x5d, 0x5c, 0x48, + 0x39, 0x50, 0x45, 0x4d, 0x53, 0x60, 0x53, 0x46, 0x42, 0x46, 0x50, 0x45, + 0x4f, 0x4e, 0x46, 0x4a, 0x4d, 0x51, 0x54, 0x47, 0x59, 0x4b, 0x58, 0x4a, + 0x50, 0x3d, 0x59, 0x48, 0x45, 0x4e, 0x4e, 0x47, 0x4f, 0x47, 0x4d, 0x4b, + 0x52, 0x42, 0x4c, 0x48, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x4c, 0x4d, 0x51, + 0x49, 0x4f, 0x4c, 0x47, 0x47, 0x48, 0x47, 0x59, 0x4f, 0x4f, 0x53, 0x49, + 0x4e, 0x4b, 0x4f, 0x5a, 0x50, 0x42, 0x47, 0x50, 0x4a, 0x54, 0x47, 0x5a, + 0x43, 0x49, 0x47, 0x4e, 0x49, 0x4d, 0x43, 0x54, 0x4c, 0x53, 0x4e, 0x4e, + 0x42, 0x43, 0x48, 0x46, 0x4f, 0x43, 0x43, 0x45, 0x51, 0x47, 0x4b, 0x4f, + 0x56, 0x48, 0x48, 0x49, 0x46, 0x45, 0x4d, 0x52, 0x47, 0x4b, 0x46, 0x50, + 0x3e, 0x4e, 0x4c, 0x43, 0x45, 0x4d, 0x53, 0x43, 0x46, 0x45, 0x44, 0x52, + 0x45, 0x49, 0x49, 0x51, 0x3d, 0x4a, 0x4d, 0x46, 0x42, 0x41, 0x4e, 0x48, + 0x5a, 0x49, 0x49, 0x49, 0x4f, 0x3d, 0x56, 0x68, 0x56, 0x67, 0x4b, 0x57, + 0x5f, 0x5c, 0x40, 0x4a, 0x4a, 0x54, 0x4c, 0x47, 0x64, 0x7a, 0x54, 0x48, + 0x46, 0x45, 0x46, 0x57, 0x4e, 0x61, 0x4f, 0x50, 0x4d, 0x64, 0x5b, 0x43, + 0x2d, 0x60, 0x55, 0x51, 0x4c, 0x54, 0x4f, 0x4e, 0x2f, 0x50, 0x4f, 0x52, + 0x50, 0x61, 0x54, 0x4b, 0x3d, 0x4c, 0x47, 0x51, 0x4a, 0x54, 0x4b, 0x42, + 0x3b, 0x55, 0x47, 0x50, 0x4f, 0x49, 0x4a, 0x46, 0x43, 0x44, 0x45, 0x47, + 0x46, 0x4b, 0x4f, 0x46, 0x43, 0x47, 0x4a, 0x4e, 0x51, 0x43, 0x55, 0x47, + 0x4d, 0x46, 0x4c, 0x4c, 0x49, 0x4d, 0x43, 0x51, 0x47, 0x51, 0x52, 0x4a, + 0x46, 0x4f, 0x49, 0x52, 0x50, 0x4a, 0x43, 0x53, 0x46, 0x4e, 0x50, 0x54, + 0x45, 0x3a, 0x4a, 0x4a, 0x4c, 0x50, 0x4b, 0x54, 0x43, 0x4f, 0x4e, 0x45, + 0x49, 0x4f, 0x46, 0x53, 0x4d, 0x51, 0x52, 0x53, 0x3d, 0x4a, 0x47, 0x4e, + 0x43, 0x4a, 0x53, 0x48, 0x4a, 0x4c, 0x4a, 0x4a, 0x42, 0x53, 0x3e, 0x43, + 0x4f, 0x4c, 0x47, 0x48, 0x54, 0x4d, 0x48, 0x48, 0x4e, 0x4c, 0x43, 0x51, + 0x42, 0x49, 0x44, 0x3e, 0x49, 0x51, 0x4a, 0x4d, 0x4f, 0x49, 0x45, 0x44, + 0x4e, 0x41, 0x48, 0x4b, 0x4c, 0x49, 0x46, 0x47, 0x5d, 0x4c, 0x4d, 0x50, + 0x45, 0x40, 0x4e, 0x6a, 0x4f, 0x62, 0x53, 0x50, 0x5c, 0x5e, 0x4a, 0x4c, + 0x50, 0x56, 0x52, 0x42, 0x60, 0x7e, 0x5b, 0x4b, 0x43, 0x41, 0x4c, 0x56, + 0x46, 0x5f, 0x4d, 0x49, 0x43, 0x65, 0x5c, 0x4d, 0x2c, 0x61, 0x48, 0x4c, + 0x44, 0x55, 0x5c, 0x49, 0x37, 0x54, 0x4e, 0x57, 0x52, 0x5c, 0x50, 0x49, + 0x3e, 0x4d, 0x4f, 0x4f, 0x51, 0x4c, 0x48, 0x43, 0x4a, 0x5a, 0x4d, 0x4b, + 0x4e, 0x58, 0x54, 0x49, 0x51, 0x42, 0x49, 0x4f, 0x46, 0x45, 0x52, 0x3d, + 0x4b, 0x4b, 0x43, 0x54, 0x47, 0x47, 0x4c, 0x42, 0x4b, 0x49, 0x45, 0x46, + 0x46, 0x4a, 0x51, 0x47, 0x47, 0x4f, 0x48, 0x4a, 0x3f, 0x4c, 0x4b, 0x57, + 0x4a, 0x3f, 0x52, 0x4a, 0x56, 0x52, 0x4b, 0x54, 0x4c, 0x3e, 0x3f, 0x4f, + 0x4b, 0x50, 0x4c, 0x53, 0x4a, 0x49, 0x46, 0x4e, 0x50, 0x48, 0x4f, 0x4b, + 0x4a, 0x4e, 0x3e, 0x49, 0x45, 0x42, 0x42, 0x41, 0x47, 0x4b, 0x4f, 0x42, + 0x49, 0x4c, 0x55, 0x4c, 0x4e, 0x42, 0x47, 0x42, 0x4b, 0x48, 0x46, 0x41, + 0x46, 0x4e, 0x4d, 0x3f, 0x4f, 0x46, 0x4f, 0x4b, 0x4b, 0x4d, 0x50, 0x3e, + 0x42, 0x43, 0x44, 0x4a, 0x49, 0x40, 0x4e, 0x43, 0x3e, 0x52, 0x3e, 0x44, + 0x49, 0x43, 0x4d, 0x44, 0x62, 0x51, 0x42, 0x53, 0x51, 0x40, 0x4c, 0x64, + 0x4f, 0x63, 0x4e, 0x5c, 0x5b, 0x5c, 0x48, 0x4d, 0x4a, 0x57, 0x4f, 0x42, + 0x65, 0xfe, 0x5c, 0x4e, 0x47, 0x43, 0x4a, 0x58, 0x4e, 0x5e, 0x48, 0x4c, + 0x51, 0x5e, 0x60, 0x56, 0x2f, 0x62, 0x54, 0x58, 0x51, 0x52, 0x55, 0x51, + 0x36, 0x4b, 0x46, 0x51, 0x53, 0x5f, 0x46, 0x4c, 0x37, 0x4d, 0x4a, 0x45, + 0x4b, 0x3f, 0x41, 0x42, 0x3f, 0x53, 0x4a, 0x48, 0x49, 0x4a, 0x4a, 0x45, + 0x52, 0x3f, 0x52, 0x52, 0x45, 0x4d, 0x4f, 0x45, 0x46, 0x4a, 0x51, 0x48, + 0x56, 0x47, 0x50, 0x3e, 0x46, 0x49, 0x4c, 0x51, 0x49, 0x54, 0x45, 0x4f, + 0x4b, 0x4b, 0x49, 0x46, 0x4b, 0x4d, 0x49, 0x5c, 0x4d, 0x43, 0x47, 0x49, + 0x48, 0x52, 0x46, 0x50, 0x51, 0x37, 0x50, 0x52, 0x4c, 0x4d, 0x4f, 0x51, + 0x4f, 0x42, 0x50, 0x47, 0x48, 0x4e, 0x4d, 0x4c, 0x48, 0x48, 0x4a, 0x51, + 0x49, 0x42, 0x50, 0x4f, 0x43, 0x4e, 0x47, 0x4b, 0x47, 0x4a, 0x44, 0x44, + 0x4c, 0x51, 0x49, 0x44, 0x45, 0x45, 0x45, 0x48, 0x3f, 0x4a, 0x43, 0x49, + 0x46, 0x49, 0x4c, 0x4d, 0x45, 0x50, 0x44, 0x45, 0x44, 0x55, 0x4a, 0x45, + 0x48, 0x47, 0x4c, 0x43, 0x3f, 0x48, 0x42, 0x43, 0x43, 0x43, 0x48, 0x46, + 0x5c, 0x51, 0x47, 0x51, 0x48, 0x40, 0x54, 0x66, 0x4e, 0x67, 0x4d, 0x5a, + 0x60, 0x57, 0x47, 0x4d, 0x4d, 0x58, 0x53, 0x46, 0x66, 0x7e, 0x56, 0x48, + 0x44, 0x4f, 0x49, 0x5c, 0x4a, 0x63, 0x50, 0x4c, 0x49, 0x56, 0x61, 0x50, + 0x2c, 0x68, 0x4d, 0x51, 0x46, 0x4e, 0x5b, 0x51, 0x2e, 0x53, 0x54, 0x50, + 0x46, 0x58, 0x44, 0x4f, 0x37, 0x48, 0x55, 0x50, 0x49, 0x49, 0x4e, 0x46, + 0x43, 0x56, 0x52, 0x4e, 0x50, 0x4b, 0x50, 0x4c, 0x49, 0x40, 0x4d, 0x4f, + 0x50, 0x41, 0x44, 0x39, 0x4b, 0x4d, 0x4b, 0x41, 0x51, 0x4d, 0x4c, 0x41, + 0x3f, 0x52, 0x4e, 0x4b, 0x49, 0x53, 0x45, 0x43, 0x4d, 0x4f, 0x44, 0x4d, + 0x4b, 0x53, 0x50, 0x4e, 0x45, 0x3f, 0x4e, 0x51, 0x50, 0x55, 0x4f, 0x51, + 0x4d, 0x3d, 0x58, 0x3f, 0x46, 0x50, 0x50, 0x50, 0x56, 0x42, 0x49, 0x49, + 0x50, 0x4f, 0x42, 0x4b, 0x4c, 0x45, 0x52, 0x41, 0x46, 0x43, 0x4c, 0x4a, + 0x4c, 0x51, 0x4d, 0x4d, 0x4a, 0x49, 0x54, 0x49, 0x58, 0x53, 0x49, 0x45, + 0x47, 0x4c, 0x4c, 0x44, 0x4e, 0x51, 0x4c, 0x4c, 0x47, 0x48, 0x4c, 0x4e, + 0x49, 0x54, 0x4c, 0x51, 0x49, 0x48, 0x47, 0x45, 0x42, 0x49, 0x42, 0x51, + 0x4e, 0x3f, 0x49, 0x41, 0x50, 0x3e, 0x4d, 0x50, 0x5c, 0x51, 0x4d, 0x56, + 0x47, 0x48, 0x58, 0x65, 0x51, 0x6b, 0x56, 0x5b, 0x56, 0x55, 0x46, 0x49, + 0x4b, 0x58, 0x59, 0x4a, 0x68, 0x79, 0x53, 0x46, 0x45, 0x4b, 0x53, 0x5d, + 0x4b, 0x6f, 0x4e, 0x4f, 0x4c, 0x53, 0x5b, 0x52, 0x30, 0x63, 0x46, 0x57, + 0x46, 0x50, 0x4b, 0x48, 0x2e, 0x4c, 0x46, 0x48, 0x44, 0x51, 0x46, 0x4a, + 0x35, 0x55, 0x43, 0x4c, 0x43, 0x4d, 0x4e, 0x3e, 0x47, 0x56, 0x50, 0x4d, + 0x44, 0x59, 0x4c, 0x51, 0x46, 0x42, 0x4e, 0x43, 0x4c, 0x44, 0x42, 0x3a, + 0x40, 0x48, 0x46, 0x44, 0x45, 0x4a, 0x46, 0x3a, 0x53, 0x4c, 0x4d, 0x4c, + 0x4a, 0x4f, 0x53, 0x40, 0x4b, 0x48, 0x54, 0x4b, 0x44, 0x59, 0x41, 0x50, + 0x4e, 0x50, 0x55, 0x4d, 0x55, 0x41, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x50, + 0x52, 0x4c, 0x50, 0x4d, 0x47, 0x42, 0x4f, 0x4b, 0x47, 0x43, 0x41, 0x4a, + 0x55, 0x3e, 0x50, 0x4b, 0x41, 0x49, 0x47, 0x49, 0x53, 0x4d, 0x48, 0x4b, + 0x43, 0x43, 0x51, 0x44, 0x4d, 0x4c, 0x44, 0x50, 0x4d, 0x42, 0x49, 0x4e, + 0x50, 0x50, 0x4c, 0x49, 0x49, 0x51, 0x46, 0x43, 0x4a, 0x4e, 0x53, 0x47, + 0x43, 0x46, 0x40, 0x49, 0x47, 0x44, 0x44, 0x4d, 0x4b, 0x4b, 0x51, 0x4b, + 0x45, 0x49, 0x47, 0x43, 0x56, 0x49, 0x4c, 0x54, 0x50, 0x3c, 0x4c, 0x5e, + 0x51, 0x67, 0x4f, 0x57, 0x57, 0x53, 0x3e, 0x4e, 0x4e, 0x5e, 0x4b, 0x48, + 0x5a, 0x78, 0x55, 0x4a, 0x3f, 0x4b, 0x4c, 0x5b, 0x53, 0x64, 0x4d, 0x53, + 0x49, 0x57, 0x57, 0x58, 0x37, 0x62, 0x4f, 0x56, 0x44, 0x4e, 0x58, 0x4a, + 0x30, 0x4f, 0x40, 0x4e, 0x47, 0x58, 0x52, 0x50, 0x35, 0x4d, 0x49, 0x52, + 0x4e, 0x42, 0x46, 0x47, 0x44, 0x57, 0x54, 0x43, 0x4e, 0x56, 0x43, 0x49, + 0x44, 0x40, 0x44, 0x41, 0x50, 0x49, 0x4b, 0x44, 0x4d, 0x52, 0x49, 0x43, + 0x52, 0x54, 0x49, 0x3f, 0x49, 0x42, 0x49, 0x4a, 0x43, 0x3e, 0x50, 0x40, + 0x46, 0x4b, 0x50, 0x4b, 0x53, 0x4b, 0x47, 0x52, 0x51, 0x4b, 0x47, 0x3f, + 0x46, 0x4b, 0x4c, 0x57, 0x49, 0x47, 0x54, 0x49, 0x50, 0x50, 0x4d, 0x4a, + 0x42, 0x4e, 0x51, 0x4c, 0x47, 0x47, 0x42, 0x43, 0x54, 0x43, 0x46, 0x47, + 0x4d, 0x43, 0x54, 0x47, 0x43, 0x58, 0x48, 0x45, 0x4b, 0x46, 0x48, 0x3d, + 0x47, 0x3f, 0x44, 0x4f, 0x4e, 0x46, 0x41, 0x40, 0x4d, 0x4d, 0x4d, 0x52, + 0x54, 0x47, 0x4f, 0x51, 0x4f, 0x45, 0x45, 0x48, 0x4b, 0x4d, 0x44, 0x52, + 0x51, 0x4b, 0x48, 0x4f, 0x49, 0x49, 0x46, 0x50, 0x54, 0x42, 0x44, 0x51, + 0x58, 0x4e, 0x43, 0x58, 0x55, 0x40, 0x53, 0x5a, 0x51, 0x61, 0x51, 0x60, + 0x53, 0x57, 0x45, 0x4f, 0x45, 0x5e, 0x51, 0x42, 0x61, 0x7a, 0x55, 0x47, + 0x41, 0x4b, 0x4a, 0x5b, 0x4c, 0x65, 0x4f, 0x55, 0x46, 0x54, 0x65, 0x59, + 0x36, 0x61, 0x54, 0x55, 0x48, 0x57, 0x52, 0x4e, 0x24, 0x4b, 0x49, 0x4d, + 0x43, 0x57, 0x44, 0x51, 0x3b, 0x4f, 0x45, 0x40, 0x47, 0x4a, 0x43, 0x47, + 0x46, 0x58, 0x50, 0x54, 0x4d, 0x50, 0x44, 0x42, 0x4a, 0x46, 0x4b, 0x4d, + 0x4f, 0x4f, 0x4d, 0x40, 0x48, 0x4a, 0x53, 0x48, 0x49, 0x48, 0x4d, 0x39, + 0x47, 0x4e, 0x44, 0x4c, 0x4b, 0x49, 0x44, 0x42, 0x4a, 0x45, 0x46, 0x46, + 0x53, 0x4d, 0x49, 0x4f, 0x4e, 0x48, 0x50, 0x4a, 0x4c, 0x46, 0x56, 0x4b, + 0x4b, 0x57, 0x4c, 0x49, 0x4a, 0x4a, 0x43, 0x4e, 0x56, 0x45, 0x50, 0x4c, + 0x47, 0x55, 0x48, 0x46, 0x4e, 0x46, 0x45, 0x3f, 0x4a, 0x4c, 0x4c, 0x47, + 0x4a, 0x51, 0x4e, 0x50, 0x40, 0x52, 0x45, 0x45, 0x4b, 0x46, 0x4f, 0x44, + 0x51, 0x4a, 0x4e, 0x4d, 0x4c, 0x46, 0x42, 0x47, 0x4a, 0x4e, 0x46, 0x42, + 0x4b, 0x4f, 0x4b, 0x4e, 0x4e, 0x46, 0x42, 0x50, 0x53, 0x51, 0x4f, 0x54, + 0x45, 0x4f, 0x45, 0x42, 0x4c, 0x45, 0x40, 0x48, 0x59, 0x49, 0x49, 0x53, + 0x4c, 0x43, 0x4b, 0x57, 0x54, 0x64, 0x4e, 0x5f, 0x5c, 0x59, 0x4b, 0x56, + 0x49, 0x5d, 0x4f, 0x4b, 0x62, 0x73, 0x54, 0x45, 0x49, 0x50, 0x48, 0x5a, + 0x50, 0x6d, 0x4a, 0x4e, 0x48, 0x55, 0x5d, 0x57, 0x38, 0x68, 0x52, 0x5a, + 0x46, 0x56, 0x4c, 0x5a, 0x2e, 0x55, 0x49, 0x4f, 0x4a, 0x57, 0x4f, 0x54, + 0x41, 0x53, 0x46, 0x43, 0x45, 0x47, 0x53, 0x4a, 0x42, 0x4f, 0x4d, 0x48, + 0x4c, 0x49, 0x47, 0x48, 0x45, 0x49, 0x48, 0x53, 0x48, 0x52, 0x4a, 0x44, + 0x4c, 0x49, 0x52, 0x4b, 0x47, 0x51, 0x42, 0x47, 0x49, 0x51, 0x3f, 0x45, + 0x47, 0x4e, 0x53, 0x33, 0x55, 0x51, 0x55, 0x48, 0x4b, 0x51, 0x56, 0x47, + 0x43, 0x55, 0x47, 0x42, 0x47, 0x4f, 0x47, 0x51, 0x46, 0x55, 0x4a, 0x4b, + 0x50, 0x52, 0x4f, 0x43, 0x4b, 0x53, 0x4d, 0x3f, 0x4e, 0x56, 0x50, 0x49, + 0x4d, 0x47, 0x51, 0x49, 0x4a, 0x52, 0x44, 0x43, 0x4d, 0x4e, 0x41, 0x51, + 0x4c, 0x4d, 0x47, 0x48, 0x4f, 0x40, 0x50, 0x46, 0x43, 0x4d, 0x4e, 0x50, + 0x43, 0x47, 0x4e, 0x46, 0x4f, 0x4b, 0x51, 0x4b, 0x4a, 0x57, 0x42, 0x51, + 0x4c, 0x54, 0x52, 0x42, 0x4c, 0x42, 0x47, 0x54, 0x4a, 0x4a, 0x47, 0x4a, + 0x3f, 0x46, 0x4e, 0x4c, 0x53, 0x50, 0x47, 0x53, 0x49, 0x44, 0x52, 0x5a, + 0x4b, 0x65, 0x50, 0x5b, 0x57, 0x59, 0x4a, 0x48, 0x48, 0x5f, 0x55, 0x48, + 0x5c, 0x78, 0x55, 0x48, 0x4a, 0x4b, 0x49, 0x4c, 0x46, 0x6b, 0x54, 0x57, + 0x55, 0x4b, 0x59, 0x52, 0x38, 0x5b, 0x57, 0x56, 0x4b, 0x4f, 0x48, 0x4e, + 0x34, 0x5a, 0x4e, 0x4f, 0x43, 0x4e, 0x4b, 0x4e, 0x36, 0x4d, 0x52, 0x48, + 0x4d, 0x4c, 0x4c, 0x49, 0x51, 0x54, 0x45, 0x54, 0x4a, 0x4e, 0x52, 0x41, + 0x4c, 0x45, 0x4a, 0x53, 0x55, 0x4b, 0x50, 0x47, 0x4e, 0x4d, 0x43, 0x51, + 0x4e, 0x4a, 0x51, 0x46, 0x4e, 0x4d, 0x48, 0x3f, 0x43, 0x52, 0x56, 0x38, + 0x52, 0x46, 0x43, 0x49, 0x40, 0x49, 0x53, 0x41, 0x47, 0x41, 0x41, 0x42, + 0x4f, 0x4b, 0x46, 0x4b, 0x4a, 0x57, 0x4a, 0x45, 0x4b, 0x46, 0x47, 0x3c, + 0x43, 0x46, 0x4f, 0x50, 0x4c, 0x53, 0x4f, 0x41, 0x4a, 0x4a, 0x40, 0x4a, + 0x3e, 0x4e, 0x4d, 0x41, 0x4a, 0x42, 0x49, 0x4c, 0x51, 0x46, 0x4f, 0x43, + 0x4b, 0x41, 0x50, 0x48, 0x4a, 0x40, 0x52, 0x45, 0x40, 0x40, 0x46, 0x48, + 0x48, 0x52, 0x52, 0x41, 0x43, 0x49, 0x49, 0x4c, 0x44, 0x48, 0x50, 0x4a, + 0x47, 0x48, 0x4c, 0x42, 0x49, 0x48, 0x52, 0x56, 0x4b, 0x41, 0x4e, 0x47, + 0x52, 0x56, 0x4e, 0x56, 0x4b, 0x38, 0x50, 0x55, 0x5a, 0x63, 0x51, 0x5a, + 0x54, 0x52, 0x44, 0x45, 0x47, 0x5e, 0x4c, 0x4a, 0x5e, 0x71, 0x56, 0x44, + 0x4c, 0x4b, 0x4c, 0x4e, 0x49, 0x69, 0x50, 0x53, 0x4d, 0x5c, 0x59, 0x50, + 0x36, 0x5d, 0x46, 0x5b, 0x51, 0x55, 0x55, 0x51, 0x36, 0x5a, 0x53, 0x56, + 0x54, 0x4a, 0x55, 0x53, 0x3c, 0x52, 0x4a, 0x45, 0x4c, 0x56, 0x49, 0x46, + 0x4f, 0x5b, 0x43, 0x4b, 0x49, 0x4c, 0x4b, 0x41, 0x44, 0x4b, 0x47, 0x4b, + 0x4b, 0x54, 0x4a, 0x4c, 0x49, 0x44, 0x46, 0x46, 0x48, 0x49, 0x47, 0x4a, + 0x40, 0x4e, 0x47, 0x53, 0x4a, 0x47, 0x4a, 0x3b, 0x48, 0x4b, 0x50, 0x51, + 0x50, 0x44, 0x4d, 0x49, 0x42, 0x4b, 0x43, 0x48, 0x4a, 0x43, 0x4d, 0x4d, + 0x49, 0x4d, 0x43, 0x4f, 0x50, 0x49, 0x47, 0x48, 0x48, 0x4f, 0x49, 0x41, + 0x4c, 0x46, 0x47, 0x3e, 0x51, 0x4d, 0x4e, 0x42, 0x3d, 0x53, 0x4d, 0x3b, + 0x53, 0x52, 0x4c, 0x4c, 0x43, 0x46, 0x43, 0x3d, 0x53, 0x48, 0x43, 0x4e, + 0x45, 0x52, 0x4d, 0x4a, 0x44, 0x49, 0x47, 0x4c, 0x4e, 0x4c, 0x4a, 0x4e, + 0x41, 0x48, 0x4b, 0x44, 0x4d, 0x4a, 0x4d, 0x44, 0x4a, 0x45, 0x4f, 0x52, + 0x45, 0x3f, 0x4b, 0x48, 0x43, 0x41, 0x3d, 0x53, 0x53, 0x50, 0x4a, 0x56, + 0x4d, 0x3e, 0x55, 0x4e, 0x56, 0x5e, 0x52, 0x52, 0x54, 0x50, 0x42, 0x4a, + 0x4d, 0x5f, 0x4f, 0x49, 0x5d, 0x6f, 0x55, 0x4a, 0x47, 0x49, 0x4e, 0x4a, + 0x43, 0x6e, 0x4e, 0x4f, 0x52, 0x59, 0x62, 0x4b, 0x3e, 0x5c, 0x4c, 0x4e, + 0x45, 0x52, 0x43, 0x4d, 0x3c, 0x58, 0x52, 0x49, 0x48, 0x55, 0x53, 0x4e, + 0x3d, 0x4e, 0x4c, 0x4b, 0x4b, 0x50, 0x4a, 0x47, 0x45, 0x62, 0x50, 0x49, + 0x48, 0x4b, 0x55, 0x45, 0x46, 0x51, 0x41, 0x55, 0x54, 0x55, 0x50, 0x47, + 0x46, 0x4d, 0x46, 0x4b, 0x41, 0x49, 0x4c, 0x40, 0x45, 0x4f, 0x52, 0x54, + 0x45, 0x4d, 0x53, 0x3a, 0x4c, 0x55, 0x4e, 0x48, 0x44, 0x45, 0x56, 0x3c, + 0x48, 0x46, 0x4b, 0x51, 0x53, 0x43, 0x41, 0x49, 0x4c, 0x52, 0x48, 0x42, + 0x48, 0x3f, 0x4c, 0x38, 0x46, 0x50, 0x4a, 0x44, 0x50, 0x54, 0x4e, 0x38, + 0x48, 0x42, 0x43, 0x4a, 0x4c, 0x44, 0x47, 0x42, 0x42, 0x46, 0x4a, 0x50, + 0x47, 0x4b, 0x43, 0x40, 0x44, 0x46, 0x46, 0x4d, 0x50, 0x4a, 0x4e, 0x51, + 0x44, 0x40, 0x50, 0x43, 0x52, 0x4d, 0x42, 0x4c, 0x50, 0x41, 0x4a, 0x4e, + 0x45, 0x49, 0x4d, 0x40, 0x46, 0x51, 0x43, 0x4b, 0x48, 0x47, 0x42, 0x55, + 0x4a, 0x41, 0x4f, 0x49, 0x4f, 0x4e, 0x47, 0x4c, 0x4a, 0x48, 0x50, 0x4e, + 0x50, 0x57, 0x4e, 0x56, 0x56, 0x4e, 0x44, 0x48, 0x4a, 0x5b, 0x55, 0x49, + 0x59, 0x67, 0x54, 0x46, 0x4f, 0x41, 0x4d, 0x4e, 0x4a, 0x63, 0x4d, 0x44, + 0x53, 0x5b, 0x59, 0x4f, 0x43, 0x55, 0x56, 0x4e, 0x55, 0x4c, 0x4b, 0x54, + 0x3c, 0x56, 0x4d, 0x50, 0x4f, 0x4a, 0x5a, 0x47, 0x48, 0x56, 0x4f, 0x4f, + 0x50, 0x51, 0x48, 0x4e, 0x4d, 0x50, 0x4e, 0x45, 0x4b, 0x48, 0x4e, 0x44, + 0x46, 0x4d, 0x43, 0x46, 0x41, 0x59, 0x53, 0x4b, 0x4a, 0x3e, 0x51, 0x47, + 0x43, 0x48, 0x52, 0x3f, 0x43, 0x50, 0x4b, 0x4f, 0x41, 0x48, 0x43, 0x2e, + 0x4d, 0x4e, 0x4c, 0x45, 0x45, 0x46, 0x4b, 0x43, 0x46, 0x49, 0x46, 0x4d, + 0x47, 0x4e, 0x4d, 0x3c, 0x47, 0x4a, 0x52, 0x4e, 0x41, 0x50, 0x43, 0x3a, + 0x50, 0x47, 0x4a, 0x45, 0x52, 0x4a, 0x4c, 0x3f, 0x42, 0x3d, 0x49, 0x48, + 0x48, 0x4c, 0x42, 0x3a, 0x40, 0x47, 0x46, 0x4e, 0x44, 0x52, 0x46, 0x44, + 0x4a, 0x44, 0x43, 0x49, 0x42, 0x45, 0x3f, 0x50, 0x4c, 0x44, 0x48, 0x43, + 0x47, 0x4a, 0x48, 0x48, 0x3e, 0x45, 0x43, 0x48, 0x4a, 0x48, 0x53, 0x4b, + 0x50, 0x49, 0x43, 0x4d, 0x53, 0x4f, 0x4b, 0x4b, 0x40, 0x42, 0x50, 0x4d, + 0x53, 0x4e, 0x44, 0x4d, 0x45, 0x3d, 0x51, 0x51, 0x4f, 0x59, 0x4b, 0x51, + 0x4a, 0x4e, 0x42, 0x40, 0x49, 0x5b, 0x4b, 0x43, 0x53, 0x60, 0x47, 0x49, + 0x4a, 0x44, 0x44, 0x48, 0x4b, 0x60, 0x51, 0x3f, 0x4b, 0x5b, 0x4f, 0x4a, + 0x4a, 0x50, 0x49, 0x46, 0x55, 0x50, 0x4b, 0x4c, 0x40, 0x4e, 0x51, 0x4f, + 0x4b, 0x51, 0x54, 0x50, 0x48, 0x4e, 0x4a, 0x4f, 0x4d, 0x4e, 0x54, 0x4d, + 0x41, 0x50, 0x4e, 0x47, 0x47, 0x47, 0x54, 0x3b, 0x51, 0x54, 0x50, 0x49, + 0x48, 0x4c, 0x4e, 0x47, 0x3f, 0x3c, 0x4c, 0x43, 0x45, 0x42, 0x45, 0x37, + 0x41, 0x52, 0x49, 0x47, 0x4e, 0x4a, 0x4b, 0x37, 0x48, 0x4d, 0x4e, 0x4a, + 0x42, 0x56, 0x3d, 0x35, 0x48, 0x42, 0x4b, 0x4a, 0x44, 0x52, 0x40, 0x48, + 0x4f, 0x49, 0x4f, 0x4c, 0x4d, 0x43, 0x49, 0x38, 0x4b, 0x42, 0x48, 0x42, + 0x45, 0x45, 0x54, 0x3a, 0x47, 0x47, 0x52, 0x45, 0x4a, 0x48, 0x47, 0x39, + 0x4d, 0x45, 0x54, 0x4b, 0x4e, 0x4f, 0x4e, 0x38, 0x4a, 0x4b, 0x48, 0x45, + 0x4e, 0x43, 0x4e, 0x4e, 0x46, 0x4e, 0x4e, 0x50, 0x46, 0x4c, 0x42, 0x45, + 0x4b, 0x46, 0x47, 0x4d, 0x49, 0x3f, 0x4f, 0x50, 0x46, 0x4a, 0x47, 0x4e, + 0x4a, 0x3e, 0x50, 0x46, 0x47, 0x40, 0x4f, 0x47, 0x51, 0x4b, 0x43, 0x46, + 0x4a, 0x42, 0x55, 0x4d, 0x46, 0x63, 0x49, 0x4e, 0x4f, 0x4f, 0x42, 0x45, + 0x50, 0x57, 0x49, 0x3e, 0x57, 0x63, 0x45, 0x4a, 0x49, 0x50, 0x41, 0x4a, + 0x48, 0x64, 0x4f, 0x42, 0x47, 0x58, 0x4b, 0x45, 0x43, 0x57, 0x49, 0x58, + 0x51, 0x51, 0x47, 0x43, 0x51, 0x4b, 0x4a, 0x45, 0x50, 0x54, 0x4d, 0x4d, + 0x3e, 0x4a, 0x50, 0x40, 0x51, 0x4f, 0x52, 0x48, 0x53, 0x49, 0x44, 0x4b, + 0x51, 0x4b, 0x50, 0x42, 0x4d, 0x49, 0x4a, 0x46, 0x44, 0x50, 0x47, 0x3f, + 0x48, 0x47, 0x41, 0x4a, 0x42, 0x52, 0x4a, 0x33, 0x50, 0x50, 0x54, 0x3f, + 0x44, 0x4e, 0x51, 0x3c, 0x4e, 0x51, 0x48, 0x4b, 0x47, 0x49, 0x3f, 0x3d, + 0x4e, 0x46, 0x4a, 0x41, 0x40, 0x50, 0x49, 0x40, 0x4a, 0x4b, 0x45, 0x50, + 0x4e, 0x4d, 0x4b, 0x39, 0x4e, 0x4b, 0x48, 0x3c, 0x47, 0x44, 0x4c, 0x42, + 0x45, 0x50, 0x3e, 0x54, 0x4d, 0x49, 0x48, 0x3c, 0x45, 0x42, 0x55, 0x4a, + 0x41, 0x4f, 0x40, 0x3f, 0x47, 0x46, 0x46, 0x44, 0x4f, 0x47, 0x46, 0x44, + 0x41, 0x40, 0x44, 0x48, 0x3e, 0x3c, 0x46, 0x3e, 0x4a, 0x45, 0x4c, 0x52, + 0x47, 0x42, 0x47, 0x3f, 0x47, 0x4e, 0x4b, 0x53, 0x4a, 0x3d, 0x4d, 0x47, + 0x4f, 0x3d, 0x4e, 0x43, 0x4f, 0x46, 0x43, 0x43, 0x46, 0x41, 0x4f, 0x42, + 0x46, 0x57, 0x4d, 0x51, 0x49, 0x51, 0x4c, 0x44, 0x51, 0x4f, 0x46, 0x44, + 0x54, 0x5d, 0x4f, 0x40, 0x59, 0x46, 0x53, 0x46, 0x48, 0x54, 0x43, 0x45, + 0x4d, 0x51, 0x4f, 0x44, 0x44, 0x53, 0x49, 0x4e, 0x48, 0x46, 0x44, 0x4a, + 0x4a, 0x42, 0x4c, 0x46, 0x54, 0x4f, 0x52, 0x47, 0x46, 0x44, 0x4c, 0x4d, + 0x4c, 0x47, 0x4d, 0x40, 0x55, 0x58, 0x46, 0x46, 0x3f, 0x3e, 0x47, 0x36, + 0x3f, 0x4d, 0x4b, 0x4d, 0x4f, 0x4f, 0x48, 0x34, 0x4d, 0x46, 0x46, 0x50, + 0x50, 0x4b, 0x47, 0x45, 0x4e, 0x49, 0x50, 0x4f, 0x4a, 0x48, 0x4f, 0x39, + 0x53, 0x4c, 0x4b, 0x56, 0x45, 0x4f, 0x55, 0x3a, 0x40, 0x53, 0x43, 0x4b, + 0x47, 0x3d, 0x4c, 0x34, 0x4b, 0x4e, 0x4a, 0x4b, 0x4d, 0x49, 0x4e, 0x40, + 0x4d, 0x48, 0x40, 0x4a, 0x4a, 0x4b, 0x4a, 0x42, 0x4c, 0x52, 0x43, 0x42, + 0x44, 0x3f, 0x4e, 0x42, 0x44, 0x45, 0x40, 0x3d, 0x4b, 0x45, 0x4a, 0x43, + 0x4b, 0x4b, 0x4e, 0x46, 0x55, 0x43, 0x44, 0x3f, 0x44, 0x43, 0x4b, 0x4b, + 0x45, 0x51, 0x48, 0x49, 0x3d, 0x44, 0x4a, 0x4a, 0x50, 0x50, 0x47, 0x44, + 0x4f, 0x3e, 0x3f, 0x43, 0x4c, 0x46, 0x4a, 0x4e, 0x4c, 0x52, 0x48, 0x4e, + 0x48, 0x46, 0x45, 0x48, 0x41, 0x4f, 0x51, 0x48, 0x40, 0x4d, 0x4a, 0x4b, + 0x4c, 0x51, 0x49, 0x50, 0x4e, 0x4b, 0x4a, 0x42, 0x49, 0x54, 0x4e, 0x43, + 0x52, 0x47, 0x4a, 0x41, 0x42, 0x51, 0x48, 0x4a, 0x46, 0x45, 0x4a, 0x43, + 0x4e, 0x4f, 0x41, 0x49, 0x4b, 0x42, 0x40, 0x4a, 0x50, 0x41, 0x42, 0x3f, + 0x49, 0x4a, 0x40, 0x3e, 0x3f, 0x42, 0x4d, 0x51, 0x4e, 0x4e, 0x47, 0x41, + 0x4e, 0x4e, 0x49, 0x4b, 0x41, 0x45, 0x51, 0x40, 0x45, 0x4c, 0x3f, 0x42, + 0x4c, 0x45, 0x4d, 0x39, 0x46, 0x52, 0x4a, 0x4e, 0x4c, 0x49, 0x4e, 0x43, + 0x43, 0x4c, 0x48, 0x46, 0x48, 0x49, 0x50, 0x3a, 0x3f, 0x49, 0x42, 0x4f, + 0x42, 0x4d, 0x4e, 0x3f, 0x51, 0x4b, 0x4e, 0x4b, 0x51, 0x44, 0x43, 0x4a, + 0x4a, 0x4c, 0x50, 0x48, 0x45, 0x47, 0x4d, 0x41, 0x47, 0x45, 0x51, 0x41, + 0x42, 0x48, 0x4c, 0x39, 0x51, 0x45, 0x46, 0x53, 0x4b, 0x50, 0x46, 0x45, + 0x4b, 0x4d, 0x42, 0x4b, 0x3f, 0x45, 0x4b, 0x4e, 0x50, 0x50, 0x47, 0x4a, + 0x45, 0x40, 0x4b, 0x43, 0x3f, 0x4a, 0x41, 0x42, 0x51, 0x41, 0x4d, 0x42, + 0x53, 0x48, 0x48, 0x49, 0x4b, 0x40, 0x42, 0x3d, 0x4f, 0x53, 0x49, 0x46, + 0x46, 0x43, 0x42, 0x44, 0x46, 0x48, 0x3f, 0x46, 0x31, 0x43, 0x4d, 0x4b, + 0x48, 0x4d, 0x4c, 0x43, 0x45, 0x53, 0x50, 0x40, 0x4a, 0x48, 0x45, 0x3b, + 0x4f, 0x4d, 0x53, 0x4c, 0x44, 0x54, 0x50, 0x66, 0x3f, 0x45, 0x4c, 0x4c, + 0x4a, 0x49, 0x49, 0x4a, 0x40, 0x52, 0x3e, 0x4c, 0x49, 0x40, 0x44, 0x49, + 0x48, 0x3f, 0x45, 0x5b, 0x49, 0x4b, 0x4c, 0x44, 0x50, 0x4e, 0x4a, 0x4a, + 0x49, 0x4e, 0x4f, 0x47, 0x46, 0x4b, 0x44, 0x3b, 0x4e, 0x4b, 0x48, 0x46, + 0x45, 0x45, 0x3d, 0x35, 0x4c, 0x49, 0x54, 0x42, 0x51, 0x46, 0x49, 0x2d, + 0x43, 0x4a, 0x53, 0x49, 0x49, 0x42, 0x4f, 0x40, 0x4e, 0x50, 0x54, 0x51, + 0x4b, 0x45, 0x48, 0x35, 0x4d, 0x41, 0x51, 0x40, 0x41, 0x49, 0x4a, 0x3b, + 0x45, 0x50, 0x48, 0x51, 0x51, 0x4d, 0x4c, 0x36, 0x47, 0x4a, 0x44, 0x45, + 0x4d, 0x47, 0x43, 0x3a, 0x48, 0x40, 0x42, 0x4f, 0x4f, 0x4f, 0x4f, 0x43, + 0x4a, 0x41, 0x4b, 0x53, 0x43, 0x46, 0x4f, 0x39, 0x46, 0x4a, 0x4d, 0x53, + 0x41, 0x44, 0x4e, 0x44, 0x3f, 0x47, 0x4c, 0x4d, 0x4d, 0x43, 0x45, 0x3d, + 0x43, 0x4b, 0x3e, 0x48, 0x42, 0x4c, 0x47, 0x42, 0x42, 0x50, 0x49, 0x4b, + 0x43, 0x4e, 0x44, 0x44, 0x4c, 0x3d, 0x4c, 0x47, 0x4e, 0x42, 0x4b, 0x44, + 0x4b, 0x44, 0x3f, 0x49, 0x33, 0x46, 0x4a, 0x4a, 0x42, 0x57, 0x5e, 0x4a, + 0x46, 0x4f, 0x55, 0x3c, 0x4a, 0x4b, 0x4c, 0x43, 0x51, 0x59, 0x64, 0x51, + 0x45, 0x60, 0x4b, 0x65, 0x46, 0x4a, 0x4e, 0x49, 0x41, 0x4b, 0x50, 0x5c, + 0x48, 0x4b, 0x3e, 0x52, 0x4f, 0x2f, 0x4e, 0x4a, 0x45, 0x53, 0x48, 0x59, + 0x4c, 0x4e, 0x4a, 0x4d, 0x49, 0x40, 0x52, 0x44, 0x49, 0x46, 0x4e, 0x46, + 0x42, 0x4b, 0x4a, 0x4b, 0x4b, 0x4b, 0x4f, 0x52, 0x46, 0x50, 0x4d, 0x3d, + 0x46, 0x4b, 0x4b, 0x40, 0x4d, 0x3f, 0x43, 0x33, 0x4e, 0x53, 0x4b, 0x4a, + 0x45, 0x48, 0x4c, 0x2e, 0x48, 0x4f, 0x49, 0x42, 0x54, 0x4f, 0x4b, 0x2b, + 0x55, 0x4e, 0x43, 0x4d, 0x4d, 0x47, 0x42, 0x3e, 0x48, 0x48, 0x4d, 0x54, + 0x52, 0x4f, 0x43, 0x37, 0x4b, 0x42, 0x4b, 0x4e, 0x49, 0x49, 0x4b, 0x2e, + 0x45, 0x4e, 0x48, 0x4e, 0x44, 0x49, 0x48, 0x30, 0x4c, 0x4b, 0x3f, 0x42, + 0x4f, 0x4f, 0x4e, 0x38, 0x4f, 0x42, 0x54, 0x49, 0x41, 0x42, 0x45, 0x3a, + 0x47, 0x43, 0x43, 0x4b, 0x49, 0x40, 0x4d, 0x38, 0x52, 0x4c, 0x3d, 0x4d, + 0x43, 0x54, 0x4e, 0x41, 0x4a, 0x47, 0x44, 0x51, 0x47, 0x48, 0x41, 0x47, + 0x4d, 0x41, 0x46, 0x4c, 0x4d, 0x46, 0x51, 0x4a, 0x49, 0x46, 0x4a, 0x42, + 0x3a, 0x43, 0x4a, 0x4b, 0x43, 0x4c, 0x68, 0x44, 0x4b, 0x52, 0x50, 0x37, + 0x4d, 0x4c, 0x57, 0x4c, 0x68, 0x62, 0x64, 0x4a, 0x3e, 0x64, 0x4b, 0x66, + 0x48, 0x4d, 0x54, 0x57, 0x4b, 0x52, 0x49, 0x5c, 0x4d, 0x55, 0x51, 0x57, + 0x4c, 0x3a, 0x48, 0x43, 0x3b, 0x43, 0x52, 0x5d, 0x45, 0x4e, 0x51, 0x4d, + 0x4a, 0x55, 0x4e, 0x4c, 0x44, 0x51, 0x4c, 0x4f, 0x41, 0x4f, 0x4a, 0x43, + 0x53, 0x48, 0x47, 0x49, 0x46, 0x52, 0x48, 0x3e, 0x4b, 0x4e, 0x4a, 0x50, + 0x4f, 0x47, 0x3e, 0x2e, 0x4b, 0x51, 0x4a, 0x44, 0x4c, 0x49, 0x4f, 0x26, + 0x48, 0x4f, 0x44, 0x51, 0x48, 0x3f, 0x4c, 0x30, 0x4e, 0x48, 0x4d, 0x48, + 0x48, 0x44, 0x4b, 0x2f, 0x50, 0x41, 0x4d, 0x50, 0x52, 0x42, 0x45, 0x33, + 0x4c, 0x48, 0x48, 0x3d, 0x46, 0x41, 0x43, 0x38, 0x45, 0x4f, 0x48, 0x4b, + 0x41, 0x49, 0x4c, 0x2f, 0x53, 0x4c, 0x48, 0x4a, 0x47, 0x40, 0x4a, 0x31, + 0x52, 0x40, 0x49, 0x4c, 0x3f, 0x48, 0x48, 0x39, 0x48, 0x3f, 0x45, 0x43, + 0x40, 0x48, 0x3c, 0x40, 0x4c, 0x48, 0x48, 0x4d, 0x3e, 0x42, 0x4a, 0x3d, + 0x4c, 0x45, 0x44, 0x46, 0x44, 0x45, 0x4a, 0x47, 0x52, 0x48, 0x4a, 0x4d, + 0x3f, 0x49, 0x4c, 0x4c, 0x48, 0x44, 0x4c, 0x44, 0x3d, 0x41, 0x47, 0x45, + 0x43, 0x4a, 0x5a, 0x3f, 0x48, 0x5d, 0x50, 0x35, 0x47, 0x4f, 0x5b, 0x46, + 0x6e, 0x50, 0x6d, 0x44, 0x49, 0x6a, 0x53, 0x6b, 0x4b, 0x4b, 0x4f, 0x62, + 0x45, 0x57, 0x48, 0x5b, 0x40, 0x4b, 0x4f, 0x63, 0x48, 0x3a, 0x4b, 0x42, + 0x43, 0x53, 0x41, 0x5f, 0x54, 0x3e, 0x4d, 0x43, 0x3d, 0x4c, 0x46, 0x46, + 0x49, 0x56, 0x4b, 0x45, 0x47, 0x45, 0x4e, 0x4f, 0x4c, 0x4d, 0x4f, 0x47, + 0x49, 0x4b, 0x51, 0x33, 0x4b, 0x45, 0x4d, 0x41, 0x51, 0x4a, 0x43, 0x2a, + 0x50, 0x4b, 0x4a, 0x4b, 0x4c, 0x52, 0x4c, 0x3b, 0x45, 0x4c, 0x51, 0x44, + 0x4c, 0x48, 0x43, 0x35, 0x51, 0x50, 0x48, 0x49, 0x3f, 0x48, 0x3d, 0x3b, + 0x52, 0x3f, 0x42, 0x4b, 0x49, 0x49, 0x47, 0x38, 0x4a, 0x4a, 0x41, 0x52, + 0x41, 0x3e, 0x4b, 0x2f, 0x46, 0x4d, 0x49, 0x44, 0x46, 0x3b, 0x47, 0x36, + 0x46, 0x3f, 0x49, 0x48, 0x47, 0x42, 0x42, 0x35, 0x44, 0x4b, 0x4d, 0x56, + 0x50, 0x49, 0x43, 0x42, 0x4b, 0x3e, 0x53, 0x44, 0x4a, 0x43, 0x47, 0x38, + 0x4a, 0x45, 0x4d, 0x3f, 0x46, 0x4a, 0x47, 0x3a, 0x4c, 0x3e, 0x47, 0x45, + 0x46, 0x4b, 0x45, 0x49, 0x4a, 0x4b, 0x54, 0x49, 0x4a, 0x53, 0x4a, 0x4c, + 0x45, 0x48, 0x53, 0x42, 0x4b, 0x47, 0x4e, 0x50, 0x3d, 0x51, 0x60, 0x3e, + 0x53, 0x5d, 0x51, 0x30, 0x45, 0x50, 0x59, 0x4e, 0x62, 0x52, 0x68, 0x51, + 0x45, 0x6c, 0x4c, 0x64, 0x4d, 0x47, 0x55, 0x61, 0x44, 0x57, 0x44, 0x58, + 0x44, 0x4a, 0x53, 0x58, 0x47, 0x31, 0x3f, 0x4c, 0x43, 0x45, 0x48, 0x5e, + 0x41, 0x43, 0x3f, 0x43, 0x51, 0x46, 0x48, 0x4b, 0x4d, 0x5b, 0x45, 0x4b, + 0x48, 0x46, 0x3f, 0x45, 0x47, 0x45, 0x40, 0x4a, 0x51, 0x51, 0x3d, 0x3f, + 0x43, 0x45, 0x4d, 0x4a, 0x47, 0x50, 0x49, 0x32, 0x4c, 0x5a, 0x55, 0x4f, + 0x4c, 0x51, 0x43, 0x37, 0x40, 0x59, 0x49, 0x49, 0x4e, 0x4f, 0x47, 0x34, + 0x40, 0x4c, 0x4a, 0x41, 0x4a, 0x47, 0x4a, 0x42, 0x4e, 0x4a, 0x48, 0x4e, + 0x4e, 0x4e, 0x45, 0x39, 0x4e, 0x45, 0x45, 0x4e, 0x4c, 0x48, 0x4a, 0x35, + 0x45, 0x4c, 0x49, 0x4f, 0x51, 0x43, 0x3c, 0x3a, 0x4a, 0x4a, 0x46, 0x48, + 0x49, 0x42, 0x4e, 0x2f, 0x42, 0x4e, 0x45, 0x50, 0x51, 0x40, 0x45, 0x32, + 0x4a, 0x4d, 0x44, 0x4e, 0x48, 0x48, 0x47, 0x2f, 0x48, 0x4b, 0x49, 0x44, + 0x48, 0x4d, 0x46, 0x3b, 0x46, 0x4a, 0x41, 0x4e, 0x4e, 0x47, 0x54, 0x4b, + 0x45, 0x49, 0x45, 0x44, 0x45, 0x48, 0x4a, 0x46, 0x55, 0x49, 0x47, 0x49, + 0x4b, 0x42, 0x48, 0x4f, 0x3f, 0x52, 0x60, 0x39, 0x4b, 0x5e, 0x55, 0x2e, + 0x48, 0x50, 0x59, 0x4f, 0x68, 0x5f, 0x64, 0x4f, 0x3b, 0x71, 0x50, 0x63, + 0x4f, 0x50, 0x50, 0x6c, 0x4b, 0x55, 0x47, 0x5b, 0x4c, 0x40, 0x48, 0x59, + 0x4f, 0x2e, 0x4b, 0x4c, 0x4e, 0x4e, 0x46, 0x61, 0x50, 0x41, 0x4c, 0x4a, + 0x44, 0x3e, 0x3f, 0x47, 0x4b, 0x4f, 0x47, 0x4b, 0x47, 0x3d, 0x41, 0x49, + 0x49, 0x3f, 0x4d, 0x44, 0x4a, 0x4d, 0x45, 0x41, 0x4d, 0x43, 0x49, 0x3c, + 0x49, 0x57, 0x49, 0x3b, 0x49, 0x59, 0x3f, 0x4f, 0x4e, 0x49, 0x4e, 0x46, + 0x52, 0x4e, 0x4c, 0x54, 0x4a, 0x48, 0x48, 0x3a, 0x44, 0x4a, 0x4f, 0x4a, + 0x44, 0x4b, 0x43, 0x4d, 0x51, 0x42, 0x53, 0x4d, 0x52, 0x41, 0x4d, 0x43, + 0x4e, 0x54, 0x4b, 0x42, 0x4b, 0x3f, 0x53, 0x45, 0x3f, 0x4a, 0x45, 0x50, + 0x3f, 0x4c, 0x4f, 0x43, 0x46, 0x42, 0x4b, 0x4d, 0x4c, 0x3b, 0x48, 0x40, + 0x4e, 0x4e, 0x49, 0x46, 0x4d, 0x4d, 0x52, 0x40, 0x4e, 0x4f, 0x46, 0x4a, + 0x40, 0x4b, 0x4c, 0x40, 0x4f, 0x4a, 0x44, 0x41, 0x46, 0x3c, 0x40, 0x3d, + 0x44, 0x48, 0x4a, 0x50, 0x46, 0x53, 0x46, 0x40, 0x44, 0x3e, 0x47, 0x43, + 0x48, 0x3d, 0x4e, 0x3e, 0x48, 0x49, 0x4b, 0x49, 0x4c, 0x3e, 0x4c, 0x4a, + 0x46, 0x4e, 0x62, 0x3c, 0x59, 0x60, 0x51, 0x29, 0x47, 0x52, 0x59, 0x4c, + 0x67, 0x68, 0x68, 0x4e, 0x3b, 0x72, 0x4d, 0x68, 0x44, 0x4f, 0x53, 0x63, + 0x47, 0x5a, 0x45, 0x4f, 0x4b, 0x37, 0x43, 0x5b, 0x4b, 0x3d, 0x44, 0x41, + 0x4a, 0x4b, 0x3c, 0x64, 0x48, 0x38, 0x42, 0x3f, 0x48, 0x46, 0x4b, 0x46, + 0x46, 0x4f, 0x46, 0x46, 0x44, 0x3c, 0x4b, 0x4f, 0x4d, 0x4a, 0x4b, 0x46, + 0x4d, 0x4f, 0x4f, 0x3f, 0x3a, 0x4b, 0x55, 0x3c, 0x51, 0x56, 0x4d, 0x42, + 0x52, 0x5a, 0x3e, 0x4b, 0x54, 0x57, 0x4e, 0x4d, 0x4e, 0x5b, 0x4e, 0x49, + 0x4e, 0x3c, 0x40, 0x41, 0x40, 0x4d, 0x48, 0x42, 0x49, 0x4e, 0x4f, 0x47, + 0x47, 0x48, 0x50, 0x49, 0x51, 0x46, 0x44, 0x45, 0x49, 0x46, 0x43, 0x48, + 0x48, 0x49, 0x4d, 0x4c, 0x45, 0x4f, 0x4c, 0x45, 0x44, 0x40, 0x49, 0x45, + 0x49, 0x51, 0x4b, 0x4b, 0x50, 0x4b, 0x48, 0x3d, 0x4e, 0x52, 0x4a, 0x47, + 0x49, 0x41, 0x55, 0x3d, 0x48, 0x4d, 0x49, 0x48, 0x4e, 0x4c, 0x48, 0x3d, + 0x3f, 0x4c, 0x4e, 0x53, 0x3e, 0x48, 0x4a, 0x3f, 0x54, 0x4d, 0x54, 0x4b, + 0x47, 0x4e, 0x44, 0x48, 0x49, 0x4b, 0x4c, 0x49, 0x4d, 0x42, 0x52, 0x4b, + 0x40, 0x3e, 0x54, 0x49, 0x55, 0x45, 0x47, 0x4d, 0x45, 0x5c, 0x60, 0x40, + 0x57, 0x60, 0x5b, 0x27, 0x4a, 0x5a, 0x64, 0x53, 0x6a, 0x5a, 0x5f, 0x52, + 0x3a, 0x72, 0x4b, 0x5f, 0x45, 0x56, 0x5f, 0x5f, 0x54, 0x5f, 0x39, 0x52, + 0x51, 0x3e, 0x3b, 0x5a, 0x44, 0x32, 0x46, 0x50, 0x3a, 0x4f, 0x44, 0x5d, + 0x4c, 0x41, 0x39, 0x3f, 0x45, 0x46, 0x3b, 0x43, 0x46, 0x51, 0x3c, 0x4c, + 0x4b, 0x43, 0x4b, 0x51, 0x43, 0x48, 0x4d, 0x43, 0x38, 0x46, 0x46, 0x43, + 0x44, 0x4a, 0x46, 0x49, 0x48, 0x50, 0x4e, 0x4a, 0x4e, 0x58, 0x4a, 0x49, + 0x48, 0x4f, 0x4a, 0x49, 0x41, 0x57, 0x51, 0x50, 0x4b, 0x48, 0x47, 0x4b, + 0x53, 0x3d, 0x4b, 0x4c, 0x4b, 0x4b, 0x55, 0x56, 0x45, 0x49, 0x46, 0x4c, + 0x45, 0x51, 0x47, 0x50, 0x40, 0x4b, 0x4f, 0x4b, 0x4d, 0x4a, 0x4f, 0x50, + 0x49, 0x53, 0x50, 0x46, 0x40, 0x48, 0x4a, 0x4a, 0x49, 0x4a, 0x42, 0x45, + 0x4b, 0x45, 0x42, 0x45, 0x4e, 0x4e, 0x44, 0x41, 0x4b, 0x4a, 0x49, 0x3f, + 0x41, 0x51, 0x48, 0x4c, 0x40, 0x41, 0x51, 0x42, 0x49, 0x49, 0x48, 0x42, + 0x48, 0x4c, 0x4b, 0x3c, 0x49, 0x45, 0x42, 0x49, 0x4c, 0x46, 0x45, 0x43, + 0x43, 0x48, 0x48, 0x41, 0x43, 0x42, 0x4c, 0x4b, 0x40, 0x45, 0x44, 0x46, + 0x4c, 0x4b, 0x4e, 0x4d, 0x3f, 0x59, 0x55, 0x41, 0x56, 0x5a, 0x51, 0x30, + 0x49, 0x5a, 0x63, 0x4d, 0x61, 0x5b, 0x64, 0x55, 0x34, 0x7a, 0x4c, 0x62, + 0x3e, 0x5d, 0x56, 0x60, 0x48, 0x61, 0x3f, 0x54, 0x46, 0x40, 0x42, 0x56, + 0x52, 0x35, 0x4c, 0x59, 0x45, 0x4c, 0x42, 0x60, 0x49, 0x3f, 0x4c, 0x3c, + 0x52, 0x36, 0x46, 0x3d, 0x58, 0x4b, 0x41, 0x48, 0x3e, 0x45, 0x4e, 0x54, + 0x4c, 0x56, 0x47, 0x44, 0x39, 0x4a, 0x4a, 0x4a, 0x46, 0x48, 0x4a, 0x48, + 0x51, 0x4f, 0x4b, 0x49, 0x45, 0x4b, 0x44, 0x4c, 0x3e, 0x4c, 0x42, 0x59, + 0x47, 0x55, 0x47, 0x47, 0x41, 0x44, 0x44, 0x4a, 0x44, 0x4b, 0x44, 0x46, + 0x49, 0x5a, 0x48, 0x5d, 0x4f, 0x4a, 0x47, 0x50, 0x48, 0x4e, 0x44, 0x57, + 0x49, 0x46, 0x42, 0x4d, 0x3d, 0x4a, 0x4a, 0x58, 0x41, 0x4d, 0x3c, 0x47, + 0x42, 0x4e, 0x4d, 0x49, 0x44, 0x4b, 0x4c, 0x4b, 0x53, 0x42, 0x4a, 0x46, + 0x4e, 0x56, 0x4b, 0x47, 0x50, 0x43, 0x4f, 0x48, 0x49, 0x50, 0x48, 0x50, + 0x42, 0x4c, 0x4e, 0x3c, 0x41, 0x4f, 0x4a, 0x41, 0x44, 0x47, 0x4c, 0x42, + 0x51, 0x4f, 0x53, 0x46, 0x4c, 0x4b, 0x48, 0x51, 0x47, 0x4b, 0x4c, 0x4d, + 0x4d, 0x49, 0x3d, 0x44, 0x4b, 0x42, 0x43, 0x49, 0x51, 0x47, 0x4c, 0x4b, + 0x4a, 0x50, 0x5b, 0x43, 0x5b, 0x68, 0x54, 0x31, 0x4c, 0x5d, 0x5c, 0x54, + 0x63, 0x5a, 0x61, 0x54, 0x3d, 0x7a, 0x51, 0x5b, 0x40, 0x59, 0x5a, 0x62, + 0x4c, 0x5e, 0x42, 0x58, 0x49, 0x3c, 0x38, 0x50, 0x54, 0x37, 0x42, 0x51, + 0x4d, 0x4f, 0x42, 0x68, 0x4a, 0x40, 0x4e, 0x40, 0x3f, 0x3e, 0x3f, 0x40, + 0x54, 0x52, 0x3e, 0x43, 0x46, 0x4a, 0x48, 0x51, 0x4e, 0x4d, 0x42, 0x47, + 0x3f, 0x51, 0x47, 0x44, 0x3f, 0x4c, 0x46, 0x47, 0x4f, 0x55, 0x4b, 0x4e, + 0x4c, 0x51, 0x40, 0x51, 0x47, 0x4a, 0x44, 0x5c, 0x48, 0x54, 0x4b, 0x46, + 0x49, 0x4b, 0x53, 0x59, 0x43, 0x3e, 0x45, 0x4e, 0x4f, 0x58, 0x4b, 0x64, + 0x41, 0x4b, 0x45, 0x4a, 0x4c, 0x51, 0x47, 0x57, 0x45, 0x46, 0x43, 0x4f, + 0x4d, 0x4d, 0x49, 0x58, 0x4b, 0x52, 0x43, 0x4b, 0x45, 0x4c, 0x50, 0x4c, + 0x4e, 0x4b, 0x40, 0x4c, 0x44, 0x4e, 0x4c, 0x47, 0x41, 0x55, 0x45, 0x4a, + 0x4c, 0x48, 0x46, 0x41, 0x47, 0x52, 0x44, 0x4f, 0x48, 0x49, 0x4b, 0x47, + 0x50, 0x4f, 0x42, 0x4a, 0x44, 0x4b, 0x52, 0x43, 0x45, 0x4e, 0x46, 0x49, + 0x45, 0x52, 0x51, 0x45, 0x44, 0x41, 0x4c, 0x46, 0x4c, 0x4b, 0x44, 0x4d, + 0x4f, 0x48, 0x44, 0x4d, 0x56, 0x48, 0x50, 0x4f, 0x3b, 0x4e, 0x55, 0x43, + 0x52, 0x62, 0x57, 0x2c, 0x4d, 0x5e, 0x5e, 0x50, 0x64, 0x5b, 0x6a, 0x55, + 0x39, 0x7d, 0x4b, 0x5e, 0x43, 0x54, 0x5d, 0x5c, 0x4d, 0x5c, 0x42, 0x51, + 0x4c, 0x3d, 0x46, 0x51, 0x4c, 0x2a, 0x3e, 0x54, 0x47, 0x48, 0x46, 0x64, + 0x42, 0x3d, 0x47, 0x3f, 0x42, 0x45, 0x49, 0x3b, 0x59, 0x50, 0x4c, 0x46, + 0x4d, 0x44, 0x47, 0x4d, 0x4a, 0x50, 0x41, 0x48, 0x43, 0x50, 0x3e, 0x44, + 0x4b, 0x53, 0x48, 0x49, 0x51, 0x51, 0x4d, 0x57, 0x49, 0x4f, 0x53, 0x50, + 0x46, 0x4f, 0x41, 0x5d, 0x47, 0x46, 0x49, 0x51, 0x45, 0x41, 0x4a, 0x56, + 0x4f, 0x4e, 0x4d, 0x4a, 0x3e, 0x55, 0x47, 0x65, 0x48, 0x51, 0x4d, 0x4e, + 0x46, 0x43, 0x48, 0x5b, 0x48, 0x4f, 0x4f, 0x48, 0x4b, 0x4d, 0x4e, 0x5c, + 0x4f, 0x4c, 0x54, 0x48, 0x4a, 0x4d, 0x4e, 0x4e, 0x44, 0x48, 0x43, 0x52, + 0x41, 0x52, 0x48, 0x4f, 0x46, 0x4f, 0x51, 0x41, 0x44, 0x45, 0x41, 0x4b, + 0x43, 0x4e, 0x4e, 0x42, 0x48, 0x41, 0x45, 0x43, 0x44, 0x43, 0x4c, 0x4c, + 0x51, 0x54, 0x4c, 0x32, 0x46, 0x52, 0x4e, 0x49, 0x40, 0x4d, 0x43, 0x4f, + 0x4a, 0x4d, 0x4d, 0x49, 0x46, 0x4c, 0x41, 0x4d, 0x41, 0x3a, 0x50, 0x4c, + 0x5a, 0x4e, 0x49, 0x53, 0x4d, 0x53, 0x53, 0x3d, 0x52, 0x64, 0x55, 0x2a, + 0x47, 0x5d, 0x61, 0x51, 0x5b, 0x5d, 0x66, 0x52, 0x3f, 0xfd, 0x55, 0x5a, + 0x4b, 0x54, 0x5b, 0x60, 0x49, 0x5d, 0x43, 0x57, 0x47, 0x41, 0x45, 0x5e, + 0x4c, 0x28, 0x3e, 0x40, 0x49, 0x4e, 0x40, 0x69, 0x4a, 0x44, 0x45, 0x43, + 0x45, 0x3d, 0x39, 0x40, 0x4c, 0x53, 0x4b, 0x3d, 0x4e, 0x43, 0x48, 0x55, + 0x4d, 0x50, 0x4d, 0x49, 0x4f, 0x48, 0x3e, 0x46, 0x47, 0x56, 0x40, 0x48, + 0x46, 0x53, 0x50, 0x5d, 0x43, 0x54, 0x49, 0x47, 0x49, 0x4c, 0x48, 0x5d, + 0x49, 0x51, 0x50, 0x3d, 0x41, 0x47, 0x48, 0x64, 0x4b, 0x44, 0x49, 0x41, + 0x54, 0x48, 0x3d, 0x6b, 0x4c, 0x5a, 0x48, 0x4e, 0x40, 0x4c, 0x52, 0x5f, + 0x54, 0x4a, 0x3f, 0x48, 0x43, 0x43, 0x44, 0x66, 0x49, 0x47, 0x43, 0x46, + 0x47, 0x54, 0x42, 0x54, 0x4b, 0x4e, 0x49, 0x49, 0x49, 0x4b, 0x52, 0x4f, + 0x43, 0x46, 0x4b, 0x49, 0x54, 0x4b, 0x40, 0x48, 0x47, 0x4a, 0x46, 0x47, + 0x44, 0x47, 0x4c, 0x37, 0x3f, 0x49, 0x45, 0x44, 0x50, 0x49, 0x44, 0x36, + 0x4d, 0x40, 0x45, 0x49, 0x53, 0x55, 0x44, 0x42, 0x47, 0x48, 0x46, 0x40, + 0x4f, 0x4c, 0x41, 0x42, 0x52, 0x3a, 0x43, 0x46, 0x55, 0x51, 0x4e, 0x4f, + 0x48, 0x51, 0x55, 0x48, 0x52, 0x66, 0x4e, 0x33, 0x49, 0x5b, 0x5f, 0x4b, + 0x5f, 0x5b, 0x66, 0x52, 0x41, 0x7c, 0x4a, 0x59, 0x47, 0x59, 0x58, 0x67, + 0x49, 0x5e, 0x44, 0x57, 0x49, 0x4c, 0x43, 0x56, 0x41, 0x27, 0x4c, 0x44, + 0x51, 0x44, 0x42, 0x65, 0x49, 0x44, 0x40, 0x3d, 0x4d, 0x3e, 0x4c, 0x3c, + 0x4f, 0x4b, 0x45, 0x44, 0x4d, 0x48, 0x47, 0x54, 0x4d, 0x4e, 0x44, 0x42, + 0x47, 0x44, 0x3d, 0x49, 0x4e, 0x50, 0x49, 0x45, 0x58, 0x4a, 0x54, 0x5c, + 0x41, 0x49, 0x4f, 0x42, 0x44, 0x4f, 0x4a, 0x62, 0x48, 0x50, 0x48, 0x43, + 0x51, 0x53, 0x47, 0x6c, 0x40, 0x46, 0x3d, 0x46, 0x4a, 0x50, 0x43, 0x69, + 0x49, 0x4f, 0x4a, 0x4c, 0x49, 0x46, 0x43, 0x6a, 0x48, 0x50, 0x49, 0x48, + 0x48, 0x51, 0x4b, 0x65, 0x42, 0x4b, 0x4d, 0x48, 0x44, 0x4e, 0x49, 0x60, + 0x44, 0x52, 0x42, 0x42, 0x47, 0x48, 0x4b, 0x51, 0x50, 0x4b, 0x3c, 0x4d, + 0x4c, 0x44, 0x48, 0x55, 0x51, 0x4c, 0x55, 0x4e, 0x52, 0x4c, 0x4b, 0x39, + 0x48, 0x42, 0x49, 0x49, 0x49, 0x50, 0x49, 0x32, 0x4e, 0x4b, 0x45, 0x4f, + 0x42, 0x4b, 0x47, 0x50, 0x48, 0x45, 0x54, 0x49, 0x4c, 0x46, 0x40, 0x46, + 0x43, 0x3d, 0x51, 0x44, 0x53, 0x4f, 0x54, 0x55, 0x43, 0x4f, 0x5b, 0x47, + 0x53, 0x6c, 0x57, 0x2e, 0x50, 0x55, 0x5a, 0x4d, 0x57, 0x5d, 0x70, 0x50, + 0x3f, 0x79, 0x4a, 0x5a, 0x4c, 0x58, 0x59, 0x63, 0x45, 0x69, 0x48, 0x58, + 0x42, 0x4b, 0x43, 0x5c, 0x46, 0x28, 0x48, 0x49, 0x4c, 0x3f, 0x45, 0x58, + 0x45, 0x44, 0x47, 0x40, 0x4c, 0x42, 0x3e, 0x37, 0x45, 0x54, 0x48, 0x3b, + 0x4e, 0x48, 0x43, 0x4a, 0x50, 0x4a, 0x49, 0x46, 0x4c, 0x54, 0x3f, 0x4b, + 0x4e, 0x56, 0x48, 0x49, 0x49, 0x4c, 0x51, 0x5f, 0x4d, 0x4b, 0x43, 0x4d, + 0x47, 0x51, 0x43, 0x59, 0x45, 0x4e, 0x4f, 0x45, 0x44, 0x54, 0x44, 0x6d, + 0x47, 0x51, 0x43, 0x4e, 0x4c, 0x4f, 0x43, 0x6d, 0x48, 0x53, 0x4b, 0x47, + 0x49, 0x48, 0x46, 0x6a, 0x51, 0x4c, 0x4d, 0x45, 0x4e, 0x47, 0x46, 0x62, + 0x4a, 0x54, 0x51, 0x4c, 0x47, 0x4d, 0x4a, 0x61, 0x3d, 0x50, 0x4c, 0x4c, + 0x45, 0x3f, 0x3e, 0x54, 0x3d, 0x53, 0x48, 0x47, 0x52, 0x4b, 0x47, 0x51, + 0x4f, 0x45, 0x4b, 0x4a, 0x4c, 0x46, 0x44, 0x37, 0x42, 0x50, 0x49, 0x4f, + 0x51, 0x41, 0x44, 0x38, 0x54, 0x40, 0x51, 0x52, 0x3e, 0x43, 0x44, 0x47, + 0x49, 0x4b, 0x4b, 0x46, 0x53, 0x54, 0x55, 0x4b, 0x4a, 0x37, 0x43, 0x4a, + 0x51, 0x47, 0x51, 0x54, 0x43, 0x46, 0x56, 0x3d, 0x54, 0x66, 0x4f, 0x30, + 0x45, 0x52, 0x5a, 0x43, 0x5c, 0x65, 0x5d, 0x52, 0x32, 0x77, 0x53, 0x5f, + 0x4a, 0x5a, 0x4f, 0x5e, 0x4e, 0x61, 0x4b, 0x5b, 0x4a, 0x53, 0x3e, 0x61, + 0x47, 0x24, 0x3e, 0x48, 0x4d, 0x43, 0x40, 0x53, 0x4e, 0x41, 0x43, 0x3d, + 0x50, 0x49, 0x41, 0x3a, 0x4e, 0x4b, 0x48, 0x49, 0x48, 0x49, 0x46, 0x50, + 0x4f, 0x4b, 0x47, 0x4b, 0x48, 0x52, 0x3e, 0x4d, 0x4d, 0x59, 0x4c, 0x3e, + 0x52, 0x49, 0x4f, 0x5e, 0x54, 0x59, 0x47, 0x4d, 0x40, 0x4c, 0x4b, 0x64, + 0x42, 0x4c, 0x53, 0x46, 0x4e, 0x50, 0x46, 0x6a, 0x41, 0x59, 0x44, 0x4b, + 0x4f, 0x44, 0x52, 0x6c, 0x54, 0x4e, 0x46, 0x48, 0x42, 0x3d, 0x44, 0x67, + 0x44, 0x4f, 0x47, 0x54, 0x4c, 0x4f, 0x43, 0x61, 0x4c, 0x54, 0x4f, 0x43, + 0x49, 0x40, 0x4a, 0x5f, 0x4a, 0x52, 0x47, 0x43, 0x4c, 0x43, 0x49, 0x53, + 0x4c, 0x4b, 0x43, 0x3d, 0x4e, 0x45, 0x49, 0x50, 0x44, 0x53, 0x4f, 0x48, + 0x4b, 0x46, 0x44, 0x3c, 0x50, 0x42, 0x43, 0x40, 0x47, 0x43, 0x42, 0x34, + 0x47, 0x42, 0x3f, 0x4a, 0x48, 0x42, 0x48, 0x4c, 0x42, 0x4c, 0x4e, 0x47, + 0x48, 0x47, 0x51, 0x51, 0x4d, 0x3d, 0x3e, 0x4b, 0x54, 0x4c, 0x4c, 0x59, + 0x4f, 0x50, 0x57, 0x3c, 0x54, 0x62, 0x54, 0x35, 0x3d, 0x5a, 0x5b, 0x47, + 0x59, 0x63, 0x66, 0x4d, 0x3c, 0x79, 0x50, 0x5f, 0x45, 0x58, 0x4e, 0x5d, + 0x48, 0x61, 0x43, 0x54, 0x47, 0x54, 0x4d, 0x54, 0x4b, 0x25, 0x41, 0x44, + 0x4c, 0x4a, 0x3b, 0x52, 0x47, 0x3c, 0x45, 0x3c, 0x53, 0x44, 0x44, 0x40, + 0x50, 0x4c, 0x45, 0x3a, 0x4c, 0x51, 0x44, 0x49, 0x4d, 0x52, 0x4d, 0x4b, + 0x45, 0x52, 0x3d, 0x50, 0x4a, 0x58, 0x4a, 0x47, 0x4d, 0x47, 0x4e, 0x52, + 0x4f, 0x4d, 0x4f, 0x49, 0x52, 0x52, 0x4c, 0x5e, 0x47, 0x4d, 0x46, 0x4d, + 0x4c, 0x48, 0x50, 0x70, 0x41, 0x4a, 0x48, 0x3d, 0x45, 0x48, 0x45, 0x74, + 0x47, 0x4c, 0x43, 0x4f, 0x4a, 0x4a, 0x40, 0x68, 0x52, 0x49, 0x3e, 0x3e, + 0x4e, 0x4b, 0x4b, 0x69, 0x42, 0x4f, 0x45, 0x47, 0x3f, 0x45, 0x46, 0x56, + 0x45, 0x4a, 0x47, 0x44, 0x52, 0x4b, 0x53, 0x4e, 0x4e, 0x46, 0x45, 0x40, + 0x47, 0x4b, 0x53, 0x52, 0x53, 0x51, 0x4f, 0x46, 0x42, 0x43, 0x50, 0x3e, + 0x48, 0x4e, 0x41, 0x53, 0x4d, 0x48, 0x48, 0x33, 0x40, 0x43, 0x4b, 0x42, + 0x52, 0x4c, 0x42, 0x4e, 0x41, 0x4e, 0x4f, 0x50, 0x43, 0x49, 0x4d, 0x47, + 0x4a, 0x3a, 0x3f, 0x51, 0x51, 0x44, 0x4e, 0x54, 0x40, 0x55, 0x59, 0x3c, + 0x57, 0x67, 0x4e, 0x2e, 0x4c, 0x5b, 0x5b, 0x51, 0x58, 0x63, 0x62, 0x52, + 0x3c, 0x72, 0x51, 0x5a, 0x4e, 0x53, 0x4a, 0x5c, 0x51, 0x69, 0x42, 0x51, + 0x48, 0x54, 0x48, 0x57, 0x3e, 0x37, 0x3f, 0x4d, 0x4d, 0x4a, 0x35, 0x57, + 0x4e, 0x40, 0x45, 0x4a, 0x45, 0x4e, 0x49, 0x40, 0x49, 0x53, 0x51, 0x44, + 0x4a, 0x50, 0x4b, 0x4b, 0x50, 0x4f, 0x3e, 0x44, 0x45, 0x44, 0x4c, 0x51, + 0x47, 0x51, 0x46, 0x42, 0x48, 0x50, 0x49, 0x4d, 0x43, 0x54, 0x52, 0x4d, + 0x4e, 0x4f, 0x3f, 0x63, 0x54, 0x57, 0x41, 0x44, 0x4e, 0x50, 0x4e, 0x66, + 0x41, 0x53, 0x4b, 0x4d, 0x4e, 0x4f, 0x43, 0x6d, 0x4e, 0x51, 0x49, 0x4f, + 0x49, 0x4a, 0x4a, 0x6c, 0x4b, 0x4f, 0x3d, 0x47, 0x4d, 0x51, 0x3c, 0x66, + 0x4b, 0x56, 0x3e, 0x4c, 0x41, 0x46, 0x45, 0x68, 0x47, 0x4b, 0x4a, 0x54, + 0x53, 0x48, 0x51, 0x59, 0x45, 0x43, 0x50, 0x45, 0x4f, 0x45, 0x42, 0x55, + 0x48, 0x52, 0x4c, 0x46, 0x52, 0x49, 0x47, 0x3d, 0x55, 0x48, 0x52, 0x52, + 0x40, 0x4e, 0x47, 0x31, 0x45, 0x4f, 0x42, 0x4a, 0x4e, 0x50, 0x42, 0x4a, + 0x49, 0x57, 0x46, 0x4b, 0x45, 0x4e, 0x4d, 0x46, 0x47, 0x43, 0x50, 0x4e, + 0x4f, 0x4c, 0x53, 0x55, 0x45, 0x51, 0x5b, 0x3a, 0x52, 0x64, 0x54, 0x2d, + 0x42, 0x59, 0x59, 0x45, 0x59, 0x67, 0x69, 0x53, 0x3f, 0x78, 0x50, 0x60, + 0x4c, 0x4c, 0x5b, 0x53, 0x45, 0x63, 0x49, 0x63, 0x51, 0x4c, 0x41, 0x4e, + 0x4b, 0x37, 0x45, 0x4e, 0x48, 0x4c, 0x39, 0x55, 0x44, 0x37, 0x3c, 0x49, + 0x44, 0x56, 0x3e, 0x40, 0x4d, 0x45, 0x4c, 0x43, 0x42, 0x41, 0x40, 0x42, + 0x57, 0x4f, 0x43, 0x3f, 0x52, 0x53, 0x51, 0x4b, 0x4b, 0x55, 0x46, 0x40, + 0x49, 0x45, 0x40, 0x4f, 0x47, 0x58, 0x4b, 0x53, 0x4e, 0x52, 0x54, 0x5e, + 0x4b, 0x51, 0x50, 0x44, 0x50, 0x4b, 0x4f, 0x70, 0x49, 0x4f, 0x4c, 0x50, + 0x45, 0x56, 0x4b, 0x6b, 0x49, 0x52, 0x4a, 0x3f, 0x44, 0x4b, 0x48, 0x72, + 0x4c, 0x47, 0x4e, 0x43, 0x46, 0x4c, 0x4f, 0x61, 0x4a, 0x52, 0x52, 0x46, + 0x4a, 0x4d, 0x46, 0x65, 0x48, 0x4e, 0x4d, 0x4e, 0x46, 0x4e, 0x53, 0x59, + 0x43, 0x49, 0x43, 0x47, 0x45, 0x47, 0x53, 0x50, 0x3e, 0x4d, 0x41, 0x46, + 0x4c, 0x4a, 0x4c, 0x35, 0x3f, 0x4f, 0x50, 0x48, 0x47, 0x4d, 0x4c, 0x32, + 0x45, 0x53, 0x43, 0x4d, 0x4e, 0x4a, 0x3e, 0x4b, 0x55, 0x4f, 0x53, 0x4c, + 0x4a, 0x4d, 0x48, 0x53, 0x4f, 0x3a, 0x47, 0x4b, 0x4e, 0x4e, 0x51, 0x59, + 0x41, 0x50, 0x57, 0x38, 0x5d, 0x63, 0x59, 0x2b, 0x45, 0x53, 0x5a, 0x4e, + 0x5c, 0x60, 0x5e, 0x4c, 0x41, 0x6f, 0x53, 0x5c, 0x48, 0x53, 0x56, 0x54, + 0x4b, 0x62, 0x46, 0x63, 0x47, 0x4e, 0x40, 0x51, 0x43, 0x36, 0x44, 0x42, + 0x46, 0x51, 0x41, 0x54, 0x4e, 0x36, 0x40, 0x4b, 0x55, 0x49, 0x40, 0x3f, + 0x4b, 0x42, 0x4a, 0x4a, 0x48, 0x47, 0x40, 0x43, 0x4d, 0x4f, 0x55, 0x3f, + 0x53, 0x42, 0x4d, 0x56, 0x49, 0x51, 0x4f, 0x41, 0x3b, 0x48, 0x43, 0x4e, + 0x4b, 0x5c, 0x4f, 0x45, 0x4a, 0x4c, 0x46, 0x66, 0x43, 0x45, 0x46, 0x48, + 0x4f, 0x4e, 0x40, 0x71, 0x4b, 0x4e, 0x3e, 0x42, 0x4d, 0x52, 0x42, 0x71, + 0x4c, 0x54, 0x4f, 0x3f, 0x4c, 0x43, 0x4a, 0x73, 0x48, 0x48, 0x4c, 0x4b, + 0x4c, 0x4d, 0x40, 0x72, 0x3e, 0x51, 0x49, 0x48, 0x52, 0x53, 0x45, 0x65, + 0x52, 0x4e, 0x4f, 0x44, 0x4c, 0x43, 0x4a, 0x5e, 0x3e, 0x56, 0x46, 0x55, + 0x55, 0x43, 0x49, 0x51, 0x4f, 0x52, 0x49, 0x4d, 0x46, 0x47, 0x49, 0x3e, + 0x51, 0x49, 0x41, 0x53, 0x42, 0x47, 0x46, 0x3b, 0x4d, 0x4e, 0x48, 0x44, + 0x42, 0x48, 0x4c, 0x47, 0x42, 0x4e, 0x4a, 0x3e, 0x44, 0x54, 0x4a, 0x4d, + 0x49, 0x41, 0x41, 0x53, 0x52, 0x4c, 0x4c, 0x56, 0x49, 0x4a, 0x5a, 0x3f, + 0x5b, 0x5c, 0x59, 0x2f, 0x49, 0x52, 0x5a, 0x4e, 0x5a, 0x61, 0x67, 0x4c, + 0x41, 0x6f, 0x5a, 0x5a, 0x40, 0x5a, 0x54, 0x4e, 0x49, 0x66, 0x45, 0x5a, + 0x4a, 0x45, 0x44, 0x4b, 0x44, 0x36, 0x41, 0x4c, 0x45, 0x44, 0x3d, 0x51, + 0x3f, 0x35, 0x3c, 0x46, 0x53, 0x5c, 0x3f, 0x3e, 0x50, 0x43, 0x46, 0x4b, + 0x40, 0x54, 0x41, 0x47, 0x4b, 0x51, 0x41, 0x46, 0x4a, 0x4d, 0x51, 0x52, + 0x43, 0x58, 0x45, 0x46, 0x4e, 0x46, 0x4a, 0x4b, 0x44, 0x54, 0x4c, 0x4c, + 0x43, 0x59, 0x48, 0x61, 0x4e, 0x4f, 0x4d, 0x4d, 0x4a, 0x52, 0x4c, 0x6e, + 0x49, 0x57, 0x48, 0x4d, 0x46, 0x46, 0x4d, 0x72, 0x4a, 0x4e, 0x47, 0x44, + 0x49, 0x4f, 0x48, 0x73, 0x42, 0x40, 0x4d, 0x44, 0x4d, 0x57, 0x3e, 0x69, + 0x50, 0x52, 0x4c, 0x55, 0x46, 0x4c, 0x44, 0x5f, 0x4b, 0x4d, 0x55, 0x4c, + 0x48, 0x49, 0x4a, 0x5e, 0x47, 0x4b, 0x45, 0x53, 0x55, 0x53, 0x4d, 0x53, + 0x47, 0x5c, 0x45, 0x4e, 0x4e, 0x52, 0x4c, 0x39, 0x4b, 0x4c, 0x49, 0x46, + 0x4a, 0x4e, 0x4b, 0x33, 0x46, 0x47, 0x52, 0x41, 0x49, 0x4b, 0x4c, 0x48, + 0x51, 0x53, 0x44, 0x4c, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x4b, 0x50, 0x47, + 0x4d, 0x4b, 0x4c, 0x4f, 0x44, 0x45, 0x58, 0x3c, 0x56, 0x5a, 0x56, 0x23, + 0x4f, 0x4d, 0x5c, 0x4e, 0x59, 0x5a, 0x65, 0x43, 0x45, 0x66, 0x54, 0x5f, + 0x45, 0x5e, 0x54, 0x4f, 0x48, 0x5f, 0x44, 0x59, 0x48, 0x46, 0x47, 0x49, + 0x4d, 0x3c, 0x49, 0x54, 0x3e, 0x48, 0x43, 0x5b, 0x4a, 0x35, 0x41, 0x43, + 0x4b, 0x55, 0x43, 0x38, 0x46, 0x42, 0x4a, 0x4e, 0x54, 0x4b, 0x4d, 0x46, + 0x43, 0x4e, 0x44, 0x47, 0x56, 0x4c, 0x51, 0x57, 0x41, 0x4d, 0x43, 0x41, + 0x51, 0x47, 0x41, 0x51, 0x51, 0x4f, 0x46, 0x50, 0x52, 0x4e, 0x4d, 0x60, + 0x41, 0x49, 0x46, 0x50, 0x48, 0x56, 0x42, 0x6d, 0x40, 0x45, 0x44, 0x55, + 0x40, 0x4e, 0x40, 0x7c, 0x47, 0x5a, 0x44, 0x44, 0x45, 0x56, 0x55, 0x71, + 0x47, 0x4b, 0x4b, 0x45, 0x4f, 0x54, 0x4c, 0x73, 0x48, 0x55, 0x44, 0x4d, + 0x4a, 0x47, 0x49, 0x5e, 0x4d, 0x52, 0x4e, 0x4c, 0x48, 0x52, 0x48, 0x58, + 0x4c, 0x5a, 0x49, 0x4b, 0x53, 0x46, 0x4d, 0x4b, 0x48, 0x53, 0x41, 0x49, + 0x4a, 0x56, 0x51, 0x3a, 0x4c, 0x4e, 0x4f, 0x51, 0x4c, 0x59, 0x47, 0x45, + 0x4f, 0x50, 0x4a, 0x4f, 0x4d, 0x3f, 0x44, 0x4e, 0x42, 0x4a, 0x4a, 0x43, + 0x46, 0x4e, 0x4c, 0x4f, 0x47, 0x47, 0x4c, 0x4b, 0x52, 0x50, 0x50, 0x4b, + 0x42, 0x45, 0x54, 0x44, 0x54, 0x59, 0x4c, 0x2b, 0x4d, 0x4c, 0x55, 0x4e, + 0x5c, 0x5b, 0x5a, 0x42, 0x47, 0x5e, 0x56, 0x59, 0x47, 0x65, 0x55, 0x4c, + 0x4c, 0x59, 0x42, 0x5a, 0x4e, 0x46, 0x4e, 0x4b, 0x53, 0x46, 0x49, 0x56, + 0x48, 0x58, 0x4b, 0x4f, 0x45, 0x38, 0x40, 0x44, 0x49, 0x51, 0x4a, 0x3b, + 0x53, 0x40, 0x40, 0x48, 0x51, 0x49, 0x44, 0x46, 0x52, 0x4b, 0x4e, 0x45, + 0x48, 0x5a, 0x4e, 0x57, 0x44, 0x53, 0x49, 0x40, 0x4c, 0x47, 0x41, 0x4f, + 0x49, 0x55, 0x46, 0x50, 0x57, 0x5b, 0x48, 0x66, 0x50, 0x49, 0x51, 0x55, + 0x55, 0x4f, 0x47, 0x72, 0x49, 0x4f, 0x41, 0x4c, 0x49, 0x42, 0x48, 0x75, + 0x4a, 0x55, 0x45, 0x4a, 0x41, 0x51, 0x41, 0x70, 0x47, 0x49, 0x42, 0x52, + 0x4f, 0x47, 0x46, 0x63, 0x4f, 0x53, 0x46, 0x4f, 0x49, 0x53, 0x52, 0x63, + 0x4c, 0x59, 0x46, 0x41, 0x49, 0x51, 0x3e, 0x53, 0x45, 0x52, 0x51, 0x40, + 0x4f, 0x4c, 0x41, 0x4c, 0x47, 0x4a, 0x46, 0x47, 0x53, 0x47, 0x48, 0x39, + 0x53, 0x4b, 0x46, 0x4b, 0x50, 0x4c, 0x41, 0x40, 0x48, 0x4e, 0x49, 0x4e, + 0x44, 0x53, 0x44, 0x4e, 0x53, 0x49, 0x49, 0x4e, 0x46, 0x3f, 0x45, 0x42, + 0x4c, 0x47, 0x42, 0x4e, 0x49, 0x4a, 0x49, 0x44, 0x51, 0x48, 0x57, 0x4c, + 0x4d, 0x60, 0x4e, 0x2d, 0x46, 0x4d, 0x58, 0x53, 0x5c, 0x56, 0x5e, 0x41, + 0x3e, 0x66, 0x53, 0x5b, 0x49, 0x59, 0x5a, 0x55, 0x4e, 0x59, 0x46, 0x4a, + 0x44, 0x42, 0x45, 0x3d, 0x4d, 0x45, 0x44, 0x4f, 0x4d, 0x53, 0x42, 0x5a, + 0x43, 0x3c, 0x48, 0x4f, 0x44, 0x59, 0x3f, 0x33, 0x45, 0x48, 0x43, 0x45, + 0x4d, 0x56, 0x48, 0x44, 0x3e, 0x48, 0x46, 0x4d, 0x44, 0x53, 0x46, 0x4e, + 0x45, 0x52, 0x40, 0x46, 0x4c, 0x50, 0x4e, 0x4b, 0x4d, 0x46, 0x48, 0x46, + 0x50, 0x52, 0x4e, 0x57, 0x3f, 0x4a, 0x49, 0x50, 0x53, 0x4e, 0x41, 0x66, + 0x49, 0x4f, 0x40, 0x4b, 0x50, 0x4c, 0x4a, 0x70, 0x42, 0x51, 0x41, 0x4c, + 0x50, 0x4f, 0x46, 0x60, 0x45, 0x47, 0x54, 0x4c, 0x49, 0x59, 0x52, 0x61, + 0x4a, 0x53, 0x52, 0x4f, 0x4b, 0x4c, 0x46, 0x56, 0x4b, 0x54, 0x4f, 0x47, + 0x53, 0x49, 0x4f, 0x50, 0x4a, 0x54, 0x45, 0x4e, 0x47, 0x48, 0x47, 0x42, + 0x49, 0x44, 0x46, 0x46, 0x55, 0x4c, 0x4f, 0x36, 0x4c, 0x49, 0x3f, 0x4e, + 0x45, 0x4b, 0x4b, 0x36, 0x48, 0x4f, 0x4b, 0x50, 0x45, 0x47, 0x49, 0x3f, + 0x50, 0x4b, 0x52, 0x48, 0x4c, 0x41, 0x49, 0x43, 0x4e, 0x3c, 0x43, 0x45, + 0x3e, 0x45, 0x48, 0x44, 0x4d, 0x48, 0x56, 0x47, 0x4b, 0x54, 0x52, 0x2b, + 0x4d, 0x4e, 0x57, 0x4f, 0x57, 0x4f, 0x56, 0x43, 0x48, 0x5f, 0x4c, 0x51, + 0x4d, 0x58, 0x4f, 0x4e, 0x50, 0x50, 0x48, 0x4a, 0x4d, 0x3f, 0x47, 0x40, + 0x4b, 0x4a, 0x4e, 0x4b, 0x4a, 0x58, 0x42, 0x49, 0x3f, 0x42, 0x3d, 0x4d, + 0x46, 0x53, 0x45, 0x3e, 0x4e, 0x49, 0x4f, 0x4a, 0x47, 0x46, 0x40, 0x3e, + 0x4c, 0x4d, 0x4d, 0x45, 0x4a, 0x56, 0x40, 0x4a, 0x47, 0x57, 0x4f, 0x48, + 0x4f, 0x48, 0x47, 0x49, 0x4e, 0x52, 0x50, 0x48, 0x42, 0x52, 0x43, 0x5a, + 0x49, 0x42, 0x4f, 0x4f, 0x51, 0x51, 0x50, 0x5c, 0x4b, 0x43, 0x4b, 0x48, + 0x50, 0x51, 0x4b, 0x6d, 0x53, 0x4e, 0x44, 0x4c, 0x4c, 0x51, 0x46, 0x5b, + 0x44, 0x48, 0x4d, 0x4c, 0x46, 0x4f, 0x54, 0x54, 0x4e, 0x54, 0x42, 0x4e, + 0x4c, 0x49, 0x49, 0x58, 0x49, 0x53, 0x53, 0x4a, 0x4e, 0x4b, 0x47, 0x53, + 0x43, 0x55, 0x46, 0x51, 0x3d, 0x3d, 0x4c, 0x47, 0x4e, 0x51, 0x47, 0x48, + 0x4b, 0x4c, 0x42, 0x3b, 0x43, 0x4f, 0x44, 0x4d, 0x54, 0x4b, 0x4a, 0x47, + 0x4c, 0x42, 0x4b, 0x43, 0x41, 0x4e, 0x4d, 0x50, 0x45, 0x46, 0x41, 0x4a, + 0x49, 0x49, 0x54, 0x47, 0x4c, 0x4b, 0x50, 0x4e, 0x3f, 0x43, 0x40, 0x41, + 0x44, 0x54, 0x51, 0x47, 0x4c, 0x4b, 0x4f, 0x34, 0x4d, 0x4c, 0x4f, 0x49, + 0x56, 0x4e, 0x4b, 0x3e, 0x48, 0x53, 0x4e, 0x56, 0x49, 0x4e, 0x4c, 0x40, + 0x55, 0x4a, 0x46, 0x4f, 0x48, 0x4a, 0x55, 0x41, 0x55, 0x3d, 0x47, 0x51, + 0x50, 0x51, 0x45, 0x51, 0x4b, 0x4e, 0x4a, 0x4f, 0x4b, 0x45, 0x42, 0x3c, + 0x4e, 0x46, 0x47, 0x49, 0x4a, 0x4c, 0x48, 0x41, 0x4f, 0x4a, 0x44, 0x45, + 0x4e, 0x4e, 0x43, 0x41, 0x4c, 0x47, 0x48, 0x49, 0x4c, 0x48, 0x4f, 0x4a, + 0x4f, 0x4a, 0x4b, 0x45, 0x42, 0x40, 0x52, 0x55, 0x4f, 0x49, 0x44, 0x54, + 0x49, 0x48, 0x51, 0x4d, 0x44, 0x4a, 0x4d, 0x49, 0x4e, 0x4e, 0x51, 0x5d, + 0x42, 0x4d, 0x49, 0x3f, 0x48, 0x58, 0x40, 0x5e, 0x48, 0x4f, 0x49, 0x53, + 0x45, 0x47, 0x4f, 0x53, 0x4d, 0x4f, 0x4d, 0x4d, 0x46, 0x55, 0x43, 0x51, + 0x4f, 0x51, 0x4a, 0x4e, 0x49, 0x42, 0x49, 0x50, 0x47, 0x4d, 0x42, 0x47, + 0x46, 0x50, 0x55, 0x47, 0x4d, 0x47, 0x3e, 0x51, 0x4d, 0x43, 0x44, 0x39, + 0x4e, 0x4b, 0x41, 0x48, 0x52, 0x53, 0x4d, 0x39, 0x4d, 0x51, 0x4c, 0x46, + 0x4e, 0x47, 0x49, 0x41, 0x45, 0x4a, 0x4a, 0x45, 0x50, 0x4a, 0x40, 0x48, + 0x43, 0x47, 0x44, 0x50, 0x4d, 0x47, 0x4a, 0x47, 0x45, 0x57, 0x41, 0x34, + 0x51, 0x40, 0x45, 0x44, 0x3c, 0x47, 0x46, 0x47, 0x44, 0x48, 0x42, 0x40, + 0x37, 0x53, 0x4a, 0x43, 0x49, 0x4b, 0x43, 0x44, 0x4f, 0x4f, 0x48, 0x48, + 0x53, 0x49, 0x4b, 0x48, 0x4e, 0x4c, 0x42, 0x45, 0x4c, 0x4a, 0x4a, 0x46, + 0x47, 0x57, 0x3e, 0x46, 0x46, 0x45, 0x4a, 0x43, 0x46, 0x49, 0x43, 0x52, + 0x3e, 0x48, 0x4a, 0x4b, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4b, 0x4e, 0x44, + 0x42, 0x44, 0x50, 0x41, 0x49, 0x49, 0x4d, 0x4b, 0x44, 0x46, 0x4a, 0x52, + 0x4d, 0x47, 0x49, 0x4b, 0x4d, 0x49, 0x41, 0x48, 0x4b, 0x3f, 0x45, 0x4f, + 0x51, 0x41, 0x55, 0x42, 0x49, 0x4b, 0x4b, 0x51, 0x4f, 0x4f, 0x42, 0x4e, + 0x4e, 0x4a, 0x52, 0x41, 0x4f, 0x42, 0x48, 0x3d, 0x4a, 0x44, 0x50, 0x4b, + 0x49, 0x45, 0x51, 0x46, 0x51, 0x44, 0x4d, 0x47, 0x4a, 0x4a, 0x4d, 0x49, + 0x4d, 0x48, 0x4d, 0x4f, 0x4d, 0x44, 0x48, 0x4e, 0x4a, 0x4b, 0x40, 0x4f, + 0x47, 0x3a, 0x41, 0x47, 0x4a, 0x4a, 0x4a, 0x48, 0x42, 0x41, 0x4d, 0x56, + 0x3f, 0x52, 0x4d, 0x4c, 0x44, 0x48, 0x47, 0x4e, 0x51, 0x4c, 0x49, 0x47, + 0x44, 0x4c, 0x4b, 0x47, 0x48, 0x46, 0x47, 0x4f, 0x43, 0x41, 0x3e, 0x47, + 0x53, 0x4a, 0x46, 0x42, 0x46, 0x61, 0x43, 0x30, 0x4e, 0x52, 0x43, 0x45, + 0x32, 0x4a, 0x45, 0x48, 0x51, 0x3e, 0x44, 0x3b, 0x3a, 0x63, 0x4c, 0x46, + 0x4c, 0x49, 0x3d, 0x41, 0x52, 0x53, 0x43, 0x43, 0x45, 0x3d, 0x48, 0x40, + 0x4b, 0x4a, 0x49, 0x48, 0x4d, 0x49, 0x4b, 0x4c, 0x3f, 0x4e, 0x4b, 0x47, + 0x45, 0x4d, 0x3f, 0x4d, 0x43, 0x50, 0x48, 0x4b, 0x54, 0x3e, 0x44, 0x4e, + 0x3e, 0x4c, 0x43, 0x4b, 0x4c, 0x4b, 0x3e, 0x49, 0x50, 0x52, 0x4a, 0x4a, + 0x50, 0x50, 0x43, 0x4e, 0x49, 0x48, 0x51, 0x50, 0x47, 0x3d, 0x45, 0x4b, + 0x47, 0x46, 0x4d, 0x4c, 0x45, 0x4d, 0x4a, 0x4d, 0x42, 0x4d, 0x47, 0x4f, + 0x40, 0x43, 0x46, 0x51, 0x47, 0x4b, 0x43, 0x49, 0x49, 0x50, 0x4b, 0x4b, + 0x46, 0x4a, 0x4c, 0x48, 0x49, 0x47, 0x4b, 0x56, 0x55, 0x4f, 0x49, 0x4f, + 0x4f, 0x4e, 0x4b, 0x49, 0x4a, 0x4a, 0x49, 0x47, 0x44, 0x4b, 0x47, 0x50, + 0x46, 0x4c, 0x46, 0x4c, 0x4b, 0x4e, 0x49, 0x57, 0x4d, 0x3e, 0x46, 0x47, + 0x50, 0x45, 0x4f, 0x52, 0x3e, 0x4d, 0x49, 0x4a, 0x40, 0x49, 0x4f, 0x5c, + 0x3e, 0x4a, 0x47, 0x45, 0x47, 0x41, 0x44, 0x3f, 0x4b, 0x4a, 0x52, 0x43, + 0x41, 0x43, 0x43, 0x47, 0x55, 0x49, 0x42, 0x4c, 0x58, 0x4b, 0x42, 0x48, + 0x4b, 0x5a, 0x36, 0x33, 0x53, 0x57, 0x4d, 0x4a, 0x37, 0x4c, 0x3e, 0x48, + 0x43, 0x46, 0x39, 0x3c, 0x34, 0x65, 0x47, 0x3d, 0x47, 0x42, 0x3c, 0x3e, + 0x45, 0x5b, 0x44, 0x3e, 0x45, 0x43, 0x46, 0x43, 0x59, 0x4e, 0x48, 0x46, + 0x43, 0x3f, 0x46, 0x47, 0x4e, 0x53, 0x50, 0x4b, 0x4a, 0x3f, 0x4a, 0x54, + 0x4c, 0x4a, 0x43, 0x50, 0x4c, 0x42, 0x4d, 0x55, 0x4d, 0x51, 0x51, 0x46, + 0x49, 0x41, 0x50, 0x44, 0x4a, 0x4b, 0x4b, 0x43, 0x4b, 0x4e, 0x47, 0x4b, + 0x3e, 0x4e, 0x44, 0x4d, 0x49, 0x41, 0x49, 0x44, 0x50, 0x4d, 0x45, 0x4e, + 0x4b, 0x50, 0x45, 0x4c, 0x46, 0x4a, 0x46, 0x42, 0x50, 0x45, 0x48, 0x53, + 0x4d, 0x44, 0x42, 0x50, 0x4c, 0x49, 0x45, 0x55, 0x4d, 0x42, 0x43, 0x41, + 0x4c, 0x41, 0x4e, 0x4d, 0x42, 0x4e, 0x3f, 0x44, 0x4d, 0x4c, 0x4b, 0x4a, + 0x47, 0x47, 0x4e, 0x54, 0x43, 0x40, 0x41, 0x55, 0x49, 0x49, 0x4e, 0x49, + 0x52, 0x4e, 0x46, 0x58, 0x4b, 0x3d, 0x4a, 0x44, 0x4e, 0x47, 0x53, 0x58, + 0x47, 0x42, 0x52, 0x46, 0x49, 0x4b, 0x47, 0x5a, 0x4c, 0x46, 0x46, 0x49, + 0x4b, 0x4d, 0x3d, 0x48, 0x40, 0x54, 0x48, 0x4c, 0x4c, 0x44, 0x4c, 0x46, + 0x47, 0x4b, 0x4d, 0x44, 0x5a, 0x4a, 0x3e, 0x46, 0x48, 0x53, 0x39, 0x30, + 0x51, 0x60, 0x4d, 0x47, 0x35, 0x4f, 0x45, 0x45, 0x4a, 0x4b, 0x42, 0x3f, + 0x38, 0x6c, 0x3d, 0x40, 0x44, 0x48, 0x3a, 0x3b, 0x46, 0x5e, 0x45, 0x3b, + 0x47, 0x47, 0x45, 0x42, 0x53, 0x55, 0x44, 0x45, 0x46, 0x43, 0x48, 0x48, + 0x52, 0x5d, 0x3e, 0x41, 0x53, 0x42, 0x48, 0x55, 0x49, 0x4d, 0x4a, 0x46, + 0x52, 0x46, 0x51, 0x48, 0x44, 0x46, 0x48, 0x41, 0x49, 0x49, 0x49, 0x49, + 0x41, 0x4d, 0x40, 0x4f, 0x45, 0x46, 0x45, 0x3f, 0x53, 0x40, 0x46, 0x43, + 0x47, 0x4d, 0x50, 0x4c, 0x55, 0x48, 0x45, 0x47, 0x4f, 0x46, 0x42, 0x4d, + 0x41, 0x48, 0x46, 0x4e, 0x42, 0x48, 0x48, 0x45, 0x41, 0x45, 0x48, 0x4a, + 0x40, 0x49, 0x43, 0x4b, 0x48, 0x4a, 0x4c, 0x45, 0x4b, 0x48, 0x48, 0x4f, + 0x40, 0x4b, 0x4a, 0x44, 0x50, 0x4a, 0x43, 0x50, 0x4c, 0x44, 0x46, 0x4c, + 0x42, 0x44, 0x4e, 0x55, 0x47, 0x49, 0x48, 0x47, 0x52, 0x4e, 0x44, 0x59, + 0x4e, 0x44, 0x4a, 0x48, 0x49, 0x4a, 0x42, 0x4e, 0x3e, 0x39, 0x51, 0x45, + 0x4d, 0x49, 0x4f, 0x54, 0x51, 0x4b, 0x50, 0x44, 0x53, 0x4f, 0x4d, 0x48, + 0x42, 0x45, 0x4e, 0x40, 0x4a, 0x48, 0x43, 0x48, 0x52, 0x54, 0x4d, 0x49, + 0x5f, 0x53, 0x46, 0x4e, 0x3f, 0x5a, 0x36, 0x31, 0x52, 0x60, 0x4b, 0x4a, + 0x32, 0x51, 0x40, 0x44, 0x46, 0x52, 0x44, 0x41, 0x3a, 0x6e, 0x41, 0x3e, + 0x47, 0x3e, 0x3a, 0x2a, 0x44, 0x5a, 0x40, 0x3c, 0x4d, 0x48, 0x46, 0x3b, + 0x5e, 0x58, 0x4d, 0x47, 0x51, 0x3a, 0x4b, 0x48, 0x5b, 0x5a, 0x54, 0x43, + 0x50, 0x4c, 0x54, 0x54, 0x49, 0x47, 0x4f, 0x48, 0x50, 0x40, 0x4f, 0x4a, + 0x42, 0x42, 0x3c, 0x41, 0x43, 0x4e, 0x53, 0x49, 0x4b, 0x4d, 0x49, 0x41, + 0x4c, 0x3e, 0x40, 0x49, 0x40, 0x44, 0x49, 0x4f, 0x50, 0x4a, 0x42, 0x3a, + 0x49, 0x4b, 0x47, 0x50, 0x49, 0x41, 0x52, 0x46, 0x3d, 0x44, 0x46, 0x43, + 0x4b, 0x4b, 0x4d, 0x4b, 0x4e, 0x40, 0x45, 0x43, 0x48, 0x44, 0x55, 0x51, + 0x4a, 0x46, 0x4e, 0x40, 0x53, 0x4a, 0x45, 0x41, 0x48, 0x48, 0x45, 0x4e, + 0x4a, 0x48, 0x40, 0x4c, 0x54, 0x44, 0x42, 0x4d, 0x49, 0x43, 0x45, 0x4c, + 0x43, 0x4f, 0x46, 0x3f, 0x46, 0x4f, 0x4b, 0x59, 0x46, 0x49, 0x54, 0x47, + 0x49, 0x46, 0x45, 0x53, 0x4a, 0x49, 0x54, 0x45, 0x41, 0x45, 0x4c, 0x5e, + 0x50, 0x3d, 0x4d, 0x49, 0x55, 0x4b, 0x49, 0x47, 0x4c, 0x4f, 0x43, 0x3d, + 0x41, 0x4b, 0x43, 0x46, 0x4f, 0x4a, 0x4c, 0x54, 0x5e, 0x4e, 0x40, 0x4d, + 0x3d, 0x59, 0x40, 0x28, 0x54, 0x5f, 0x4d, 0x4b, 0x36, 0x51, 0x3a, 0x47, + 0x4a, 0x55, 0x42, 0x43, 0x3b, 0x72, 0x3b, 0x3d, 0x51, 0x42, 0x3f, 0x2d, + 0x4b, 0x5a, 0x48, 0x44, 0x49, 0x49, 0x3d, 0x39, 0x56, 0x55, 0x46, 0x46, + 0x4b, 0x43, 0x40, 0x4a, 0x52, 0x56, 0x4d, 0x45, 0x4b, 0x48, 0x40, 0x5a, + 0x4e, 0x3a, 0x53, 0x48, 0x4c, 0x44, 0x49, 0x4e, 0x42, 0x47, 0x46, 0x40, + 0x51, 0x42, 0x50, 0x4b, 0x43, 0x53, 0x44, 0x44, 0x46, 0x4c, 0x4c, 0x3c, + 0x42, 0x45, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x3d, 0x47, 0x4b, 0x4c, 0x4e, + 0x52, 0x4a, 0x4e, 0x41, 0x3f, 0x46, 0x43, 0x54, 0x44, 0x53, 0x4e, 0x48, + 0x40, 0x41, 0x4f, 0x45, 0x43, 0x3c, 0x52, 0x49, 0x40, 0x44, 0x4a, 0x3f, + 0x4d, 0x4c, 0x4f, 0x47, 0x44, 0x47, 0x55, 0x47, 0x50, 0x4d, 0x4a, 0x4c, + 0x50, 0x48, 0x47, 0x55, 0x4b, 0x4a, 0x52, 0x49, 0x3d, 0x3f, 0x4f, 0x51, + 0x48, 0x4e, 0x42, 0x4e, 0x42, 0x48, 0x4e, 0x49, 0x4a, 0x50, 0x45, 0x54, + 0x41, 0x43, 0x45, 0x4d, 0x48, 0x48, 0x48, 0x51, 0x53, 0x3e, 0x55, 0x44, + 0x52, 0x56, 0x44, 0x4d, 0x4e, 0x48, 0x4b, 0x43, 0x48, 0x53, 0x48, 0x44, + 0x49, 0x45, 0x4e, 0x50, 0x5d, 0x4a, 0x45, 0x4c, 0x45, 0x55, 0x43, 0x2e, + 0x59, 0x60, 0x4e, 0x4d, 0x32, 0x53, 0x3e, 0x3f, 0x40, 0x63, 0x41, 0x48, + 0x38, 0x73, 0x38, 0x46, 0x50, 0x3e, 0x3c, 0x23, 0x48, 0x61, 0x45, 0x3c, + 0x41, 0x41, 0x36, 0x3b, 0x58, 0x56, 0x4a, 0x40, 0x4f, 0x44, 0x45, 0x4c, + 0x5a, 0x56, 0x47, 0x3f, 0x4d, 0x4b, 0x46, 0x5d, 0x52, 0x47, 0x45, 0x4c, + 0x4a, 0x52, 0x4f, 0x4f, 0x4f, 0x43, 0x4f, 0x47, 0x43, 0x46, 0x3c, 0x4c, + 0x46, 0x55, 0x40, 0x53, 0x43, 0x3e, 0x42, 0x35, 0x51, 0x41, 0x42, 0x3f, + 0x45, 0x3d, 0x41, 0x31, 0x4e, 0x47, 0x48, 0x42, 0x41, 0x45, 0x43, 0x38, + 0x42, 0x40, 0x4a, 0x47, 0x4e, 0x43, 0x40, 0x43, 0x48, 0x49, 0x45, 0x4f, + 0x44, 0x42, 0x4d, 0x42, 0x42, 0x3f, 0x46, 0x52, 0x3c, 0x3c, 0x47, 0x43, + 0x46, 0x47, 0x45, 0x40, 0x4c, 0x44, 0x43, 0x4a, 0x4b, 0x4d, 0x4e, 0x46, + 0x51, 0x45, 0x47, 0x4b, 0x45, 0x50, 0x40, 0x42, 0x4c, 0x4c, 0x4c, 0x4f, + 0x44, 0x3c, 0x49, 0x3c, 0x3f, 0x45, 0x3f, 0x5c, 0x42, 0x3e, 0x4b, 0x4e, + 0x50, 0x45, 0x42, 0x5c, 0x4c, 0x48, 0x50, 0x52, 0x50, 0x47, 0x4b, 0x44, + 0x3d, 0x50, 0x55, 0x4c, 0x48, 0x3f, 0x4b, 0x44, 0x4a, 0x51, 0x42, 0x4c, + 0x60, 0x51, 0x41, 0x4b, 0x46, 0x5c, 0x42, 0x2c, 0x55, 0x61, 0x50, 0x52, + 0x37, 0x5a, 0x3f, 0x43, 0x43, 0x58, 0x3a, 0x4d, 0x3e, 0x72, 0x35, 0x3f, + 0x58, 0x41, 0x40, 0x1f, 0x55, 0x63, 0x3f, 0x49, 0x41, 0x3e, 0x35, 0x41, + 0x65, 0x54, 0x42, 0x45, 0x45, 0x3c, 0x44, 0x45, 0x59, 0x5a, 0x4d, 0x41, + 0x51, 0x46, 0x49, 0x59, 0x4c, 0x41, 0x42, 0x44, 0x4a, 0x45, 0x3f, 0x4a, + 0x4a, 0x44, 0x48, 0x48, 0x52, 0x40, 0x4a, 0x4a, 0x4d, 0x54, 0x44, 0x48, + 0x54, 0x46, 0x49, 0x3b, 0x42, 0x4a, 0x4e, 0x46, 0x4a, 0x45, 0x4f, 0x30, + 0x46, 0x41, 0x47, 0x46, 0x4b, 0x47, 0x46, 0x38, 0x4c, 0x3a, 0x4b, 0x46, + 0x52, 0x48, 0x4f, 0x3e, 0x48, 0x4a, 0x48, 0x4b, 0x44, 0x45, 0x4a, 0x46, + 0x3f, 0x4f, 0x40, 0x44, 0x43, 0x43, 0x4b, 0x39, 0x46, 0x43, 0x49, 0x49, + 0x49, 0x4a, 0x44, 0x48, 0x4c, 0x41, 0x4d, 0x52, 0x4c, 0x4a, 0x46, 0x3d, + 0x41, 0x4b, 0x41, 0x48, 0x45, 0x3b, 0x51, 0x54, 0x4a, 0x39, 0x4d, 0x41, + 0x54, 0x46, 0x4c, 0x53, 0x48, 0x3e, 0x4a, 0x3d, 0x41, 0x52, 0x54, 0x63, + 0x44, 0x4d, 0x4a, 0x43, 0x52, 0x4b, 0x52, 0x52, 0x4e, 0x41, 0x48, 0x42, + 0x48, 0x4d, 0x49, 0x45, 0x51, 0x48, 0x3e, 0x47, 0x5a, 0x52, 0x4a, 0x4e, + 0x3e, 0x59, 0x3c, 0x2e, 0x5c, 0x5b, 0x4c, 0x56, 0x30, 0x59, 0x3a, 0x48, + 0x3d, 0x5c, 0x44, 0x49, 0x40, 0x7c, 0x3a, 0x48, 0x54, 0x40, 0x41, 0x28, + 0x4d, 0x64, 0x46, 0x47, 0x49, 0x40, 0x30, 0x3a, 0x5f, 0x5b, 0x42, 0x37, + 0x49, 0x45, 0x40, 0x43, 0x5b, 0x54, 0x48, 0x4d, 0x4a, 0x47, 0x51, 0x58, + 0x4b, 0x3c, 0x4d, 0x46, 0x4b, 0x52, 0x4c, 0x58, 0x53, 0x46, 0x42, 0x45, + 0x4c, 0x4a, 0x4d, 0x4e, 0x52, 0x4d, 0x46, 0x44, 0x46, 0x3f, 0x46, 0x34, + 0x4f, 0x42, 0x44, 0x46, 0x44, 0x50, 0x47, 0x30, 0x44, 0x3c, 0x42, 0x46, + 0x4f, 0x4a, 0x52, 0x30, 0x55, 0x4f, 0x45, 0x4a, 0x48, 0x4c, 0x4e, 0x35, + 0x4e, 0x3c, 0x45, 0x4a, 0x45, 0x4a, 0x44, 0x3c, 0x4e, 0x4a, 0x51, 0x44, + 0x49, 0x40, 0x4a, 0x40, 0x41, 0x44, 0x4f, 0x4c, 0x43, 0x45, 0x4b, 0x43, + 0x3e, 0x3e, 0x4c, 0x44, 0x48, 0x48, 0x42, 0x42, 0x4d, 0x43, 0x50, 0x4d, + 0x49, 0x3c, 0x45, 0x4f, 0x4c, 0x46, 0x4b, 0x48, 0x4d, 0x4d, 0x49, 0x55, + 0x49, 0x3b, 0x40, 0x44, 0x4a, 0x4b, 0x4e, 0x5e, 0x43, 0x47, 0x45, 0x43, + 0x4d, 0x4d, 0x49, 0x46, 0x4a, 0x44, 0x4e, 0x3e, 0x52, 0x41, 0x47, 0x47, + 0x4a, 0x50, 0x48, 0x43, 0x5d, 0x4f, 0x49, 0x48, 0x43, 0x4f, 0x45, 0x3e, + 0x5a, 0x69, 0x4d, 0x5a, 0x3a, 0x5d, 0x3a, 0x48, 0x42, 0x55, 0x3e, 0x48, + 0x48, 0x7b, 0x37, 0x40, 0x57, 0x45, 0x48, 0x24, 0x50, 0x61, 0x4c, 0x4a, + 0x44, 0x41, 0x34, 0x38, 0x65, 0x5b, 0x4f, 0x3c, 0x4d, 0x3a, 0x4a, 0x4c, + 0x66, 0x55, 0x50, 0x47, 0x4d, 0x46, 0x47, 0x58, 0x4c, 0x48, 0x48, 0x48, + 0x4e, 0x59, 0x4f, 0x4b, 0x45, 0x45, 0x4b, 0x54, 0x46, 0x51, 0x4f, 0x44, + 0x42, 0x55, 0x48, 0x44, 0x48, 0x41, 0x53, 0x2e, 0x4d, 0x45, 0x44, 0x54, + 0x4a, 0x44, 0x53, 0x34, 0x4c, 0x46, 0x47, 0x3f, 0x4c, 0x4b, 0x47, 0x36, + 0x47, 0x41, 0x43, 0x40, 0x51, 0x46, 0x45, 0x33, 0x46, 0x3e, 0x47, 0x50, + 0x3f, 0x48, 0x48, 0x37, 0x41, 0x41, 0x42, 0x3e, 0x45, 0x3d, 0x49, 0x3e, + 0x4f, 0x42, 0x49, 0x4a, 0x46, 0x46, 0x48, 0x44, 0x49, 0x45, 0x46, 0x4a, + 0x4a, 0x47, 0x48, 0x43, 0x44, 0x45, 0x3f, 0x4c, 0x4c, 0x49, 0x4d, 0x51, + 0x4a, 0x4a, 0x49, 0x4c, 0x42, 0x4d, 0x4b, 0x4b, 0x4a, 0x42, 0x47, 0x4d, + 0x3e, 0x4b, 0x47, 0x5c, 0x49, 0x3d, 0x4e, 0x41, 0x44, 0x49, 0x3e, 0x3e, + 0x4b, 0x47, 0x4e, 0x45, 0x44, 0x4a, 0x4d, 0x4a, 0x4f, 0x46, 0x45, 0x52, + 0x60, 0x53, 0x49, 0x50, 0x3d, 0x4f, 0x43, 0x3d, 0x52, 0x64, 0x52, 0x58, + 0x39, 0x5f, 0x36, 0x4c, 0x45, 0x57, 0x42, 0x4b, 0x3f, 0x80, 0x34, 0x47, + 0x58, 0x41, 0x45, 0x1b, 0x4b, 0x5e, 0x4c, 0x40, 0x44, 0x42, 0x39, 0x3a, + 0x5e, 0x5b, 0x4b, 0x3a, 0x4b, 0x3f, 0x45, 0x3e, 0x69, 0x57, 0x4b, 0x45, + 0x4b, 0x3f, 0x45, 0x55, 0x49, 0x49, 0x48, 0x47, 0x41, 0x4f, 0x42, 0x53, + 0x49, 0x40, 0x42, 0x3e, 0x49, 0x47, 0x53, 0x47, 0x45, 0x51, 0x4a, 0x44, + 0x44, 0x45, 0x4e, 0x2a, 0x45, 0x42, 0x4a, 0x4b, 0x46, 0x4d, 0x41, 0x30, + 0x3d, 0x43, 0x3f, 0x48, 0x49, 0x44, 0x4d, 0x2e, 0x48, 0x4a, 0x4c, 0x51, + 0x50, 0x46, 0x3e, 0x2c, 0x4d, 0x3f, 0x47, 0x46, 0x3c, 0x40, 0x4c, 0x38, + 0x4f, 0x46, 0x47, 0x53, 0x3b, 0x3c, 0x4e, 0x3e, 0x49, 0x40, 0x43, 0x4c, + 0x4d, 0x48, 0x45, 0x3c, 0x4d, 0x4c, 0x4d, 0x45, 0x3f, 0x49, 0x4a, 0x43, + 0x4d, 0x41, 0x4b, 0x50, 0x4e, 0x46, 0x50, 0x44, 0x49, 0x44, 0x4e, 0x42, + 0x4a, 0x43, 0x4c, 0x4c, 0x49, 0x49, 0x44, 0x4e, 0x4b, 0x3f, 0x4b, 0x5d, + 0x41, 0x49, 0x4b, 0x46, 0x4e, 0x48, 0x45, 0x51, 0x4d, 0x45, 0x46, 0x45, + 0x4b, 0x4e, 0x3c, 0x4d, 0x3d, 0x41, 0x47, 0x47, 0x64, 0x54, 0x41, 0x55, + 0x47, 0x56, 0x44, 0x3b, 0x53, 0x66, 0x4f, 0x5e, 0x40, 0x5d, 0x38, 0x4a, + 0x41, 0x59, 0x42, 0x48, 0x47, 0xff, 0x36, 0x49, 0x59, 0x41, 0x43, 0x1d, + 0x4d, 0x5e, 0x44, 0x44, 0x50, 0x3f, 0x39, 0x40, 0x68, 0x5e, 0x4a, 0x41, + 0x52, 0x41, 0x43, 0x41, 0x68, 0x51, 0x45, 0x48, 0x4c, 0x46, 0x4a, 0x5e, + 0x4e, 0x40, 0x4d, 0x41, 0x41, 0x5c, 0x3f, 0x4e, 0x4c, 0x37, 0x48, 0x40, + 0x46, 0x47, 0x4f, 0x43, 0x53, 0x52, 0x3d, 0x44, 0x47, 0x44, 0x3d, 0x34, + 0x44, 0x42, 0x4a, 0x43, 0x4d, 0x3f, 0x53, 0x2e, 0x42, 0x47, 0x43, 0x4d, + 0x45, 0x45, 0x47, 0x31, 0x4d, 0x39, 0x41, 0x4a, 0x4a, 0x4d, 0x4b, 0x35, + 0x47, 0x4e, 0x4c, 0x40, 0x4a, 0x44, 0x44, 0x36, 0x3e, 0x49, 0x3f, 0x45, + 0x46, 0x43, 0x4e, 0x3c, 0x4d, 0x47, 0x4c, 0x48, 0x4a, 0x4b, 0x48, 0x39, + 0x46, 0x50, 0x4a, 0x4f, 0x46, 0x41, 0x44, 0x4a, 0x41, 0x4f, 0x4c, 0x4e, + 0x55, 0x46, 0x43, 0x46, 0x4a, 0x48, 0x4e, 0x46, 0x42, 0x40, 0x4f, 0x56, + 0x4c, 0x45, 0x4b, 0x46, 0x4a, 0x47, 0x42, 0x5e, 0x49, 0x4e, 0x46, 0x43, + 0x4e, 0x42, 0x45, 0x48, 0x47, 0x48, 0x4f, 0x45, 0x47, 0x51, 0x4b, 0x4c, + 0x51, 0x39, 0x4d, 0x48, 0x60, 0x57, 0x49, 0x52, 0x3d, 0x57, 0x46, 0x3d, + 0x53, 0x68, 0x4b, 0x60, 0x40, 0x5a, 0x41, 0x4b, 0x46, 0x56, 0x46, 0x4c, + 0x49, 0x7e, 0x2f, 0x48, 0x51, 0x42, 0x40, 0x20, 0x4b, 0x62, 0x4d, 0x41, + 0x4f, 0x43, 0x3d, 0x35, 0x63, 0x63, 0x46, 0x3e, 0x4e, 0x47, 0x40, 0x40, + 0x60, 0x52, 0x4c, 0x46, 0x49, 0x48, 0x4f, 0x56, 0x51, 0x47, 0x52, 0x4e, + 0x4b, 0x59, 0x55, 0x4f, 0x48, 0x3d, 0x48, 0x4a, 0x4d, 0x50, 0x47, 0x47, + 0x51, 0x52, 0x4d, 0x51, 0x45, 0x45, 0x47, 0x2d, 0x4d, 0x41, 0x43, 0x49, + 0x4d, 0x40, 0x4a, 0x2f, 0x4f, 0x43, 0x46, 0x4a, 0x3e, 0x4a, 0x4a, 0x2b, + 0x49, 0x4c, 0x4c, 0x3e, 0x41, 0x4c, 0x4a, 0x2b, 0x40, 0x44, 0x46, 0x4a, + 0x40, 0x44, 0x42, 0x38, 0x52, 0x42, 0x46, 0x51, 0x53, 0x4e, 0x45, 0x31, + 0x45, 0x47, 0x4f, 0x46, 0x49, 0x43, 0x45, 0x3b, 0x4b, 0x4b, 0x4b, 0x4c, + 0x43, 0x4a, 0x4c, 0x43, 0x4e, 0x40, 0x52, 0x44, 0x48, 0x49, 0x47, 0x4b, + 0x4e, 0x3d, 0x4e, 0x44, 0x48, 0x4d, 0x4f, 0x4f, 0x50, 0x36, 0x47, 0x41, + 0x4a, 0x44, 0x45, 0x56, 0x4f, 0x4c, 0x50, 0x4b, 0x45, 0x3e, 0x45, 0x4e, + 0x45, 0x45, 0x43, 0x40, 0x47, 0x4e, 0x45, 0x3e, 0x4a, 0x3f, 0x49, 0x50, + 0x62, 0x55, 0x48, 0x56, 0x3e, 0x57, 0x4f, 0x3b, 0x55, 0x6c, 0x50, 0x5c, + 0x3d, 0x54, 0x3d, 0x46, 0x43, 0x59, 0x3e, 0x51, 0x4d, 0x7b, 0x33, 0x47, + 0x52, 0x43, 0x3f, 0x25, 0x4a, 0x6f, 0x49, 0x3e, 0x50, 0x40, 0x41, 0x30, + 0x5e, 0x5c, 0x4a, 0x43, 0x4d, 0x42, 0x46, 0x3b, 0x63, 0x53, 0x4f, 0x43, + 0x58, 0x48, 0x4b, 0x59, 0x50, 0x4e, 0x4b, 0x51, 0x4a, 0x55, 0x44, 0x46, + 0x4c, 0x3d, 0x4c, 0x52, 0x44, 0x52, 0x4c, 0x41, 0x4f, 0x44, 0x4a, 0x47, + 0x4e, 0x48, 0x49, 0x2e, 0x3e, 0x45, 0x4c, 0x48, 0x41, 0x47, 0x4d, 0x2e, + 0x40, 0x4b, 0x4c, 0x42, 0x4d, 0x40, 0x4e, 0x2e, 0x43, 0x45, 0x4b, 0x43, + 0x3e, 0x49, 0x55, 0x35, 0x43, 0x42, 0x42, 0x40, 0x4e, 0x46, 0x44, 0x37, + 0x49, 0x41, 0x3f, 0x52, 0x47, 0x4b, 0x43, 0x33, 0x4b, 0x47, 0x4b, 0x4c, + 0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x40, 0x49, 0x41, 0x42, 0x49, 0x4b, 0x46, + 0x4e, 0x4e, 0x47, 0x4e, 0x48, 0x48, 0x4b, 0x46, 0x51, 0x4b, 0x46, 0x4d, + 0x47, 0x4f, 0x3e, 0x51, 0x46, 0x4e, 0x46, 0x4b, 0x47, 0x48, 0x4e, 0x55, + 0x4c, 0x3d, 0x47, 0x51, 0x42, 0x45, 0x4f, 0x42, 0x52, 0x50, 0x44, 0x4c, + 0x44, 0x44, 0x43, 0x4d, 0x40, 0x42, 0x4d, 0x4b, 0x5d, 0x4e, 0x47, 0x54, + 0x47, 0x51, 0x43, 0x39, 0x58, 0x66, 0x4e, 0x5a, 0x41, 0x52, 0x36, 0x47, + 0x45, 0x5f, 0x34, 0x50, 0x46, 0x79, 0x30, 0x48, 0x50, 0x45, 0x32, 0x22, + 0x54, 0x64, 0x49, 0x46, 0x45, 0x3c, 0x42, 0x36, 0x65, 0x5c, 0x48, 0x3a, + 0x4d, 0x4b, 0x47, 0x3e, 0x63, 0x56, 0x4a, 0x48, 0x51, 0x42, 0x4f, 0x5e, + 0x4c, 0x44, 0x4b, 0x4c, 0x3d, 0x5a, 0x43, 0x4d, 0x42, 0x40, 0x4f, 0x4d, + 0x3f, 0x3e, 0x46, 0x40, 0x49, 0x42, 0x49, 0x40, 0x49, 0x4c, 0x4a, 0x2e, + 0x4b, 0x3f, 0x53, 0x4b, 0x48, 0x49, 0x3e, 0x34, 0x47, 0x4a, 0x4b, 0x46, + 0x3b, 0x49, 0x46, 0x34, 0x4b, 0x48, 0x4c, 0x49, 0x49, 0x43, 0x4f, 0x2e, + 0x44, 0x46, 0x48, 0x50, 0x46, 0x4e, 0x4a, 0x37, 0x4b, 0x4c, 0x4a, 0x50, + 0x45, 0x4a, 0x48, 0x3b, 0x48, 0x44, 0x48, 0x4a, 0x41, 0x44, 0x52, 0x3f, + 0x4c, 0x46, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x36, 0x53, 0x3e, 0x48, 0x47, + 0x3f, 0x42, 0x41, 0x4c, 0x42, 0x4a, 0x52, 0x46, 0x49, 0x3f, 0x48, 0x5a, + 0x43, 0x42, 0x3d, 0x43, 0x4f, 0x44, 0x43, 0x65, 0x41, 0x41, 0x44, 0x4b, + 0x50, 0x44, 0x53, 0x49, 0x41, 0x45, 0x4a, 0x4d, 0x40, 0x45, 0x4a, 0x4e, + 0x50, 0x40, 0x51, 0x40, 0x5e, 0x50, 0x43, 0x5c, 0x47, 0x5a, 0x44, 0x4c, + 0x54, 0x64, 0x4f, 0x63, 0x39, 0x58, 0x3c, 0x4a, 0x42, 0x5e, 0x3c, 0x4a, + 0x48, 0x7b, 0x34, 0x4c, 0x4f, 0x44, 0x30, 0x24, 0x50, 0x65, 0x47, 0x39, + 0x46, 0x3e, 0x3f, 0x33, 0x65, 0x5a, 0x44, 0x38, 0x50, 0x47, 0x4b, 0x3e, + 0x5b, 0x53, 0x4a, 0x4d, 0x51, 0x40, 0x47, 0x59, 0x51, 0x42, 0x4f, 0x50, + 0x45, 0x57, 0x46, 0x50, 0x3f, 0x3c, 0x4c, 0x4f, 0x46, 0x41, 0x4a, 0x3e, + 0x4d, 0x45, 0x51, 0x48, 0x4e, 0x44, 0x4e, 0x35, 0x44, 0x3f, 0x44, 0x48, + 0x3c, 0x4c, 0x49, 0x2c, 0x4a, 0x46, 0x48, 0x44, 0x4b, 0x42, 0x4b, 0x2f, + 0x4e, 0x50, 0x4c, 0x4d, 0x44, 0x46, 0x3f, 0x39, 0x4d, 0x47, 0x45, 0x41, + 0x42, 0x47, 0x4a, 0x3a, 0x40, 0x3e, 0x4a, 0x51, 0x3f, 0x47, 0x44, 0x37, + 0x47, 0x4e, 0x47, 0x52, 0x45, 0x42, 0x4a, 0x3d, 0x43, 0x4d, 0x4d, 0x47, + 0x48, 0x43, 0x44, 0x44, 0x47, 0x4e, 0x52, 0x4b, 0x4e, 0x50, 0x42, 0x47, + 0x4b, 0x4b, 0x4e, 0x4c, 0x4e, 0x47, 0x50, 0x56, 0x46, 0x47, 0x4d, 0x49, + 0x4d, 0x46, 0x49, 0x5f, 0x49, 0x42, 0x4d, 0x44, 0x40, 0x4b, 0x52, 0x45, + 0x46, 0x4a, 0x4b, 0x49, 0x47, 0x4b, 0x42, 0x45, 0x42, 0x44, 0x46, 0x4c, + 0x62, 0x4a, 0x44, 0x53, 0x43, 0x5a, 0x48, 0x49, 0x59, 0x68, 0x46, 0x61, + 0x40, 0x5a, 0x3a, 0x4d, 0x45, 0x5e, 0x33, 0x4f, 0x4e, 0x74, 0x3e, 0x3e, + 0x5a, 0x4b, 0x34, 0x31, 0x52, 0x6c, 0x44, 0x39, 0x4c, 0x3b, 0x39, 0x3a, + 0x63, 0x65, 0x4b, 0x40, 0x50, 0x4d, 0x53, 0x4a, 0x69, 0x56, 0x54, 0x45, + 0x4c, 0x4c, 0x50, 0x5b, 0x4d, 0x4f, 0x3d, 0x4b, 0x44, 0x47, 0x43, 0x47, + 0x49, 0x3c, 0x49, 0x41, 0x41, 0x3f, 0x47, 0x43, 0x48, 0x47, 0x4c, 0x43, + 0x4a, 0x40, 0x4d, 0x32, 0x4b, 0x4d, 0x44, 0x48, 0x46, 0x44, 0x50, 0x2f, + 0x4e, 0x49, 0x53, 0x4b, 0x52, 0x47, 0x4b, 0x2b, 0x48, 0x4b, 0x4a, 0x4c, + 0x4d, 0x4c, 0x43, 0x37, 0x48, 0x3c, 0x4b, 0x42, 0x51, 0x3f, 0x45, 0x3c, + 0x49, 0x40, 0x42, 0x43, 0x4d, 0x4c, 0x3f, 0x3f, 0x4d, 0x43, 0x45, 0x42, + 0x48, 0x42, 0x48, 0x39, 0x51, 0x4e, 0x46, 0x4f, 0x3e, 0x4c, 0x45, 0x3e, + 0x3f, 0x3f, 0x43, 0x41, 0x4b, 0x4b, 0x43, 0x4d, 0x44, 0x3b, 0x48, 0x45, + 0x3c, 0x4a, 0x48, 0x5b, 0x3c, 0x4b, 0x4c, 0x44, 0x46, 0x3e, 0x45, 0x57, + 0x43, 0x42, 0x51, 0x4a, 0x46, 0x47, 0x43, 0x49, 0x42, 0x43, 0x50, 0x4e, + 0x4e, 0x44, 0x41, 0x4e, 0x4e, 0x41, 0x48, 0x47, 0x5c, 0x53, 0x44, 0x54, + 0x44, 0x5b, 0x45, 0x46, 0x55, 0x67, 0x4d, 0x5d, 0x40, 0x5a, 0x43, 0x4b, + 0x43, 0x60, 0x3c, 0x4b, 0x41, 0x79, 0x41, 0x41, 0x58, 0x48, 0x40, 0x3b, + 0x4f, 0x6c, 0x46, 0x3f, 0x53, 0x3a, 0x3d, 0x36, 0x5a, 0x57, 0x44, 0x41, + 0x4c, 0x47, 0x4e, 0x48, 0x62, 0x60, 0x4a, 0x46, 0x51, 0x3e, 0x52, 0x5f, + 0x4b, 0x46, 0x48, 0x4c, 0x4c, 0x55, 0x43, 0x46, 0x49, 0x3e, 0x41, 0x40, + 0x4d, 0x47, 0x46, 0x3b, 0x51, 0x3a, 0x4a, 0x45, 0x50, 0x47, 0x51, 0x38, + 0x44, 0x41, 0x40, 0x4b, 0x4d, 0x44, 0x4d, 0x28, 0x47, 0x3e, 0x44, 0x40, + 0x49, 0x49, 0x40, 0x3c, 0x44, 0x4c, 0x48, 0x51, 0x46, 0x3e, 0x47, 0x2a, + 0x41, 0x44, 0x49, 0x4c, 0x4e, 0x4e, 0x42, 0x3c, 0x49, 0x42, 0x43, 0x45, + 0x4e, 0x4d, 0x50, 0x39, 0x42, 0x43, 0x48, 0x41, 0x3f, 0x40, 0x4e, 0x3a, + 0x44, 0x3d, 0x49, 0x4d, 0x47, 0x45, 0x4b, 0x42, 0x4c, 0x4d, 0x3f, 0x3f, + 0x4e, 0x4d, 0x4d, 0x4d, 0x4d, 0x45, 0x47, 0x43, 0x4c, 0x46, 0x47, 0x57, + 0x4b, 0x42, 0x4d, 0x46, 0x4b, 0x4b, 0x43, 0x58, 0x48, 0x49, 0x4d, 0x47, + 0x43, 0x49, 0x4b, 0x48, 0x46, 0x4f, 0x4f, 0x42, 0x4a, 0x43, 0x49, 0x4e, + 0x4a, 0x47, 0x4c, 0x48, 0x5a, 0x57, 0x4a, 0x58, 0x49, 0x4f, 0x45, 0x47, + 0x63, 0x66, 0x4d, 0x5e, 0x4b, 0x51, 0x45, 0x4a, 0x43, 0x5d, 0x33, 0x4b, + 0x4e, 0x70, 0x42, 0x39, 0x57, 0x4a, 0x40, 0x3a, 0x51, 0x68, 0x45, 0x45, + 0x4c, 0x44, 0x3a, 0x3a, 0x4f, 0x62, 0x49, 0x45, 0x53, 0x4c, 0x4e, 0x41, + 0x63, 0x5e, 0x44, 0x44, 0x47, 0x43, 0x47, 0x59, 0x4c, 0x4b, 0x4c, 0x49, + 0x3e, 0x43, 0x4c, 0x46, 0x4c, 0x38, 0x47, 0x46, 0x46, 0x47, 0x40, 0x44, + 0x51, 0x3e, 0x40, 0x47, 0x3f, 0x45, 0x48, 0x2a, 0x42, 0x3e, 0x43, 0x46, + 0x50, 0x4c, 0x4a, 0x2c, 0x49, 0x4b, 0x48, 0x48, 0x40, 0x4a, 0x4a, 0x37, + 0x4e, 0x42, 0x4f, 0x4c, 0x41, 0x43, 0x45, 0x38, 0x4e, 0x3d, 0x41, 0x47, + 0x42, 0x42, 0x43, 0x3b, 0x4a, 0x40, 0x48, 0x4a, 0x53, 0x44, 0x4d, 0x35, + 0x51, 0x3c, 0x4e, 0x4e, 0x3e, 0x3f, 0x4b, 0x3c, 0x3e, 0x47, 0x41, 0x48, + 0x40, 0x46, 0x4e, 0x44, 0x49, 0x42, 0x49, 0x44, 0x4b, 0x46, 0x46, 0x43, + 0x4c, 0x4b, 0x49, 0x4d, 0x3d, 0x47, 0x43, 0x5c, 0x4a, 0x42, 0x47, 0x4e, + 0x47, 0x40, 0x4c, 0x55, 0x3f, 0x45, 0x46, 0x49, 0x46, 0x48, 0x49, 0x4d, + 0x4c, 0x41, 0x49, 0x40, 0x4a, 0x44, 0x42, 0x49, 0x52, 0x41, 0x49, 0x4a, + 0x5c, 0x53, 0x47, 0x58, 0x49, 0x55, 0x4a, 0x4a, 0x62, 0x61, 0x4b, 0x57, + 0x3c, 0x50, 0x42, 0x4c, 0x49, 0x5f, 0x3f, 0x4a, 0x42, 0x70, 0x40, 0x40, + 0x4f, 0x46, 0x43, 0x43, 0x4d, 0x6c, 0x41, 0x3e, 0x4e, 0x49, 0x43, 0x38, + 0x50, 0x57, 0x43, 0x39, 0x4a, 0x4f, 0x51, 0x3e, 0x5c, 0x57, 0x46, 0x49, + 0x41, 0x40, 0x42, 0x4f, 0x4c, 0x45, 0x46, 0x4a, 0x4c, 0x4b, 0x43, 0x42, + 0x4c, 0x3c, 0x47, 0x47, 0x4f, 0x44, 0x45, 0x3a, 0x4d, 0x3d, 0x4d, 0x3f, + 0x46, 0x4f, 0x41, 0x37, 0x46, 0x45, 0x54, 0x47, 0x4e, 0x46, 0x47, 0x23, + 0x48, 0x4e, 0x4a, 0x47, 0x45, 0x45, 0x4e, 0x33, 0x49, 0x4a, 0x4d, 0x4e, + 0x49, 0x46, 0x49, 0x36, 0x48, 0x44, 0x53, 0x44, 0x4a, 0x45, 0x4a, 0x37, + 0x45, 0x36, 0x4b, 0x4e, 0x50, 0x3f, 0x49, 0x38, 0x40, 0x43, 0x46, 0x4c, + 0x43, 0x46, 0x4a, 0x3f, 0x45, 0x3d, 0x44, 0x47, 0x44, 0x42, 0x4a, 0x45, + 0x47, 0x43, 0x4d, 0x4d, 0x44, 0x44, 0x4f, 0x4a, 0x4a, 0x41, 0x50, 0x50, + 0x4b, 0x44, 0x54, 0x5c, 0x4b, 0x3a, 0x46, 0x4a, 0x4a, 0x43, 0x48, 0x5c, + 0x4b, 0x43, 0x47, 0x3d, 0x3e, 0x54, 0x42, 0x47, 0x42, 0x4f, 0x4b, 0x4b, + 0x46, 0x46, 0x46, 0x42, 0x42, 0x4b, 0x48, 0x45, 0x51, 0x4e, 0x49, 0x4d, + 0x43, 0x56, 0x45, 0x40, 0x5a, 0x58, 0x4c, 0x55, 0x40, 0x4b, 0x4c, 0x51, + 0x42, 0x59, 0x43, 0x46, 0x46, 0x69, 0x43, 0x3c, 0x54, 0x47, 0x3d, 0x41, + 0x52, 0x64, 0x44, 0x38, 0x4f, 0x49, 0x3a, 0x3a, 0x55, 0x54, 0x45, 0x3e, + 0x49, 0x44, 0x4e, 0x3f, 0x57, 0x50, 0x47, 0x43, 0x45, 0x48, 0x53, 0x5b, + 0x53, 0x4d, 0x48, 0x4e, 0x48, 0x3a, 0x3e, 0x46, 0x42, 0x36, 0x50, 0x4d, + 0x49, 0x4b, 0x4b, 0x45, 0x4c, 0x44, 0x50, 0x47, 0x3e, 0x49, 0x50, 0x37, + 0x4c, 0x4b, 0x4a, 0x54, 0x4e, 0x43, 0x40, 0x25, 0x46, 0x42, 0x52, 0x3d, + 0x44, 0x45, 0x51, 0x2e, 0x4a, 0x3d, 0x46, 0x46, 0x4c, 0x42, 0x48, 0x34, + 0x44, 0x44, 0x44, 0x4c, 0x4f, 0x4b, 0x42, 0x3d, 0x45, 0x40, 0x47, 0x49, + 0x43, 0x41, 0x3e, 0x39, 0x47, 0x4b, 0x50, 0x4a, 0x46, 0x47, 0x4e, 0x3b, + 0x4e, 0x3e, 0x49, 0x4a, 0x50, 0x40, 0x43, 0x49, 0x48, 0x3c, 0x4f, 0x45, + 0x4a, 0x41, 0x42, 0x48, 0x4b, 0x46, 0x4a, 0x50, 0x40, 0x49, 0x44, 0x54, + 0x45, 0x45, 0x4a, 0x4b, 0x51, 0x51, 0x48, 0x53, 0x50, 0x3f, 0x50, 0x46, + 0x44, 0x45, 0x51, 0x43, 0x4f, 0x3e, 0x41, 0x41, 0x46, 0x45, 0x45, 0x4c, + 0x54, 0x3c, 0x4a, 0x4c, 0x5a, 0x4f, 0x46, 0x4b, 0x47, 0x4a, 0x43, 0x4c, + 0x56, 0x5a, 0x4a, 0x53, 0x4c, 0x49, 0x46, 0x4c, 0x45, 0x59, 0x40, 0x4b, + 0x48, 0x60, 0x3d, 0x42, 0x52, 0x3f, 0x42, 0x3d, 0x52, 0x5f, 0x46, 0x42, + 0x4b, 0x4e, 0x4a, 0x3d, 0x52, 0x55, 0x53, 0x37, 0x47, 0x3e, 0x4a, 0x42, + 0x51, 0x54, 0x48, 0x48, 0x4b, 0x48, 0x3e, 0x52, 0x41, 0x4e, 0x4c, 0x4f, + 0x43, 0x3b, 0x4b, 0x4b, 0x4c, 0x40, 0x48, 0x49, 0x4d, 0x3a, 0x45, 0x3c, + 0x53, 0x44, 0x48, 0x4d, 0x4b, 0x49, 0x46, 0x3c, 0x4d, 0x40, 0x51, 0x3f, + 0x4c, 0x45, 0x44, 0x2f, 0x49, 0x51, 0x3f, 0x4d, 0x3e, 0x4e, 0x3c, 0x30, + 0x3d, 0x48, 0x4f, 0x3f, 0x45, 0x45, 0x46, 0x3b, 0x4c, 0x46, 0x4d, 0x50, + 0x4c, 0x3d, 0x41, 0x37, 0x3e, 0x3e, 0x4f, 0x4b, 0x4d, 0x4f, 0x45, 0x45, + 0x4a, 0x47, 0x4a, 0x44, 0x43, 0x46, 0x51, 0x41, 0x4e, 0x39, 0x44, 0x4a, + 0x4e, 0x49, 0x4a, 0x42, 0x49, 0x4b, 0x4e, 0x48, 0x49, 0x4a, 0x45, 0x4a, + 0x45, 0x41, 0x4a, 0x4b, 0x42, 0x41, 0x48, 0x4a, 0x44, 0x3a, 0x46, 0x49, + 0x54, 0x45, 0x44, 0x60, 0x4a, 0x4e, 0x45, 0x4a, 0x4a, 0x45, 0x4b, 0x49, + 0x42, 0x44, 0x46, 0x50, 0x4b, 0x4b, 0x4e, 0x45, 0x48, 0x3e, 0x55, 0x42, + 0x51, 0x49, 0x49, 0x44, 0x4e, 0x54, 0x53, 0x49, 0x4c, 0x63, 0x48, 0x5a, + 0x50, 0x4b, 0x45, 0x49, 0x43, 0x57, 0x4c, 0x3f, 0x4d, 0x67, 0x3f, 0x47, + 0x53, 0x49, 0x43, 0x44, 0x49, 0x61, 0x50, 0x47, 0x49, 0x49, 0x4a, 0x42, + 0x4a, 0x51, 0x46, 0x43, 0x3f, 0x34, 0x40, 0x3a, 0x45, 0x54, 0x4c, 0x55, + 0x40, 0x3c, 0x4a, 0x4d, 0x3e, 0x4d, 0x48, 0x51, 0x4c, 0x3e, 0x4c, 0x4f, + 0x50, 0x47, 0x4d, 0x49, 0x4d, 0x4e, 0x45, 0x43, 0x41, 0x41, 0x40, 0x47, + 0x43, 0x4a, 0x4a, 0x3c, 0x4c, 0x3d, 0x4e, 0x43, 0x41, 0x42, 0x4a, 0x30, + 0x45, 0x4c, 0x45, 0x55, 0x46, 0x39, 0x43, 0x39, 0x45, 0x47, 0x48, 0x53, + 0x4a, 0x48, 0x43, 0x38, 0x4f, 0x51, 0x4d, 0x4c, 0x41, 0x46, 0x40, 0x3d, + 0x43, 0x4b, 0x40, 0x46, 0x47, 0x50, 0x4a, 0x43, 0x50, 0x4e, 0x45, 0x4f, + 0x4d, 0x44, 0x4d, 0x3f, 0x4e, 0x48, 0x4a, 0x49, 0x44, 0x3d, 0x4a, 0x44, + 0x40, 0x45, 0x49, 0x40, 0x4a, 0x44, 0x4f, 0x4a, 0x43, 0x4a, 0x4e, 0x52, + 0x4d, 0x50, 0x48, 0x4c, 0x43, 0x45, 0x4d, 0x54, 0x4a, 0x49, 0x4c, 0x58, + 0x4c, 0x48, 0x4c, 0x44, 0x4b, 0x4e, 0x52, 0x44, 0x49, 0x44, 0x47, 0x4e, + 0x4b, 0x45, 0x49, 0x3e, 0x4c, 0x3b, 0x53, 0x3f, 0x51, 0x41, 0x3f, 0x44, + 0x43, 0x4a, 0x4b, 0x43, 0x53, 0x57, 0x50, 0x53, 0x4f, 0x4b, 0x48, 0x51, + 0x47, 0x49, 0x46, 0x4d, 0x4d, 0x5e, 0x44, 0x46, 0x56, 0x3d, 0x3c, 0x3e, + 0x47, 0x55, 0x54, 0x46, 0x42, 0x49, 0x4f, 0x43, 0x48, 0x54, 0x51, 0x40, + 0x44, 0x44, 0x47, 0x45, 0x4b, 0x59, 0x4d, 0x47, 0x40, 0x39, 0x48, 0x54, + 0x43, 0x45, 0x44, 0x42, 0x4c, 0x3c, 0x4d, 0x42, 0x4b, 0x45, 0x42, 0x48, + 0x51, 0x44, 0x45, 0x3f, 0x3d, 0x49, 0x4b, 0x4a, 0x41, 0x43, 0x4f, 0x3f, + 0x51, 0x4b, 0x44, 0x46, 0x46, 0x44, 0x53, 0x3d, 0x47, 0x47, 0x43, 0x4b, + 0x41, 0x43, 0x3c, 0x3b, 0x49, 0x47, 0x47, 0x49, 0x4b, 0x3d, 0x43, 0x43, + 0x4b, 0x47, 0x45, 0x4e, 0x42, 0x4a, 0x4c, 0x3e, 0x51, 0x3e, 0x46, 0x44, + 0x46, 0x43, 0x42, 0x42, 0x47, 0x4d, 0x51, 0x4b, 0x49, 0x44, 0x4d, 0x40, + 0x50, 0x43, 0x41, 0x4c, 0x42, 0x49, 0x49, 0x4c, 0x42, 0x50, 0x48, 0x3f, + 0x46, 0x42, 0x48, 0x57, 0x49, 0x4d, 0x47, 0x4e, 0x48, 0x4b, 0x46, 0x50, + 0x47, 0x45, 0x52, 0x45, 0x4b, 0x48, 0x40, 0x5b, 0x4e, 0x43, 0x51, 0x48, + 0x48, 0x4a, 0x4a, 0x4a, 0x52, 0x51, 0x4c, 0x4b, 0x42, 0x55, 0x4d, 0x46, + 0x50, 0x40, 0x4a, 0x50, 0x51, 0x3e, 0x42, 0x4c, 0x43, 0x46, 0x4d, 0x46, + 0x46, 0x4d, 0x4d, 0x52, 0x4e, 0x44, 0x45, 0x47, 0x49, 0x4c, 0x41, 0x44, + 0x4d, 0x54, 0x4c, 0x4a, 0x54, 0x3e, 0x44, 0x43, 0x53, 0x55, 0x4b, 0x4a, + 0x47, 0x47, 0x4f, 0x46, 0x4f, 0x4b, 0x51, 0x3f, 0x41, 0x4c, 0x43, 0x46, + 0x55, 0x51, 0x40, 0x4b, 0x4f, 0x40, 0x47, 0x50, 0x4e, 0x4a, 0x46, 0x4e, + 0x42, 0x4d, 0x48, 0x49, 0x48, 0x4a, 0x4a, 0x43, 0x49, 0x48, 0x44, 0x3b, + 0x51, 0x46, 0x3d, 0x43, 0x47, 0x4a, 0x4f, 0x42, 0x4a, 0x50, 0x4f, 0x41, + 0x45, 0x45, 0x43, 0x3c, 0x4c, 0x4c, 0x46, 0x4b, 0x3e, 0x44, 0x4b, 0x3a, + 0x45, 0x50, 0x42, 0x48, 0x46, 0x47, 0x44, 0x3a, 0x53, 0x46, 0x4e, 0x4f, + 0x43, 0x40, 0x46, 0x48, 0x4e, 0x45, 0x3f, 0x47, 0x48, 0x3f, 0x44, 0x4f, + 0x44, 0x47, 0x4e, 0x47, 0x47, 0x49, 0x42, 0x43, 0x3f, 0x49, 0x4a, 0x53, + 0x53, 0x4a, 0x4e, 0x4a, 0x49, 0x4d, 0x49, 0x41, 0x48, 0x4d, 0x4d, 0x4e, + 0x4b, 0x45, 0x4d, 0x4a, 0x46, 0x4a, 0x46, 0x51, 0x4b, 0x47, 0x49, 0x45, + 0x49, 0x49, 0x4b, 0x5c, 0x48, 0x42, 0x51, 0x4c, 0x41, 0x3f, 0x4c, 0x42, + 0x4f, 0x45, 0x4b, 0x4a, 0x52, 0x48, 0x53, 0x4f, 0x40, 0x47, 0x41, 0x47, + 0x68, 0xfb, 0xff, 0xff, 0x4c, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, + 0x58, 0x01, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00, + 0x38, 0x02, 0x00, 0x00, 0x9c, 0x02, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00, + 0x14, 0x03, 0x00, 0x00, 0xfe, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, + 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x00, + 0xcc, 0xfc, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x17, 0xbf, 0xd2, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x58, 0xec, 0xd1, 0x43, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x43, 0x6f, 0x6e, 0x76, + 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x34, 0xff, 0xff, 0xff, + 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a, 0xc2, 0xfd, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68, + 0x61, 0x70, 0x65, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x94, 0xfd, 0xff, 0xff, + 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0xfe, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d, + 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x0c, 0x00, 0x0c, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0xc5, 0x01, 0x2a, 0x3b, 0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, + 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, + 0x71, 0x75, 0x61, 0x6e, 0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, + 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, + 0x78, 0x56, 0x61, 0x72, 0x73, 0x00, 0x00, 0x00, 0x84, 0xfe, 0xff, 0xff, + 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xab, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a, + 0x01, 0x00, 0x00, 0x00, 0x6e, 0x88, 0xae, 0x3d, 0x01, 0x00, 0x00, 0x00, + 0xd4, 0x97, 0x30, 0xbe, 0x26, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, + 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f, + 0x31, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2f, 0xad, 0x18, 0x40, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x38, 0xa2, 0x43, 0x01, 0x00, 0x00, 0x00, 0x02, 0xf1, 0x8d, 0xc3, + 0x8e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73, + 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff, + 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, + 0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, + 0x74, 0x5f, 0x31, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, + 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, + 0x61, 0x72, 0x73, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, + 0x65, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, + 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x31, 0x83, 0xce, 0x3a, 0x01, 0x00, 0x00, 0x00, + 0x4d, 0x97, 0x92, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x84, 0x75, 0xec, 0xbd, + 0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, + 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00, + 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00, + 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0xfa, 0xff, 0xff, 0xff, 0x00, 0x19, 0x06, 0x00, + 0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x09, 0x06, 0x00, + 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04}; +const int g_tiny_conv_model_data_len = 19800; diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h new file mode 100644 index 0000000000000000000000000000000000000000..2953cc852d98fa9b5551ae5036048de9c2ebf674 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h @@ -0,0 +1,27 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This is a standard TensorFlow Lite model file that has been converted into a +// C data array, so it can be easily compiled into a binary for devices that +// don't have a file system. It was created using the command: +// xxd -i tiny_conv.tflite > tiny_conv_model_data.cc + +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_ + +extern const unsigned char g_tiny_conv_model_data[]; +extern const int g_tiny_conv_model_data_len; + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/BUILD b/tensorflow/contrib/lite/experimental/micro/kernels/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..a012f950e6f58f082d0a7c9ac0b4cd9018bcf40b --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/BUILD @@ -0,0 +1,107 @@ +package(default_visibility = [ + "//visibility:public", +]) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts") +load( + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test.bzl", + "tflite_micro_cc_test", +) + +cc_library( + name = "micro_ops", + srcs = [ + "depthwise_conv.cc", + "fully_connected.cc", + "softmax.cc", + ], + hdrs = [ + ], + copts = tflite_copts(), + deps = [ + "//tensorflow/contrib/lite/c:c_api_internal", + "//tensorflow/contrib/lite/experimental/micro:micro_framework", + "//tensorflow/contrib/lite/kernels:kernel_util", + "//tensorflow/contrib/lite/kernels:op_macros", + "//tensorflow/contrib/lite/kernels:padding", + "//tensorflow/contrib/lite/kernels/internal:quantization_util", + "//tensorflow/contrib/lite/kernels/internal:reference_base", + "//tensorflow/contrib/lite/kernels/internal:tensor", + ], +) + +cc_library( + name = "all_ops_resolver", + srcs = [ + "all_ops_resolver.cc", + ], + hdrs = [ + "all_ops_resolver.h", + ], + copts = tflite_copts(), + deps = [ + ":micro_ops", + "//tensorflow/contrib/lite/c:c_api_internal", + "//tensorflow/contrib/lite/experimental/micro:micro_framework", + ], +) + +cc_library( + name = "test_utils", + srcs = [ + ], + hdrs = [ + "test_utils.h", + ], + copts = tflite_copts(), + deps = [ + "//tensorflow/contrib/lite/c:c_api_internal", + "//tensorflow/contrib/lite/core/api", + "//tensorflow/contrib/lite/experimental/micro:micro_framework", + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test", + ], +) + +tflite_micro_cc_test( + name = "depthwise_conv_test", + srcs = [ + "depthwise_conv_test.cc", + ], + deps = [ + ":all_ops_resolver", + ":test_utils", + "//tensorflow/contrib/lite/c:c_api_internal", + "//tensorflow/contrib/lite/experimental/micro:micro_framework", + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test", + ], +) + +tflite_micro_cc_test( + name = "fully_connected_test", + srcs = [ + "fully_connected_test.cc", + ], + deps = [ + ":all_ops_resolver", + ":test_utils", + "//tensorflow/contrib/lite/c:c_api_internal", + "//tensorflow/contrib/lite/experimental/micro:micro_framework", + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test", + ], +) + +tflite_micro_cc_test( + name = "softmax_test", + srcs = [ + "softmax_test.cc", + ], + deps = [ + ":all_ops_resolver", + ":test_utils", + "//tensorflow/contrib/lite/c:c_api_internal", + "//tensorflow/contrib/lite/experimental/micro:micro_framework", + "//tensorflow/contrib/lite/experimental/micro/testing:micro_test", + ], +) diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc new file mode 100644 index 0000000000000000000000000000000000000000..bd0a37badb8ab1e739fdee9c8be9c3f800e80e2e --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h" + +namespace tflite { +namespace ops { +namespace micro { + +TfLiteRegistration* Register_DEPTHWISE_CONV_2D(); +TfLiteRegistration* Micro_Register_DEPTHWISE_CONV_2D() { + return Register_DEPTHWISE_CONV_2D(); +} + +TfLiteRegistration* Register_FULLY_CONNECTED(); +TfLiteRegistration* Micro_Register_FULLY_CONNECTED() { + return Register_FULLY_CONNECTED(); +} + +TfLiteRegistration* Register_SOFTMAX(); +TfLiteRegistration* Micro_Register_SOFTMAX() { return Register_SOFTMAX(); } + +AllOpsResolver::AllOpsResolver() { + AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, + Micro_Register_DEPTHWISE_CONV_2D()); + AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Micro_Register_FULLY_CONNECTED(), + /* min_version */ 1, + /* max_version */ 2); + AddBuiltin(BuiltinOperator_SOFTMAX, Micro_Register_SOFTMAX()); +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h new file mode 100644 index 0000000000000000000000000000000000000000..f836064a3f63443ff577e7ac7a8b791cbb2c24c5 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h @@ -0,0 +1,34 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_ALL_OPS_RESOLVER_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_ALL_OPS_RESOLVER_H_ + +#include "tensorflow/contrib/lite/experimental/micro/compatibility.h" +#include "tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h" + +namespace tflite { +namespace ops { +namespace micro { + +class AllOpsResolver : public MicroMutableOpResolver { + public: + AllOpsResolver(); + + private: + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_ALL_OPS_RESOLVER_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc new file mode 100644 index 0000000000000000000000000000000000000000..4f17263181982afdaa1941194b88d58f0ef0ca74 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc @@ -0,0 +1,208 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/c/builtin_op_data.h" +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/padding.h" + +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace depthwise_conv { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kFilterTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +struct OpData { + TfLitePaddingValues padding; + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; +}; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, int width, + int height, int filter_width, int filter_height, + int out_width, int out_height, + const TfLiteType data_type, OpData* data) { + data->padding.height = ComputePadding(params->stride_height, 1, height, + filter_height, out_height); + data->padding.width = + ComputePadding(params->stride_width, 1, width, filter_width, out_width); + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. + if (data_type != kTfLiteFloat32) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + const TfLiteTensor* bias = + GetOptionalInputTensor(context, node, kBiasTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, filter, bias, output, &real_multiplier)); + int exponent; + QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent); + data->output_shift = -exponent; + CalculateActivationRangeUint8(params->activation, output, + &data->output_activation_min, + &data->output_activation_max); + } + return kTfLiteOk; +} + +} // namespace + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + return nullptr; +} + +void Free(TfLiteContext* context, void* buffer) {} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, OpData* data, + const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = data->padding.width; + op_params.padding_values.height = data->padding.height; + op_params.stride_width = params->stride_width; + op_params.stride_height = params->stride_height; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.depth_multiplier = params->depth_multiplier; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + tflite::reference_ops::DepthwiseConv( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(filter), GetTensorData(filter), + GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); +} + +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, OpData* data, + const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output) { + const int32_t input_offset = -input->params.zero_point; + const int32_t filter_offset = -filter->params.zero_point; + const int32_t output_offset = output->params.zero_point; + + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = data->padding.width; + op_params.padding_values.height = data->padding.height; + op_params.stride_width = params->stride_width; + op_params.stride_height = params->stride_height; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.depth_multiplier = params->depth_multiplier; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = data->output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = -data->output_shift; + + tflite::reference_ops::DepthwiseConv( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(filter), GetTensorData(filter), + GetTensorShape(bias), GetTensorData(bias), + GetTensorShape(output), GetTensorData(output)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + const TfLiteTensor* bias = + (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr; + + const TfLiteType data_type = input->type; + int width = SizeOfDimension(input, 2); + int height = SizeOfDimension(input, 1); + int filter_width = SizeOfDimension(filter, 2); + int filter_height = SizeOfDimension(filter, 1); + int out_width = ComputeOutSize(params->padding, width, filter_width, + params->stride_width); + int out_height = ComputeOutSize(params->padding, height, filter_height, + params->stride_height); + OpData local_data_object; + OpData* data = &local_data_object; + TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height, + filter_width, filter_height, out_width, + out_height, data_type, data)); + + // TODO(aselle): Consider whether float conv and quantized conv should be + // separate ops to avoid dispatch overhead here. + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + EvalFloat(context, node, params, data, input, filter, bias, output); + break; + case kTfLiteUInt8: + EvalQuantized(context, node, params, data, input, filter, bias, output); + break; + default: + context->ReportError(context, "Type %d not currently supported.", + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace depthwise_conv + +TfLiteRegistration* Register_DEPTHWISE_CONV_2D() { + static TfLiteRegistration r = {depthwise_conv::Init, depthwise_conv::Free, + depthwise_conv::Prepare, depthwise_conv::Eval}; + return &r; +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..169899c471dd44399b4d8a479cecbbbd78ba1215 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc @@ -0,0 +1,406 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/c/builtin_op_data.h" +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h" +#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h" +#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h" +#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h" + +namespace tflite { +namespace testing { +namespace { + +void TestDepthwiseConvFloat(std::initializer_list input_dims_data, + std::initializer_list input_data, + std::initializer_list filter_dims_data, + std::initializer_list filter_data, + std::initializer_list bias_dims_data, + std::initializer_list bias_data, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, + TfLiteFusedActivation activation, + float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInitializer(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims, "input_tensor"), + CreateFloatTensor(filter_data, filter_dims, "filter_tensor"), + CreateFloatTensor(bias_data, bias_dims, "bias_tensor"), + CreateFloatTensor(output_data, output_dims, "output_tensor"), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_DEPTHWISE_CONV_2D, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + int input_depth = input_dims->data[3]; + int output_depth = filter_dims->data[3]; + int depth_mul = output_depth / input_depth; + TfLiteDepthwiseConvParams builtin_data = { + kTfLitePaddingValid, 1, 1, depth_mul, activation, + }; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], + 1e-5f); + } +} + +void TestDepthwiseConvQuantized( + std::initializer_list input_dims_data, + std::initializer_list input_data, float input_min, float input_max, + std::initializer_list filter_dims_data, + std::initializer_list filter_data, float filter_min, + float filter_max, std::initializer_list bias_dims_data, + std::initializer_list bias_data, float bias_min, float bias_max, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, float output_min, + float output_max, TfLiteFusedActivation activation, uint8_t* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInitializer(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min, + input_max), + CreateQuantizedTensor(filter_data, filter_dims, "filter_tensor", + filter_min, filter_max), + CreateQuantized32Tensor(bias_data, bias_dims, "bias_tensor", bias_min, + bias_max), + CreateQuantizedTensor(output_data, output_dims, "output_tensor", + output_min, output_max), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_DEPTHWISE_CONV_2D, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + int input_depth = input_dims->data[3]; + int output_depth = filter_dims->data[3]; + int depth_mul = output_depth / input_depth; + TfLiteDepthwiseConvParams builtin_data = { + kTfLitePaddingValid, 1, 1, depth_mul, activation, + }; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + } +} + +} // namespace +} // namespace testing +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(SimpleTest) { + const int output_dims_count = 8; + float output_data[output_dims_count]; + tflite::testing::TestDepthwiseConvFloat( // + {4, 1, 3, 2, 2}, // Input shape. + { + 1, 2, 7, 8, // Input values. + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }, + {4, 1, 2, 2, 4}, // Filters shape. + { + 1, 2, 3, 4, // Filters values. + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }, + {1, 4}, // Bias shape. + { + 1, 2, 3, 4, // Bias values. + }, + { + 71, -34, 99, -20, // Expected results. + 91, -26, 127, -4, // + }, + {4, 1, 2, 1, 4}, // Output shape. + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantized) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float filter_min = -63.5f; + const float filter_max = 64.0f; + const float bias_min = 0.0f; + const float bias_max = 64.0f * (1 << 24); + const float output_min = -127.0f; + const float output_max = 128.0f; + const int output_dims_count = 8; + uint8_t output_data[output_dims_count]; + + tflite::testing::TestDepthwiseConvQuantized( // + {4, 1, 3, 2, 2}, // Input shape. + { + // Input values. + F2Q(1, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(7, input_min, input_max), + F2Q(8, input_min, input_max), + F2Q(3, input_min, input_max), + F2Q(4, input_min, input_max), + F2Q(9, input_min, input_max), + F2Q(10, input_min, input_max), + F2Q(5, input_min, input_max), + F2Q(6, input_min, input_max), + F2Q(11, input_min, input_max), + F2Q(12, input_min, input_max), + }, + input_min, input_max, // Input quantization range. + {4, 1, 2, 2, 4}, // Filter shape. + { + // Filter values. + F2Q(1, filter_min, filter_max), + F2Q(2, filter_min, filter_max), + F2Q(3, filter_min, filter_max), + F2Q(4, filter_min, filter_max), + F2Q(-9, filter_min, filter_max), + F2Q(10, filter_min, filter_max), + F2Q(-11, filter_min, filter_max), + F2Q(12, filter_min, filter_max), + F2Q(5, filter_min, filter_max), + F2Q(6, filter_min, filter_max), + F2Q(7, filter_min, filter_max), + F2Q(8, filter_min, filter_max), + F2Q(13, filter_min, filter_max), + F2Q(-14, filter_min, filter_max), + F2Q(15, filter_min, filter_max), + F2Q(-16, filter_min, filter_max), + }, + filter_min, filter_max, // Filter quantization range. + {1, 4}, // Bias shape. + { + // Bias values. + F2Q32(1, bias_min, bias_max), + F2Q32(2, bias_min, bias_max), + F2Q32(3, bias_min, bias_max), + F2Q32(4, bias_min, bias_max), + }, + bias_min, bias_max, // Bias quantization range. + { + // Expected results. + F2Q(71, output_min, output_max), + F2Q(-34, output_min, output_max), + F2Q(99, output_min, output_max), + F2Q(-20, output_min, output_max), + F2Q(91, output_min, output_max), + F2Q(-26, output_min, output_max), + F2Q(127, output_min, output_max), + F2Q(-4, output_min, output_max), + }, + {4, 1, 2, 1, 4}, // Output shape. + output_min, output_max, // Output quantization range. + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestRelu) { + const int output_dims_count = 8; + float output_data[output_dims_count]; + tflite::testing::TestDepthwiseConvFloat( // + {4, 1, 3, 2, 2}, // Input shape. + { + 1, 2, 7, 8, // Input values. + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }, + {4, 1, 2, 2, 4}, // Filters shape. + { + 1, 2, 3, 4, // Filters values. + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }, + {1, 4}, // Bias shape. + { + 1, 2, 3, 4, // Bias values. + }, + { + 71, 0, 99, 0, // Expected results. + 91, 0, 127, 0, // + }, + {4, 1, 2, 1, 4}, // Output shape. + kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestReluQuantized) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float filter_min = -63.5f; + const float filter_max = 64.0f; + const float bias_min = 0.0f; + const float bias_max = 64.0f * (1 << 24); + const float output_min = -127.0f; + const float output_max = 128.0f; + const int output_dims_count = 8; + uint8_t output_data[output_dims_count]; + + tflite::testing::TestDepthwiseConvQuantized( // + {4, 1, 3, 2, 2}, // Input shape. + { + // Input values. + F2Q(1, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(7, input_min, input_max), + F2Q(8, input_min, input_max), + F2Q(3, input_min, input_max), + F2Q(4, input_min, input_max), + F2Q(9, input_min, input_max), + F2Q(10, input_min, input_max), + F2Q(5, input_min, input_max), + F2Q(6, input_min, input_max), + F2Q(11, input_min, input_max), + F2Q(12, input_min, input_max), + }, + input_min, input_max, // Input quantization range. + {4, 1, 2, 2, 4}, // Filter shape. + { + // Filter values. + F2Q(1, filter_min, filter_max), + F2Q(2, filter_min, filter_max), + F2Q(3, filter_min, filter_max), + F2Q(4, filter_min, filter_max), + F2Q(-9, filter_min, filter_max), + F2Q(10, filter_min, filter_max), + F2Q(-11, filter_min, filter_max), + F2Q(12, filter_min, filter_max), + F2Q(5, filter_min, filter_max), + F2Q(6, filter_min, filter_max), + F2Q(7, filter_min, filter_max), + F2Q(8, filter_min, filter_max), + F2Q(13, filter_min, filter_max), + F2Q(-14, filter_min, filter_max), + F2Q(15, filter_min, filter_max), + F2Q(-16, filter_min, filter_max), + }, + filter_min, filter_max, // Filter quantization range. + {1, 4}, // Bias shape. + { + // Bias values. + F2Q32(1, bias_min, bias_max), + F2Q32(2, bias_min, bias_max), + F2Q32(3, bias_min, bias_max), + F2Q32(4, bias_min, bias_max), + }, + bias_min, bias_max, // Bias quantization range. + { + // Expected results. + F2Q(71, output_min, output_max), + F2Q(0, output_min, output_max), + F2Q(99, output_min, output_max), + F2Q(0, output_min, output_max), + F2Q(91, output_min, output_max), + F2Q(0, output_min, output_max), + F2Q(127, output_min, output_max), + F2Q(0, output_min, output_max), + }, + {4, 1, 2, 1, 4}, // Output shape. + output_min, output_max, // Output quantization range. + kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc new file mode 100644 index 0000000000000000000000000000000000000000..1e9e54cafb8c91af1b42d6d23396495ecad6e602 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc @@ -0,0 +1,184 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h" +#include "tensorflow/contrib/lite/c/builtin_op_data.h" +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace fully_connected { +namespace { + +struct OpData { + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; + // The index of the temporary tensor where the quantized inputs are cached. + int input_quantized_index; +}; + +constexpr int kInputTensor = 0; +constexpr int kWeightsTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +TfLiteStatus CalculateOpData(TfLiteContext* context, + TfLiteFullyConnectedParams* params, + TfLiteType data_type, const TfLiteTensor* input, + const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output, + OpData* data) { + TfLiteStatus status = kTfLiteOk; + if (data_type != kTfLiteFloat32) { + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, filter, bias, output, &real_multiplier)); + int exponent; + QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent); + data->output_shift = -exponent; + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + } + return status; +} + +} // namespace + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + return nullptr; +} + +void Free(TfLiteContext* context, void* buffer) {} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteFullyConnectedParams* params, OpData* data, + const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, + TfLiteTensor* output) { + const int32_t input_offset = -input->params.zero_point; + const int32_t filter_offset = -filter->params.zero_point; + const int32_t output_offset = output->params.zero_point; + + tflite::FullyConnectedParams op_params; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = data->output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = -data->output_shift; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + +#define TF_LITE_FULLY_CONNECTED(output_data_type) \ + reference_ops::FullyConnected( \ + op_params, GetTensorShape(input), GetTensorData(input), \ + GetTensorShape(filter), GetTensorData(filter), \ + GetTensorShape(bias), GetTensorData(bias), \ + GetTensorShape(output), GetTensorData(output), \ + nullptr) + switch (output->type) { + case kTfLiteUInt8: + TF_LITE_FULLY_CONNECTED(uint8_t); + break; + case kTfLiteInt16: + TF_LITE_FULLY_CONNECTED(int16_t); + break; + default: + context->ReportError( + context, + "Quantized FullyConnected expects output data type uint8 or int16"); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteFullyConnectedParams* params, OpData* data, + const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + tflite::FullyConnectedParams op_params; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + tflite::reference_ops::FullyConnected( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(filter), GetTensorData(filter), + GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TfLiteType data_type = input->type; + OpData local_data_object; + OpData* data = &local_data_object; + TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, data_type, input, + filter, bias, output, data)); + + switch (filter->type) { // Already know in/out types are same. + case kTfLiteFloat32: + return EvalFloat(context, node, params, data, input, filter, bias, + output); + case kTfLiteUInt8: + return EvalQuantized(context, node, params, data, input, filter, bias, + output); + + default: + context->ReportError(context, "Type %d not currently supported.", + filter->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace fully_connected + +TfLiteRegistration* Register_FULLY_CONNECTED() { + static TfLiteRegistration r = {fully_connected::Init, fully_connected::Free, + fully_connected::Prepare, + fully_connected::Eval}; + return &r; +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..b42bf4c3bca75572dbf8e1907e7fb94be24d41bd --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc @@ -0,0 +1,643 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/c/builtin_op_data.h" +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h" +#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h" +#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h" +#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h" + +namespace tflite { +namespace testing { +namespace { + +void TestFullyConnectedFloat(std::initializer_list input_dims_data, + std::initializer_list input_data, + std::initializer_list weights_dims_data, + std::initializer_list weights_data, + std::initializer_list bias_dims_data, + std::initializer_list bias_data, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, + TfLiteFusedActivation activation, + float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* weights_dims = IntArrayFromInitializer(weights_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims, "input_tensor"), + CreateFloatTensor(weights_data, weights_dims, "weights_tensor"), + CreateFloatTensor(bias_data, bias_dims, "bias_tensor"), + CreateFloatTensor(output_data, output_dims, "output_tensor"), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLiteFullyConnectedParams builtin_data = { + activation, + kTfLiteFullyConnectedWeightsFormatDefault, + }; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], + 1e-5f); + } +} + +void TestFullyConnectedQuantized( + std::initializer_list input_dims_data, + std::initializer_list input_data, float input_min, float input_max, + std::initializer_list weights_dims_data, + std::initializer_list weights_data, float weights_min, + float weights_max, std::initializer_list bias_dims_data, + std::initializer_list bias_data, float bias_min, float bias_max, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, float output_min, + float output_max, TfLiteFusedActivation activation, uint8_t* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* weights_dims = IntArrayFromInitializer(weights_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min, + input_max), + CreateQuantizedTensor(weights_data, weights_dims, "weights_tensor", + weights_min, weights_max), + CreateQuantized32Tensor(bias_data, bias_dims, "bias_tensor", bias_min, + bias_max), + CreateQuantizedTensor(output_data, output_dims, "output_tensor", + output_min, output_max), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLiteFullyConnectedParams builtin_data = { + activation, + kTfLiteFullyConnectedWeightsFormatDefault, + }; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + } +} + +} // namespace +} // namespace testing +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(SimpleTest) { + const int output_dims_count = 6; + float output_data[output_dims_count]; + tflite::testing::TestFullyConnectedFloat( // + {2, 2, 10}, // Input shape. + { + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }, + {2, 3, 10}, // Weights shape. + { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 + }, + {1, 3}, // Bias shape. + { + 1, 2, 3, // Bias values. + }, + { + 24, 25, 26, 58, 59, 60, // Expected results. + }, + {2, 2, 3}, // Output shape. + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTest2) { + const int output_dims_count = 6; + float output_data[output_dims_count]; + tflite::testing::TestFullyConnectedFloat( // + {2, 2, 2}, // Input shape. + { + 1, 2, // b = 0 + 2, 1, // b = 1 + }, + {2, 1, 2}, // Weights shape. + { + 2, 4, // u = 0 + }, + {1, 1}, // Bias shape. + { + 1, // Bias values. + }, + { + 11, 9, // Expected results. + }, + {2, 2, 1}, // Output shape. + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestRelu) { + const int output_dims_count = 6; + float output_data[output_dims_count]; + tflite::testing::TestFullyConnectedFloat( // + {2, 2, 10}, // Input shape. + { + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }, + {2, 3, 10}, // Weights shape. + { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 + }, + {1, 3}, // Bias shape. + { + 1, -2, 3, // Bias values. + }, + { + 24, 0, 26, 58, 0, 60, // Expected results. + }, + {2, 2, 3}, // Output shape. + kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantized) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float weights_min = -63.5f; + const float weights_max = 64.0f; + const float bias_min = 0.0f; + const float bias_max = 64.0f * (1 << 24); + const float output_min = -127.0f; + const float output_max = 128.0f; + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( // + {2, 2, 10}, // Input shape. + { + // Input values. + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }, + input_min, input_max, // Input quantization range. + {2, 3, 10}, // Weights shape. + { + // Weight values. + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }, + weights_min, weights_max, // Weights quantization range. + {1, 3}, // Bias shape. + { + F2Q32(1, bias_min, bias_max), + F2Q32(2, bias_min, bias_max), + F2Q32(3, bias_min, bias_max), + }, + bias_min, bias_max, // Bias quantization range. + { + // Expected results. + F2Q(24, output_min, output_max), + F2Q(25, output_min, output_max), + F2Q(26, output_min, output_max), + F2Q(58, output_min, output_max), + F2Q(59, output_min, output_max), + F2Q(60, output_min, output_max), + }, + {2, 2, 3}, // Output shape. + output_min, output_max, // Output quantization range. + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedRelu) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float weights_min = -63.5f; + const float weights_max = 64.0f; + const float bias_min = 0.0f; + const float bias_max = 64.0f * (1 << 24); + const float output_min = -127.0f; + const float output_max = 128.0f; + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( // + {2, 2, 10}, // Input shape. + { + // Input values. + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }, + input_min, input_max, // Input quantization range. + {2, 3, 10}, // Weights shape. + { + // Weight values. + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(-1, weights_min, weights_max), F2Q(-2, weights_min, weights_max), + F2Q(-3, weights_min, weights_max), F2Q(-4, weights_min, weights_max), + F2Q(-5, weights_min, weights_max), F2Q(-6, weights_min, weights_max), + F2Q(-7, weights_min, weights_max), F2Q(-8, weights_min, weights_max), + F2Q(-9, weights_min, weights_max), F2Q(-10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }, + weights_min, weights_max, // Weights quantization range. + {1, 3}, // Bias shape. + { + F2Q32(1, bias_min, bias_max), + F2Q32(0, bias_min, bias_max), + F2Q32(3, bias_min, bias_max), + }, + bias_min, bias_max, // Bias quantization range. + { + // Expected results. + F2Q(24, output_min, output_max), + F2Q(0, output_min, output_max), + F2Q(26, output_min, output_max), + F2Q(58, output_min, output_max), + F2Q(0, output_min, output_max), + F2Q(60, output_min, output_max), + }, + {2, 2, 3}, // Output shape. + output_min, output_max, // Output quantization range. + kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedOutputMultiplierGreaterThan1) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -127.0f; + const float input_max = 128.0f; + const float weights_min = -127.0f; + const float weights_max = 128.0f; + const float bias_min = 0.0f; + const float bias_max = 256.0f * (1 << 24); + const float output_min = -63.5f; + const float output_max = 64.0f; + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( // + {2, 2, 10}, // Input shape. + { + // Input values. + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }, + input_min, input_max, // Input quantization range. + {2, 3, 10}, // Weights shape. + { + // Weight values. + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }, + weights_min, weights_max, // Weights quantization range. + {1, 3}, // Bias shape. + { + F2Q32(1, bias_min, bias_max), + F2Q32(2, bias_min, bias_max), + F2Q32(3, bias_min, bias_max), + }, + bias_min, bias_max, // Bias quantization range. + { + // Expected results. + F2Q(24, output_min, output_max), + F2Q(25, output_min, output_max), + F2Q(26, output_min, output_max), + F2Q(58, output_min, output_max), + F2Q(59, output_min, output_max), + F2Q(60, output_min, output_max), + }, + {2, 2, 3}, // Output shape. + output_min, output_max, // Output quantization range. + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTest4DInput) { + const int output_dims_count = 6; + float output_data[output_dims_count]; + tflite::testing::TestFullyConnectedFloat( // + {4, 1, 1, 5, 1}, // Input shape. + { + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }, + {2, 3, 10}, // Weights shape. + { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 + }, + {1, 3}, // Bias shape. + { + 1, 2, 3, // Bias values. + }, + { + 24, 25, 26, 58, 59, 60, // Expected results. + }, + {2, 2, 3}, // Output shape. + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTest4DInputQuantized) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float weights_min = -63.5f; + const float weights_max = 64.0f; + const float bias_min = 0.0f; + const float bias_max = 64.0f * (1 << 24); + const float output_min = -127.0f; + const float output_max = 128.0f; + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( // + {4, 1, 1, 5, 1}, // Input shape. + { + // Input values. + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }, + input_min, input_max, // Input quantization range. + {2, 3, 10}, // Weights shape. + { + // Weight values. + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }, + weights_min, weights_max, // Weights quantization range. + {1, 3}, // Bias shape. + { + F2Q32(1, bias_min, bias_max), + F2Q32(2, bias_min, bias_max), + F2Q32(3, bias_min, bias_max), + }, + bias_min, bias_max, // Bias quantization range. + { + // Expected results. + F2Q(24, output_min, output_max), + F2Q(25, output_min, output_max), + F2Q(26, output_min, output_max), + F2Q(58, output_min, output_max), + F2Q(59, output_min, output_max), + F2Q(60, output_min, output_max), + }, + {2, 2, 3}, // Output shape. + output_min, output_max, // Output quantization range. + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedOutputMultiplierGreaterThan1) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -127.0f; + const float input_max = 128.0f; + const float weights_min = -127.0f; + const float weights_max = 128.0f; + const float bias_min = 0.0f; + const float bias_max = 256.0f * (1 << 24); + const float output_min = -63.5f; + const float output_max = 64.0f; + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( // + {4, 1, 1, 5, 1}, // Input shape. + { + // Input values. + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }, + input_min, input_max, // Input quantization range. + {2, 3, 10}, // Weights shape. + { + // Weight values. + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }, + weights_min, weights_max, // Weights quantization range. + {1, 3}, // Bias shape. + { + F2Q32(1, bias_min, bias_max), + F2Q32(2, bias_min, bias_max), + F2Q32(3, bias_min, bias_max), + }, + bias_min, bias_max, // Bias quantization range. + { + // Expected results. + F2Q(24, output_min, output_max), + F2Q(25, output_min, output_max), + F2Q(26, output_min, output_max), + F2Q(58, output_min, output_max), + F2Q(59, output_min, output_max), + F2Q(60, output_min, output_max), + }, + {2, 2, 3}, // Output shape. + output_min, output_max, // Output quantization range. + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc b/tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc new file mode 100644 index 0000000000000000000000000000000000000000..a4019a067c563cac25d9918e4bdf75913bdfa3d6 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc @@ -0,0 +1,213 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/kernels/internal/reference/softmax.h" +#include "tensorflow/contrib/lite/c/builtin_op_data.h" +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace activations { +namespace { + +struct OpData { + int32_t input_multiplier = 0; + int input_left_shift = 0; + int32_t input_range_radius = 0; + int diff_min = 0; +}; + +TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, + const TfLiteTensor* input, + TfLiteTensor* output, + const TfLiteSoftmaxParams* params, + OpData* data) { + if (input->type == kTfLiteUInt8) { + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + TF_LITE_ENSURE(context, output->params.scale == 1. / 256); + + static const int kScaledDiffIntegerBits = 5; + + tflite::PreprocessSoftmaxScaling( + params->beta, input->params.scale, kScaledDiffIntegerBits, + &data->input_multiplier, &data->input_left_shift); + data->diff_min = -1.0 * tflite::CalculateInputRadius( + kScaledDiffIntegerBits, data->input_left_shift); + } + return kTfLiteOk; +} + +} // namespace + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + return nullptr; +} + +void Free(TfLiteContext* context, void* buffer) {} + +TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +// Takes a 1D tensor and performs softmax along it. +void Softmax1DFloat(const TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params) { + const int input_size = input->dims->data[0]; + tflite::reference_ops::Softmax(input->data.f, input_size, 1, params->beta, + output->data.f); +} + +// Takes a 2D tensor and perform softmax along the last dimension. +void Softmax2DFloat(const TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params) { + const int batch_size = input->dims->data[0]; + const int input_size = input->dims->data[1]; + tflite::reference_ops::Softmax(input->data.f, input_size, batch_size, + params->beta, output->data.f); +} + +void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params, OpData* data) { + // TODO(ahentz): this is arguably a dirty trick. Since the implementation + // always traverses the last dimension of a 4D tensor, we will pretend our 1D + // tensor is 4D in a special way. We will convert a (Y) shape into a (1, + // 1, 1, Y) shape. + const int input_size = input->dims->data[0]; + const int32_t shape_data[4] = {1, 1, 1, input_size}; + RuntimeShape shape(4, shape_data); + SoftmaxParams op_params; + op_params.input_multiplier = data->input_multiplier; + op_params.input_left_shift = data->input_left_shift; + op_params.diff_min = data->diff_min; + tflite::reference_ops::Softmax(op_params, shape, + GetTensorData(input), shape, + GetTensorData(output)); +} + +void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params, OpData* data) { + // TODO(ahentz): this is arguably a dirty trick. Since the implementation + // always traverses the last dimension of a 4D tensor, we will pretend our 2D + // tensor is 4D in a special way. We will convert a (X, Y) shape into a (X, + // 1, 1, Y) shape. + const int batch_size = input->dims->data[0]; + const int input_size = input->dims->data[1]; + const int32_t shape_data[4] = {batch_size, 1, 1, input_size}; + RuntimeShape shape(4, shape_data); + SoftmaxParams op_params; + op_params.input_multiplier = data->input_multiplier; + op_params.input_left_shift = data->input_left_shift; + op_params.diff_min = data->diff_min; + tflite::reference_ops::Softmax(op_params, shape, + GetTensorData(input), shape, + GetTensorData(output)); +} + +// Takes a 4D tensor and perform softmax along the forth dimension. +void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params) { + SoftmaxParams op_params; + op_params.beta = params->beta; + tflite::reference_ops::Softmax( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); +} + +void Softmax4DQuantized(const TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params, OpData* data) { + SoftmaxParams op_params; + op_params.input_multiplier = data->input_multiplier; + op_params.input_left_shift = data->input_left_shift; + op_params.diff_min = data->diff_min; + tflite::reference_ops::Softmax( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); +} + +TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + const TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + + OpData local_data_object; + OpData* data = &local_data_object; + TF_LITE_ENSURE_STATUS( + CalculateSoftmaxOpData(context, input, output, params, data)); + + // TODO(ahentz): consider an implementation that works for many (all?) + // dimensions. + switch (input->type) { + case kTfLiteFloat32: { + if (NumDimensions(input) == 1) { + Softmax1DFloat(input, output, params); + return kTfLiteOk; + } + if (NumDimensions(input) == 2) { + Softmax2DFloat(input, output, params); + return kTfLiteOk; + } + if (NumDimensions(input) == 4) { + Softmax4DFloat(input, output, params); + return kTfLiteOk; + } + context->ReportError( + context, "Only 1D, 2D and 4D tensors supported currently, got %dD.", + NumDimensions(input)); + return kTfLiteError; + } + case kTfLiteUInt8: { + if (NumDimensions(input) == 1) { + Softmax1DQuantized(input, output, params, data); + return kTfLiteOk; + } + if (NumDimensions(input) == 2) { + Softmax2DQuantized(input, output, params, data); + return kTfLiteOk; + } + if (NumDimensions(input) == 4) { + Softmax4DQuantized(input, output, params, data); + return kTfLiteOk; + } + context->ReportError( + context, "Only 2D and 4D tensors supported currently, got %dD.", + NumDimensions(input)); + return kTfLiteError; + } + default: + context->ReportError( + context, "Only float32 and uint8_t supported currently, got %d.", + input->type); + return kTfLiteError; + } +} +} // namespace activations + +TfLiteRegistration* Register_SOFTMAX() { + static TfLiteRegistration r = {activations::Init, activations::Free, + activations::SoftmaxPrepare, + activations::SoftmaxEval}; + return &r; +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..694456d8ace5182578f9b59c2de8bbad0447b4ee --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc @@ -0,0 +1,220 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/c/builtin_op_data.h" +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h" +#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h" +#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h" +#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h" + +namespace tflite { +namespace testing { +namespace { + +void TestSoftmaxFloat(std::initializer_list input_dims_data, + std::initializer_list input_data, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, + float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 2; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims, "input_tensor"), + CreateFloatTensor(output_data, output_dims, "output_tensor"), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_SOFTMAX, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLiteSoftmaxParams builtin_data = {1.0f}; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], + 1e-5f); + } +} + +void TestSoftmaxQuantized(std::initializer_list input_dims_data, + std::initializer_list input_data, + float input_min, float input_max, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, + float output_min, float output_max, + uint8_t* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 1; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min, + input_max), + CreateQuantizedTensor(output_data, output_dims, "output_tensor", + output_min, output_max), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_SOFTMAX, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLiteSoftmaxParams builtin_data = {1.0f}; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + } +} + +} // namespace +} // namespace testing +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(SimpleTest) { + const int output_dims_count = 10; + float output_data[output_dims_count]; + tflite::testing::TestSoftmaxFloat( // + {2, 2, 5}, // Input shape. + { + 1.0, 2.0, 3.0, 4.0, 5.0, // b = 0 + -1.0, -2.0, -3.0, -4.0, -5.0, // b = 0 + }, + { + // Expected results. + 0.011656231, + 0.031684921, + 0.086128544, + 0.234121657, + 0.636408647, + 0.636408647, + 0.234121657, + 0.086128544, + 0.031684921, + 0.011656231, + }, + {2, 2, 5}, // Output shape. + output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantized) { + using tflite::testing::F2Q; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float output_min = 0.0f; + const float output_max = (255.0f / 256.0f); + const int output_dims_count = 5; + uint8_t output_data[output_dims_count]; + tflite::testing::TestSoftmaxQuantized( // + {2, 1, 5}, // Input shape. + { + F2Q(1.0, input_min, input_max), + F2Q(2.0, input_min, input_max), + F2Q(3.0, input_min, input_max), + F2Q(4.0, input_min, input_max), + F2Q(5.0, input_min, input_max), + }, + input_min, input_max, // Input quantized range. + { + // Expected results. + F2Q(0.011656231, output_min, output_max), + F2Q(0.031684921, output_min, output_max), + F2Q(0.086128544, output_min, output_max), + F2Q(0.234121657, output_min, output_max), + F2Q(0.636408647, output_min, output_max), + }, + {2, 1, 5}, // Output shape. + output_min, output_max, // Output quantized range. + output_data); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h b/tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..789a48ece8bd68544649fb05548355cb796ccabb --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h @@ -0,0 +1,170 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_TEST_UTILS_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_TEST_UTILS_H_ + +#include +#include +#include + +#include "tensorflow/contrib/lite/c/builtin_op_data.h" +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/core/api/error_reporter.h" +#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h" +#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h" + +namespace tflite { +namespace testing { + +// How many elements are in the array with this shape. +inline int ElementCount(const TfLiteIntArray& dims) { + int result = 1; + for (int i = 0; i < dims.size; ++i) { + result *= dims.data[i]; + } + return result; +} + +// Wrapper to forward kernel errors to the interpreter's error reporter. +inline void ReportOpError(struct TfLiteContext* context, const char* format, + ...) { + ErrorReporter* error_reporter = static_cast(context->impl_); + va_list args; + va_start(args, format); + error_reporter->Report(format, args); + va_end(args); +} + +// Derives the quantization scaling factor from a min and max range. +template +inline float ScaleFromMinMax(const float min, const float max) { + return (max - min) / ((std::numeric_limits::max() * 1.0) - + std::numeric_limits::min()); +} + +// Derives the quantization zero point from a min and max range. +template +inline int ZeroPointFromMinMax(const float min, const float max) { + return static_cast((-min / ScaleFromMinMax(min, max)) + 0.5f); +} + +// Converts a float value into an unsigned eight-bit quantized value. +inline uint8_t F2Q(const float value, const float min, const float max) { + int32_t result = ZeroPointFromMinMax(min, max) + + (value / ScaleFromMinMax(min, max)) + 0.5f; + if (result < 0) { + result = 0; + } + if (result > 256) { + result = 256; + } + return result; +} + +// Converts a float value into a signed thirty-two-bit quantized value. +inline uint8_t F2Q32(const float value, const float min, const float max) { + return static_cast((value - ZeroPointFromMinMax(min, max)) / + ScaleFromMinMax(min, max)); +} + +inline void PopulateContext(TfLiteTensor* tensors, int tensors_size, + TfLiteContext* context) { + context->tensors_size = tensors_size; + context->tensors = tensors; + context->impl_ = static_cast(micro_test::reporter); + context->GetExecutionPlan = nullptr; + context->ResizeTensor = nullptr; + context->ReportError = ReportOpError; + context->AddTensors = nullptr; + context->GetNodeAndRegistration = nullptr; + context->ReplaceSubgraphsWithDelegateKernels = nullptr; + context->recommended_num_threads = 1; + context->GetExternalContext = nullptr; + context->SetExternalContext = nullptr; +} + +inline TfLiteIntArray* IntArrayFromInts(const int* int_array) { + return const_cast( + reinterpret_cast(int_array)); +} + +inline TfLiteIntArray* IntArrayFromInitializer( + std::initializer_list int_initializer) { + return IntArrayFromInts(int_initializer.begin()); +} + +inline TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims, + const char* name) { + const size_t bytes = ElementCount(*dims) * sizeof(float); + return { + kTfLiteFloat32, {const_cast(reinterpret_cast(data))}, + dims, {}, + kTfLiteMemNone, bytes, + nullptr, name}; +} + +inline TfLiteTensor CreateFloatTensor(std::initializer_list data, + TfLiteIntArray* dims, const char* name) { + return CreateFloatTensor(data.begin(), dims, name); +} + +inline TfLiteTensor CreateQuantizedTensor(const uint8_t* data, + TfLiteIntArray* dims, + const char* name, float min, + float max) { + const size_t bytes = ElementCount(*dims) * sizeof(uint8_t); + const TfLiteQuantizationParams q_params = { + ScaleFromMinMax(min, max), + ZeroPointFromMinMax(min, max)}; + return { + kTfLiteUInt8, {const_cast(reinterpret_cast(data))}, + dims, q_params, + kTfLiteMemNone, bytes, + nullptr, name}; +} + +inline TfLiteTensor CreateQuantizedTensor(std::initializer_list data, + TfLiteIntArray* dims, + const char* name, float min, + float max) { + return CreateQuantizedTensor(data.begin(), dims, name, min, max); +} + +inline TfLiteTensor CreateQuantized32Tensor(const int32_t* data, + TfLiteIntArray* dims, + const char* name, float min, + float max) { + const size_t bytes = ElementCount(*dims) * sizeof(int32_t); + const TfLiteQuantizationParams q_params = { + ScaleFromMinMax(min, max), + ZeroPointFromMinMax(min, max)}; + return { + kTfLiteUInt8, {const_cast(reinterpret_cast(data))}, + dims, q_params, + kTfLiteMemNone, bytes, + nullptr, name}; +} + +inline TfLiteTensor CreateQuantized32Tensor(std::initializer_list data, + TfLiteIntArray* dims, + const char* name, float min, + float max) { + return CreateQuantized32Tensor(data.begin(), dims, name, min, max); +} + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_TEST_UTILS_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc new file mode 100644 index 0000000000000000000000000000000000000000..99dd8836611c287b7f76104c29c12a73d219ccb3 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc @@ -0,0 +1,78 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h" + +#ifdef TF_LITE_MCU_DEBUG_LOG +#include +#else // TF_LITE_MCU_DEBUG_LOG +#include +#include +void DebugLog(const char* s) { fprintf(stderr, "%s", s); } +void DebugLogInt32(int32_t i) { fprintf(stderr, "%d", i); } +void DebugLogUInt32(uint32_t i) { fprintf(stderr, "%d", i); } +void DebugLogHex(uint32_t i) { fprintf(stderr, "0x%8x", i); } +void DebugLogFloat(float i) { fprintf(stderr, "%f", i); } +#endif // TF_LITE_MCU_DEBUG_LOG + +namespace tflite { +namespace { +void DebugLogPrintf(const char* format, va_list args) { + const int output_cache_size = 64; + char output_cache[output_cache_size + 1]; + int output_cache_index = 0; + const char* current = format; + while (*current != 0) { + if (*current == '%') { + const char next = *(current + 1); + if ((next == 'd') || (next == 's')) { + current += 1; + if (output_cache_index > 0) { + output_cache[output_cache_index] = 0; + DebugLog(output_cache); + output_cache_index = 0; + } + if (next == 'd') { + DebugLogInt32(va_arg(args, int)); + } else if (next == 's') { + DebugLog(va_arg(args, char*)); + } + } + } else { + output_cache[output_cache_index] = *current; + output_cache_index += 1; + } + if (output_cache_index >= output_cache_size) { + output_cache[output_cache_index] = 0; + DebugLog(output_cache); + output_cache_index = 0; + } + current += 1; + } + if (output_cache_index > 0) { + output_cache[output_cache_index] = 0; + DebugLog(output_cache); + output_cache_index = 0; + } + DebugLog("\n"); +} +} // namespace + +int MicroErrorReporter::Report(const char* format, va_list args) { + DebugLogPrintf(format, args); + return 0; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h new file mode 100644 index 0000000000000000000000000000000000000000..33e54f7990af6cff4f8706d2889c335087581af4 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h @@ -0,0 +1,34 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_ERROR_REPORTER_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_ERROR_REPORTER_H_ + +#include "tensorflow/contrib/lite/core/api/error_reporter.h" +#include "tensorflow/contrib/lite/experimental/micro/compatibility.h" + +namespace tflite { + +class MicroErrorReporter : public ErrorReporter { + public: + ~MicroErrorReporter() {} + int Report(const char* format, va_list args) override; + + private: + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_ERROR_REPORTER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.h b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc similarity index 53% rename from tensorflow/compiler/xla/service/gpu/gpu_options.h rename to tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc index 498d4a94955cb2c50e0b165f28ded44ac1c0bfff..ef3c32050c0e826c005f185553974170da7e486a 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_options.h +++ b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc @@ -13,21 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ - -#include "tensorflow/compiler/xla/service/hlo_module_config.h" - -// Helper functions for querying options that are specific to the GPU backend. - -namespace xla { -namespace gpu { - -// Returns true if we should use heuristics to assign convolution layouts, as -// opposed to always assigning NCHW. -bool ConvUseLayoutHeuristic(const HloModuleConfig& config); - -} // namespace gpu -} // namespace xla - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ +#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h" + +int main(int argc, char** argv) { + tflite::MicroErrorReporter micro_error_reporter; + tflite::ErrorReporter* error_reporter = µ_error_reporter; + error_reporter->Report("Number: %d", 42); + error_reporter->Report("Badly-formed format string %"); + error_reporter->Report("Another % badly-formed %% format string"); + error_reporter->Report("~~~%s~~~", "ALL TESTS PASSED"); +} diff --git a/tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc new file mode 100644 index 0000000000000000000000000000000000000000..0f38991bb0ef3d0134b4d9a1eb6e148a140fe6f9 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc @@ -0,0 +1,310 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h" + +#include "tensorflow/contrib/lite/core/api/flatbuffer_conversions.h" +#include "tensorflow/contrib/lite/experimental/micro/compatibility.h" + +namespace tflite { +namespace { +const int kStackDataAllocatorSize = 128; +class StackDataAllocator : public BuiltinDataAllocator { + public: + void* Allocate(size_t size) override { + if (size > kStackDataAllocatorSize) { + return nullptr; + } else { + return data_; + } + } + void Deallocate(void* data) override { + // Do nothing. + } + + private: + uint8_t data_[kStackDataAllocatorSize]; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +const char* OpNameFromRegistration(const TfLiteRegistration* registration) { + if (registration->builtin_code == BuiltinOperator_CUSTOM) { + return registration->custom_name; + } else { + return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code)); + } +} + +void ReportOpError(struct TfLiteContext* context, const char* format, ...) { + MicroInterpreter* interpreter = + static_cast(context->impl_); + va_list args; + va_start(args, format); + interpreter->error_reporter()->Report(format, args); + va_end(args); +} + +} // namespace + +MicroInterpreter::MicroInterpreter(const Model* model, + const OpResolver& op_resolver, + SimpleTensorAllocator* tensor_allocator, + ErrorReporter* error_reporter) + : model_(model), + op_resolver_(op_resolver), + tensor_allocator_(tensor_allocator), + error_reporter_(error_reporter), + initialization_status_(kTfLiteOk) { + const flatbuffers::Vector>* buffers = + model->buffers(); + auto* subgraphs = model->subgraphs(); + if (subgraphs->size() != 1) { + error_reporter->Report("Only 1 subgraph is currently supported.\n"); + initialization_status_ = kTfLiteError; + return; + } + subgraph_ = (*subgraphs)[0]; + tensors_ = subgraph_->tensors(); + operators_ = subgraph_->operators(); + + context_.tensors_size = tensors_->Length(); + context_.tensors = + reinterpret_cast(tensor_allocator_->AllocateMemory( + sizeof(TfLiteTensor) * context_.tensors_size)); + for (int i = 0; i < subgraph_->inputs()->Length(); ++i) { + const int tensor_index = subgraph_->inputs()->Get(i); + const auto* tensor = tensors_->Get(tensor_index); + initialization_status_ = tensor_allocator_->AllocateTensor( + *tensor, 0, operators_->Length(), buffers, error_reporter, + &context_.tensors[tensor_index]); + if (initialization_status_ != kTfLiteOk) { + return; + } + } + + int* first_created = reinterpret_cast( + tensor_allocator_->AllocateMemory(sizeof(int) * tensors_->Length())); + int* last_used = reinterpret_cast( + tensor_allocator_->AllocateMemory(sizeof(int) * tensors_->Length())); + for (int i = 0; i < tensors_->Length(); ++i) { + first_created[i] = -1; + last_used[i] = -1; + } + + for (int i = (operators_->Length() - 1); i >= 0; --i) { + const auto* op = operators_->Get(i); + for (int n = 0; n < op->inputs()->Length(); ++n) { + const int tensor_index = op->inputs()->Get(n); + if ((last_used[tensor_index] == -1) || (last_used[tensor_index] < i)) { + last_used[tensor_index] = i; + } + } + for (int n = 0; n < op->outputs()->Length(); ++n) { + const int tensor_index = op->outputs()->Get(n); + const int create_before = i; + int destroy_after = last_used[tensor_index]; + if (destroy_after == -1) { + destroy_after = operators_->Length(); + } + const auto* tensor = tensors_->Get(tensor_index); + if (!tensor->is_variable()) { + initialization_status_ = tensor_allocator_->AllocateTensor( + *tensor, create_before, destroy_after, buffers, error_reporter, + &context_.tensors[tensor_index]); + if (initialization_status_ != kTfLiteOk) { + return; + } + first_created[tensor_index] = i; + } + } + } + + for (int i = 0; i < tensors_->Length(); ++i) { + const auto* tensor = tensors_->Get(i); + const bool is_read_only = (first_created[i] == -1) && (last_used[i] != -1); + if (tensor->is_variable() || is_read_only) { + initialization_status_ = tensor_allocator_->AllocateTensor( + *tensor, 0, operators_->Length(), buffers, error_reporter, + &context_.tensors[i]); + if (initialization_status_ != kTfLiteOk) { + return; + } + } + } + context_.impl_ = static_cast(this); + context_.GetExecutionPlan = nullptr; + context_.ResizeTensor = nullptr; + context_.ReportError = ReportOpError; + context_.AddTensors = nullptr; + context_.GetNodeAndRegistration = nullptr; + context_.ReplaceSubgraphsWithDelegateKernels = nullptr; + context_.recommended_num_threads = 1; + context_.GetExternalContext = nullptr; + context_.SetExternalContext = nullptr; +} + +TfLiteStatus MicroInterpreter::Invoke() { + if (initialization_status_ != kTfLiteOk) { + error_reporter_->Report("Invoke() called after initialization failed\n"); + return kTfLiteError; + } + TfLiteStatus status = kTfLiteOk; + auto opcodes = model_->operator_codes(); + for (int i = 0; i < operators_->Length(); ++i) { + const auto* op = operators_->Get(i); + int index = op->opcode_index(); + if (index < 0 || index >= opcodes->size()) { + error_reporter_->Report("Missing registration for opcode_index %d\n", + index); + return kTfLiteError; + } + auto opcode = (*opcodes)[index]; + const TfLiteRegistration* registration = nullptr; + status = GetRegistrationFromOpCode(opcode, op_resolver_, error_reporter_, + ®istration); + if (status != kTfLiteOk) { + return status; + } + if (registration == nullptr) { + error_reporter_->Report("Skipping op for opcode_index %d\n", index); + return kTfLiteError; + } + BuiltinOperator op_type = + static_cast(registration->builtin_code); + + if (op_type != BuiltinOperator_CUSTOM && op->custom_options()) { + error_reporter_->Report( + "Found builtin operator %s with custom options.\n", + EnumNameBuiltinOperator(op_type)); + } + StackDataAllocator stack_data_allocator; + const char* custom_data = nullptr; + size_t custom_data_size = 0; + unsigned char* builtin_data = nullptr; + if (op->custom_options()) { + custom_data = reinterpret_cast(op->custom_options()->data()); + custom_data_size = op->custom_options()->size(); + } else { + TF_LITE_ENSURE_STATUS(ParseOpData(op, op_type, error_reporter_, + &stack_data_allocator, + (void**)(&builtin_data))); + } + + const char* init_data; + size_t init_data_size; + if (registration->builtin_code == BuiltinOperator_CUSTOM) { + init_data = custom_data; + init_data_size = custom_data_size; + } else { + init_data = reinterpret_cast(builtin_data); + init_data_size = 0; + } + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context_, init_data, init_data_size); + } + + const int kMaxInputs = 16; + int inputs_data[kMaxInputs + 1]; + TfLiteIntArray* inputs_array = + reinterpret_cast(inputs_data); + if (op->inputs()->Length() >= kMaxInputs) { + error_reporter_->Report("Too many inputs (%d)\n", op->inputs()->Length()); + return kTfLiteError; + } + inputs_array->size = op->inputs()->Length(); + for (int n = 0; n < op->inputs()->Length(); ++n) { + inputs_array->data[n] = op->inputs()->Get(n); + } + + const int kMaxOutputs = 16; + int outputs_data[kMaxOutputs + 1]; + TfLiteIntArray* outputs_array = + reinterpret_cast(outputs_data); + if (op->outputs()->Length() >= kMaxOutputs) { + error_reporter_->Report("Too many outputs (%d)\n", + op->outputs()->Length()); + return kTfLiteError; + } + outputs_array->size = op->outputs()->Length(); + for (int n = 0; n < op->outputs()->Length(); ++n) { + outputs_array->data[n] = op->outputs()->Get(n); + } + + const int kMaxTemporaries = 16; + int temporaries_data[kMaxTemporaries + 1]; + TfLiteIntArray* temporaries_array = + reinterpret_cast(temporaries_data); + temporaries_array->size = 0; + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(builtin_data); + node.custom_initial_data = custom_data; + node.custom_initial_data_size = custom_data_size; + node.delegate = nullptr; + if (registration->prepare) { + TfLiteStatus prepare_status = registration->prepare(&context_, &node); + if (prepare_status != kTfLiteOk) { + error_reporter_->Report( + "Node %s (number %d) failed to prepare with status %d", + OpNameFromRegistration(registration), i, prepare_status); + return kTfLiteError; + } + } + + if (registration->invoke) { + TfLiteStatus invoke_status = registration->invoke(&context_, &node); + if (invoke_status != kTfLiteOk) { + error_reporter_->Report( + "Node %s (number %d) failed to invoke with status %d", + OpNameFromRegistration(registration), i, invoke_status); + return kTfLiteError; + } + } + + if (registration->free) { + registration->free(&context_, user_data); + } + } + return status; +} + +TfLiteTensor* MicroInterpreter::input(int index) { + const flatbuffers::Vector* inputs = subgraph_->inputs(); + const size_t length = inputs->Length(); + if ((index < 0) || (index >= length)) { + error_reporter_->Report("Input index %d out of range (length is %d)", index, + length); + return nullptr; + } + return &(context_.tensors[inputs->Get(index)]); +} + +TfLiteTensor* MicroInterpreter::output(int index) { + const flatbuffers::Vector* outputs = subgraph_->outputs(); + const size_t length = outputs->Length(); + if ((index < 0) || (index >= outputs->Length())) { + error_reporter_->Report("Output index %d out of range (length is %d)", + index, length); + return nullptr; + } + return &(context_.tensors[outputs->Get(index)]); +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/experimental/micro/micro_interpreter.h b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.h new file mode 100644 index 0000000000000000000000000000000000000000..a88514cde849595244d36a31900e6d1c2ae1714b --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.h @@ -0,0 +1,71 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_INTERPRETER_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_INTERPRETER_H_ + +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/core/api/error_reporter.h" +#include "tensorflow/contrib/lite/core/api/op_resolver.h" +#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" + +namespace tflite { + +class MicroInterpreter { + public: + // The lifetime of the model, op resolver, allocator, and error reporter must + // be at least as long as that of the interpreter object, since the + // interpreter may need to access them at any time. This means that you should + // usually create them with the same scope as each other, for example having + // them all allocated on the stack as local variables through a top-level + // function. + // The interpreter doesn't do any deallocation of any of the pointed-to + // objects, ownership remains with the caller. + MicroInterpreter(const Model* model, const OpResolver& op_resolver, + SimpleTensorAllocator* tensor_allocator, + ErrorReporter* error_reporter); + + TfLiteStatus Invoke(); + + size_t tensors_size() const { return context_.tensors_size; } + TfLiteTensor* tensor(int tensor_index); + + TfLiteTensor* input(int index); + size_t inputs_size() const { return subgraph_->inputs()->Length(); } + + TfLiteTensor* output(int index); + size_t outputs_size() const { return subgraph_->outputs()->Length(); } + + TfLiteStatus initialization_status() const { return initialization_status_; } + + ErrorReporter* error_reporter() { return error_reporter_; } + + private: + const Model* model_; + const OpResolver& op_resolver_; + SimpleTensorAllocator* tensor_allocator_; + ErrorReporter* error_reporter_; + + TfLiteStatus initialization_status_; + const flatbuffers::Vector>* tensors_; + const flatbuffers::Vector>* operators_; + TfLiteContext context_; + + const SubGraph* subgraph_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_INTERPRETER_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc b/tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..251e5f72037717f74bc3472b69144cff299f0668 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc @@ -0,0 +1,197 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h" + +#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h" + +namespace tflite { +namespace { +void* MockInit(TfLiteContext* context, const char* buffer, size_t length) { + // Do nothing. + return nullptr; +} + +void MockFree(TfLiteContext* context, void* buffer) { + // Do nothing. +} + +TfLiteStatus MockPrepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +TfLiteStatus MockInvoke(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = &context->tensors[node->inputs->data[0]]; + const int32_t* input_data = input->data.i32; + const TfLiteTensor* weight = &context->tensors[node->inputs->data[1]]; + const uint8_t* weight_data = weight->data.uint8; + TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; + int32_t* output_data = output->data.i32; + output_data[0] = input_data[0] + weight_data[0]; + return kTfLiteOk; +} + +class MockOpResolver : public OpResolver { + public: + const TfLiteRegistration* FindOp(BuiltinOperator op, + int version) const override { + return nullptr; + } + const TfLiteRegistration* FindOp(const char* op, int version) const override { + if (strcmp(op, "mock_custom") == 0) { + static TfLiteRegistration r = {MockInit, MockFree, MockPrepare, + MockInvoke}; + return &r; + } else { + return nullptr; + } + } +}; + +class StackAllocator : public flatbuffers::Allocator { + public: + StackAllocator() : data_(data_backing_), data_size_(0) {} + + uint8_t* allocate(size_t size) override { + if ((data_size_ + size) > kStackAllocatorSize) { + // TODO(petewarden): Add error reporting beyond returning null! + return nullptr; + } + uint8_t* result = data_; + data_ += size; + data_size_ += size; + return result; + } + + void deallocate(uint8_t* p, size_t) override {} + + static StackAllocator& instance() { + // Avoid using true dynamic memory allocation to be portable to bare metal. + static char inst_memory[sizeof(StackAllocator)]; + static StackAllocator* inst = new (inst_memory) StackAllocator; + return *inst; + } + + static constexpr int kStackAllocatorSize = 4096; + + private: + uint8_t data_backing_[kStackAllocatorSize]; + uint8_t* data_; + int data_size_; +}; + +const Model* BuildMockModel() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder builder(StackAllocator::kStackAllocatorSize, + &StackAllocator::instance()); + constexpr size_t buffer_data_size = 1; + const uint8_t buffer_data[buffer_data_size] = {21}; + constexpr size_t buffers_size = 2; + const Offset buffers[buffers_size] = { + CreateBuffer(builder), + CreateBuffer(builder, + builder.CreateVector(buffer_data, buffer_data_size))}; + constexpr size_t tensor_shape_size = 1; + const int32_t tensor_shape[tensor_shape_size] = {1}; + constexpr size_t tensors_size = 3; + const Offset tensors[tensors_size] = { + CreateTensor(builder, + builder.CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, + builder.CreateString("test_input_tensor"), 0, false), + CreateTensor(builder, + builder.CreateVector(tensor_shape, tensor_shape_size), + TensorType_UINT8, 1, + builder.CreateString("test_weight_tensor"), 0, false), + CreateTensor(builder, + builder.CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, + builder.CreateString("test_output_tensor"), 0, false), + }; + constexpr size_t inputs_size = 1; + const int32_t inputs[inputs_size] = {0}; + constexpr size_t outputs_size = 1; + const int32_t outputs[outputs_size] = {2}; + constexpr size_t operator_inputs_size = 2; + const int32_t operator_inputs[operator_inputs_size] = {0, 1}; + constexpr size_t operator_outputs_size = 1; + const int32_t operator_outputs[operator_outputs_size] = {2}; + constexpr size_t operators_size = 1; + const Offset operators[operators_size] = {CreateOperator( + builder, 0, builder.CreateVector(operator_inputs, operator_inputs_size), + builder.CreateVector(operator_outputs, operator_outputs_size), + BuiltinOptions_NONE)}; + constexpr size_t subgraphs_size = 1; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph(builder, builder.CreateVector(tensors, tensors_size), + builder.CreateVector(inputs, inputs_size), + builder.CreateVector(outputs, outputs_size), + builder.CreateVector(operators, operators_size), + builder.CreateString("test_subgraph"))}; + constexpr size_t operator_codes_size = 1; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(builder, BuiltinOperator_CUSTOM, "mock_custom", + 0)}; + const Offset model_offset = CreateModel( + builder, 0, builder.CreateVector(operator_codes, operator_codes_size), + builder.CreateVector(subgraphs, subgraphs_size), + builder.CreateString("test_model"), + builder.CreateVector(buffers, buffers_size)); + FinishModelBuffer(builder, model_offset); + void* model_pointer = builder.GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +} // namespace +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(TestInterpreter) { + const tflite::Model* model = tflite::BuildMockModel(); + TF_LITE_MICRO_EXPECT_NE(nullptr, model); + tflite::MockOpResolver mock_resolver; + constexpr size_t allocator_buffer_size = 1024; + uint8_t allocator_buffer[allocator_buffer_size]; + tflite::SimpleTensorAllocator simple_tensor_allocator(allocator_buffer, + allocator_buffer_size); + tflite::MicroInterpreter interpreter( + model, mock_resolver, &simple_tensor_allocator, micro_test::reporter); + TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size()); + TF_LITE_MICRO_EXPECT_EQ(1, interpreter.outputs_size()); + + TfLiteTensor* input = interpreter.input(0); + TF_LITE_MICRO_EXPECT_NE(nullptr, input); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, input->type); + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(4, input->bytes); + TF_LITE_MICRO_EXPECT_NE(nullptr, input->data.i32); + input->data.i32[0] = 21; + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter.Invoke()); + + TfLiteTensor* output = interpreter.output(0); + TF_LITE_MICRO_EXPECT_NE(nullptr, output); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, output->type); + TF_LITE_MICRO_EXPECT_EQ(1, output->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(4, output->bytes); + TF_LITE_MICRO_EXPECT_NE(nullptr, output->data.i32); + TF_LITE_MICRO_EXPECT_EQ(42, output->data.i32[0]); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc new file mode 100644 index 0000000000000000000000000000000000000000..40c21c6448c39f27c12e95ae36038510cb346362 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc @@ -0,0 +1,80 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h" + +namespace tflite { + +const TfLiteRegistration* MicroMutableOpResolver::FindOp( + tflite::BuiltinOperator op, int version) const { + for (int i = 0; i < registrations_len_; ++i) { + const TfLiteRegistration& registration = registrations_[i]; + if ((registration.builtin_code == op) && + (registration.version == version)) { + return ®istration; + } + } + return nullptr; +} + +const TfLiteRegistration* MicroMutableOpResolver::FindOp(const char* op, + int version) const { + for (int i = 0; i < registrations_len_; ++i) { + const TfLiteRegistration& registration = registrations_[i]; + if ((registration.builtin_code == -1) && + (strcmp(registration.custom_name, op) == 0) && + (registration.version == version)) { + return ®istration; + } + } + return nullptr; +} + +void MicroMutableOpResolver::AddBuiltin(tflite::BuiltinOperator op, + TfLiteRegistration* registration, + int min_version, int max_version) { + for (int version = min_version; version <= max_version; ++version) { + if (registrations_len_ >= TFLITE_REGISTRATIONS_MAX) { + // TODO(petewarden) - Add error reporting hooks so we can report this! + return; + } + TfLiteRegistration* new_registration = ®istrations_[registrations_len_]; + registrations_len_ += 1; + + *new_registration = *registration; + new_registration->builtin_code = op; + new_registration->version = version; + } +} + +void MicroMutableOpResolver::AddCustom(const char* name, + TfLiteRegistration* registration, + int min_version, int max_version) { + for (int version = min_version; version <= max_version; ++version) { + if (registrations_len_ >= TFLITE_REGISTRATIONS_MAX) { + // TODO(petewarden) - Add error reporting hooks so we can report this! + return; + } + TfLiteRegistration* new_registration = ®istrations_[registrations_len_]; + registrations_len_ += 1; + + *new_registration = *registration; + new_registration->builtin_code = -1; + new_registration->custom_name = name; + new_registration->version = version; + } +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h new file mode 100644 index 0000000000000000000000000000000000000000..f3750a248416cc7244e0dea82be167562fd59ee7 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_ + +#include "tensorflow/contrib/lite/core/api/op_resolver.h" +#include "tensorflow/contrib/lite/experimental/micro/compatibility.h" + +#ifndef TFLITE_REGISTRATIONS_MAX +#define TFLITE_REGISTRATIONS_MAX (128) +#endif + +namespace tflite { + +class MicroMutableOpResolver : public OpResolver { + public: + const TfLiteRegistration* FindOp(tflite::BuiltinOperator op, + int version) const override; + const TfLiteRegistration* FindOp(const char* op, int version) const override; + void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration, + int min_version = 1, int max_version = 1); + void AddCustom(const char* name, TfLiteRegistration* registration, + int min_version = 1, int max_version = 1); + + private: + TfLiteRegistration registrations_[TFLITE_REGISTRATIONS_MAX]; + int registrations_len_ = 0; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..5420a33e8778d93d5aad2150438fdba80df372b8 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc @@ -0,0 +1,83 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h" + +#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h" + +namespace tflite { +namespace { +void* MockInit(TfLiteContext* context, const char* buffer, size_t length) { + // Do nothing. + return nullptr; +} + +void MockFree(TfLiteContext* context, void* buffer) { + // Do nothing. +} + +TfLiteStatus MockPrepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +TfLiteStatus MockInvoke(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} +} // namespace +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(TestOperations) { + using tflite::BuiltinOperator_CONV_2D; + using tflite::BuiltinOperator_RELU; + using tflite::MicroMutableOpResolver; + using tflite::OpResolver; + + static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree, + tflite::MockPrepare, tflite::MockInvoke}; + + MicroMutableOpResolver micro_mutable_op_resolver; + micro_mutable_op_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &r, 0, 2); + micro_mutable_op_resolver.AddCustom("mock_custom", &r, 0, 3); + OpResolver* resolver = µ_mutable_op_resolver; + + const TfLiteRegistration* registration = + resolver->FindOp(BuiltinOperator_CONV_2D, 0); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr)); + + registration = resolver->FindOp(BuiltinOperator_CONV_2D, 10); + TF_LITE_MICRO_EXPECT_EQ(nullptr, registration); + + registration = resolver->FindOp(BuiltinOperator_RELU, 0); + TF_LITE_MICRO_EXPECT_EQ(nullptr, registration); + + registration = resolver->FindOp("mock_custom", 0); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr)); + + registration = resolver->FindOp("mock_custom", 10); + TF_LITE_MICRO_EXPECT_EQ(nullptr, registration); + + registration = resolver->FindOp("nonexistent_custom", 0); + TF_LITE_MICRO_EXPECT_EQ(nullptr, registration); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc new file mode 100644 index 0000000000000000000000000000000000000000..8c090a20a5fb9e6cb330a40c86236c549c28539e --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc @@ -0,0 +1,149 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h" + +#include "tensorflow/contrib/lite/core/api/flatbuffer_conversions.h" + +namespace tflite { +namespace { + +TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size, + ErrorReporter* reporter) { + switch (type) { + case kTfLiteFloat32: + *size = sizeof(float); + break; + case kTfLiteInt16: + *size = sizeof(int16_t); + break; + case kTfLiteInt32: + *size = sizeof(int32_t); + break; + case kTfLiteUInt8: + *size = sizeof(uint8_t); + break; + case kTfLiteInt64: + *size = sizeof(int64_t); + break; + case kTfLiteBool: + *size = sizeof(bool); + break; + case kTfLiteComplex64: + *size = sizeof(float) * 2; + break; + default: + reporter->Report( + "Only float32, int16, int32, int64, uint8, bool, complex64 " + "supported currently."); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus BytesRequired(const tflite::Tensor& flatbuffer_tensor, + size_t dims_size, size_t* bytes, + ErrorReporter* error_reporter) { + TfLiteType tf_lite_type; + TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(), + &tf_lite_type, error_reporter)); + size_t type_size; + TF_LITE_ENSURE_STATUS( + TfLiteTypeSizeOf(tf_lite_type, &type_size, error_reporter)); + *bytes = dims_size * type_size; + return kTfLiteOk; +} + +} // namespace + +TfLiteStatus SimpleTensorAllocator::AllocateTensor( + const tflite::Tensor& flatbuffer_tensor, int create_before, + int destroy_after, + const flatbuffers::Vector>* buffers, + ErrorReporter* error_reporter, TfLiteTensor* result) { + TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(), + &result->type, error_reporter)); + result->is_variable = flatbuffer_tensor.is_variable(); + + result->data.raw = nullptr; + result->bytes = 0; + if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) { + if (auto* array = buffer->data()) { + if (size_t array_size = array->size()) { + result->data.raw = + const_cast(reinterpret_cast(array->data())); + TF_LITE_ENSURE_STATUS(BytesRequired(flatbuffer_tensor, array_size, + &result->bytes, error_reporter)); + } + } + } + if (result->data.raw) { + result->allocation_type = kTfLiteMmapRo; + } else { + int data_size = 1; + for (int n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) { + data_size *= flatbuffer_tensor.shape()->Get(n); + } + TF_LITE_ENSURE_STATUS(BytesRequired(flatbuffer_tensor, data_size, + &result->bytes, error_reporter)); + result->data.raw = reinterpret_cast(AllocateMemory(result->bytes)); + if (result->data.raw == nullptr) { + const char* tensor_name = flatbuffer_tensor.name()->c_str(); + if (tensor_name == nullptr) { + tensor_name = ""; + } + error_reporter->Report( + "Couldn't allocate memory for tensor '%s', wanted %d bytes but only " + "%d were available", + tensor_name, result->bytes, (data_size_max_ - data_size_)); + return kTfLiteError; + } + result->allocation_type = kTfLiteArenaRw; + } + result->dims = reinterpret_cast( + AllocateMemory(sizeof(int) * (flatbuffer_tensor.shape()->Length() + 1))); + result->dims->size = flatbuffer_tensor.shape()->Length(); + for (int n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) { + result->dims->data[n] = flatbuffer_tensor.shape()->Get(n); + } + if (flatbuffer_tensor.quantization()) { + result->params.scale = flatbuffer_tensor.quantization()->scale()->Get(0); + result->params.zero_point = + flatbuffer_tensor.quantization()->zero_point()->Get(0); + } + result->allocation = nullptr; + if (flatbuffer_tensor.name()) { + result->name = flatbuffer_tensor.name()->c_str(); + } else { + result->name = ""; + } + result->delegate = nullptr; + result->buffer_handle = 0; + result->data_is_stale = false; + return kTfLiteOk; +} + +uint8_t* SimpleTensorAllocator::AllocateMemory(size_t size) { + if ((data_size_ + size) > data_size_max_) { + // TODO(petewarden): Add error reporting beyond returning null! + return nullptr; + } + uint8_t* result = data_; + data_ += size; + data_size_ += size; + return result; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..4f16a9d0e54cba6fb3b635ceeb39ab10ff59ae73 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h @@ -0,0 +1,51 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_SIMPLE_TENSOR_ALLOCATOR_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_SIMPLE_TENSOR_ALLOCATOR_H_ + +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/core/api/error_reporter.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" + +namespace tflite { + +// TODO(petewarden): This allocator never frees up or reuses any memory, even +// though we have enough information about lifetimes of the tensors to do so. +// This makes it pretty wasteful, so we should use a more intelligent method. +class SimpleTensorAllocator { + public: + SimpleTensorAllocator(uint8_t* buffer, int buffer_size) + : data_size_(0), data_size_max_(buffer_size), data_(buffer) {} + + TfLiteStatus AllocateTensor( + const tflite::Tensor& flatbuffer_tensor, int create_before, + int destroy_after, + const flatbuffers::Vector>* buffers, + ErrorReporter* error_reporter, TfLiteTensor* result); + + uint8_t* AllocateMemory(size_t size); + + int GetDataSize() const { return data_size_; } + + private: + int data_size_; + int data_size_max_; + uint8_t* data_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_SIMPLE_TENSOR_ALLOCATOR_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..c83542724395328cb6a5e038b64dba4b9f4f655b --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc @@ -0,0 +1,144 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h" + +#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h" + +namespace tflite { +namespace { +class StackAllocator : public flatbuffers::Allocator { + public: + StackAllocator() : data_(data_backing_), data_size_(0) {} + + uint8_t* allocate(size_t size) override { + if ((data_size_ + size) > kStackAllocatorSize) { + // TODO(petewarden): Add error reporting beyond returning null! + return nullptr; + } + uint8_t* result = data_; + data_ += size; + data_size_ += size; + return result; + } + + void deallocate(uint8_t* p, size_t) override {} + + static StackAllocator& instance() { + // Avoid using true dynamic memory allocation to be portable to bare metal. + static char inst_memory[sizeof(StackAllocator)]; + static StackAllocator* inst = new (inst_memory) StackAllocator; + return *inst; + } + + static constexpr int kStackAllocatorSize = 4096; + + private: + uint8_t data_backing_[kStackAllocatorSize]; + uint8_t* data_; + int data_size_; +}; + +flatbuffers::FlatBufferBuilder* BuilderInstance() { + static char inst_memory[sizeof(flatbuffers::FlatBufferBuilder)]; + static flatbuffers::FlatBufferBuilder* inst = + new (inst_memory) flatbuffers::FlatBufferBuilder( + StackAllocator::kStackAllocatorSize, &StackAllocator::instance()); + return inst; +} + +const Tensor* Create1dTensor(int size) { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + constexpr size_t tensor_shape_size = 1; + const int32_t tensor_shape[tensor_shape_size] = {size}; + const Offset tensor_offset = CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, builder->CreateString("test_tensor"), 0, false); + builder->Finish(tensor_offset); + void* tensor_pointer = builder->GetBufferPointer(); + const Tensor* tensor = flatbuffers::GetRoot(tensor_pointer); + return tensor; +} + +const flatbuffers::Vector>* CreateBuffers() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + constexpr size_t buffers_size = 1; + const Offset buffers[buffers_size] = { + CreateBuffer(*builder), + }; + const flatbuffers::Offset>> + buffers_offset = builder->CreateVector(buffers, buffers_size); + builder->Finish(buffers_offset); + void* buffers_pointer = builder->GetBufferPointer(); + const flatbuffers::Vector>* result = + flatbuffers::GetRoot>>( + buffers_pointer); + return result; +} + +} // namespace +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(TestAllocateTensor) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleTensorAllocator allocator(arena, arena_size); + + const tflite::Tensor* tensor = tflite::Create1dTensor(100); + const flatbuffers::Vector>* buffers = + tflite::CreateBuffers(); + + TfLiteTensor allocated_tensor; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + allocator.AllocateTensor(*tensor, 0, 1, buffers, micro_test::reporter, + &allocated_tensor)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_tensor.type); + TF_LITE_MICRO_EXPECT_EQ(1, allocated_tensor.dims->size); + TF_LITE_MICRO_EXPECT_EQ(100, allocated_tensor.dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(400, allocated_tensor.bytes); + TF_LITE_MICRO_EXPECT_NE(nullptr, allocated_tensor.data.i32); +} + +TF_LITE_MICRO_TEST(TestTooLarge) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleTensorAllocator allocator(arena, arena_size); + + const tflite::Tensor* tensor = tflite::Create1dTensor(10000); + const flatbuffers::Vector>* buffers = + tflite::CreateBuffers(); + + TfLiteTensor allocated_tensor; + TF_LITE_MICRO_EXPECT_NE( + kTfLiteOk, + allocator.AllocateTensor(*tensor, 0, 1, buffers, micro_test::reporter, + &allocated_tensor)); +} + +TF_LITE_MICRO_TEST(TestJustFits) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleTensorAllocator allocator(arena, arena_size); + + uint8_t* result = allocator.AllocateMemory(arena_size); + TF_LITE_MICRO_EXPECT_NE(nullptr, result); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/contrib/lite/experimental/micro/testing/BUILD b/tensorflow/contrib/lite/experimental/micro/testing/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..0d23be5712ad1bc6d81cc467cce8c9927caece3d --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/testing/BUILD @@ -0,0 +1,17 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["test_linux_binary.sh"]) + +cc_library( + name = "micro_test", + hdrs = [ + "micro_test.h", + ], + deps = [ + "//tensorflow/contrib/lite/experimental/micro:micro_framework", + ], +) diff --git a/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill b/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill new file mode 100644 index 0000000000000000000000000000000000000000..7d6d81af0f482afb7a9f0624b5262a5277112976 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill @@ -0,0 +1,21 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# This docker configuration file lets you emulate a Blue Pill board +# on an x86 desktop or laptop, which can be useful for debugging and +# automated testing. +FROM antmicro/renode:latest + +LABEL maintainer="Pete Warden " \ No newline at end of file diff --git a/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc new file mode 100644 index 0000000000000000000000000000000000000000..9333dc42bfbfbc0c6185a88db096b2cb2102d5be --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc @@ -0,0 +1,36 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +using sysbus + +mach create +machine LoadPlatformDescription @platforms/cpus/stm32f103.repl + +# These lines are needed to show the results of DebugLog calls in the output. +machine LoadPlatformDescriptionFromString "uartSemihosting: UART.SemihostingUart @ cpu" +showAnalyzer cpu.uartSemihosting Antmicro.Renode.Analyzers.LoggingUartAnalyzer + +logFile @/tmp/renode_bluepill_log.txt + +macro reset +""" + sysbus LoadELF $bin +""" + +runMacro $reset + +emulation RunFor @1 + +quit \ No newline at end of file diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl new file mode 100644 index 0000000000000000000000000000000000000000..916e3eeac394f9a815d7c1785d253fd54ca7aa0e --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl @@ -0,0 +1,67 @@ +"""Rules for simple testing without dependencies by parsing output logs.""" + +def tflite_micro_cc_test( + name, + expected_in_logs = "~~~ALL TESTS PASSED~~~", + srcs = [], + includes = [], + defines = [], + copts = [], + nocopts = "", + linkopts = [], + deps = [], + tags = [], + visibility = None): + """Tests a C/C++ binary without testing framework dependencies`. + + Runs a C++ binary, and tests that the output logs contain the + expected value. This is a deliberately spartan way of testing, to match + what's available when testing microcontroller binaries. + + Args: + name: a unique name for this rule. + expected_in_logs: A regular expression that is required to be + present in the binary's logs for the test to pass. + srcs: sources to compile (C, C++, ld scripts). + includes: include paths to add to this rule and its dependents. + defines: list of `VAR` or `VAR=VAL` to pass to CPP for this rule and + its dependents. + copts: gcc compilation flags for this rule only. + nocopts: list of gcc compilation flags to remove for this rule + only. No regexp like for `cc_library`. + linkopts: `gcc` flags to add to the linking phase. For "pure" ld flags, + prefix them with the `-Wl,` prefix here. + deps: dependencies. only `tflite_bare_metal_cc_library()` dependencies + allowed. + visibility: visibility. + """ + native.cc_binary( + name = name + "_binary", + srcs = srcs, + includes = includes, + defines = defines, + copts = copts, + nocopts = nocopts, + linkopts = linkopts, + deps = deps, + tags = tags, + visibility = visibility, + ) + native.sh_test( + name = name, + size = "medium", + srcs = [ + "//tensorflow/contrib/lite/experimental/micro/testing:test_linux_binary.sh", + ], + args = [ + native.package_name() + "/" + name + "_binary", + "'" + expected_in_logs + "'", + ], + data = [ + name + "_binary", + # Internal test dependency placeholder + ], + deps = [ + ], + tags = tags, + ) diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h new file mode 100644 index 0000000000000000000000000000000000000000..104509c9dc6123e84c45f26d03465f608f100310 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h @@ -0,0 +1,138 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// An ultra-lightweight testing framework designed for use with microcontroller +// applications. Its only dependency is on TensorFlow Lite's ErrorReporter +// interface, where log messages are output. This is designed to be usable even +// when no standard C or C++ libraries are available, and without any dynamic +// memory allocation or reliance on global constructors. +// +// To build a test, you use syntax similar to gunit, but with some extra +// decoration to create a hidden 'main' function containing each of the tests to +// be run. Your code should look something like: +// ---------------------------------------------------------------------------- +// #include "path/to/this/header" +// +// TF_LITE_MICRO_TESTS_BEGIN +// +// TF_LITE_MICRO_TEST(SomeTest) { +// TF_LITE_LOG_EXPECT_EQ(true, true); +// } +// +// TF_LITE_MICRO_TESTS_END +// ---------------------------------------------------------------------------- +// If you compile this for your platform, you'll get a normal binary that you +// should be able to run. Executing it will output logging information like this +// to stderr (or whatever equivalent is available and written to by +// ErrorReporter): +// ---------------------------------------------------------------------------- +// Testing SomeTest +// 1/1 tests passed +// ~~~ALL TESTS PASSED~~~ +// ---------------------------------------------------------------------------- +// This is designed to be human-readable, so you can just run tests manually, +// but the string "~~~ALL TESTS PASSED~~~" should only appear if all of the +// tests do pass. This makes it possible to integrate with automated test +// systems by scanning the output logs and looking for that magic value. +// +// This framework is intended to be a rudimentary alternative to no testing at +// all on systems that struggle to run more conventional approaches, so use with +// caution! + +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_ + +#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h" + +namespace micro_test { +extern int tests_passed; +extern int tests_failed; +extern bool is_test_complete; +extern bool did_test_fail; +extern tflite::ErrorReporter* reporter; +} // namespace micro_test + +#define TF_LITE_MICRO_TESTS_BEGIN \ + namespace micro_test { \ + int tests_passed; \ + int tests_failed; \ + bool is_test_complete; \ + bool did_test_fail; \ + tflite::ErrorReporter* reporter; \ + } \ + \ + int main(int argc, char** argv) { \ + micro_test::tests_passed = 0; \ + micro_test::tests_failed = 0; \ + tflite::MicroErrorReporter error_reporter; \ + micro_test::reporter = &error_reporter; + +#define TF_LITE_MICRO_TESTS_END \ + micro_test::reporter->Report( \ + "%d/%d tests passed", micro_test::tests_passed, \ + (micro_test::tests_failed + micro_test::tests_passed)); \ + if (micro_test::tests_failed == 0) { \ + micro_test::reporter->Report("~~~ALL TESTS PASSED~~~\n"); \ + } else { \ + micro_test::reporter->Report("~~~SOME TESTS FAILED~~~\n"); \ + } \ + } + +// TODO(petewarden): I'm going to hell for what I'm doing to this poor for loop. +#define TF_LITE_MICRO_TEST(name) \ + micro_test::reporter->Report("Testing %s", #name); \ + for (micro_test::is_test_complete = false, \ + micro_test::did_test_fail = false; \ + !micro_test::is_test_complete; micro_test::is_test_complete = true, \ + micro_test::tests_passed += (micro_test::did_test_fail) ? 0 : 1, \ + micro_test::tests_failed += (micro_test::did_test_fail) ? 1 : 0) + +#define TF_LITE_MICRO_EXPECT(x) \ + do { \ + if (!(x)) { \ + micro_test::reporter->Report(#x " failed at %s:%d", __FILE__, __LINE__); \ + micro_test::did_test_fail = true; \ + } \ + } while (false) + +#define TF_LITE_MICRO_EXPECT_EQ(x, y) \ + do { \ + if ((x) != (y)) { \ + micro_test::reporter->Report(#x " == " #y " failed at %s:%d", __FILE__, \ + __LINE__); \ + micro_test::did_test_fail = true; \ + } \ + } while (false) + +#define TF_LITE_MICRO_EXPECT_NE(x, y) \ + do { \ + if ((x) == (y)) { \ + micro_test::reporter->Report(#x " != " #y " failed at %s:%d", __FILE__, \ + __LINE__); \ + micro_test::did_test_fail = true; \ + } \ + } while (false) + +#define TF_LITE_MICRO_EXPECT_NEAR(x, y, epsilon) \ + do { \ + auto delta = ((x) > (y)) ? ((x) - (y)) : ((y) - (x)); \ + if (delta > epsilon) { \ + micro_test::reporter->Report(#x " near " #y " failed at %s:%d", \ + __FILE__, __LINE__); \ + micro_test::did_test_fail = true; \ + } \ + } while (false) + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh b/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh new file mode 100755 index 0000000000000000000000000000000000000000..07742a8262f8cdf5981be2a057631d975cd04d33 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh @@ -0,0 +1,54 @@ +#!/bin/bash -e +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Tests a 'bluepill' STM32F103 ELF by parsing the log output of Renode emulation. +# +# First argument is the ELF location. +# Second argument is a regular expression that's required to be in the output logs +# for the test to pass. + +declare -r ROOT_DIR=`pwd` +declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/ +declare -r MICRO_LOG_PATH=${TEST_TMPDIR} +declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt +mkdir -p ${MICRO_LOG_PATH} + +docker build -t renode_bluepill \ + -f ${ROOT_DIR}/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill \ + ${ROOT_DIR}/tensorflow/contrib/lite/experimental/micro/testing/ + +docker run \ + --log-driver=none -a stdout -a stderr \ + -v ${ROOT_DIR}:/workspace \ + -v /tmp:/tmp \ + -it renode_bluepill \ + /bin/bash -c "renode -P 5000 --disable-xwt -e ' +\$bin?=@/workspace/$1 +s @/workspace/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc +' 2>&1 >${MICRO_LOG_FILENAME}" + +echo "LOGS:" +cat ${MICRO_LOG_FILENAME} + +if grep -q "$2" ${MICRO_LOG_FILENAME} +then + echo "$1: PASS" + exit 0 +else + echo "$1: FAIL - '$2' not found in logs." + exit 1 +fi + diff --git a/tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh b/tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh new file mode 100755 index 0000000000000000000000000000000000000000..24131a6d2df6c0187696b7c21efba2323ef1a305 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh @@ -0,0 +1,39 @@ +#!/bin/bash -e +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Tests a Linux binary by parsing the log output. +# +# First argument is the binary location. +# Second argument is a regular expression that's required to be in the output logs +# for the test to pass. + +declare -r ROOT_DIR=`pwd` +declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/ +declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1 +declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt +mkdir -p ${MICRO_LOG_PATH} + +$1 2>&1 | tee ${MICRO_LOG_FILENAME} + +if grep -q "$2" ${MICRO_LOG_FILENAME} +then + echo "$1: PASS" + exit 0 +else + echo "$1: FAIL - '$2' not found in logs." + exit 1 +fi + diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..880bb4763cbbaf58db286ff142a822fbab60dfd8 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile @@ -0,0 +1,166 @@ +MAKEFILE_DIR := tensorflow/contrib/lite/experimental/micro/tools/make + +# Try to figure out the host system +HOST_OS := +ifeq ($(OS),Windows_NT) + HOST_OS = windows +else + UNAME_S := $(shell uname -s) + ifeq ($(UNAME_S),Linux) + HOST_OS := linux + endif + ifeq ($(UNAME_S),Darwin) + HOST_OS := osx + endif +endif + +HOST_ARCH := $(shell if [[ $(shell uname -m) =~ i[345678]86 ]]; then echo x86_32; else echo $(shell uname -m); fi) + +# Override these on the make command line to target a specific architecture. For example: +# make -f tensorflow/contrib/lite/Makefile TARGET=rpi TARGET_ARCH=armv7l +TARGET := $(HOST_OS) +TARGET_ARCH := $(HOST_ARCH) + +INCLUDES := \ +-I. \ +-I$(MAKEFILE_DIR)/../../../../../ \ +-I$(MAKEFILE_DIR)/../../../../../../ \ +-I$(MAKEFILE_DIR)/downloads/ \ +-I$(MAKEFILE_DIR)/downloads/gemmlowp \ +-I$(MAKEFILE_DIR)/downloads/flatbuffers/include \ +-I$(OBJDIR) +# This is at the end so any globally-installed frameworks like protobuf don't +# override local versions in the source tree. +INCLUDES += -I/usr/local/include + +TEST_SCRIPT := tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh + +MICROLITE_LIBS := -lm + +# There are no rules for compiling objects for the host system (since we don't +# generate things like the protobuf compiler that require that), so all of +# these settings are for the target compiler. +CXXFLAGS := -O3 -DNDEBUG +CXXFLAGS += --std=c++11 -g -DTF_LITE_STATIC_MEMORY +CCFLAGS := -DNDEBUG -g -DTF_LITE_STATIC_MEMORY +LDOPTS := -L/usr/local/lib +ARFLAGS := -r +TARGET_TOOLCHAIN_PREFIX := +CC_PREFIX := + +# This library is the main target for this makefile. It will contain a minimal +# runtime that can be linked in to other programs. +MICROLITE_LIB_NAME := libtensorflow-microlite.a + +# Test binary for the microcontroller speech model. +MICRO_SPEECH_TEST_SRCS := \ +tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc \ +tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc + +MICROLITE_TEST_SRCS := \ +$(wildcard tensorflow/contrib/lite/experimental/micro/*test.cc) \ +$(wildcard tensorflow/contrib/lite/experimental/micro/kernels/*test.cc) + +MICROLITE_CC_BASE_SRCS := \ +$(wildcard tensorflow/contrib/lite/experimental/micro/*.cc) \ +$(wildcard tensorflow/contrib/lite/experimental/micro/kernels/*.cc) \ +tensorflow/contrib/lite/c/c_api_internal.c \ +tensorflow/contrib/lite/core/api/error_reporter.cc \ +tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc \ +tensorflow/contrib/lite/core/api/op_resolver.cc \ +tensorflow/contrib/lite/kernels/kernel_util.cc \ +tensorflow/contrib/lite/kernels/internal/quantization_util.cc +MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_TEST_SRCS), $(MICROLITE_CC_BASE_SRCS)) + +# These target-specific makefiles should modify or replace options like +# CXXFLAGS or LIBS to work for a specific targetted architecture. All logic +# based on platforms or architectures should happen within these files, to +# keep this main makefile focused on the sources and dependencies. +include $(wildcard $(MAKEFILE_DIR)/targets/*_makefile.inc) + +ALL_SRCS := \ + $(MICRO_SPEECH_TEST_SRCS) \ + $(MICROLITE_CC_SRCS) \ + $(MICROLITE_TEST_SRCS) + +# Where compiled objects are stored. +GENDIR := $(MAKEFILE_DIR)/gen/$(TARGET)_$(TARGET_ARCH)/ +OBJDIR := $(GENDIR)obj/ +BINDIR := $(GENDIR)bin/ +LIBDIR := $(GENDIR)lib/ + +MICROLITE_LIB_PATH := $(LIBDIR)$(MICROLITE_LIB_NAME) + +MICRO_SPEECH_TEST_BINARY := $(BINDIR)micro_speech_test + +CXX := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}g++ +CC := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}gcc +AR := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}ar + +MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICRO_SPEECH_TEST_SRCS)))) + +MICROLITE_LIB_OBJS := $(addprefix $(OBJDIR), \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICROLITE_CC_SRCS)))) + +MICROLITE_TEST_TARGETS := $(addprefix $(BINDIR), \ +$(patsubst %_test.cc,%.test_target,$(MICROLITE_TEST_SRCS))) + +# For normal manually-created TensorFlow C++ source files. +$(OBJDIR)%.o: %.cc + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ + +# For normal manually-created TensorFlow C source files. +$(OBJDIR)%.o: %.c + @mkdir -p $(dir $@) + $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@ + +# The target that's compiled if there's no command-line arguments. +all: $(MICROLITE_LIB_PATH) $(MICRO_SPEECH_TEST_BINARY) + +microlite: $(MICROLITE_LIB_PATH) + +# Hack for generating schema file bypassing flatbuffer parsing +tensorflow/contrib/lite/schema/schema_generated.h: + @cp -u tensorflow/contrib/lite/schema/schema_generated.h.OPENSOURCE tensorflow/contrib/lite/schema/schema_generated.h + +# Gathers together all the objects we've compiled into a single '.a' archive. +$(MICROLITE_LIB_PATH): tensorflow/contrib/lite/schema/schema_generated.h $(MICROLITE_LIB_OBJS) + @mkdir -p $(dir $@) + $(AR) $(ARFLAGS) $(MICROLITE_LIB_PATH) $(MICROLITE_LIB_OBJS) + +$(MICRO_SPEECH_TEST_BINARY): $(MICRO_SPEECH_TEST_OBJS) $(MICROLITE_LIB_PATH) + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(INCLUDES) \ + -o $(MICRO_SPEECH_TEST_BINARY) $(MICRO_SPEECH_TEST_OBJS) \ + $(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS) + +micro_speech_test: $(MICRO_SPEECH_TEST_BINARY) +micro_speech_test_bin: $(MICRO_SPEECH_TEST_BINARY).bin + +test_micro_speech: $(MICRO_SPEECH_TEST_BINARY) + $(TEST_SCRIPT) $(MICRO_SPEECH_TEST_BINARY) '~~~ALL TESTS PASSED~~~' + +$(BINDIR)%_test : $(OBJDIR)%_test.o $(MICROLITE_LIB_PATH) + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(INCLUDES) \ + -o $@ $< \ + $(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS) + +$(BINDIR)%.test_target: $(BINDIR)%_test + $(TEST_SCRIPT) $< '~~~ALL TESTS PASSED~~~' + +$(info $(MICROLITE_TEST_TARGETS)) + +test: test_micro_speech $(MICROLITE_TEST_TARGETS) + +# Gets rid of all generated files. +clean: + rm -rf $(MAKEFILE_DIR)/gen + +$(DEPDIR)/%.d: ; +.PRECIOUS: $(DEPDIR)/%.d +.PRECIOUS: $(BINDIR)%_test + +-include $(patsubst %,$(DEPDIR)/%.d,$(basename $(ALL_SRCS))) diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh b/tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh new file mode 100755 index 0000000000000000000000000000000000000000..4c2ff8545dbdcc426bf62aaeb07ca22d8b17cc69 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR/../../../../../../.." + +DOWNLOADS_DIR=tensorflow/contrib/lite/experimental/micro/tools/make/downloads +BZL_FILE_PATH=tensorflow/workspace.bzl + +# Ensure it is being run from repo root +if [ ! -f $BZL_FILE_PATH ]; then + echo "Could not find ${BZL_FILE_PATH}": + echo "Likely you are not running this from the root directory of the repository."; + exit 1; +fi + +GEMMLOWP_URL="https://github.com/google/gemmlowp/archive/719139ce755a0f31cbf1c37f7f98adcc7fc9f425.zip" +FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz" +CMSIS_URL="https://github.com/ARM-software/CMSIS_5/archive/5.4.0.zip" +STM32_BARE_LIB_URL="https://github.com/google/stm32_bare_lib/archive/50e0da307a2821bb54af1f57b969e6b76cb89d32.zip" + +download_and_extract() { + local usage="Usage: download_and_extract URL DIR" + local url="${1:?${usage}}" + local dir="${2:?${usage}}" + echo "downloading ${url}" >&2 + mkdir -p "${dir}" + if [[ "${url}" == *gz ]]; then + curl -Ls "${url}" | tar -C "${dir}" --strip-components=1 -xz + elif [[ "${url}" == *zip ]]; then + tempdir=$(mktemp -d) + tempdir2=$(mktemp -d) + + curl -L ${url} > ${tempdir}/zipped.zip + unzip ${tempdir}/zipped.zip -d ${tempdir2} + + # If the zip file contains nested directories, extract the files from the + # inner directory. + if ls ${tempdir2}/*/* 1> /dev/null 2>&1; then + # unzip has no strip components, so unzip to a temp dir, and move the + # files we want from the tempdir to destination. + cp -R ${tempdir2}/*/* ${dir}/ + else + cp -R ${tempdir2}/* ${dir}/ + fi + rm -rf ${tempdir2} ${tempdir} + fi + + # Delete any potential BUILD files, which would interfere with Bazel builds. + find "${dir}" -type f -name '*BUILD' -delete +} + +download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp" +download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers" +download_and_extract "${CMSIS_URL}" "${DOWNLOADS_DIR}/cmsis" +download_and_extract "${STM32_BARE_LIB_URL}" "${DOWNLOADS_DIR}/stm32_bare_lib" + +echo "download_dependencies.sh completed successfully." >&2 diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc b/tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc new file mode 100644 index 0000000000000000000000000000000000000000..022a8422dc89c048797d0f9ba224f67060d210d7 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc @@ -0,0 +1,65 @@ +# Settings for Blue Pill platforms. +ifeq ($(TARGET), bluepill) + TARGET_ARCH := cortex-m3 + TARGET_TOOLCHAIN_PREFIX := arm-none-eabi- + + PLATFORM_FLAGS = \ + -DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \ + -DTF_LITE_STATIC_MEMORY \ + -DTF_LITE_MCU_DEBUG_LOG \ + -fno-rtti \ + -fmessage-length=0 \ + -fno-exceptions \ + -fno-unwind-tables \ + -fno-builtin \ + -ffunction-sections \ + -fdata-sections \ + -funsigned-char \ + -MMD \ + -mcpu=cortex-m3 \ + -mthumb \ + -std=gnu++11 \ + -Wvla \ + -Wall \ + -Wextra \ + -Wno-unused-parameter \ + -Wno-missing-field-initializers \ + -Wno-write-strings \ + -Wno-sign-compare \ + -fno-delete-null-pointer-checks \ + -fomit-frame-pointer \ + -fpermissive \ + -nostdlib \ + -g \ + -Os + CXXFLAGS += $(PLATFORM_FLAGS) + CCFLAGS += $(PLATFORM_FLAGS) + LDFLAGS += \ + -T $(MAKEFILE_DIR)/downloads/stm32_bare_lib/stm32_linker_layout.lds \ + -Wl,-Map=$(MAKEFILE_DIR)/gen/$(TARGET).map,--cref \ + -Wl,--gc-sections + BUILD_TYPE := micro + MICROLITE_LIBS := \ + -lm + INCLUDES += \ + -isystem$(MAKEFILE_DIR)/downloads/cmsis/CMSIS/Core/Include/ \ + -I$(MAKEFILE_DIR)/downloads/stm32_bare_lib/include + MICROLITE_CC_SRCS += \ + $(wildcard $(MAKEFILE_DIR)/downloads/stm32_bare_lib/source/*.c) \ + $(wildcard $(MAKEFILE_DIR)/downloads/stm32_bare_lib/source/*.cc) + TEST_SCRIPT := tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh + # These are tests that don't currently work on the blue pill. + EXCLUDED_TESTS := \ + tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc \ + tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc + MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS)) + +# These are microcontroller-specific rules for converting the ELF output +# of the linker into a binary image that can be loaded directly. +OBJCOPY := $(TARGET_TOOLCHAIN_PREFIX)objcopy + +$(BINDIR)/%.bin: $(BINDIR)/% + @mkdir -p $(dir $@) + $(OBJCOPY) $< $@ -O binary + +endif \ No newline at end of file diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..44d0ccd3128dea1c947e57ccbc4e18b2d34cef88 Binary files /dev/null and b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png differ diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..94a6310612828db2370d19a094795341478e90f8 Binary files /dev/null and b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png differ diff --git a/tensorflow/contrib/lite/g3doc/performance.md b/tensorflow/contrib/lite/g3doc/performance.md index 0ae9400068887745f3064409bd39cf41eea212ca..ed114527166da79dba2d92c3ffad78e9885f9e94 100644 --- a/tensorflow/contrib/lite/g3doc/performance.md +++ b/tensorflow/contrib/lite/g3doc/performance.md @@ -3,36 +3,43 @@ Mobile and embedded devices have limited computational resources and it is important to keep your application resource efficient. We have compiled a list of best practices and strategies you can use to optimize your model and application when using Tensorflow Lite. -## Choose the most efficient model for the problem -Some models may be too large to run on embedded devices. Instead of large models it is better to use a slightly less precise but smaller model for embedded devices. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices. +## Choose the best model for the task +Depending on the task you will need to make a tradeoff between model complexity and size. If your task requires high accuracy then you may need a large and complex model. Some tasks may work with a less precise model, for these tasks it is better to use a smaller but less precise model. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. For example, graphs below show accuracy and latency tradeoff for some common image classification models. + +![accuracy vs model size](images/performance/model_size_vs_accuracy.png "Accuracy vs Model size") + + +![latency vs model size](images/performance/model_size_vs_latency.png "Latency vs Model size") + +One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices. You can retrain the listed models on your own dataset by using transfer learning. Check out our transfer learning tutorial for -[image classification] (https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and +[image classification](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and [object detection](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193). ## Profile your model -Before starting any optimization, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](../tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time. +Once you have selected a candidate model that is right for your task, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time. ## Profile and optimize operators in the graph If a particular operator appears frequently in the model and based on profiling you find the operator consuming the most amount of time, you can look into optimizing the operator. This scenario should be rare as Tensorflow Lite has optimized versions for most ops. However you may be able to write a faster version of a custom op, if you know the constraints in which the operator is executed. Check out our [custom operator documentation](custom_operators.md). ## Quantize your model -If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. Fully quantized models can be remarkably power efficient as well. +If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. ## Tweak the number of threads -Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](../interpreter.h) threads. +Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](https://github.com/tensorflow/tensorflow/blob/1084594657a5d139102ac794f84d1427a710e39a/tensorflow/contrib/lite/interpreter.h#L337) threads. Multi-threaded execution however comes at the cost of increased performance variability depending on what else is been executed concurrently. This is particularly the case for mobile apps. For example, isolated tests may show 2x speed up vs single-threaded but if another app is executing at the same time may result in worst performance than single-threaded. ## Eliminate redundant copies -Tensorflow Lite is optimized to reduce redundant copies. The APIs allow user to [mmap a model file](https://github.com/tensorflow/tensorflow/blob/9982fd6c8831cbd2f58954f79ea71f26660393bc/tensorflow/contrib/lite/model.h#L152) and avoid copies. If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151). +If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151). ## Profile your application with platform specific tools Platform specific tools like [Android profiler](https://developer.android.com/studio/profile/android-profiler) and [Instruments](https://help.apple.com/instruments/mac/current/) provide a wealth of profiling information that can be used to debug your app. Sometimes the performance bug may be not in the model but in parts of application code that interact with the model. Make sure to familiarize yourself with platform specific profiling tools and best practices for your platform. -## Use hardware accelerators available on the device -Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/) on Android. -You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable NNAPI call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance. +## Evaluate whether your model benefits from using hardware accelerators available on the device +Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [Neural Networks API](https://developer.android.com/ndk/guides/neuralnetworks/) on Android. +You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable Neural Networks API call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance. ## Need more help The Tensorflow team is happy to help diagnose and address specific performance issues you may be facing. Please file a bug on [github](https://github.com/tensorflow/tensorflow/issues) with details of the issue. diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md index b0f32a8d6ca91229489c73c2c6f52d9c82d37b37..2eb776d10cf8ec68987d13b580eddf2f1bda8e78 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md @@ -1,6 +1,22 @@ - # Building TensorFlow on Android +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ To get you started working with TensorFlow on Android, we'll walk through two ways to build our TensorFlow mobile demos and deploying them on an Android device. The first is Android Studio, which lets you build and deploy in an diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/index.md b/tensorflow/contrib/lite/g3doc/tfmobile/index.md index 49ad35d4e6a18f266d88e330626bae8bf1fc499f..15f0fd396134e40e89266182cb308080d9d250cb 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/index.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/index.md @@ -1,6 +1,22 @@ - # Overview +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ TensorFlow was designed to be a good deep learning solution for mobile platforms. Currently we have two solutions for deploying machine learning applications on mobile and embedded devices: TensorFlow for Mobile and diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md index be8b4100c89f4b02e651b1585faf438881c9119d..d922907cdc5fe5ccec8864b456586fce0293a0af 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md @@ -1,6 +1,22 @@ - # Building TensorFlow on iOS +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ ## Using CocoaPods The simplest way to get started with TensorFlow on iOS is using the CocoaPods diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md index 4d4bb3bc081d613714271f8b0bf7461cb1e0f4d5..fd0e322c93493ed835ae7ec9766a708885c6ac88 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md @@ -1,6 +1,22 @@ - # Integrating TensorFlow libraries +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ Once you have made some progress on a model that addresses the problem you’re trying to solve, it’s important to test it out inside your application immediately. There are often unexpected differences between your training data diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md index 7436594fd8580151ba66562eccd408cc7e6c4201..59ff8e774c6c63a01668aee7d6caeea01171468d 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md @@ -1,6 +1,22 @@ - # Optimizing for mobile +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ There are some special issues that you have to deal with when you’re trying to ship on mobile or embedded devices, and you’ll need to think about these as you’re developing your model. diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md index d1c67d4c61608bcbc9b0bcee5b60f46a73b44692..1d373251ddf3ba6a0119bd57bf14caf100ef371a 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md @@ -1,6 +1,22 @@ - # Preparing models for mobile deployment +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ The requirements for storing model information during training are very different from when you want to release it as part of a mobile app. This section covers the tools involved in converting from a training model to something diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 2657bcd42b364149dd860f420883407231f82636..88e41ffc55d2b666bb4837c12dccb2ebcdcaac33 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -451,16 +451,15 @@ TfLiteStatus Interpreter::AllocateTensors() { // Reset the variable tensors to zero after (re)allocating the tensors. // Developers shouldn't rely on the side effect of this function to reset - // variable tesnsors. They should call `ResetVariableTensorsToZero` directly + // variable tesnsors. They should call `ResetVariableTensors` directly // instead. - ResetVariableTensorsToZero(); + ResetVariableTensors(); return kTfLiteOk; } -// TODO(ycling): Consider to provide other functions to initialize variable -// tensors to non-zero values. -TfLiteStatus Interpreter::ResetVariableTensorsToZero() { +// TODO(ycling): Support non-zero default values. +TfLiteStatus Interpreter::ResetVariableTensors() { for (auto& tensor : tensors_) { if (!tensor.is_variable) { continue; diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index aa2bc4def666eee8993c9b3198e864dd6ed642de..651a97e9dc84350569514528ae5635ec040d607f 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -349,6 +349,10 @@ class Interpreter { return context_.allow_fp32_relax_to_fp16; } + // Owning handle to a TfLiteDelegate instance. + using TfLiteDelegatePtr = + std::unique_ptr; + // Allow a delegate to look at the graph and modify the graph to handle // parts of the graph themselves. After this is called, the graph may // contain new nodes that replace 1 more nodes. @@ -421,9 +425,12 @@ class Interpreter { allow_buffer_handle_output_ = allow_buffer_handle_output; } - // Reset all variable tensors to zero. + // Reset all variable tensors to the default value. + // If a variable tensor doesn't have a buffer, reset it to zero. + // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it + // to the value of the buffer. // WARNING: This is an experimental API and subject to change. - TfLiteStatus ResetVariableTensorsToZero(); + TfLiteStatus ResetVariableTensors(); // Retrieve an operator's description of its work, for profiling purposes. const char* OpProfilingString(const TfLiteRegistration& op_reg, @@ -571,19 +578,11 @@ class Interpreter { TfLiteExternalContextType type, TfLiteExternalContext* ctx); - using TfLiteDelegatePtr = - std::unique_ptr; - // Variant of the public ModifyGraphWithDelegate method that additionally // Assumes ownership of the provided delegate. // WARNING: This is an experimental API and subject to change. - template - TfLiteStatus ModifyGraphWithDelegate(std::unique_ptr typed_delegate, + TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegatePtr delegate, bool allow_dynamic_tensors = false) { - TfLiteDelegatePtr delegate(typed_delegate.release(), - [](TfLiteDelegate* delegate) { - delete static_cast(delegate); - }); // Note that we retain ownership of the delegate even if graph modification // fails, as delegate use will be in an indeterminate state at that point. owned_delegates_.push_back(std::move(delegate)); @@ -673,6 +672,7 @@ class Interpreter { // List of delegates that have been installed and are owned by this // interpreter instance. Useful if client delegate ownership is burdensome. // WARNING: This is an experimental API and subject to change. + // TODO(b/116667551): Use TfLiteExternalContext for storing state. std::vector owned_delegates_; std::unique_ptr memory_planner_; diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index cdede430e29be7b18939f55a8bb06b66f1a3ea33..6c71d5a8d7bb3e275379637b151ab8f998b04f41 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -30,7 +30,11 @@ class InterpreterTest : public ::testing::Test { template static TfLiteStatus ModifyGraphWithDelegate( Interpreter* interpreter, std::unique_ptr delegate) { - return interpreter->ModifyGraphWithDelegate(std::move(delegate)); + Interpreter::TfLiteDelegatePtr tflite_delegate( + delegate.release(), [](TfLiteDelegate* delegate) { + delete reinterpret_cast(delegate); + }); + return interpreter->ModifyGraphWithDelegate(std::move(tflite_delegate)); } protected: diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index 098ba7e7731d833678fbd5eab9cce3f022570f23..e68cd26f8124c24cf25d07376b0ebe49da1c149b 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -11,6 +11,10 @@ load("//tensorflow/java:build_defs.bzl", "JAVACOPTS") load("//tensorflow/contrib/lite:build_def.bzl", "tflite_jni_binary") load("//tensorflow/contrib/lite/java:aar_with_jni.bzl", "aar_with_jni") +JAVA_SRCS = glob([ + "src/main/java/org/tensorflow/lite/*.java", +]) + # Building tensorflow-lite.aar including 4 variants of .so # To build an aar for release, run below command: # bazel build --cxxopt='--std=c++11' -c opt --fat_apk_cpu=x86,x86_64,arm64-v8a,armeabi-v7a \ @@ -20,28 +24,38 @@ aar_with_jni( android_library = ":tensorflowlite", ) +# EXPERIMENTAL: AAR target that supports TensorFlow op execution with TFLite. +aar_with_jni( + name = "tensorflow-lite-flex", + android_library = ":tensorflowlite_flex", +) + android_library( name = "tensorflowlite", - srcs = glob( - [ - "src/main/java/org/tensorflow/lite/*.java", - ], - ), + srcs = JAVA_SRCS, + manifest = "AndroidManifest.xml", + visibility = ["//visibility:public"], + deps = [ + ":tensorflowlite_native", + "@org_checkerframework_qual", + ], +) + +# EXPERIMENTAL: Android target that supports TensorFlow op execution with TFLite. +android_library( + name = "tensorflowlite_flex", + srcs = JAVA_SRCS, manifest = "AndroidManifest.xml", visibility = ["//visibility:public"], deps = [ - ":tflite_runtime", + ":tensorflowlite_native_flex", "@org_checkerframework_qual", ], ) android_library( name = "tensorflowlite_java", - srcs = glob( - [ - "src/main/java/org/tensorflow/lite/*.java", - ], - ), + srcs = JAVA_SRCS, visibility = ["//visibility:public"], deps = [ "@org_checkerframework_qual", @@ -50,16 +64,23 @@ android_library( java_library( name = "tensorflowlitelib", - srcs = glob( - [ - "src/main/java/org/tensorflow/lite/*.java", - ], - ), + srcs = JAVA_SRCS, javacopts = JAVACOPTS, visibility = ["//visibility:public"], deps = [ ":libtensorflowlite_jni.so", - "//tensorflow/contrib/lite/java/src/main/native", + "@org_checkerframework_qual", + ], +) + +# EXPERIMENTAL: Java target that supports TensorFlow op execution with TFLite. +java_library( + name = "tensorflowlitelib_flex", + srcs = JAVA_SRCS, + javacopts = JAVACOPTS, + visibility = ["//visibility:public"], + deps = [ + ":libtensorflowlite_flex_jni.so", "@org_checkerframework_qual", ], ) @@ -72,7 +93,6 @@ java_test( tags = ["no_oss"], test_class = "org.tensorflow.lite.TensorFlowLiteTest", deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -87,7 +107,6 @@ java_test( tags = ["no_oss"], test_class = "org.tensorflow.lite.DataTypeTest", deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -110,7 +129,6 @@ java_test( tags = ["no_oss"], test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -125,19 +143,37 @@ java_test( data = [ "src/testdata/add.bin", "src/testdata/mobilenet.tflite.bin", + "//tensorflow/contrib/lite:testdata/multi_add_flex.bin", ], javacopts = JAVACOPTS, tags = ["no_oss"], test_class = "org.tensorflow.lite.InterpreterTest", visibility = ["//visibility:private"], deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", ], ) +java_test( + name = "InterpreterFlexTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/InterpreterFlexTest.java"], + data = [ + "//tensorflow/contrib/lite:testdata/multi_add_flex.bin", + ], + javacopts = JAVACOPTS, + tags = ["no_oss"], + test_class = "org.tensorflow.lite.InterpreterFlexTest", + visibility = ["//visibility:private"], + deps = [ + ":tensorflowlitelib_flex", + "@com_google_truth", + "@junit", + ], +) + java_test( name = "TensorTest", size = "small", @@ -164,14 +200,29 @@ filegroup( ) cc_library( - name = "tflite_runtime", + name = "tensorflowlite_native", srcs = ["libtensorflowlite_jni.so"], visibility = ["//visibility:public"], ) +cc_library( + name = "tensorflowlite_native_flex", + srcs = ["libtensorflowlite_flex_jni.so"], + visibility = ["//visibility:public"], +) + tflite_jni_binary( name = "libtensorflowlite_jni.so", deps = [ "//tensorflow/contrib/lite/java/src/main/native", ], ) + +# EXPERIMENTAL: Native target that supports TensorFlow op execution with TFLite. +tflite_jni_binary( + name = "libtensorflowlite_flex_jni.so", + deps = [ + "//tensorflow/contrib/lite/delegates/flex:delegate", + "//tensorflow/contrib/lite/java/src/main/native", + ], +) diff --git a/tensorflow/contrib/lite/java/aar_with_jni.bzl b/tensorflow/contrib/lite/java/aar_with_jni.bzl index db837cf29edfc0ffe9950ffedc02cca1389b0fdf..360d622b1bcf5cf379987ceefc43c74b1b6ce5fb 100644 --- a/tensorflow/contrib/lite/java/aar_with_jni.bzl +++ b/tensorflow/contrib/lite/java/aar_with_jni.bzl @@ -3,12 +3,12 @@ load("@build_bazel_rules_android//android:rules.bzl", "android_binary") def aar_with_jni(name, android_library): - # Generate dummy AndroidManifest.xml for dummy apk usage - # (dummy apk is generated by _dummy_app_for_so target below) - native.genrule( - name = name + "_binary_manifest_generator", - outs = [name + "_generated_AndroidManifest.xml"], - cmd = """ + # Generate dummy AndroidManifest.xml for dummy apk usage + # (dummy apk is generated by _dummy_app_for_so target below) + native.genrule( + name = name + "_binary_manifest_generator", + outs = [name + "_generated_AndroidManifest.xml"], + cmd = """ cat > $(OUTS) < $(OUTS) < EOF """, - ) + ) - # Generate dummy apk including .so files and later we extract out - # .so files and throw away the apk. - android_binary( - name = name + "_dummy_app_for_so", - manifest = name + "_generated_AndroidManifest.xml", - custom_package = "dummy.package.for.so", - deps = [android_library], - # In some platforms we don't have an Android SDK/NDK and this target - # can't be built. We need to prevent the build system from trying to - # use the target in that case. - tags = ["manual"], - ) + # Generate dummy apk including .so files and later we extract out + # .so files and throw away the apk. + android_binary( + name = name + "_dummy_app_for_so", + aapt_version = "aapt", + manifest = name + "_generated_AndroidManifest.xml", + custom_package = "dummy.package.for.so", + deps = [android_library], + # In some platforms we don't have an Android SDK/NDK and this target + # can't be built. We need to prevent the build system from trying to + # use the target in that case. + tags = [ + "manual", + "no_cuda_on_cpu_tap", + ], + ) - native.genrule( - name = name, - srcs = [android_library + ".aar", name + "_dummy_app_for_so_unsigned.apk"], - outs = [name + ".aar"], - tags = ["manual"], - cmd = """ + native.genrule( + name = name, + srcs = [android_library + ".aar", name + "_dummy_app_for_so_unsigned.apk"], + outs = [name + ".aar"], + tags = ["manual"], + cmd = """ cp $(location {}.aar) $(location :{}.aar) chmod +w $(location :{}.aar) origdir=$$PWD @@ -46,4 +50,4 @@ unzip $$origdir/$(location :{}_dummy_app_for_so_unsigned.apk) "lib/*" cp -r lib jni zip -r $$origdir/$(location :{}.aar) jni/*/*.so """.format(android_library, name, name, name, name), - ) + ) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD index 220d6c2159b56f6349e93132418fa0f6c69d1ab3..5ad738389eb8bc1d875fc888c1336fb3fa140eee 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD @@ -7,6 +7,7 @@ licenses(["notice"]) # Apache 2.0 android_binary( name = "TfLiteCameraDemo", srcs = glob(["java/**/*.java"]), + aapt_version = "aapt", assets = [ "//tensorflow/contrib/lite/java/demo/app/src/main/assets:labels_mobilenet_quant_v1_224.txt", "@tflite_mobilenet//:mobilenet_quant_v1_224.tflite", diff --git a/tensorflow/contrib/lite/java/ovic/BUILD b/tensorflow/contrib/lite/java/ovic/BUILD index bb0be04ca2a659dfb5e0c73bcf0485fb425d5ed0..ea9b9ed4b66a601981f4c402f7f8a4f6749e07fd 100644 --- a/tensorflow/contrib/lite/java/ovic/BUILD +++ b/tensorflow/contrib/lite/java/ovic/BUILD @@ -9,6 +9,7 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow/java:build_defs.bzl", "JAVACOPTS") +# Build targets for OVIC classification. java_test( name = "OvicClassifierTest", size = "medium", @@ -45,8 +46,9 @@ android_library( name = "ovicbenchmarkerlib", srcs = [ "src/main/java/org/tensorflow/ovic/OvicBenchmarker.java", + "src/main/java/org/tensorflow/ovic/OvicClassificationResult.java", "src/main/java/org/tensorflow/ovic/OvicClassifier.java", - "src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java", + "src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java", ], manifest = "//tensorflow/contrib/lite/java:AndroidManifest.xml", tags = ["no_oss"], @@ -60,8 +62,8 @@ android_library( java_library( name = "ovicbenchmarkerlib_java", srcs = [ + "src/main/java/org/tensorflow/ovic/OvicClassificationResult.java", "src/main/java/org/tensorflow/ovic/OvicClassifier.java", - "src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java", ], javacopts = JAVACOPTS, tags = ["no_oss"], @@ -73,3 +75,58 @@ java_library( "@org_checkerframework_qual", ], ) + +# Build targets for OVIC detection. +java_test( + name = "OvicDetectorTest", + size = "medium", + srcs = ["src/test/java/org/tensorflow/ovic/OvicDetectorTest.java"], + data = [ + "//tensorflow/contrib/lite/java/ovic/src/testdata:coco_labels.txt", + "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata", + "@tflite_mobilenet_ssd_quant//:detect.tflite", + ], + javacopts = JAVACOPTS, + tags = ["no_oss"], + test_class = "org.tensorflow.ovic.OvicDetectorTest", + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/contrib/lite/java/ovic:ovicdetectionbenchmarkerlib_java", + "@com_google_truth", + "@junit", + ], +) + +android_library( + name = "ovicdetectionbenchmarkerlib", + srcs = [ + "src/main/java/org/tensorflow/ovic/BoundingBox.java", + "src/main/java/org/tensorflow/ovic/OvicBenchmarker.java", + "src/main/java/org/tensorflow/ovic/OvicDetectionResult.java", + "src/main/java/org/tensorflow/ovic/OvicDetector.java", + "src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java", + ], + manifest = "//tensorflow/contrib/lite/java:AndroidManifest.xml", + deps = [ + "//tensorflow/contrib/lite/java:tensorflowlite", + "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", + "@org_checkerframework_qual", + ], +) + +java_library( + name = "ovicdetectionbenchmarkerlib_java", + srcs = [ + "src/main/java/org/tensorflow/ovic/BoundingBox.java", + "src/main/java/org/tensorflow/ovic/OvicDetectionResult.java", + "src/main/java/org/tensorflow/ovic/OvicDetector.java", + ], + javacopts = JAVACOPTS, + deps = [ + "//tensorflow/contrib/lite/java:libtensorflowlite_jni.so", + "//tensorflow/contrib/lite/java:tensorflowlite_java", + "//tensorflow/contrib/lite/java/src/main/native", + "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper", + "@org_checkerframework_qual", + ], +) diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD index b2e3a9bd7dfa314a572b0e921b53dd7e08c351aa..f567358ea33966ea8fdb422749662e22111c5fcc 100644 --- a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD +++ b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD @@ -8,9 +8,12 @@ android_binary( srcs = [ "OvicBenchmarkerActivity.java", ], + aapt_version = "aapt", assets = [ - "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata", + "//tensorflow/contrib/lite/java/ovic/src/testdata:coco_labels.txt", "//tensorflow/contrib/lite/java/ovic/src/testdata:labels.txt", + "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata", + "@tflite_mobilenet_ssd_quant//:detect.tflite", ], assets_dir = "", custom_package = "ovic.demo.app", @@ -24,6 +27,7 @@ android_binary( deps = [ "//tensorflow/contrib/lite/java:tensorflowlite", "//tensorflow/contrib/lite/java/ovic:ovicbenchmarkerlib", + "//tensorflow/contrib/lite/java/ovic:ovicdetectionbenchmarkerlib", "@androidsdk//com.android.support:support-v13-25.2.0", "@androidsdk//com.android.support:support-v4-25.2.0", ], diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java index 4adf94aeb6a431adc3c574bd4d3f0418538884f2..48c29ecebeed42ac9a2e0bc801cab1fb1f9201e8 100644 --- a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java +++ b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java @@ -35,19 +35,18 @@ import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.text.DecimalFormat; import org.tensorflow.ovic.OvicBenchmarker; -import org.tensorflow.ovic.OvicSingleImageResult; - +import org.tensorflow.ovic.OvicClassifierBenchmarker; +import org.tensorflow.ovic.OvicDetectorBenchmarker; /** Class that benchmark image classifier models. */ public class OvicBenchmarkerActivity extends Activity { /** Tag for the {@link Log}. */ private static final String TAG = "OvicBenchmarkerActivity"; - /** Name of the label file stored in Assets. */ - private static final String LABEL_PATH = "labels.txt"; - - private static final String TEST_IMAGE_PATH = "test_image_224.jpg"; - private static final String MODEL_PATH = "float_model.lite"; + /** Name of the task-dependent data files stored in Assets. */ + private static String labelPath = null; + private static String testImagePath = null; + private static String modelPath = null; /** * Each bottom press will launch a benchmarking experiment. The experiment stops when either the * total native latency reaches WALL_TIME or the number of iterations reaches MAX_ITERATIONS, @@ -66,8 +65,6 @@ public class OvicBenchmarkerActivity extends Activity { private MappedByteBuffer model = null; private InputStream labelInputStream = null; private OvicBenchmarker benchmarker; - /** Inference result of each iteration. */ - OvicSingleImageResult iterResult = null; private TextView textView = null; // private Button startButton = null; @@ -83,21 +80,31 @@ public class OvicBenchmarkerActivity extends Activity { } private Bitmap loadTestBitmap() throws IOException { - InputStream imageStream = getAssets().open(TEST_IMAGE_PATH); + InputStream imageStream = getAssets().open(testImagePath); return BitmapFactory.decodeStream(imageStream); } - public void initializeTest() throws IOException { + public void initializeTest(boolean benchmarkClassification) throws IOException { Log.i(TAG, "Initializing benchmarker."); - benchmarker = new OvicBenchmarker(WALL_TIME); + if (benchmarkClassification) { + benchmarker = new OvicClassifierBenchmarker(WALL_TIME); + labelPath = "labels.txt"; + testImagePath = "test_image_224.jpg"; + modelPath = "quantized_model.lite"; + } else { // Benchmarking detection. + benchmarker = new OvicDetectorBenchmarker(WALL_TIME); + labelPath = "coco_labels.txt"; + testImagePath = "test_image_224.jpg"; + modelPath = "detect.tflite"; + } AssetManager am = getAssets(); - AssetFileDescriptor fileDescriptor = am.openFd(MODEL_PATH); + AssetFileDescriptor fileDescriptor = am.openFd(modelPath); FileInputStream modelInputStream = new FileInputStream(fileDescriptor.getFileDescriptor()); FileChannel fileChannel = modelInputStream.getChannel(); long startOffset = fileDescriptor.getStartOffset(); long declaredLength = fileDescriptor.getDeclaredLength(); model = fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength); - labelInputStream = am.open(LABEL_PATH); + labelInputStream = am.open(labelPath); } public Boolean doTestIteration() throws IOException, InterruptedException { @@ -117,24 +124,44 @@ public class OvicBenchmarkerActivity extends Activity { Log.i(TAG, "Going to do test iter."); // Start testing. Bitmap testImageBitmap = loadTestBitmap(); - iterResult = benchmarker.doTestIteration(testImageBitmap); - testImageBitmap.recycle(); - if (iterResult == null) { + try { + if (!benchmarker.processBitmap(testImageBitmap)) { + throw new RuntimeException("Failed to run test."); + } + } catch (Exception e) { + e.printStackTrace(); + throw e; + } finally { + testImageBitmap.recycle(); + } + String iterResultString = benchmarker.getLastResultString(); + if (iterResultString == null) { throw new RuntimeException("Inference failed to produce a result."); } - Log.i(TAG, iterResult.toString()); + Log.i(TAG, iterResultString); return true; } - public void startPressed(View view) throws IOException { - Log.i(TAG, "Start pressed"); + public void detectPressed(View view) throws IOException { + benchmarkSession(false); + } + public void classifyPressed(View view) throws IOException { + benchmarkSession(true); + } + + private void benchmarkSession(boolean benchmarkClassification) throws IOException { try { - initializeTest(); + initializeTest(benchmarkClassification); } catch (IOException e) { Log.e(TAG, "Can't initialize benchmarker.", e); throw e; } String displayText = ""; + if (benchmarkClassification) { + displayText = "Classification benchmark: "; + } else { + displayText = "Detection benchmark: "; + } try { setProcessorAffinity(BIG_CORE_MASK); } catch (IOException e) { @@ -144,7 +171,6 @@ public class OvicBenchmarkerActivity extends Activity { Log.i(TAG, "Successfully initialized benchmarker."); int testIter = 0; Boolean iterSuccess = false; - double totalLatency = 0.0f; while (testIter < MAX_ITERATIONS) { try { iterSuccess = doTestIteration(); @@ -153,23 +179,22 @@ public class OvicBenchmarkerActivity extends Activity { throw e; } catch (InterruptedException e) { Log.e(TAG, "Interrupted at iteration " + testIter); + displayText += e.getMessage() + "\n"; } if (!iterSuccess) { break; } testIter++; - totalLatency += (double) iterResult.latency; } - ; Log.i(TAG, "Benchmarking finished"); if (textView != null) { if (testIter > 0) { textView.setText( displayText - + MODEL_PATH + + modelPath + ": Average latency=" - + df2.format(totalLatency / testIter) + + df2.format(benchmarker.getTotalRunTime() / testIter) + "ms after " + testIter + " runs."); diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml b/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml index e9d83bae543ae62ba8749c4c91b36b20bf09a176..1bce60ff7def2b0df9c93a4106a9aafff0009a2f 100644 --- a/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml +++ b/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml @@ -30,14 +30,14 @@ android:layout_height="wrap_content" android:text="@string/initial_status_msg" android:id="@+id/textView" - android:layout_above="@+id/button_start" + android:layout_above="@+id/button_clf_start" android:layout_alignParentTop="true"/>